drime

French rhyme dictionary with web and CLI interface
git clone https://a3nm.net/git/drime/
Log | Files | Refs | README

commit 03034013d9d6738e8a76a26151214c5e65174344
parent 072bb59ffc91f72c56b2380488f584f03809c9d1
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Tue, 27 Dec 2011 01:14:32 +0100

woops

Diffstat:
common.py | 2--
common.py | 89+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 89 insertions(+), 2 deletions(-)

diff --git a/common.py b/common.py @@ -1 +0,0 @@ -/home/a3_nm/DOCUMENTS/poetlint/common.py- \ No newline at end of file diff --git a/common.py b/common.py @@ -0,0 +1,89 @@ +#!/usr/bin/python3 +#coding: utf-8 + +import unicodedata +import re + +vowels = 'aeiouyœæ' +consonants = "bcçdfghjklmnpqrstvwxz" + +# a variant of x-sampa such that all french phonemes are one-character +SUBSTS = [ + ('#', 'A~'), + ('$', 'O~'), + (')', 'E~'), + ('(', '9~'), + ] + +# Forbidden at the end of a hemistiche. "-ent" would also be forbidden +# in some cases but not others... +sure_end_fem = ['es', 'e', 'ë'] + +# http://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-string +def strip_accents_one(s, with_except=False): + """Strip accent from a string + + with_except keeps specifically 'é' and 'è'""" + r = [] + for x in s: + if with_except and x in ['è', 'é']: + r.append(x) + else: + r += unicodedata.normalize('NFD', x) + return r + +def strip_accents(s, with_except=False): + return ''.join( + (c for c in strip_accents_one(s, with_except) + if unicodedata.category(c) != 'Mn')) + +def norm_spaces(text): + """Remove multiple consecutive whitespace""" + return re.sub("\s+-*\s*", ' ', text) + +def rm_punct(text, with_apostrophe = False): + """Remove punctuation from text""" + if not with_apostrophe: + text = re.sub("'", '', text) + #TODO rather: keep only good chars + pattern = re.compile("[^'\w -]", re.UNICODE) + return pattern.sub(' ', text) + +def is_vowels(chunk, with_h=False, with_y=True): + """Test if a chunk is vowels + + with_h counts 'h' as vowel, with_y allows 'y'""" + + if not with_y and chunk == 'y': + return False + for char in strip_accents(chunk): + if char not in vowels: + if char != 'h' or not with_h: + return False + return True + +def is_consonants(chunk): + """Test if a chunk is consonants""" + + for char in strip_accents(chunk): + if char not in consonants: + return False + return True + +def normalize(text, with_apostrophe=False): + """Normalize text, ie. lowercase, no useless punctuation or whitespace""" + return norm_spaces(rm_punct(text.lower(), with_apostrophe)).rstrip().lstrip() + +def subst(string, subs): + if len(subs) == 0: + return string + return subst(string.replace(subs[0][0], subs[0][1]), subs[1:]) + +def to_xsampa(s): + """convert our modified format to x-sampa""" + return subst(s, SUBSTS) + +def from_xsampa(s): + """convert x-sampa to our modified format""" + return subst(s, [(x[1], x[0]) for x in SUBSTS]) +