drime

French rhyme dictionary with web and CLI interface
git clone https://a3nm.net/git/drime/
Log | Files | Refs | README

commit b6937f858f4a525e8cd2915806d3b31cabb1afdc
parent 6559338d7e22464f7eafdd42b56fe2a74c6eda76
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Wed,  9 Nov 2011 18:30:28 +0100

Merge branch 'merge-requests/1'

Also perform some more cleanup.

Diffstat:
lexique2sql.py | 149++++++++++++++++++++++++++++++++++++++++++-------------------------------------
1 file changed, 79 insertions(+), 70 deletions(-)

diff --git a/lexique2sql.py b/lexique2sql.py @@ -13,6 +13,22 @@ import metric from common import is_vowels, is_consonants, sure_end_fem import sys +def string_type(max_len): + return ('string', {'max_len':max_len}) +sql_fields = [ + ('word', string_type(100)), + ('phon', string_type(100)), + ('base', string_type(100)), + ('kind', string_type(10)), + ('freq', ('float', {})), + ('min_nsyl', ('int', {})), + ('max_nsyl', ('int', {})), + ('word_end', string_type(10)), + ('phon_end', string_type(10)), + ('elidable', ('bool', {})), + ('feminine', ('bool', {})), +] + seen = {} # phonetic vowel sounds @@ -31,26 +47,15 @@ def escape(x): class Word: @property - def elidable(self): - """Can this word cause elision in the previous word?""" - return is_vowels(self.word[0]) or (self.word[0] == 'h' and - not haspirater.lookup(self.word)) + def min_nsyl(self): + return self.nsyl[0] @property - def phon_ending(self): - """Compute minimal phonetic rhyme""" - l = [] - w = list(self.phon) - w.reverse() - for x in w: - l.append(x) - if x in phon_vowels: - break - l.reverse() - return ''.join(l) - + def max_nsyl(self): + return self.nsyl[1] + @property - def ending(self): + def word_end(self): """Compute minimal visual rhyme""" l = [] w = list(self.word) @@ -66,52 +71,64 @@ class Word: return ''.join(l) @property + def phon_end(self): + """Compute minimal phonetic rhyme""" + l = [] + w = list(self.phon) + w.reverse() + for x in w: + l.append(x) + if x in phon_vowels: + break + l.reverse() + return ''.join(l) + + @property + def elidable(self): + """Can this word cause elision in the previous word?""" + return is_vowels(self.word[0]) or (self.word[0] == 'h' and + not haspirater.lookup(self.word)) + + @property def feminine(self): """Would this word be a feminine rhyme?""" - for end in sure_end_fem: - if self.word.endswith(end): - return True - if not self.word.endswith('ent'): - return False - # word ends in -ent, it's hard to tell from writing, so look at phon + # when word ends in -ent, it's hard to tell from writing, so look at phon # example: "tient" vs. "lient" - for end in phon_non_end_fem: - if self.phon.endswith(end): - return False - return True - - @property - def render_sql(self): - return ('INSERT INTO words VALUES("' - + escape(self.word) + '", "' - + escape(self.phon) + '", "' - + escape(self.base) + '", "' - + escape(self.kind) + '", ' - + escape(str(self.freq)) + ', ' - + escape(str(self.nsyl[0])) + ', ' - + escape(str(self.nsyl[1])) + ', "' - + escape(self.ending) + '", "' - + escape(self.phon_ending) + '", ' - + escape(str(int(self.elidable))) + ', ' - + escape(str(int(self.feminine))) + ');' - ) + def endswith_any(x, ends): return any([x.endswith(end) for end in ends]) + return (endswith_any(self.word, sure_end_fem) + or (self.word.endswith('ent') + and not endswith_any(self.phon, phon_non_end_fem))) @property def ok(self): # Remove words with no vowels for x in phon_vowels: - if x in self.phon_ending: + if x in self.phon_end: return True return False + @property + def sql(self): + render = { + 'string': lambda s: '"'+s+'"', # no escaping: use parametrized queries! + 'float': str, + 'int': lambda s: str(int(s)), + 'bool': lambda s: str(int(s)), + } + def sql_field(field): + (name, (ty, _)) = field + return render[ty](getattr(self, name)) + return ('INSERT INTO words VALUES(' + + ', '.join([sql_field(f) for f in sql_fields]) + + ');') + def __init__(self, word, phon, base, kind, freq): self.word = word self.phon = phon self.base = base self.kind = kind - self.freq = freq + self.freq = float(freq) self.nsyl = None - self.redundant = False self.do_extends() def align_sum(self, align): @@ -128,38 +145,30 @@ class Word: self.extend(self.align_sum(align[0])) def extend(self, item): - if self.nsyl == None: - self.nsyl = [item, item] - else: + try: self.nsyl = [min(self.nsyl[0], item), max(self.nsyl[1], item)] + except TypeError: # first execution + self.nsyl = [item, item] + +def create_table(): + def decl(field): + (name, (ty, data)) = field + if ty == 'string': + ty = 'varchar(' + str(data['max_len']) + ')' + return name + ' ' + ty + return ('CREATE TABLE words(' + + ', '.join([decl(field) for field in sql_fields]) + + ');') if __name__ == '__main__': - print ("""CREATE TABLE words( - word varchar(100), -- word - phon varchar(100), -- pronunciation - base varchar(100), -- base word - kind varchar(10), -- grammatical category - freq float, -- frequency - min_nsyl int, -- lower bound on the number of syllabes - max_nsyl int, -- upper bound on the number of syllabes - word_end varchar(10), -- minimal word-level rhyme - phon_end varchar(10), -- minimal phon-level rhyme - elidable bool, -- can cause elision - feminine bool -- genre of the rhyme - );""") + print (create_table()) while True: line = sys.stdin.readline() if not line: break l = line.rstrip().split("\t") - word = l.pop(0) - phon = l.pop(0) - base = l.pop(0) - kind = l.pop(0) - freq = float(l.pop(0)) - assert(len(l) == 0) - w = Word(word, phon, base, kind, freq) + w = Word(*l) if w.ok: - print(w.render_sql) + print(w.sql)