commit b6937f858f4a525e8cd2915806d3b31cabb1afdc
parent 6559338d7e22464f7eafdd42b56fe2a74c6eda76
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Wed, 9 Nov 2011 18:30:28 +0100
Merge branch 'merge-requests/1'
Also perform some more cleanup.
Diffstat:
lexique2sql.py | | | 149 | ++++++++++++++++++++++++++++++++++++++++++------------------------------------- |
1 file changed, 79 insertions(+), 70 deletions(-)
diff --git a/lexique2sql.py b/lexique2sql.py
@@ -13,6 +13,22 @@ import metric
from common import is_vowels, is_consonants, sure_end_fem
import sys
+def string_type(max_len):
+ return ('string', {'max_len':max_len})
+sql_fields = [
+ ('word', string_type(100)),
+ ('phon', string_type(100)),
+ ('base', string_type(100)),
+ ('kind', string_type(10)),
+ ('freq', ('float', {})),
+ ('min_nsyl', ('int', {})),
+ ('max_nsyl', ('int', {})),
+ ('word_end', string_type(10)),
+ ('phon_end', string_type(10)),
+ ('elidable', ('bool', {})),
+ ('feminine', ('bool', {})),
+]
+
seen = {}
# phonetic vowel sounds
@@ -31,26 +47,15 @@ def escape(x):
class Word:
@property
- def elidable(self):
- """Can this word cause elision in the previous word?"""
- return is_vowels(self.word[0]) or (self.word[0] == 'h' and
- not haspirater.lookup(self.word))
+ def min_nsyl(self):
+ return self.nsyl[0]
@property
- def phon_ending(self):
- """Compute minimal phonetic rhyme"""
- l = []
- w = list(self.phon)
- w.reverse()
- for x in w:
- l.append(x)
- if x in phon_vowels:
- break
- l.reverse()
- return ''.join(l)
-
+ def max_nsyl(self):
+ return self.nsyl[1]
+
@property
- def ending(self):
+ def word_end(self):
"""Compute minimal visual rhyme"""
l = []
w = list(self.word)
@@ -66,52 +71,64 @@ class Word:
return ''.join(l)
@property
+ def phon_end(self):
+ """Compute minimal phonetic rhyme"""
+ l = []
+ w = list(self.phon)
+ w.reverse()
+ for x in w:
+ l.append(x)
+ if x in phon_vowels:
+ break
+ l.reverse()
+ return ''.join(l)
+
+ @property
+ def elidable(self):
+ """Can this word cause elision in the previous word?"""
+ return is_vowels(self.word[0]) or (self.word[0] == 'h' and
+ not haspirater.lookup(self.word))
+
+ @property
def feminine(self):
"""Would this word be a feminine rhyme?"""
- for end in sure_end_fem:
- if self.word.endswith(end):
- return True
- if not self.word.endswith('ent'):
- return False
- # word ends in -ent, it's hard to tell from writing, so look at phon
+ # when word ends in -ent, it's hard to tell from writing, so look at phon
# example: "tient" vs. "lient"
- for end in phon_non_end_fem:
- if self.phon.endswith(end):
- return False
- return True
-
- @property
- def render_sql(self):
- return ('INSERT INTO words VALUES("'
- + escape(self.word) + '", "'
- + escape(self.phon) + '", "'
- + escape(self.base) + '", "'
- + escape(self.kind) + '", '
- + escape(str(self.freq)) + ', '
- + escape(str(self.nsyl[0])) + ', '
- + escape(str(self.nsyl[1])) + ', "'
- + escape(self.ending) + '", "'
- + escape(self.phon_ending) + '", '
- + escape(str(int(self.elidable))) + ', '
- + escape(str(int(self.feminine))) + ');'
- )
+ def endswith_any(x, ends): return any([x.endswith(end) for end in ends])
+ return (endswith_any(self.word, sure_end_fem)
+ or (self.word.endswith('ent')
+ and not endswith_any(self.phon, phon_non_end_fem)))
@property
def ok(self):
# Remove words with no vowels
for x in phon_vowels:
- if x in self.phon_ending:
+ if x in self.phon_end:
return True
return False
+ @property
+ def sql(self):
+ render = {
+ 'string': lambda s: '"'+s+'"', # no escaping: use parametrized queries!
+ 'float': str,
+ 'int': lambda s: str(int(s)),
+ 'bool': lambda s: str(int(s)),
+ }
+ def sql_field(field):
+ (name, (ty, _)) = field
+ return render[ty](getattr(self, name))
+ return ('INSERT INTO words VALUES('
+ + ', '.join([sql_field(f) for f in sql_fields])
+ + ');')
+
def __init__(self, word, phon, base, kind, freq):
self.word = word
self.phon = phon
self.base = base
self.kind = kind
- self.freq = freq
+ self.freq = float(freq)
self.nsyl = None
- self.redundant = False
self.do_extends()
def align_sum(self, align):
@@ -128,38 +145,30 @@ class Word:
self.extend(self.align_sum(align[0]))
def extend(self, item):
- if self.nsyl == None:
- self.nsyl = [item, item]
- else:
+ try:
self.nsyl = [min(self.nsyl[0], item), max(self.nsyl[1], item)]
+ except TypeError: # first execution
+ self.nsyl = [item, item]
+
+def create_table():
+ def decl(field):
+ (name, (ty, data)) = field
+ if ty == 'string':
+ ty = 'varchar(' + str(data['max_len']) + ')'
+ return name + ' ' + ty
+ return ('CREATE TABLE words('
+ + ', '.join([decl(field) for field in sql_fields])
+ + ');')
if __name__ == '__main__':
- print ("""CREATE TABLE words(
- word varchar(100), -- word
- phon varchar(100), -- pronunciation
- base varchar(100), -- base word
- kind varchar(10), -- grammatical category
- freq float, -- frequency
- min_nsyl int, -- lower bound on the number of syllabes
- max_nsyl int, -- upper bound on the number of syllabes
- word_end varchar(10), -- minimal word-level rhyme
- phon_end varchar(10), -- minimal phon-level rhyme
- elidable bool, -- can cause elision
- feminine bool -- genre of the rhyme
- );""")
+ print (create_table())
while True:
line = sys.stdin.readline()
if not line:
break
l = line.rstrip().split("\t")
- word = l.pop(0)
- phon = l.pop(0)
- base = l.pop(0)
- kind = l.pop(0)
- freq = float(l.pop(0))
- assert(len(l) == 0)
- w = Word(word, phon, base, kind, freq)
+ w = Word(*l)
if w.ok:
- print(w.render_sql)
+ print(w.sql)