commit 6559338d7e22464f7eafdd42b56fe2a74c6eda76
parent 56a35afb5d21d5f51c62f6f685fff85859937970
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Wed, 9 Nov 2011 12:06:08 +0100
reorder code
Diffstat:
2 files changed, 39 insertions(+), 30 deletions(-)
diff --git a/lexique2sql.py b/lexique2sql.py
@@ -3,7 +3,7 @@
"""Prepare the rhyme database
Input should have tab-separated fields: word, pronunciation, base word,
-grammatical category, frequency. Output is a """
+grammatical category, frequency. Output is a sequence of SQL statements"""
# TODO frequencies are off
# TODO "bibliographe" number of syllables?!
@@ -20,20 +20,14 @@ phon_vowels = "()$#289aeEioOuy@"
# not a feminine ending, independently of spelling
phon_non_end_fem = ['#', ')']
-#
-print ("""CREATE TABLE words(
- word varchar(100), -- word
- phon varchar(100), -- pronunciation
- base varchar(100), -- base word
- kind varchar(10), -- grammatical category
- freq float, -- frequency
- min_nsyl int, -- lower bound on the number of syllabes
- max_nsyl int, -- upper bound on the number of syllabes
- word_end varchar(10), -- minimal word-level rhyme
- phon_end varchar(10), -- minimal phon-level rhyme
- elidable bool, -- can cause elision
- feminine bool -- genre of the rhyme
-);""")
+def escape(x):
+ """Escape for SQL"""
+ s = []
+ for a in x:
+ if a in ["\\", "'"]:
+ s.append('\\')
+ s.append(a)
+ return ''.join(s)
class Word:
@property
@@ -139,18 +133,33 @@ class Word:
else:
self.nsyl = [min(self.nsyl[0], item), max(self.nsyl[1], item)]
-while True:
- line = sys.stdin.readline()
- if not line:
- break
- l = line.rstrip().split("\t")
- word = l.pop(0)
- phon = l.pop(0)
- base = l.pop(0)
- kind = l.pop(0)
- freq = float(l.pop(0))
- assert(len(l) == 0)
- w = Word(word, phon, base, kind, freq)
- if w.ok:
- print(w.render_sql)
+if __name__ == '__main__':
+ print ("""CREATE TABLE words(
+ word varchar(100), -- word
+ phon varchar(100), -- pronunciation
+ base varchar(100), -- base word
+ kind varchar(10), -- grammatical category
+ freq float, -- frequency
+ min_nsyl int, -- lower bound on the number of syllabes
+ max_nsyl int, -- upper bound on the number of syllabes
+ word_end varchar(10), -- minimal word-level rhyme
+ phon_end varchar(10), -- minimal phon-level rhyme
+ elidable bool, -- can cause elision
+ feminine bool -- genre of the rhyme
+ );""")
+
+ while True:
+ line = sys.stdin.readline()
+ if not line:
+ break
+ l = line.rstrip().split("\t")
+ word = l.pop(0)
+ phon = l.pop(0)
+ base = l.pop(0)
+ kind = l.pop(0)
+ freq = float(l.pop(0))
+ assert(len(l) == 0)
+ w = Word(word, phon, base, kind, freq)
+ if w.ok:
+ print(w.render_sql)
diff --git a/lexique2sql.sh b/lexique2sql.sh
@@ -5,4 +5,4 @@ cd "$( dirname "$0" )"
cat - additions | # add custom exceptions
cut -f 1,2,3,4,7,8,9,10,24,28 | # select relevant fields
awk '{FS=" "; OFS=" "; print $1, $2, $3, $4, ($5+$6)/2 + 100*($7+$8)/2}' | # aggregate frequencies
- ./make_db.py
+ ./lexique2sql.py