commit ca3371d52e21f826ffb465d3e89e25bc001a2010
parent 867643e14e7f8f50d58a7d05cfd3fe6723641947
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Thu, 29 Sep 2011 22:50:23 +0200
trying with mysql, fail
Diffstat:
make_db.py | | | 73 | +++++++++++++++++++++++++++++++++++-------------------------------------- |
make_db.sh | | | 4 | ++-- |
2 files changed, 37 insertions(+), 40 deletions(-)
diff --git a/make_db.py b/make_db.py
@@ -1,6 +1,9 @@
-#!/usr/bin/python3 -O
+#!/usr/bin/python
-"""Prepare the rhyme database"""
+"""Prepare the rhyme database
+
+Input should have tab-separated fields: word, pronunciation, base word,
+grammatical category, frequency. Output is a """
# TODO frequencies are off
# TODO "bibliographe" number of syllables?!
@@ -9,6 +12,7 @@ import haspirater
import metric
from common import is_vowels, is_consonants, sure_end_fem
import sys
+import _mysql
seen = {}
@@ -17,6 +21,21 @@ phon_vowels = "()$#289aeEioOuy@"
# not a feminine ending, independently of spelling
phon_non_end_fem = ['#', ')']
+#
+print ("""CREATE TABLE words(
+ word varchar(100), -- word
+ phon varchar(100), -- pronunciation
+ base varchar(100), -- base word
+ kind varchar(10), -- grammatical category
+ freq float, -- frequency
+ min_nsyl int, -- lower bound on the number of syllabes
+ max_nsyl int, -- upper bound on the number of syllabes
+ word_end varchar(10), -- minimal word-level rhyme
+ phon_end varchar(10), -- minimal phon-level rhyme
+ elidable bool, -- can cause elision
+ feminine bool -- genre of the rhyme
+);""")
+
class Word:
@property
def elidable(self):
@@ -70,20 +89,23 @@ class Word:
@property
def render_sql(self):
- return ('INSERT INTO words VALUES("' + self.word + '", "'
- + self.phon + '", "'
- + self.base + '", "'
- + self.kind + '", '
- + str(self.freq) + ', '
- + str(self.nsyl[0]) + ', '
- + str(self.nsyl[1]) + ', "'
- + self.ending + '", "'
- + self.phon_ending + '", '
- + str(int(self.elidable)) + ', '
- + str(int(self.feminine)) + ');')
+ return ('INSERT INTO words VALUES("'
+ + _mysql.escape_string(self.word) + '", "'
+ + _mysql.escape_string(self.phon) + '", "'
+ + _mysql.escape_string(self.base) + '", "'
+ + _mysql.escape_string(self.kind) + '", '
+ + _mysql.escape_string(str(self.freq)) + ', '
+ + _mysql.escape_string(str(self.nsyl[0])) + ', '
+ + _mysql.escape_string(str(self.nsyl[1])) + ', "'
+ + _mysql.escape_string(self.ending) + '", "'
+ + _mysql.escape_string(self.phon_ending) + '", '
+ + _mysql.escape_string(str(int(self.elidable))) + ', '
+ + _mysql.escape_string(str(int(self.feminine))) + ');'
+ )
@property
def ok(self):
+ # Remove words with no vowels
for x in phon_vowels:
if x in self.phon_ending:
return True
@@ -118,26 +140,6 @@ class Word:
else:
self.nsyl = [min(self.nsyl[0], item), max(self.nsyl[1], item)]
-bases = {} #TODO transitive to the topmost base
-
-def derives(a, b):
- #print ("SKIP derives %s %s" % (a, b))
- if a == b:
- return True
- if a not in bases.keys():
- return False
- for x in bases[a]:
- #print ("SKIP base is %s" % x)
- if x != a:
- if derives(x, b):
- return True
- return False
-
-print ("""CREATE TABLE words(word varchar(100), phon varchar(100), base
-varchar(100), kind varchar(10), freq float, min_nsyl int, max_nsyl int,
-word_end varchar(10), phon_end varchar(10), elidable
-bool, feminine bool);""")
-
while True:
line = sys.stdin.readline()
if not line:
@@ -148,13 +150,8 @@ while True:
base = l.pop(0)
kind = l.pop(0)
freq = float(l.pop(0))
- #print ("DBG for %s: %d and %d" % (word, int(l[0]), 1+len([x for x in l[1]
- #if x == ' ' or x == '-'])))
assert(len(l) == 0)
w = Word(word, phon, base, kind, freq)
- if word not in bases.keys():
- bases[word] = []
- bases[word].append(base)
if w.ok:
print(w.render_sql)
diff --git a/make_db.sh b/make_db.sh
@@ -4,5 +4,5 @@ cd "$( dirname "$0" )"
cat - additions | # add custom exceptions
cut -f 1,2,3,4,7,8,9,10,24,28 | # select relevant fields
- awk '{FS=" "; OFS=" "; print $1, $2, $3, $4, ($5+$6)/2 + 100*($7+$8)/2}' #| # aggregate frequencies
- #./make_db.py
+ awk '{FS=" "; OFS=" "; print $1, $2, $3, $4, ($5+$6)/2 + 100*($7+$8)/2}' | # aggregate frequencies
+ ./make_db.py