drime

French rhyme dictionary with web and CLI interface
git clone https://a3nm.net/git/drime/
Log | Files | Refs | README

commit ca3371d52e21f826ffb465d3e89e25bc001a2010
parent 867643e14e7f8f50d58a7d05cfd3fe6723641947
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Thu, 29 Sep 2011 22:50:23 +0200

trying with mysql, fail

Diffstat:
make_db.py | 73+++++++++++++++++++++++++++++++++++--------------------------------------
make_db.sh | 4++--
2 files changed, 37 insertions(+), 40 deletions(-)

diff --git a/make_db.py b/make_db.py @@ -1,6 +1,9 @@ -#!/usr/bin/python3 -O +#!/usr/bin/python -"""Prepare the rhyme database""" +"""Prepare the rhyme database + +Input should have tab-separated fields: word, pronunciation, base word, +grammatical category, frequency. Output is a """ # TODO frequencies are off # TODO "bibliographe" number of syllables?! @@ -9,6 +12,7 @@ import haspirater import metric from common import is_vowels, is_consonants, sure_end_fem import sys +import _mysql seen = {} @@ -17,6 +21,21 @@ phon_vowels = "()$#289aeEioOuy@" # not a feminine ending, independently of spelling phon_non_end_fem = ['#', ')'] +# +print ("""CREATE TABLE words( + word varchar(100), -- word + phon varchar(100), -- pronunciation + base varchar(100), -- base word + kind varchar(10), -- grammatical category + freq float, -- frequency + min_nsyl int, -- lower bound on the number of syllabes + max_nsyl int, -- upper bound on the number of syllabes + word_end varchar(10), -- minimal word-level rhyme + phon_end varchar(10), -- minimal phon-level rhyme + elidable bool, -- can cause elision + feminine bool -- genre of the rhyme +);""") + class Word: @property def elidable(self): @@ -70,20 +89,23 @@ class Word: @property def render_sql(self): - return ('INSERT INTO words VALUES("' + self.word + '", "' - + self.phon + '", "' - + self.base + '", "' - + self.kind + '", ' - + str(self.freq) + ', ' - + str(self.nsyl[0]) + ', ' - + str(self.nsyl[1]) + ', "' - + self.ending + '", "' - + self.phon_ending + '", ' - + str(int(self.elidable)) + ', ' - + str(int(self.feminine)) + ');') + return ('INSERT INTO words VALUES("' + + _mysql.escape_string(self.word) + '", "' + + _mysql.escape_string(self.phon) + '", "' + + _mysql.escape_string(self.base) + '", "' + + _mysql.escape_string(self.kind) + '", ' + + _mysql.escape_string(str(self.freq)) + ', ' + + _mysql.escape_string(str(self.nsyl[0])) + ', ' + + _mysql.escape_string(str(self.nsyl[1])) + ', "' + + _mysql.escape_string(self.ending) + '", "' + + _mysql.escape_string(self.phon_ending) + '", ' + + _mysql.escape_string(str(int(self.elidable))) + ', ' + + _mysql.escape_string(str(int(self.feminine))) + ');' + ) @property def ok(self): + # Remove words with no vowels for x in phon_vowels: if x in self.phon_ending: return True @@ -118,26 +140,6 @@ class Word: else: self.nsyl = [min(self.nsyl[0], item), max(self.nsyl[1], item)] -bases = {} #TODO transitive to the topmost base - -def derives(a, b): - #print ("SKIP derives %s %s" % (a, b)) - if a == b: - return True - if a not in bases.keys(): - return False - for x in bases[a]: - #print ("SKIP base is %s" % x) - if x != a: - if derives(x, b): - return True - return False - -print ("""CREATE TABLE words(word varchar(100), phon varchar(100), base -varchar(100), kind varchar(10), freq float, min_nsyl int, max_nsyl int, -word_end varchar(10), phon_end varchar(10), elidable -bool, feminine bool);""") - while True: line = sys.stdin.readline() if not line: @@ -148,13 +150,8 @@ while True: base = l.pop(0) kind = l.pop(0) freq = float(l.pop(0)) - #print ("DBG for %s: %d and %d" % (word, int(l[0]), 1+len([x for x in l[1] - #if x == ' ' or x == '-']))) assert(len(l) == 0) w = Word(word, phon, base, kind, freq) - if word not in bases.keys(): - bases[word] = [] - bases[word].append(base) if w.ok: print(w.render_sql) diff --git a/make_db.sh b/make_db.sh @@ -4,5 +4,5 @@ cd "$( dirname "$0" )" cat - additions | # add custom exceptions cut -f 1,2,3,4,7,8,9,10,24,28 | # select relevant fields - awk '{FS=" "; OFS=" "; print $1, $2, $3, $4, ($5+$6)/2 + 100*($7+$8)/2}' #| # aggregate frequencies - #./make_db.py + awk '{FS=" "; OFS=" "; print $1, $2, $3, $4, ($5+$6)/2 + 100*($7+$8)/2}' | # aggregate frequencies + ./make_db.py