drime

French rhyme dictionary with web and CLI interface
git clone https://a3nm.net/git/drime/
Log | Files | Refs | README

commit c929301f8362110d97c9e4a8427157dea151b6df
parent 6cb15b721d1db8d6da3b7fe5c0b3b7e25b6e88b8
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sat, 20 Aug 2011 14:34:14 +0200

keep class information, cleanup

Diffstat:
manage.py | 50+++++++++++---------------------------------------
prepare.sh | 2+-
query.sh | 4++--
reorder.py | 47++++++++++++++++++++++++++++++++++++-----------
4 files changed, 50 insertions(+), 53 deletions(-)

diff --git a/manage.py b/manage.py @@ -64,20 +64,11 @@ class Word: return True @property - def render(self): - fields = [self.word, self.phon, self.base, self.freq, self.nsyl[0], - self.nsyl[1], self.ending, self.phon_ending, self.mult, - self.elidable, self.feminine, self.redundant] - return "\t".join([str(x) for x in fields]) - - @property def render_sql(self): - fields = [self.word, self.phon, self.base, self.freq, self.nsyl[0], - self.nsyl[1], self.mult, self.elidable, self.feminine, - self.redundant, self.ending, self.phon_ending, self.redundant] return ('INSERT INTO words VALUES("' + self.word + '", "' + self.phon + '", "' - + self.base + '", ' + + self.base + '", "' + + self.kind + '", ' + str(self.freq) + ', ' + str(self.nsyl[0]) + ', ' + str(self.nsyl[1]) + ', "' @@ -85,8 +76,7 @@ class Word: + self.phon_ending + '", ' + str(int(self.mult)) + ', ' + str(int(self.elidable)) + ', ' - + str(int(self.feminine)) + ', ' - + str(int(self.redundant)) + ');') + + str(int(self.feminine)) + ');') @property def ok(self): @@ -95,10 +85,11 @@ class Word: return True return False - def __init__(self, word, phon, base, freq, nsyl, mult): + def __init__(self, word, phon, base, kind, freq, nsyl, mult): self.word = word self.phon = phon self.base = base + self.kind = kind self.freq = freq self.nsyl = [nsyl, nsyl] self.mult = mult @@ -122,9 +113,7 @@ class Word: self.nsyl = [min(self.nsyl[0], item), max(self.nsyl[1], item)] -seen = set() -bases = {} -phon_seen = {} +bases = {} #TODO transitive to the topmost base def derives(a, b): #print ("SKIP derives %s %s" % (a, b)) @@ -140,9 +129,9 @@ def derives(a, b): return False print ("""CREATE TABLE words(word varchar(100), phon varchar(100), base -varchar(100), freq float, min_nsyl int, max_nsyl int, word_end -varchar(10), phon_end varchar(10), multiple bool, elidable bool, -feminine bool, redundant bool);""") +varchar(100), kind varchar(10), freq float, min_nsyl int, max_nsyl int, +word_end varchar(10), phon_end varchar(10), multiple bool, elidable +bool, feminine bool);""") while True: line = sys.stdin.readline() @@ -152,6 +141,7 @@ while True: word = l.pop(0) phon = l.pop(0) base = l.pop(0) + kind = l.pop(0) freq = ((float(l[0]) + float(l[1]))/2 + 100*(float(l[2]) + float(l[3]))/2) l.pop(0) @@ -166,28 +156,10 @@ while True: mult = ',' in l[0] l.pop(0) assert(len(l) == 0) - w = Word(word, phon, base, freq, nsyl, mult) - key = (word, phon) - if key in seen: - #continue - pass - else: - seen.add(key) - phon_key = (phon, w.feminine) - if phon_key not in phon_seen.keys(): - phon_seen[phon_key] = [] + w = Word(word, phon, base, kind, freq, nsyl, mult) if word not in bases.keys(): bases[word] = [] bases[word].append(base) - for candidate in phon_seen[phon_key]: - #print("SKIP candidate for %s %s is %s" % (phon_key[0], phon_key[1], - # candidate)) - # TODO replace by common ancestor, and perform tsort - if derives(word, candidate): - # word derives from a word with the same pronunciation, skip it - #print("SKIP ", word) - w.redundant = True - phon_seen[phon_key].append(word) if w.ok: print(w.render_sql) diff --git a/prepare.sh b/prepare.sh @@ -1,6 +1,6 @@ #!/bin/bash -cat Lexique3.txt additions | cut -f 1,2,3,7,8,9,10,24,28,29 | +cat Lexique3.txt additions | cut -f 1,2,3,4,7,8,9,10,24,28,29 | ~/DOCUMENTS/poetlint/rhyme/lexique/lexique_fix.sh | sort -k1,1 | ./manage.py diff --git a/query.sh b/query.sh @@ -3,8 +3,8 @@ cd "$( dirname "$0" )" sqlite dico.sqlite 'select t1.freq, t1.word, t1.phon, t2.word, t2.phon, -t2.freq, t2.min_nsyl, t2.max_nsyl, t2.elidable, t2.base from words +t2.freq, t2.min_nsyl, t2.max_nsyl, t2.elidable, t2.base, t2.kind from words as t1 inner join words as t2 on (t1.phon_end = t2.phon_end or t1.word_end = t2.word_end) and t1.feminine = t2.feminine where t1.word = -"'$1'" and (t2.word != t1.word or t2.multiple);' | ./reorder.py +"'$1'";' | ./reorder.py diff --git a/reorder.py b/reorder.py @@ -1,5 +1,7 @@ #!/usr/bin/python3 -O +# TODO test "suis", "lui"... no multiple interpretations! + import sys def lcs(x, y): @@ -24,9 +26,21 @@ names = {0: "pour l'œil", 1: "pauvre", 2: "suffisante", 3: "riche"} def key(l): # frequency of interpretation desc, phonemes desc, eye desc, same as base, frequency desc, alpha #print(l) - return (-float(l[0]), -l[10], -l[11], 0 if l[9] == l[3] else 1, -float(l[5]), l[3]) + return (-float(l[0]), -l[11], -l[12], 0 if l[9] == l[3] else 1, -float(l[5]), l[3]) + +mx = [0] * 13 + +def display(l): + global header + if header != None: + print(header) + header = None + print(mp(l, mx, 3) + ' w' + mp(l, mx, 12) + ' ' + + mp(l, mx, 6) + '-' + mp(l, mx, 7) + + ('+' if l[5] == '1' else ' ') + ' ' + + mp(l, mx, 4) + ' ' + mp(l, mx, 5)[0:9] + ' ' + + l[10] + (', from "' + l[9] + '"' if l[9] != l[3] else '')) -mx = [0] * 12 while True: line = sys.stdin.readline() if not line: @@ -35,6 +49,7 @@ while True: l.append(lcs(l[2], l[4])) l.append(lcs(l[1], l[3])) l[4] = '[' + l[4] + ']' + l[2] = '[' + l[2] + ']' for i in range(len(l)): mx[i] = max(mx[i], len(str(l[i]))) lines.append(l) @@ -42,22 +57,32 @@ while True: seen = set() last2 = None -last10 = None +last11 = None +header = None + for l in sorted(lines, key=key): if l[2] != last2: last2 = l[2] print ("## For %s [%s], freq %s" % (l[1], l[2], l[0][0:9])) - if l[10] != last10: - last10 = l[10] + cache = None + seen = set() + if l[11] != last11: + last11 = l[11] # TODO check if vowel is in there - print (" -- %d phonemes (%s)" % (l[10], names[min(3, l[10])] if min(3, - l[10]) in names.keys() else '')) + header = (" -- %d phonemes (%s)" % (l[11], names[min(3, l[11])] if min(3, + l[11]) in names.keys() else '')) + if l[3] == l[1] and l[2] == l[4]: + # this is the query word, only display if several exist + if cache == None: + cache = l + else: + display(cache) + display(l) + seen.add(l[9]) + continue if l[9] in seen: # skip words with a seen base continue seen.add(l[9]) #print(l[9]) - print(mp(l, mx, 3) + ' w' + mp(l, mx, 11) + ' ' - + mp(l, mx, 6) + '-' + mp(l, mx, 7) - + ('+' if l[5] == '1' else ' ') + ' ' - + mp(l, mx, 4) + ' ' + mp(l, mx, 5)[0:9]) + display(l)