commit 1e3e32d1068a76e43b5a1fa991ab05fcb7d917ce
parent 3937aa9cb6899ea053a09a3c2c111319cc37a93e
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Thu, 18 Aug 2011 15:38:05 -0400
more sensible duplicate elimination
Diffstat:
3 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/manage.py b/manage.py
@@ -1,6 +1,8 @@
#!/usr/bin/python3 -O
# TODO frequencies are off
+# TODO bibliographe number of syllables?!
+# TODO do NOT remove "duplicates" like nuit(nuire) and nuit(nuit)
import haspirater
import metric
diff --git a/query.sh b/query.sh
@@ -3,7 +3,7 @@
cd "$( dirname "$0" )"
sqlite dico.sqlite 'select t1.freq, t1.word, t1.phon, t2.word, t2.phon,
-t2.freq, t2.min_nsyl, t2.max_nsyl, t2.elidable, t2.redundant from words
+t2.freq, t2.min_nsyl, t2.max_nsyl, t2.elidable, t2.base from words
as t1 inner join words as t2 on (t1.phon_end = t2.phon_end or
t1.word_end = t2.word_end) and t1.feminine = t2.feminine where t1.word =
"'$1'" and (t2.word != t1.word or t2.multiple);' | ./reorder.py
diff --git a/reorder.py b/reorder.py
@@ -22,10 +22,9 @@ lines = []
names = {0: "pour l'œil", 1: "pauvre", 2: "suffisante", 3: "riche"}
def key(l):
- # frequency of interpretation desc, phonemes desc, eye desc, frequency
- # desc, alpha
+ # frequency of interpretation desc, phonemes desc, eye desc, same as base, frequency desc, alpha
#print(l)
- return (-float(l[0]), -l[10], -l[11], -float(l[5]), l[3])
+ return (-float(l[0]), -l[10], -l[11], 0 if l[9] == l[3] else -1, -float(l[5]), l[3])
mx = [0] * 12
while True:
@@ -53,11 +52,11 @@ for l in sorted(lines, key=key):
# TODO check if vowel is in there
print (" -- %d phonemes (%s)" % (l[10], names[min(3, l[10])] if min(3,
l[10]) in names.keys() else ''))
- if l[9] == '1' and l[4] in seen:
- # skip redundant where the same pronunciation was seen
- # keep for eye rhyme
+ if l[9] in seen:
+ # skip words with a seen base
continue
- seen.add(l[4])
+ seen.add(l[9])
+ #print(l[9])
print(mp(l, mx, 3) + ' w' + mp(l, mx, 11) + ' '
+ mp(l, mx, 6) + '-' + mp(l, mx, 7)
+ ('+' if l[5] == '1' else ' ') + ' '