keep class information, cleanup - drime - French rhyme dictionary with web and CLI interface

commit c929301f8362110d97c9e4a8427157dea151b6df
parent 6cb15b721d1db8d6da3b7fe5c0b3b7e25b6e88b8
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sat, 20 Aug 2011 14:34:14 +0200

keep class information, cleanup

Diffstat:
manage.py  | 50 +++++++++++---------------------------------------
prepare.sh  | 2 +-
query.sh  | 4 ++--
reorder.py  | 47 ++++++++++++++++++++++++++++++++++++-----------

4 files changed, 50 insertions(+), 53 deletions(-)
diff --git a/manage.py b/manage.py
@@ -64,20 +64,11 @@ class Word:
     return True
 
   @property
-  def render(self):
-    fields = [self.word, self.phon, self.base, self.freq, self.nsyl[0],
-        self.nsyl[1], self.ending, self.phon_ending, self.mult,
-        self.elidable, self.feminine, self.redundant]
-    return "\t".join([str(x) for x in fields])
-  
-  @property
   def render_sql(self):
-    fields = [self.word, self.phon, self.base, self.freq, self.nsyl[0],
-        self.nsyl[1], self.mult, self.elidable, self.feminine,
-        self.redundant, self.ending, self.phon_ending, self.redundant]
     return ('INSERT INTO words VALUES("' + self.word + '", "'
         + self.phon + '", "'
-        + self.base + '", '
+        + self.base + '", "'
+        + self.kind + '", '
         + str(self.freq) + ', '
         + str(self.nsyl[0]) + ', '
         + str(self.nsyl[1]) + ', "'
@@ -85,8 +76,7 @@ class Word:
         + self.phon_ending + '", '
         + str(int(self.mult)) + ', '
         + str(int(self.elidable)) + ', '
-        + str(int(self.feminine)) + ', '
-        + str(int(self.redundant)) + ');')
+        + str(int(self.feminine)) + ');')
 
   @property
   def ok(self):
@@ -95,10 +85,11 @@ class Word:
         return True
     return False
 
-  def __init__(self, word, phon, base, freq, nsyl, mult):
+  def __init__(self, word, phon, base, kind, freq, nsyl, mult):
     self.word = word
     self.phon = phon
     self.base = base
+    self.kind = kind
     self.freq = freq
     self.nsyl = [nsyl, nsyl]
     self.mult = mult
@@ -122,9 +113,7 @@ class Word:
     self.nsyl = [min(self.nsyl[0], item),
         max(self.nsyl[1], item)]
 
-seen = set()
-bases = {}
-phon_seen = {}
+bases = {} #TODO transitive to the topmost base
 
 def derives(a, b):
   #print ("SKIP derives %s %s" % (a, b))
@@ -140,9 +129,9 @@ def derives(a, b):
   return False
 
 print ("""CREATE TABLE words(word varchar(100), phon varchar(100), base
-varchar(100), freq float, min_nsyl int, max_nsyl int, word_end
-varchar(10), phon_end varchar(10), multiple bool, elidable bool,
-feminine bool, redundant bool);""")
+varchar(100), kind varchar(10), freq float, min_nsyl int, max_nsyl int,
+word_end varchar(10), phon_end varchar(10), multiple bool, elidable
+bool, feminine bool);""")
 
 while True:
   line = sys.stdin.readline()
@@ -152,6 +141,7 @@ while True:
   word = l.pop(0)
   phon = l.pop(0)
   base = l.pop(0)
+  kind = l.pop(0)
   freq = ((float(l[0]) + float(l[1]))/2 +
         100*(float(l[2]) + float(l[3]))/2)
   l.pop(0)
@@ -166,28 +156,10 @@ while True:
   mult = ',' in l[0]
   l.pop(0)
   assert(len(l) == 0)
-  w = Word(word, phon, base, freq, nsyl, mult)
-  key = (word, phon)
-  if key in seen:
-    #continue
-    pass
-  else:
-    seen.add(key)
-  phon_key = (phon, w.feminine)
-  if phon_key not in phon_seen.keys():
-    phon_seen[phon_key] = []
+  w = Word(word, phon, base, kind, freq, nsyl, mult)
   if word not in bases.keys():
     bases[word] = []
   bases[word].append(base)
-  for candidate in phon_seen[phon_key]:
-    #print("SKIP candidate for %s %s is %s" % (phon_key[0], phon_key[1],
-      # candidate))
-    # TODO replace by common ancestor, and perform tsort
-    if derives(word, candidate):
-      # word derives from a word with the same pronunciation, skip it
-      #print("SKIP ", word)
-      w.redundant = True
-  phon_seen[phon_key].append(word)
   if w.ok:
     print(w.render_sql)
 
diff --git a/prepare.sh b/prepare.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-cat Lexique3.txt additions | cut -f 1,2,3,7,8,9,10,24,28,29 |
+cat Lexique3.txt additions | cut -f 1,2,3,4,7,8,9,10,24,28,29 |
   ~/DOCUMENTS/poetlint/rhyme/lexique/lexique_fix.sh |
   sort -k1,1 |
   ./manage.py
diff --git a/query.sh b/query.sh
@@ -3,8 +3,8 @@
 cd "$( dirname "$0" )"
 
 sqlite dico.sqlite 'select t1.freq, t1.word, t1.phon, t2.word, t2.phon,
-t2.freq, t2.min_nsyl, t2.max_nsyl, t2.elidable, t2.base from words
+t2.freq, t2.min_nsyl, t2.max_nsyl, t2.elidable, t2.base, t2.kind from words
 as t1 inner join words as t2 on (t1.phon_end = t2.phon_end or
 t1.word_end = t2.word_end) and t1.feminine = t2.feminine where t1.word =
-"'$1'" and (t2.word != t1.word or t2.multiple);' | ./reorder.py
+"'$1'";' | ./reorder.py
 
diff --git a/reorder.py b/reorder.py
@@ -1,5 +1,7 @@
 #!/usr/bin/python3 -O
 
+# TODO test "suis", "lui"... no multiple interpretations!
+
 import sys
 
 def lcs(x, y):
@@ -24,9 +26,21 @@ names = {0: "pour l'œil", 1: "pauvre", 2: "suffisante", 3: "riche"}
 def key(l):
   # frequency of interpretation desc, phonemes desc, eye desc, same as base, frequency desc, alpha
   #print(l)
-  return (-float(l[0]), -l[10], -l[11], 0 if l[9] == l[3] else 1, -float(l[5]), l[3])
+  return (-float(l[0]), -l[11], -l[12], 0 if l[9] == l[3] else 1, -float(l[5]), l[3])
+
+mx = [0] * 13
+
+def display(l):
+  global header
+  if header != None:
+    print(header)
+    header = None
+  print(mp(l, mx, 3) + ' w' + mp(l, mx, 12) + '  '
+      + mp(l, mx, 6) + '-' + mp(l, mx, 7)
+      + ('+' if l[5] == '1' else ' ') + ' '
+      + mp(l, mx, 4) + ' ' + mp(l, mx, 5)[0:9] + ' '
+      + l[10] + (', from "' + l[9] + '"' if l[9] != l[3] else ''))
 
-mx = [0] * 12
 while True:
   line = sys.stdin.readline()
   if not line:
@@ -35,6 +49,7 @@ while True:
   l.append(lcs(l[2], l[4]))
   l.append(lcs(l[1], l[3]))
   l[4] = '[' + l[4] + ']'
+  l[2] = '[' + l[2] + ']'
   for i in range(len(l)):
     mx[i] = max(mx[i], len(str(l[i])))
   lines.append(l)
@@ -42,22 +57,32 @@ while True:
 seen = set()
 
 last2 = None
-last10 = None
+last11 = None
+header = None
+
 for l in sorted(lines, key=key):
   if l[2] != last2:
     last2 = l[2]
     print ("## For %s [%s], freq %s" % (l[1], l[2], l[0][0:9]))
-  if l[10] != last10:
-    last10 = l[10]
+    cache = None
+    seen = set()
+  if l[11] != last11:
+    last11 = l[11]
     # TODO check if vowel is in there
-    print ("  -- %d phonemes (%s)" % (l[10], names[min(3, l[10])] if min(3,
-      l[10]) in names.keys() else ''))
+    header = ("  -- %d phonemes (%s)" % (l[11], names[min(3, l[11])] if min(3,
+      l[11]) in names.keys() else ''))
+  if l[3] == l[1] and l[2] == l[4]:
+    # this is the query word, only display if several exist
+    if cache == None:
+      cache = l
+    else:
+      display(cache)
+      display(l)
+    seen.add(l[9])
+    continue
   if l[9] in seen:
     # skip words with a seen base
     continue
   seen.add(l[9])
   #print(l[9])
-  print(mp(l, mx, 3) + ' w' + mp(l, mx, 11) + '  '
-      + mp(l, mx, 6) + '-' + mp(l, mx, 7)
-      + ('+' if l[5] == '1' else ' ') + ' '
-      + mp(l, mx, 4) + ' ' + mp(l, mx, 5)[0:9])
+  display(l)

	drime French rhyme dictionary with web and CLI interface
	git clone https://a3nm.net/git/drime/
	Log \| Files \| Refs \| README

manage.py	\|	50	+++++++++++---------------------------------------
prepare.sh	\|	2	+-
query.sh	\|	4	++--
reorder.py	\|	47	++++++++++++++++++++++++++++++++++++-----------