code cleanup - drime - French rhyme dictionary with web and CLI interface

commit d6e388394d025c2e0b137db29c469b2f61782cbf
parent 6ce5cb23a65b5ae19f10a2f4bd6f2dc24f612bb6
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Tue, 27 Dec 2011 01:59:28 +0100

code cleanup

Diffstat:
query.py  | 80 ++++++++++++++++++++++++++++++++++---------------------------------------------

1 file changed, 34 insertions(+), 46 deletions(-)
diff --git a/query.py b/query.py
@@ -78,8 +78,6 @@ def get_key(x):
         x['t1_word'] + ' [' + to_xsampa(x['t1_phon']) + ']')
 
 def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
-  print ((word, phon, minsyll, maxsyll, elide, gender,))
-  print ((offset, size,))
   cursor = run_query('''
     SELECT t1.word AS t1_word,
         t1.phon AS t1_phon
@@ -93,6 +91,9 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
     result[get_key(x)] = x
   if len(result.keys()) > 1 or result == {}:
     return result, 0 # require disambiguation or is empty
+  word = x['t1_word']
+  phon = x['t1_phon']
+  key = get_key(x)
 
   rest = ''' FROM words AS t1, words AS t2
     WHERE (t1.phon_end = t2.phon_end OR t1.word_end = t2.word_end)
@@ -102,14 +103,12 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
             OR (t2.elidable AND t2.min_nsyl - 1 <= ? AND ?)))
     ORDER BY t2.freq, t1.phon, t1.word
     '''
-  limit = '''LIMIT ? OFFSET ?'''
+  #limit = '''LIMIT ? OFFSET ?'''
 
   args = (word, word == None, phon, phon == None,
         minsyll == None, minsyll, maxsyll == None, maxsyll, maxsyll, elide,)
   query = '''
-    SELECT t1.word AS t1_word,
-        t1.phon AS t1_phon,
-        t1.feminine AS t1_feminine,
+    SELECT t1.feminine AS t1_feminine,
         t2.word AS t2_word,
         t2.phon AS t2_phon,
         t2.freq AS t2_freq,
@@ -121,19 +120,13 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
       ''' + rest #+ limit
   print (query)
   cursor = run_query(query, args) #+ (size, offset,))
-  result = {}
+  print ("DONE")
+
+  result = []
   for x in cursor:
-    for k in x.keys():
-      if isinstance(x[k], str):
-        try:
-          x[k] = x[k].decode('utf8')
-        except UnicodeDecodeError:
-          x[k] = x[k].decode('latin1')
+    decode_all(x)
     if x['t1_feminine'] != x['t2_feminine'] and gender:
       continue
-    key = get_key(x)
-    if key not in result.keys():
-      result[key] = []
     row = dict([
         (k[3:], x[k]) for k in x.keys()
             if k.startswith('t2_')])
@@ -143,14 +136,14 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
           row[k] = row[k].decode('utf8')
         except UnicodeDecodeError:
           row[k] = row[k].decode('latin1')
-    if (row['word'].endswith('-'+x['t1_word'])):
+    if (row['word'].endswith('-'+word)):
       continue
-    if (row['word'] == x['t1_word'] and row['word'] == x['t1_word']
+    if (row['word'] == word and row['word'] == word
         and ',' not in row['orig']):
       continue # don't display the word if it has only one possible origin
     row['freq'] = float(row['freq'])
-    row['phon_rhyme'] = lcs(x['t1_phon'], row['phon'])
-    row['word_rhyme'] = lcs(x['t1_word'], row['word'])
+    row['phon_rhyme'] = lcs(phon, row['phon'])
+    row['word_rhyme'] = lcs(phon, row['word'])
     row['key'] = (
         -row['phon_rhyme'], # phon_rhyme desc
         -row['word_rhyme'], # eye_rhyme desc
@@ -159,38 +152,33 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
         row['word'] # alphabetical order
         )
     row['phon'] = to_xsampa(row['phon'])
-    result[key].append(row)
-
-  result2 = {}
-  seen = {}
-  for k in result.keys():
-    # TODO only display the word itself if multiple derivations are possible
-    result[k] = sorted(result[k], key=operator.itemgetter('key'))
-    result2[k] = []
-    seen[key] = set()
-    for row in result[k]:
-      bases = row['orig'].split(',')
-      ok = False
-      for i in range(len(bases)):
-        bases[i] = bases[i].split('|')
-        if bases[i][1] not in seen[key]:
-          ok = True
-        seen[key].add(bases[i][1])
-      if ok:
-        row['orig'] = ', '.join(
-              [a[0] + (' ('+a[1]+')' if row['word'] != a[1] else '')
-                for a in bases])
-        result2[k].append(row)
-        seen[key].add(row['word'])
-    count = len(result2[k])
-    result2[k] = result2[k][:PAGESIZE]
+    result.append(row)
+
+  result2 = []
+  seen = set()
+  for row in sorted(result, key=operator.itemgetter('key')):
+    bases = row['orig'].split(',')
+    ok = False
+    for i in range(len(bases)):
+      bases[i] = bases[i].split('|')
+      if bases[i][1] not in seen:
+        ok = True
+      seen.add(bases[i][1])
+    if ok:
+      row['orig'] = ', '.join(
+            [a[0] + (' ('+a[1]+')' if row['word'] != a[1] else '')
+              for a in bases])
+      result2.append(row)
+      seen.add(row['word'])
+  count = len(result2)
+  result2 = result2[:PAGESIZE]
 
   #cursor = run_query('''
     #SELECT count(t2.word)
     #''' + rest, args)
   #for x in cursor:
     #count = x[x.keys()[0]]
-  return result2, count
+  return {key: result2}, count
 
 if __name__ == '__main__':
     print(query(*sys.argv[1:]))

	drime French rhyme dictionary with web and CLI interface
	git clone https://a3nm.net/git/drime/
	Log \| Files \| Refs \| README