drime

French rhyme dictionary with web and CLI interface
git clone https://a3nm.net/git/drime/
Log | Files | Refs | README

commit d6e388394d025c2e0b137db29c469b2f61782cbf
parent 6ce5cb23a65b5ae19f10a2f4bd6f2dc24f612bb6
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Tue, 27 Dec 2011 01:59:28 +0100

code cleanup

Diffstat:
query.py | 80++++++++++++++++++++++++++++++++++---------------------------------------------
1 file changed, 34 insertions(+), 46 deletions(-)

diff --git a/query.py b/query.py @@ -78,8 +78,6 @@ def get_key(x): x['t1_word'] + ' [' + to_xsampa(x['t1_phon']) + ']') def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size): - print ((word, phon, minsyll, maxsyll, elide, gender,)) - print ((offset, size,)) cursor = run_query(''' SELECT t1.word AS t1_word, t1.phon AS t1_phon @@ -93,6 +91,9 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size): result[get_key(x)] = x if len(result.keys()) > 1 or result == {}: return result, 0 # require disambiguation or is empty + word = x['t1_word'] + phon = x['t1_phon'] + key = get_key(x) rest = ''' FROM words AS t1, words AS t2 WHERE (t1.phon_end = t2.phon_end OR t1.word_end = t2.word_end) @@ -102,14 +103,12 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size): OR (t2.elidable AND t2.min_nsyl - 1 <= ? AND ?))) ORDER BY t2.freq, t1.phon, t1.word ''' - limit = '''LIMIT ? OFFSET ?''' + #limit = '''LIMIT ? OFFSET ?''' args = (word, word == None, phon, phon == None, minsyll == None, minsyll, maxsyll == None, maxsyll, maxsyll, elide,) query = ''' - SELECT t1.word AS t1_word, - t1.phon AS t1_phon, - t1.feminine AS t1_feminine, + SELECT t1.feminine AS t1_feminine, t2.word AS t2_word, t2.phon AS t2_phon, t2.freq AS t2_freq, @@ -121,19 +120,13 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size): ''' + rest #+ limit print (query) cursor = run_query(query, args) #+ (size, offset,)) - result = {} + print ("DONE") + + result = [] for x in cursor: - for k in x.keys(): - if isinstance(x[k], str): - try: - x[k] = x[k].decode('utf8') - except UnicodeDecodeError: - x[k] = x[k].decode('latin1') + decode_all(x) if x['t1_feminine'] != x['t2_feminine'] and gender: continue - key = get_key(x) - if key not in result.keys(): - result[key] = [] row = dict([ (k[3:], x[k]) for k in x.keys() if k.startswith('t2_')]) @@ -143,14 +136,14 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size): row[k] = row[k].decode('utf8') except UnicodeDecodeError: row[k] = row[k].decode('latin1') - if (row['word'].endswith('-'+x['t1_word'])): + if (row['word'].endswith('-'+word)): continue - if (row['word'] == x['t1_word'] and row['word'] == x['t1_word'] + if (row['word'] == word and row['word'] == word and ',' not in row['orig']): continue # don't display the word if it has only one possible origin row['freq'] = float(row['freq']) - row['phon_rhyme'] = lcs(x['t1_phon'], row['phon']) - row['word_rhyme'] = lcs(x['t1_word'], row['word']) + row['phon_rhyme'] = lcs(phon, row['phon']) + row['word_rhyme'] = lcs(phon, row['word']) row['key'] = ( -row['phon_rhyme'], # phon_rhyme desc -row['word_rhyme'], # eye_rhyme desc @@ -159,38 +152,33 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size): row['word'] # alphabetical order ) row['phon'] = to_xsampa(row['phon']) - result[key].append(row) - - result2 = {} - seen = {} - for k in result.keys(): - # TODO only display the word itself if multiple derivations are possible - result[k] = sorted(result[k], key=operator.itemgetter('key')) - result2[k] = [] - seen[key] = set() - for row in result[k]: - bases = row['orig'].split(',') - ok = False - for i in range(len(bases)): - bases[i] = bases[i].split('|') - if bases[i][1] not in seen[key]: - ok = True - seen[key].add(bases[i][1]) - if ok: - row['orig'] = ', '.join( - [a[0] + (' ('+a[1]+')' if row['word'] != a[1] else '') - for a in bases]) - result2[k].append(row) - seen[key].add(row['word']) - count = len(result2[k]) - result2[k] = result2[k][:PAGESIZE] + result.append(row) + + result2 = [] + seen = set() + for row in sorted(result, key=operator.itemgetter('key')): + bases = row['orig'].split(',') + ok = False + for i in range(len(bases)): + bases[i] = bases[i].split('|') + if bases[i][1] not in seen: + ok = True + seen.add(bases[i][1]) + if ok: + row['orig'] = ', '.join( + [a[0] + (' ('+a[1]+')' if row['word'] != a[1] else '') + for a in bases]) + result2.append(row) + seen.add(row['word']) + count = len(result2) + result2 = result2[:PAGESIZE] #cursor = run_query(''' #SELECT count(t2.word) #''' + rest, args) #for x in cursor: #count = x[x.keys()[0]] - return result2, count + return {key: result2}, count if __name__ == '__main__': print(query(*sys.argv[1:]))