drime

French rhyme dictionary with web and CLI interface
git clone https://a3nm.net/git/drime/
Log | Files | Refs | README

commit 43c21a35874f917deafeed522f919c0c2f26510c
parent ab0440119d597e8e4557b1e8279725b8e6e41586
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Tue, 27 Dec 2011 21:18:20 +0100

cleanup, performance

Diffstat:
query.py | 70++++++++++++++++++++++++++++++++--------------------------------------
templates/page.html | 4++--
2 files changed, 34 insertions(+), 40 deletions(-)

diff --git a/query.py b/query.py @@ -68,22 +68,18 @@ pass def decode_all(x): for k in x.keys(): if isinstance(x[k], str): - try: - x[k] = x[k].decode('utf8') - except UnicodeDecodeError: - x[k] = x[k].decode('latin1') + x[k] = x[k].decode('latin1') def get_key(x): - return (x['t1_word'], x['t1_phon'], - x['t1_word'] + ' [' + to_xsampa(x['t1_phon']) + ']') + return (x['word'], x['phon'], + x['word'] + ' [' + to_xsampa(x['phon']) + ']') def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size): cursor = run_query(''' - SELECT t1.word AS t1_word, - t1.phon AS t1_phon - FROM words AS t1 - WHERE (t1.word = ? OR ?) AND (t1.phon = ? OR ?) - ORDER BY t1.freq DESC + SELECT word, phon, word_end, phon_end, feminine + FROM words + WHERE (word = ? OR ?) AND (phon = ? OR ?) + ORDER BY freq DESC ''', (word, word == None, phon, phon == None,)) result = {} for x in cursor: @@ -91,32 +87,25 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size): result[get_key(x)] = x if len(result.keys()) > 1 or result == {}: return result, 0 # require disambiguation or is empty - word = x['t1_word'] - phon = x['t1_phon'] + word = x['word'] + phon = x['phon'] + word_end = x['word_end'] + phon_end = x['phon_end'] + feminine = x['feminine'] key = get_key(x) - rest = ''' FROM words AS t1, words AS t2 - WHERE (t1.phon_end = t2.phon_end OR t1.word_end = t2.word_end) - AND (t1.word = ?) AND (t1.phon = ?) - AND ((? OR t2.max_nsyl >= ?) - AND (? OR t2.min_nsyl <= ? - OR (t2.elidable AND t2.min_nsyl - 1 <= ? AND ?))) - ORDER BY t2.freq, t1.phon, t1.word + rest = ''' FROM words + WHERE (phon_end = ? OR word_end = ?) + AND ((? OR max_nsyl >= ?) + AND (? OR min_nsyl <= ? + OR (elidable AND min_nsyl - 1 <= ? AND ?))) ''' #limit = '''LIMIT ? OFFSET ?''' - args = (word, phon, + args = (phon_end, word_end, minsyll == None, minsyll, maxsyll == None, maxsyll, maxsyll, elide,) query = ''' - SELECT t1.feminine AS t1_feminine, - t2.word AS word, - t2.phon AS phon, - t2.freq AS freq, - t2.min_nsyl AS min_nsyl, - t2.max_nsyl AS max_nsyl, - t2.elidable AS elidable, - t2.orig AS orig, - t2.feminine AS t2_feminine + SELECT word, phon, freq, min_nsyl, max_nsyl, elidable, orig, feminine ''' + rest #+ limit print (query) cursor = run_query(query, args) #+ (size, offset,)) @@ -125,7 +114,7 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size): result = [] for row in cursor: decode_all(row) - if row['t1_feminine'] != row['t2_feminine'] and gender: + if feminine != row['feminine'] and gender: continue if (row['word'].endswith('-'+word)): continue @@ -135,10 +124,6 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size): row['freq'] = float(row['freq']) row['phon_rhyme'] = lcs(phon, row['phon']) row['word_rhyme'] = lcs(word, row['word']) - bases = row['orig'].split(',') - for i in range(len(bases)): - bases[i] = bases[i].split('|') - row['orig'] = bases row['key'] = ( -row['phon_rhyme'], # phon_rhyme desc -row['word_rhyme'], # eye_rhyme desc @@ -149,10 +134,15 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size): result.append(row) print ("DONE2") - + result.sort(key=operator.itemgetter('key')) result2 = [] seen = set() - for row in sorted(result, key=operator.itemgetter('key')): + c = 0 + for row in result: + bases = row['orig'].split(',') + for i in range(len(bases)): + bases[i] = bases[i].split('|') + row['orig'] = bases ok = False for i in range(len(row['orig'])): if row['orig'][i][1] not in seen: @@ -164,10 +154,14 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size): for a in row['orig']]) row['phon'] = to_xsampa(row['phon']) result2.append(row) + c += 1 + if c > PAGESIZE: + break seen.add(row['word']) - count = len(result2) + count = len(result) result2 = result2[:PAGESIZE] + print ("DONE3") #cursor = run_query(''' #SELECT count(t2.word) #''' + rest, args) diff --git a/templates/page.html b/templates/page.html @@ -80,9 +80,9 @@ result{% if displayed != 1 %}s{% endif %} {{ page * pagesize + 1 }} to {{ (page+1) * pagesize }} #} {% if displayed < count %} {% if lang == 'fr' %} -sur +sur environ {% else %} -of +of about {% endif %} {{ count }} {% if lang == 'fr' %}