commit 43c21a35874f917deafeed522f919c0c2f26510c
parent ab0440119d597e8e4557b1e8279725b8e6e41586
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Tue, 27 Dec 2011 21:18:20 +0100
cleanup, performance
Diffstat:
2 files changed, 34 insertions(+), 40 deletions(-)
diff --git a/query.py b/query.py
@@ -68,22 +68,18 @@ pass
def decode_all(x):
for k in x.keys():
if isinstance(x[k], str):
- try:
- x[k] = x[k].decode('utf8')
- except UnicodeDecodeError:
- x[k] = x[k].decode('latin1')
+ x[k] = x[k].decode('latin1')
def get_key(x):
- return (x['t1_word'], x['t1_phon'],
- x['t1_word'] + ' [' + to_xsampa(x['t1_phon']) + ']')
+ return (x['word'], x['phon'],
+ x['word'] + ' [' + to_xsampa(x['phon']) + ']')
def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
cursor = run_query('''
- SELECT t1.word AS t1_word,
- t1.phon AS t1_phon
- FROM words AS t1
- WHERE (t1.word = ? OR ?) AND (t1.phon = ? OR ?)
- ORDER BY t1.freq DESC
+ SELECT word, phon, word_end, phon_end, feminine
+ FROM words
+ WHERE (word = ? OR ?) AND (phon = ? OR ?)
+ ORDER BY freq DESC
''', (word, word == None, phon, phon == None,))
result = {}
for x in cursor:
@@ -91,32 +87,25 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
result[get_key(x)] = x
if len(result.keys()) > 1 or result == {}:
return result, 0 # require disambiguation or is empty
- word = x['t1_word']
- phon = x['t1_phon']
+ word = x['word']
+ phon = x['phon']
+ word_end = x['word_end']
+ phon_end = x['phon_end']
+ feminine = x['feminine']
key = get_key(x)
- rest = ''' FROM words AS t1, words AS t2
- WHERE (t1.phon_end = t2.phon_end OR t1.word_end = t2.word_end)
- AND (t1.word = ?) AND (t1.phon = ?)
- AND ((? OR t2.max_nsyl >= ?)
- AND (? OR t2.min_nsyl <= ?
- OR (t2.elidable AND t2.min_nsyl - 1 <= ? AND ?)))
- ORDER BY t2.freq, t1.phon, t1.word
+ rest = ''' FROM words
+ WHERE (phon_end = ? OR word_end = ?)
+ AND ((? OR max_nsyl >= ?)
+ AND (? OR min_nsyl <= ?
+ OR (elidable AND min_nsyl - 1 <= ? AND ?)))
'''
#limit = '''LIMIT ? OFFSET ?'''
- args = (word, phon,
+ args = (phon_end, word_end,
minsyll == None, minsyll, maxsyll == None, maxsyll, maxsyll, elide,)
query = '''
- SELECT t1.feminine AS t1_feminine,
- t2.word AS word,
- t2.phon AS phon,
- t2.freq AS freq,
- t2.min_nsyl AS min_nsyl,
- t2.max_nsyl AS max_nsyl,
- t2.elidable AS elidable,
- t2.orig AS orig,
- t2.feminine AS t2_feminine
+ SELECT word, phon, freq, min_nsyl, max_nsyl, elidable, orig, feminine
''' + rest #+ limit
print (query)
cursor = run_query(query, args) #+ (size, offset,))
@@ -125,7 +114,7 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
result = []
for row in cursor:
decode_all(row)
- if row['t1_feminine'] != row['t2_feminine'] and gender:
+ if feminine != row['feminine'] and gender:
continue
if (row['word'].endswith('-'+word)):
continue
@@ -135,10 +124,6 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
row['freq'] = float(row['freq'])
row['phon_rhyme'] = lcs(phon, row['phon'])
row['word_rhyme'] = lcs(word, row['word'])
- bases = row['orig'].split(',')
- for i in range(len(bases)):
- bases[i] = bases[i].split('|')
- row['orig'] = bases
row['key'] = (
-row['phon_rhyme'], # phon_rhyme desc
-row['word_rhyme'], # eye_rhyme desc
@@ -149,10 +134,15 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
result.append(row)
print ("DONE2")
-
+ result.sort(key=operator.itemgetter('key'))
result2 = []
seen = set()
- for row in sorted(result, key=operator.itemgetter('key')):
+ c = 0
+ for row in result:
+ bases = row['orig'].split(',')
+ for i in range(len(bases)):
+ bases[i] = bases[i].split('|')
+ row['orig'] = bases
ok = False
for i in range(len(row['orig'])):
if row['orig'][i][1] not in seen:
@@ -164,10 +154,14 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
for a in row['orig']])
row['phon'] = to_xsampa(row['phon'])
result2.append(row)
+ c += 1
+ if c > PAGESIZE:
+ break
seen.add(row['word'])
- count = len(result2)
+ count = len(result)
result2 = result2[:PAGESIZE]
+ print ("DONE3")
#cursor = run_query('''
#SELECT count(t2.word)
#''' + rest, args)
diff --git a/templates/page.html b/templates/page.html
@@ -80,9 +80,9 @@ result{% if displayed != 1 %}s{% endif %}
{{ page * pagesize + 1 }} to {{ (page+1) * pagesize }} #}
{% if displayed < count %}
{% if lang == 'fr' %}
-sur
+sur environ
{% else %}
-of
+of about
{% endif %}
{{ count }}
{% if lang == 'fr' %}