commit d6e388394d025c2e0b137db29c469b2f61782cbf
parent 6ce5cb23a65b5ae19f10a2f4bd6f2dc24f612bb6
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Tue, 27 Dec 2011 01:59:28 +0100
code cleanup
Diffstat:
query.py | | | 80 | ++++++++++++++++++++++++++++++++++--------------------------------------------- |
1 file changed, 34 insertions(+), 46 deletions(-)
diff --git a/query.py b/query.py
@@ -78,8 +78,6 @@ def get_key(x):
x['t1_word'] + ' [' + to_xsampa(x['t1_phon']) + ']')
def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
- print ((word, phon, minsyll, maxsyll, elide, gender,))
- print ((offset, size,))
cursor = run_query('''
SELECT t1.word AS t1_word,
t1.phon AS t1_phon
@@ -93,6 +91,9 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
result[get_key(x)] = x
if len(result.keys()) > 1 or result == {}:
return result, 0 # require disambiguation or is empty
+ word = x['t1_word']
+ phon = x['t1_phon']
+ key = get_key(x)
rest = ''' FROM words AS t1, words AS t2
WHERE (t1.phon_end = t2.phon_end OR t1.word_end = t2.word_end)
@@ -102,14 +103,12 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
OR (t2.elidable AND t2.min_nsyl - 1 <= ? AND ?)))
ORDER BY t2.freq, t1.phon, t1.word
'''
- limit = '''LIMIT ? OFFSET ?'''
+ #limit = '''LIMIT ? OFFSET ?'''
args = (word, word == None, phon, phon == None,
minsyll == None, minsyll, maxsyll == None, maxsyll, maxsyll, elide,)
query = '''
- SELECT t1.word AS t1_word,
- t1.phon AS t1_phon,
- t1.feminine AS t1_feminine,
+ SELECT t1.feminine AS t1_feminine,
t2.word AS t2_word,
t2.phon AS t2_phon,
t2.freq AS t2_freq,
@@ -121,19 +120,13 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
''' + rest #+ limit
print (query)
cursor = run_query(query, args) #+ (size, offset,))
- result = {}
+ print ("DONE")
+
+ result = []
for x in cursor:
- for k in x.keys():
- if isinstance(x[k], str):
- try:
- x[k] = x[k].decode('utf8')
- except UnicodeDecodeError:
- x[k] = x[k].decode('latin1')
+ decode_all(x)
if x['t1_feminine'] != x['t2_feminine'] and gender:
continue
- key = get_key(x)
- if key not in result.keys():
- result[key] = []
row = dict([
(k[3:], x[k]) for k in x.keys()
if k.startswith('t2_')])
@@ -143,14 +136,14 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
row[k] = row[k].decode('utf8')
except UnicodeDecodeError:
row[k] = row[k].decode('latin1')
- if (row['word'].endswith('-'+x['t1_word'])):
+ if (row['word'].endswith('-'+word)):
continue
- if (row['word'] == x['t1_word'] and row['word'] == x['t1_word']
+ if (row['word'] == word and row['word'] == word
and ',' not in row['orig']):
continue # don't display the word if it has only one possible origin
row['freq'] = float(row['freq'])
- row['phon_rhyme'] = lcs(x['t1_phon'], row['phon'])
- row['word_rhyme'] = lcs(x['t1_word'], row['word'])
+ row['phon_rhyme'] = lcs(phon, row['phon'])
+ row['word_rhyme'] = lcs(phon, row['word'])
row['key'] = (
-row['phon_rhyme'], # phon_rhyme desc
-row['word_rhyme'], # eye_rhyme desc
@@ -159,38 +152,33 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
row['word'] # alphabetical order
)
row['phon'] = to_xsampa(row['phon'])
- result[key].append(row)
-
- result2 = {}
- seen = {}
- for k in result.keys():
- # TODO only display the word itself if multiple derivations are possible
- result[k] = sorted(result[k], key=operator.itemgetter('key'))
- result2[k] = []
- seen[key] = set()
- for row in result[k]:
- bases = row['orig'].split(',')
- ok = False
- for i in range(len(bases)):
- bases[i] = bases[i].split('|')
- if bases[i][1] not in seen[key]:
- ok = True
- seen[key].add(bases[i][1])
- if ok:
- row['orig'] = ', '.join(
- [a[0] + (' ('+a[1]+')' if row['word'] != a[1] else '')
- for a in bases])
- result2[k].append(row)
- seen[key].add(row['word'])
- count = len(result2[k])
- result2[k] = result2[k][:PAGESIZE]
+ result.append(row)
+
+ result2 = []
+ seen = set()
+ for row in sorted(result, key=operator.itemgetter('key')):
+ bases = row['orig'].split(',')
+ ok = False
+ for i in range(len(bases)):
+ bases[i] = bases[i].split('|')
+ if bases[i][1] not in seen:
+ ok = True
+ seen.add(bases[i][1])
+ if ok:
+ row['orig'] = ', '.join(
+ [a[0] + (' ('+a[1]+')' if row['word'] != a[1] else '')
+ for a in bases])
+ result2.append(row)
+ seen.add(row['word'])
+ count = len(result2)
+ result2 = result2[:PAGESIZE]
#cursor = run_query('''
#SELECT count(t2.word)
#''' + rest, args)
#for x in cursor:
#count = x[x.keys()[0]]
- return result2, count
+ return {key: result2}, count
if __name__ == '__main__':
print(query(*sys.argv[1:]))