commit da8d869875cac049ed3f6fdba3ac2bcbac9e3624
parent 9e3a33cd6bf26a491c5a19bd7b60f87a67f906f6
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Fri, 23 Dec 2011 19:52:18 +0100
start query.py
Diffstat:
query.py | | | 108 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
1 file changed, 108 insertions(+), 0 deletions(-)
diff --git a/query.py b/query.py
@@ -0,0 +1,108 @@
+#!/usr/bin/python3 -O
+
+import sqlite3
+import os
+import sys
+import operator
+
+PAGESIZE=50
+DBPATH = os.path.join(os.path.dirname(os.path.realpath(__file__)),
+ 'db.sqlite')
+db = sqlite3.connect(DBPATH)
+db.row_factory = sqlite3.Row
+cursor = db.cursor()
+
+
+def lcs(x, y):
+ """Longest common suffix"""
+ i = 1
+ while x[-i] == y[-i]:
+ i += 1
+ if i > len(x) or i > len(y):
+ break
+ return i - 1
+
+
+def query(word, syll='', genre=True, page=0):
+ # word => word, phon
+ word = word.strip().split(' ')
+ syll = syll.strip()
+ if word[-1].startswith('[') and word[-1].endswith(']'):
+ phon = word[-1][1:-1]
+ word = word[:-1]
+ else:
+ phon = None
+ word = ' '.join(word)
+ elide = False
+ if len(syll) == 0:
+ minsyll = None
+ maxsyll = None
+ else:
+ syll = syll.split('-')
+ if syll[1][-1] == '+':
+ syll[1] = syll[1][:-1]
+ elide = True
+ else:
+ elide = False
+ if len(syll) > 2:
+ raise ValueError
+ minsyll = int(syll[0])
+ if len(syll) == 1:
+ maxsyll = int(syll[0])
+ else:
+ maxsyll = int(syll[1])
+
+ return do_query(word, phon, minsyll, maxsyll, elide, genre,
+ page*PAGESIZE, PAGESIZE)
+pass
+
+def do_query(word, phon, minsyll, maxsyll, elide, genre, offset, size):
+ cursor.execute('''
+ SELECT t1.freq AS t1_freq,
+ t1.word AS t1_word,
+ t1.phon AS t1_phon,
+ t1.feminine AS t1_feminine,
+ t2.word AS t2_word,
+ t2.phon AS t2_phon,
+ t2.freq AS t2_freq,
+ t2.min_nsyl AS t2_min_nsyl,
+ t2.max_nsyl AS t2_max_nsyl,
+ t2.elidable AS t2_elidable,
+ t2.base AS t2_base,
+ t2.kind AS t2_kind,
+ t2.feminine AS t2_feminine
+ FROM words AS t1 INNER JOIN words AS t2 ON
+ (t1.phon_end = t2.phon_end OR t1.word_end = t2.word_end)
+ WHERE (t1.word = ? OR ?) AND (t1.phon = ? OR ?)
+ AND (? OR t2.max_nsyl >= ?)
+ AND (? OR t2.min_nsyl <= ? OR (t2.elidable AND t2.min_nsyl - 1 <= ?))
+ ORDER BY t1.freq, t1.phon, t1.word
+ ''', (word, word == None, phon, phon == None,
+ minsyll == None, minsyll, maxsyll == None, maxsyll, maxsyll,))
+ result = {}
+ for x in cursor:
+ if x['t1_feminine'] != x['t2_feminine'] and genre:
+ continue
+ key = (x['t1_word'], x['t1_phon'], x['t1_freq'])
+ if key not in result.keys():
+ result[key] = []
+ row = dict([
+ (k[3:], x[k]) for k in x.keys()
+ if k.startswith('t2_')])
+ row['phon_rhyme'] = lcs(x['t1_phon'], row['phon'])
+ row['word_rhyme'] = lcs(x['t1_word'], row['word'])
+ row['key'] = (
+ -row['phon_rhyme'], # phon_rhyme desc
+ -row['word_rhyme'], # eye_rhyme desc
+ row['base'] == row['word'], # same as base
+ -float(row['freq']), # frequency desc
+ row['word'] # alphabetical order
+ )
+ result[key].append(row)
+ for k in result.keys():
+ result[k] = sorted(result[k], key=operator.itemgetter('key'))
+
+ return result
+
+if __name__ == '__main__':
+ print(query(*sys.argv[1:]))