drime

French rhyme dictionary with web and CLI interface
git clone https://a3nm.net/git/drime/
Log | Files | Refs | README

commit da8d869875cac049ed3f6fdba3ac2bcbac9e3624
parent 9e3a33cd6bf26a491c5a19bd7b60f87a67f906f6
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Fri, 23 Dec 2011 19:52:18 +0100

start query.py

Diffstat:
query.py | 108+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 108 insertions(+), 0 deletions(-)

diff --git a/query.py b/query.py @@ -0,0 +1,108 @@ +#!/usr/bin/python3 -O + +import sqlite3 +import os +import sys +import operator + +PAGESIZE=50 +DBPATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'db.sqlite') +db = sqlite3.connect(DBPATH) +db.row_factory = sqlite3.Row +cursor = db.cursor() + + +def lcs(x, y): + """Longest common suffix""" + i = 1 + while x[-i] == y[-i]: + i += 1 + if i > len(x) or i > len(y): + break + return i - 1 + + +def query(word, syll='', genre=True, page=0): + # word => word, phon + word = word.strip().split(' ') + syll = syll.strip() + if word[-1].startswith('[') and word[-1].endswith(']'): + phon = word[-1][1:-1] + word = word[:-1] + else: + phon = None + word = ' '.join(word) + elide = False + if len(syll) == 0: + minsyll = None + maxsyll = None + else: + syll = syll.split('-') + if syll[1][-1] == '+': + syll[1] = syll[1][:-1] + elide = True + else: + elide = False + if len(syll) > 2: + raise ValueError + minsyll = int(syll[0]) + if len(syll) == 1: + maxsyll = int(syll[0]) + else: + maxsyll = int(syll[1]) + + return do_query(word, phon, minsyll, maxsyll, elide, genre, + page*PAGESIZE, PAGESIZE) +pass + +def do_query(word, phon, minsyll, maxsyll, elide, genre, offset, size): + cursor.execute(''' + SELECT t1.freq AS t1_freq, + t1.word AS t1_word, + t1.phon AS t1_phon, + t1.feminine AS t1_feminine, + t2.word AS t2_word, + t2.phon AS t2_phon, + t2.freq AS t2_freq, + t2.min_nsyl AS t2_min_nsyl, + t2.max_nsyl AS t2_max_nsyl, + t2.elidable AS t2_elidable, + t2.base AS t2_base, + t2.kind AS t2_kind, + t2.feminine AS t2_feminine + FROM words AS t1 INNER JOIN words AS t2 ON + (t1.phon_end = t2.phon_end OR t1.word_end = t2.word_end) + WHERE (t1.word = ? OR ?) AND (t1.phon = ? OR ?) + AND (? OR t2.max_nsyl >= ?) + AND (? OR t2.min_nsyl <= ? OR (t2.elidable AND t2.min_nsyl - 1 <= ?)) + ORDER BY t1.freq, t1.phon, t1.word + ''', (word, word == None, phon, phon == None, + minsyll == None, minsyll, maxsyll == None, maxsyll, maxsyll,)) + result = {} + for x in cursor: + if x['t1_feminine'] != x['t2_feminine'] and genre: + continue + key = (x['t1_word'], x['t1_phon'], x['t1_freq']) + if key not in result.keys(): + result[key] = [] + row = dict([ + (k[3:], x[k]) for k in x.keys() + if k.startswith('t2_')]) + row['phon_rhyme'] = lcs(x['t1_phon'], row['phon']) + row['word_rhyme'] = lcs(x['t1_word'], row['word']) + row['key'] = ( + -row['phon_rhyme'], # phon_rhyme desc + -row['word_rhyme'], # eye_rhyme desc + row['base'] == row['word'], # same as base + -float(row['freq']), # frequency desc + row['word'] # alphabetical order + ) + result[key].append(row) + for k in result.keys(): + result[k] = sorted(result[k], key=operator.itemgetter('key')) + + return result + +if __name__ == '__main__': + print(query(*sys.argv[1:]))