drime

French rhyme dictionary with web and CLI interface
git clone https://a3nm.net/git/drime/
Log | Files | Refs | README

commit e6b424a2503b9d1e8ecbaa3bd0548ce462877208
parent 6b3e556ced2fe137360ad87870a8a0419329f20b
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sat, 26 Oct 2013 12:57:18 +0200

infer pronuncation of unknown words

Diffstat:
drime.py | 3++-
query.py | 35++++++++++++++++++++++++++++-------
2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/drime.py b/drime.py @@ -45,12 +45,13 @@ def q(): except ValueError: d['page'] = 0 try: - r, count = query.query(**d) + r, count, sure = query.query(**d) d['lang'] = get_locale() d['pagesize'] = query.PAGESIZE d['mode'] = 'query' d['title'] = get_title() d['count'] = count + d['sure'] = sure d['displayed'] = min(d['pagesize'], count) d['keys'] = r['keys'] if len(r['keys']) == 0: diff --git a/query.py b/query.py @@ -6,7 +6,9 @@ import codecs import operator from db_mysql import run_query from common import from_xsampa, to_xsampa -from rhyme import Rhyme, Constraint +from rhyme import Rhyme, Constraint, escape +import frhyme +from lexique2sql import Word PAGESIZE = 500 @@ -113,8 +115,24 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, classical, offset, siz decode_all(x) key = get_key(x) keys.append(key) - if len(keys) > 1 or keys == []: - return {'keys': keys}, 0 # require disambiguation or is empty + sure = True + if len(keys) > 1: + return {'keys': keys}, 0, True # require disambiguation or is empty + if len(keys) == 0: + if not word: + return {'keys': keys}, 0, True # we need a word to infer anything + if not phon: + # infer from what was given + sure = False + s = word.split(' ')[-3:] + prons = [frhyme.lookup(escape(w))[0][1] for w in s] + phon = ''.join(prons) + # now, create a dummy entry for what was provided + w = Word(word.encode('utf-8'), phon.encode('utf-8'), '', '', '1', + do_extends=False) + x = {'word': w.word, 'phon': w.phon, 'word_end': w.word_end, 'phon_end': + w.phon_end, 'feminine': w.feminine, 'orig': '|'} + key = get_key(x) word = x['word'] phon = x['phon'] word_end = x['word_end'] @@ -137,13 +155,14 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, classical, offset, siz cursor = run_query(query, args) #+ (size, offset,)) result = [] + bword = word.split(' ')[-1] for row in cursor: decode_all(row) if feminine != row['feminine'] and gender: continue - if (row['word'].endswith('-'+word)): + if (row['word'].endswith('-'+bword)): continue - if (row['word'] == word and row['word'] == word + if (row['word'] == bword and row['word'] == bword and ',' not in row['orig']): continue # don't display the word if it has only one possible origin if classical: @@ -192,7 +211,7 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, classical, offset, siz #''' + rest, args) #for x in cursor: #count = x[x.keys()[0]] - return {'keys': [key], 'result': result2}, count + return {'keys': [key], 'result': result2}, count, sure if __name__ == '__main__': # work around encoding issues @@ -205,7 +224,7 @@ if __name__ == '__main__': for p in [3, 4]: if p < len(sys.argv): sys.argv[p] = convert(sys.argv[p]) - r, c = query(*sys.argv[1:]) + r, c, sure = query(*sys.argv[1:]) except BadValues: print ("Bad values passed as arguments.") usage() @@ -222,6 +241,8 @@ if __name__ == '__main__': print (" - %s" % k[-1] + ' -- ' + k[2]) print ("Please rerun with a more specific query") sys.exit(2) + if not sure: + print ("Warning: word is unknown, pronunciation is inferred, please check") result = [["word", "phon", "pr", "wr", "freq", "orig"]] + [ (x['word'], x['phon'],