drime

French rhyme dictionary with web and CLI interface
git clone https://a3nm.net/git/drime/
Log | Files | Refs | README

commit f4cd6d730fe9f0f7a42cf1f0f91c9bd95041aa21
parent d2aa5011536d26ac30ba03d0bbcc667f0b8d7290
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sat, 26 Oct 2013 14:08:27 +0200

move everything to python3, get rid of encoding issues

Diffstat:
README | 5+++++
common.py | 6------
db_mysql.py | 10+++++-----
drime.py | 4++--
query.py | 25+++++++------------------
5 files changed, 19 insertions(+), 31 deletions(-)

diff --git a/README b/README @@ -11,12 +11,17 @@ frequency, syllable count, and rhyme gender. == 2. Requirements == +drime requires a working Python3 installation. + drime requires the haspirater module <http://gitorious.org/haspirater/>. Just place the "haspirater.py" and "haspirater.json" file in the same folder as the other files. It also requires the "rhyme.py" module from plint <http://gitorious.org/plint/> that you should put in the same folder, and also "frhyme.py" (see plint's documentation). +drime also requires PyMySQL <http://www.pymysql.org> and Flask +<http://flask.pocoo.org/> for Python3. + == 3. Generating the DB == The program database isn't shipped, but scripts are provided to build it diff --git a/common.py b/common.py @@ -26,8 +26,6 @@ def strip_accents_one(s, with_except=False): with_except keeps specifically 'é' and 'è'""" r = [] for x in s: - if isinstance(x, str): - x = x.decode('utf-8') if with_except and x in ['è', 'é']: r.append(x) else: @@ -59,8 +57,6 @@ def is_vowels(chunk, with_h=False, with_y=True): if not with_y and chunk == 'y': return False for char in strip_accents(chunk): - if isinstance(char, unicode): - char = char.encode('utf-8') if char not in vowels: if char != 'h' or not with_h: return False @@ -70,8 +66,6 @@ def is_consonants(chunk): """Test if a chunk is consonants""" for char in strip_accents(chunk): - if isinstance(char, unicode): - char = char.encode('utf-8') if char not in consonants: return False return True diff --git a/db_mysql.py b/db_mysql.py @@ -1,16 +1,16 @@ -#!/usr/bin/python -O +#!/usr/bin/python3 -O -import MySQLdb -import MySQLdb.cursors +import pymysql +import pymysql.cursors from db_mysql_config import config def run_query(r, v): - db = MySQLdb.connect( + db = pymysql.connect( host=config['host'], user=config['user'], passwd=config['passwd'], db=config['db'], - cursorclass=MySQLdb.cursors.DictCursor, + cursorclass=pymysql.cursors.DictCursor, charset='utf8', use_unicode=True) cursor = db.cursor() diff --git a/drime.py b/drime.py @@ -1,4 +1,4 @@ -#!/usr/bin/python -O +#!/usr/bin/python3 -O import query from flask import Flask, render_template, request, jsonify @@ -78,5 +78,5 @@ def q(): return render_template('error.html', **d) if __name__ == '__main__': - app.run() + app.run(debug=True) diff --git a/query.py b/query.py @@ -1,4 +1,4 @@ -#!/usr/bin/python -O +#!/usr/bin/python3 -O # -*- coding: utf-8 -*- import sys @@ -54,7 +54,7 @@ def query(q, nsyl='', gender=True, classical=True, page=0): else: phon = None word = ' '.join(word) - word = word.replace("œ".decode('utf-8'), "oe").replace("æ".decode('utf-8'), "ae") + word = word.replace("œ", "oe").replace("æ", "ae") if word == '': word = None elide = False @@ -82,11 +82,6 @@ def query(q, nsyl='', gender=True, classical=True, page=0): return do_query(word, phon, minsyll, maxsyll, elide, gender, classical, page*PAGESIZE, PAGESIZE) -def decode_all(x): - for k in x.keys(): - if isinstance(x[k], str): - x[k] = x[k].decode('utf8') - def get_key(x): x['orig'] = decode_orig(x['orig']) return (x['word'], to_xsampa(x['phon']), render_orig(x), @@ -114,7 +109,6 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, classical, offset, siz keys = [] constraint = Constraint(1, True) for x in cursor: - decode_all(x) key = get_key(x) keys.append(key) sure = True @@ -130,7 +124,7 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, classical, offset, siz prons = [frhyme.lookup(escape(w))[0][1] for w in s] phon = ''.join(prons)[-LIMIT:] # now, create a dummy entry for what was provided - w = Word(word.encode('utf-8'), phon.encode('utf-8'), '', '', '1', + w = Word(word, phon, '', '', '1', do_extends=False) x = {'word': w.word, 'phon': w.phon, 'word_end': w.word_end, 'phon_end': w.phon_end, 'feminine': w.feminine, 'orig': '|'} @@ -159,7 +153,6 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, classical, offset, siz result = [] bword = word.split(' ')[-1] for row in cursor: - decode_all(row) if feminine != row['feminine'] and gender: continue if (row['word'].endswith('-'+bword)): @@ -168,10 +161,10 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, classical, offset, siz and ',' not in row['orig']): continue # don't display the word if it has only one possible origin if classical: - rhyme = Rhyme(word.encode('utf-8'), constraint, - phon=[phon.encode('utf-8')]) - rhyme.restrict(Rhyme(row['word'].encode('utf-8'), constraint, - phon=[row['phon'].encode('utf-8')])) + rhyme = Rhyme(word, constraint, + phon=[phon]) + rhyme.restrict(Rhyme(row['word'], constraint, + phon=[row['phon']])) if not rhyme.satisfied(): continue # discard words following classical rules row['freq'] = float(row['freq']) @@ -216,10 +209,6 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, classical, offset, siz return {'keys': [key], 'result': result2}, count, sure if __name__ == '__main__': - # work around encoding issues - sys.argv = [x.decode('utf-8') for x in sys.argv] - sys.stdout = codecs.getwriter('utf8')(sys.stdout) - def usage(): print ("Usage: %s QUERY [NSYL [GENDER [CLASSICAL]]]" % sys.argv[0]) try: