commit f4cd6d730fe9f0f7a42cf1f0f91c9bd95041aa21
parent d2aa5011536d26ac30ba03d0bbcc667f0b8d7290
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Sat, 26 Oct 2013 14:08:27 +0200
move everything to python3, get rid of encoding issues
Diffstat:
5 files changed, 19 insertions(+), 31 deletions(-)
diff --git a/README b/README
@@ -11,12 +11,17 @@ frequency, syllable count, and rhyme gender.
== 2. Requirements ==
+drime requires a working Python3 installation.
+
drime requires the haspirater module <http://gitorious.org/haspirater/>. Just
place the "haspirater.py" and "haspirater.json" file in the same folder as the
other files. It also requires the "rhyme.py" module from plint
<http://gitorious.org/plint/> that you should put in the same folder, and also
"frhyme.py" (see plint's documentation).
+drime also requires PyMySQL <http://www.pymysql.org> and Flask
+<http://flask.pocoo.org/> for Python3.
+
== 3. Generating the DB ==
The program database isn't shipped, but scripts are provided to build it
diff --git a/common.py b/common.py
@@ -26,8 +26,6 @@ def strip_accents_one(s, with_except=False):
with_except keeps specifically 'é' and 'è'"""
r = []
for x in s:
- if isinstance(x, str):
- x = x.decode('utf-8')
if with_except and x in ['è', 'é']:
r.append(x)
else:
@@ -59,8 +57,6 @@ def is_vowels(chunk, with_h=False, with_y=True):
if not with_y and chunk == 'y':
return False
for char in strip_accents(chunk):
- if isinstance(char, unicode):
- char = char.encode('utf-8')
if char not in vowels:
if char != 'h' or not with_h:
return False
@@ -70,8 +66,6 @@ def is_consonants(chunk):
"""Test if a chunk is consonants"""
for char in strip_accents(chunk):
- if isinstance(char, unicode):
- char = char.encode('utf-8')
if char not in consonants:
return False
return True
diff --git a/db_mysql.py b/db_mysql.py
@@ -1,16 +1,16 @@
-#!/usr/bin/python -O
+#!/usr/bin/python3 -O
-import MySQLdb
-import MySQLdb.cursors
+import pymysql
+import pymysql.cursors
from db_mysql_config import config
def run_query(r, v):
- db = MySQLdb.connect(
+ db = pymysql.connect(
host=config['host'],
user=config['user'],
passwd=config['passwd'],
db=config['db'],
- cursorclass=MySQLdb.cursors.DictCursor,
+ cursorclass=pymysql.cursors.DictCursor,
charset='utf8',
use_unicode=True)
cursor = db.cursor()
diff --git a/drime.py b/drime.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python -O
+#!/usr/bin/python3 -O
import query
from flask import Flask, render_template, request, jsonify
@@ -78,5 +78,5 @@ def q():
return render_template('error.html', **d)
if __name__ == '__main__':
- app.run()
+ app.run(debug=True)
diff --git a/query.py b/query.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python -O
+#!/usr/bin/python3 -O
# -*- coding: utf-8 -*-
import sys
@@ -54,7 +54,7 @@ def query(q, nsyl='', gender=True, classical=True, page=0):
else:
phon = None
word = ' '.join(word)
- word = word.replace("œ".decode('utf-8'), "oe").replace("æ".decode('utf-8'), "ae")
+ word = word.replace("œ", "oe").replace("æ", "ae")
if word == '':
word = None
elide = False
@@ -82,11 +82,6 @@ def query(q, nsyl='', gender=True, classical=True, page=0):
return do_query(word, phon, minsyll, maxsyll, elide, gender, classical,
page*PAGESIZE, PAGESIZE)
-def decode_all(x):
- for k in x.keys():
- if isinstance(x[k], str):
- x[k] = x[k].decode('utf8')
-
def get_key(x):
x['orig'] = decode_orig(x['orig'])
return (x['word'], to_xsampa(x['phon']), render_orig(x),
@@ -114,7 +109,6 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, classical, offset, siz
keys = []
constraint = Constraint(1, True)
for x in cursor:
- decode_all(x)
key = get_key(x)
keys.append(key)
sure = True
@@ -130,7 +124,7 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, classical, offset, siz
prons = [frhyme.lookup(escape(w))[0][1] for w in s]
phon = ''.join(prons)[-LIMIT:]
# now, create a dummy entry for what was provided
- w = Word(word.encode('utf-8'), phon.encode('utf-8'), '', '', '1',
+ w = Word(word, phon, '', '', '1',
do_extends=False)
x = {'word': w.word, 'phon': w.phon, 'word_end': w.word_end, 'phon_end':
w.phon_end, 'feminine': w.feminine, 'orig': '|'}
@@ -159,7 +153,6 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, classical, offset, siz
result = []
bword = word.split(' ')[-1]
for row in cursor:
- decode_all(row)
if feminine != row['feminine'] and gender:
continue
if (row['word'].endswith('-'+bword)):
@@ -168,10 +161,10 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, classical, offset, siz
and ',' not in row['orig']):
continue # don't display the word if it has only one possible origin
if classical:
- rhyme = Rhyme(word.encode('utf-8'), constraint,
- phon=[phon.encode('utf-8')])
- rhyme.restrict(Rhyme(row['word'].encode('utf-8'), constraint,
- phon=[row['phon'].encode('utf-8')]))
+ rhyme = Rhyme(word, constraint,
+ phon=[phon])
+ rhyme.restrict(Rhyme(row['word'], constraint,
+ phon=[row['phon']]))
if not rhyme.satisfied():
continue # discard words following classical rules
row['freq'] = float(row['freq'])
@@ -216,10 +209,6 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, classical, offset, siz
return {'keys': [key], 'result': result2}, count, sure
if __name__ == '__main__':
- # work around encoding issues
- sys.argv = [x.decode('utf-8') for x in sys.argv]
- sys.stdout = codecs.getwriter('utf8')(sys.stdout)
-
def usage():
print ("Usage: %s QUERY [NSYL [GENDER [CLASSICAL]]]" % sys.argv[0])
try: