move everything to python3, get rid of encoding issues - drime - French rhyme dictionary with web and CLI interface

commit f4cd6d730fe9f0f7a42cf1f0f91c9bd95041aa21
parent d2aa5011536d26ac30ba03d0bbcc667f0b8d7290
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sat, 26 Oct 2013 14:08:27 +0200

move everything to python3, get rid of encoding issues

Diffstat:
README  | 5 +++++
common.py  | 6 ------
db_mysql.py  | 10 +++++-----
drime.py  | 4 ++--
query.py  | 25 +++++++------------------

5 files changed, 19 insertions(+), 31 deletions(-)
diff --git a/README b/README
@@ -11,12 +11,17 @@ frequency, syllable count, and rhyme gender.
 
 == 2. Requirements ==
 
+drime requires a working Python3 installation.
+
 drime requires the haspirater module <http://gitorious.org/haspirater/>. Just
 place the "haspirater.py" and "haspirater.json" file in the same folder as the
 other files. It also requires the "rhyme.py" module from plint
 <http://gitorious.org/plint/> that you should put in the same folder, and also
 "frhyme.py" (see plint's documentation).
 
+drime also requires PyMySQL <http://www.pymysql.org> and Flask
+<http://flask.pocoo.org/> for Python3.
+
 == 3. Generating the DB ==
 
 The program database isn't shipped, but scripts are provided to build it
diff --git a/common.py b/common.py
@@ -26,8 +26,6 @@ def strip_accents_one(s, with_except=False):
   with_except keeps specifically 'é' and 'è'"""
   r = []
   for x in s:
-    if isinstance(x, str):
-      x = x.decode('utf-8')
     if with_except and x in ['è', 'é']:
       r.append(x)
     else:
@@ -59,8 +57,6 @@ def is_vowels(chunk, with_h=False, with_y=True):
   if not with_y and chunk == 'y':
     return False
   for char in strip_accents(chunk):
-    if isinstance(char, unicode):
-      char = char.encode('utf-8')
     if char not in vowels:
       if char != 'h' or not with_h:
         return False
@@ -70,8 +66,6 @@ def is_consonants(chunk):
   """Test if a chunk is consonants"""
 
   for char in strip_accents(chunk):
-    if isinstance(char, unicode):
-      char = char.encode('utf-8')
     if char not in consonants:
       return False
   return True
diff --git a/db_mysql.py b/db_mysql.py
@@ -1,16 +1,16 @@
-#!/usr/bin/python -O
+#!/usr/bin/python3 -O
 
-import MySQLdb
-import MySQLdb.cursors
+import pymysql
+import pymysql.cursors
 from db_mysql_config import config
 
 def run_query(r, v):
-  db = MySQLdb.connect(
+  db = pymysql.connect(
       host=config['host'],
       user=config['user'],
       passwd=config['passwd'],
       db=config['db'],
-      cursorclass=MySQLdb.cursors.DictCursor,
+      cursorclass=pymysql.cursors.DictCursor,
       charset='utf8',
       use_unicode=True)
   cursor = db.cursor()
diff --git a/drime.py b/drime.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python -O
+#!/usr/bin/python3 -O
 
 import query
 from flask import Flask, render_template, request, jsonify
@@ -78,5 +78,5 @@ def q():
     return render_template('error.html', **d)
 
 if __name__ == '__main__':
-  app.run()
+  app.run(debug=True)
 
diff --git a/query.py b/query.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python -O
+#!/usr/bin/python3 -O
 # -*- coding: utf-8 -*-
 
 import sys
@@ -54,7 +54,7 @@ def query(q, nsyl='', gender=True, classical=True, page=0):
   else:
       phon = None
   word = ' '.join(word)
-  word = word.replace("œ".decode('utf-8'), "oe").replace("æ".decode('utf-8'), "ae")
+  word = word.replace("œ", "oe").replace("æ", "ae")
   if word == '':
     word = None
   elide = False
@@ -82,11 +82,6 @@ def query(q, nsyl='', gender=True, classical=True, page=0):
   return do_query(word, phon, minsyll, maxsyll, elide, gender, classical,
       page*PAGESIZE, PAGESIZE)
 
-def decode_all(x):
-  for k in x.keys():
-    if isinstance(x[k], str):
-      x[k] = x[k].decode('utf8')
-
 def get_key(x):
   x['orig'] = decode_orig(x['orig'])
   return (x['word'], to_xsampa(x['phon']), render_orig(x),
@@ -114,7 +109,6 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, classical, offset, siz
   keys = []
   constraint = Constraint(1, True)
   for x in cursor:
-    decode_all(x)
     key = get_key(x)
     keys.append(key)
   sure = True
@@ -130,7 +124,7 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, classical, offset, siz
       prons = [frhyme.lookup(escape(w))[0][1] for w in s]
       phon = ''.join(prons)[-LIMIT:]
     # now, create a dummy entry for what was provided
-    w = Word(word.encode('utf-8'), phon.encode('utf-8'), '', '', '1',
+    w = Word(word, phon, '', '', '1',
             do_extends=False)
     x = {'word': w.word, 'phon': w.phon, 'word_end': w.word_end, 'phon_end':
         w.phon_end, 'feminine': w.feminine, 'orig': '|'}
@@ -159,7 +153,6 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, classical, offset, siz
   result = []
   bword = word.split(' ')[-1]
   for row in cursor:
-    decode_all(row)
     if feminine != row['feminine'] and gender:
       continue
     if (row['word'].endswith('-'+bword)):
@@ -168,10 +161,10 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, classical, offset, siz
         and ',' not in row['orig']):
       continue # don't display the word if it has only one possible origin
     if classical:
-      rhyme = Rhyme(word.encode('utf-8'), constraint,
-          phon=[phon.encode('utf-8')])
-      rhyme.restrict(Rhyme(row['word'].encode('utf-8'), constraint,
-          phon=[row['phon'].encode('utf-8')]))
+      rhyme = Rhyme(word, constraint,
+          phon=[phon])
+      rhyme.restrict(Rhyme(row['word'], constraint,
+          phon=[row['phon']]))
       if not rhyme.satisfied():
         continue # discard words following classical rules
     row['freq'] = float(row['freq'])
@@ -216,10 +209,6 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, classical, offset, siz
   return {'keys': [key], 'result': result2}, count, sure
 
 if __name__ == '__main__':
-  # work around encoding issues
-  sys.argv = [x.decode('utf-8') for x in sys.argv]
-  sys.stdout = codecs.getwriter('utf8')(sys.stdout)
-
   def usage():
     print ("Usage: %s QUERY [NSYL [GENDER [CLASSICAL]]]" % sys.argv[0])
   try:

	drime French rhyme dictionary with web and CLI interface
	git clone https://a3nm.net/git/drime/
	Log \| Files \| Refs \| README

README	\|	5	+++++
common.py	\|	6	------
db_mysql.py	\|	10	+++++-----
drime.py	\|	4	++--
query.py	\|	25	+++++++------------------