more cleanup - frhyme - guess the last phonemes of a French word

commit 7b5acb5f891130182002b1949baf758fd4aac8f1
parent 86f7140b3130908fd72a02a8bfd0262398e34fd9
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Tue, 13 Mar 2012 12:58:13 +0100

more cleanup

Diffstat:
README  | 6 ++++--
frhyme.py  | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
pron.py  | 70 ----------------------------------------------------------------------
rhyme.py  | 4 ++--

4 files changed, 66 insertions(+), 74 deletions(-)
diff --git a/README b/README
@@ -29,10 +29,12 @@ It is trained on a list of words with associated pronunciation, and will
 infer a few likely possibilities for unseen words using known words with
 the longest common prefix, using a trie for internal representation.
 
-TODO
 == 2. Usage ==
 
-To avoid licensing headaches, no training data is included. 
+To avoid licensing headaches, and because the data file is quite big, no
+pronunciation data is included, you have to generate it yourself. See section 3.
+
+Once you have pronunciation data ready in 
 If you just want to use the included training data, you can either run
 haspirater.py, giving one word per line in stdin and getting the
 annotation on stout, or you can import it in a Python file and call
diff --git a/frhyme.py b/frhyme.py
@@ -0,0 +1,60 @@
+#!/usr/bin/python3 -O
+
+#TODO
+"""description"""
+
+import os
+import json
+import sys
+from pprint import pprint
+
+f = open(os.path.join(os.path.dirname(
+  os.path.realpath(__file__)), 'frhyme.json'))
+trie = json.load(f)
+f.close()
+
+def to_list(d, rev=True):
+  return [(d[a], a[::-1] if rev else a) for a in d.keys()]
+
+def trie2list(trie):
+  v, c = trie
+  if c == {}:
+    return to_list(v)
+  else:
+    d = {}
+    for child in c.keys():
+      l = trie2list(c[child])
+      for x in l:
+        if x[1] not in d.keys():
+          d[x[1]] = 0
+        d[x[1]] += x[0]
+    return to_list(d, False)
+
+def add_dict(a, b):
+  return dict( [ (n, a.get(n, 0)+b.get(n, 0)) for n in set(a)|set(b) ] )
+
+def do_lookup(trie, key):
+  #print(key)
+  if len(key) == 0 or key[0] not in trie[1].keys():
+    return trie2list(trie)
+  return do_lookup(trie[1][key[0]], key[1:])
+
+def nbest(l, t):
+  l = sorted(l)[-t:]
+  l.reverse()
+  return l
+
+def lookup(key):
+  """Return pronunciations for key"""
+  if key.rstrip() == '':
+    raise ValueError # TODO this is debug
+  return nbest(do_lookup(trie, key[::-1] + '  '), 5)
+
+if __name__ == '__main__':
+  while True:
+    line = sys.stdin.readline()
+    if not line:
+      break
+    line = line.lower().lstrip().rstrip()
+    pprint(lookup(line))
+
diff --git a/pron.py b/pron.py
@@ -1,70 +0,0 @@
-#!/usr/bin/python3 -O
-
-import os
-import json
-import sys
-from pprint import pprint
-
-f = open(os.path.join(os.path.dirname(
-  os.path.realpath(__file__)), 'data.json'))
-trie = json.load(f)
-f.close()
-
-def to_list(d, rev=True):
-  return [(d[a], a[::-1] if rev else a) for a in d.keys()]
-
-def trie2list(trie):
-  v, c = trie
-  if c == {}:
-    return to_list(v)
-  else:
-    d = {}
-    for child in c.keys():
-      l = trie2list(c[child])
-      for x in l:
-        if x[1] not in d.keys():
-          d[x[1]] = 0
-        d[x[1]] += x[0]
-    return to_list(d, False)
-
-def add_dict(a, b):
-  return dict( [ (n, a.get(n, 0)+b.get(n, 0)) for n in set(a)|set(b) ] )
-
-#def trie2list(trie):
-#  l = [trie]
-#  d = {}
-#  while len(l) > 0:
-#    print(l[0])
-#    v, c = l.pop()
-#    if c == {}:
-#      d = add_dict(dict(to_list(v)), d)
-#    else:
-#      for child in c.values():
-#        l.append(c)
-#  return d
-
-def do_lookup(trie, key):
-  #print(key)
-  if len(key) == 0 or key[0] not in trie[1].keys():
-    return trie2list(trie)
-  return do_lookup(trie[1][key[0]], key[1:])
-   
-def nbest(l, t):
-  l = sorted(l)[-t:]
-  l.reverse()
-  return l
-
-def lookup(key):
-  """Return pronunciations for key"""
-  if key.rstrip() == '':
-    raise ValueError # TODO this is debug
-  return nbest(do_lookup(trie, key[::-1] + '  '), 5)
-
-if __name__ == '__main__':
-  while True:
-    line = sys.stdin.readline()
-    if not line:
-      break
-    line = line.lower().lstrip().rstrip()
-    pprint(lookup(line))
-
diff --git a/rhyme.py b/rhyme.py
@@ -3,7 +3,7 @@
 import re
 import sys
 from pprint import pprint
-import pron
+import frhyme
 import functools
 
 vowel = list("Eeaio592O#@y%u")
@@ -45,7 +45,7 @@ def lookup(s):
   s = s.split(' ')[-3:]
   #pprint(s)
   sets = list(map((lambda a : set([x[1] for x in
-    pron.lookup(escape(a))])), s))
+    frhyme.lookup(escape(a))])), s))
   #print("HERE")
   #pprint(sets)
   return functools.reduce(concat_couples, sets, set(['']))

	frhyme guess the last phonemes of a French word
	git clone https://a3nm.net/git/frhyme/
	Log \| Files \| Refs \| README

README	\|	6	++++--
frhyme.py	\|	60	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
pron.py	\|	70	----------------------------------------------------------------------
rhyme.py	\|	4	++--