commit 7b5acb5f891130182002b1949baf758fd4aac8f1
parent 86f7140b3130908fd72a02a8bfd0262398e34fd9
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Tue, 13 Mar 2012 12:58:13 +0100
more cleanup
Diffstat:
README | | | 6 | ++++-- |
frhyme.py | | | 60 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
pron.py | | | 70 | ---------------------------------------------------------------------- |
rhyme.py | | | 4 | ++-- |
4 files changed, 66 insertions(+), 74 deletions(-)
diff --git a/README b/README
@@ -29,10 +29,12 @@ It is trained on a list of words with associated pronunciation, and will
infer a few likely possibilities for unseen words using known words with
the longest common prefix, using a trie for internal representation.
-TODO
== 2. Usage ==
-To avoid licensing headaches, no training data is included.
+To avoid licensing headaches, and because the data file is quite big, no
+pronunciation data is included, you have to generate it yourself. See section 3.
+
+Once you have pronunciation data ready in
If you just want to use the included training data, you can either run
haspirater.py, giving one word per line in stdin and getting the
annotation on stout, or you can import it in a Python file and call
diff --git a/frhyme.py b/frhyme.py
@@ -0,0 +1,60 @@
+#!/usr/bin/python3 -O
+
+#TODO
+"""description"""
+
+import os
+import json
+import sys
+from pprint import pprint
+
+f = open(os.path.join(os.path.dirname(
+ os.path.realpath(__file__)), 'frhyme.json'))
+trie = json.load(f)
+f.close()
+
+def to_list(d, rev=True):
+ return [(d[a], a[::-1] if rev else a) for a in d.keys()]
+
+def trie2list(trie):
+ v, c = trie
+ if c == {}:
+ return to_list(v)
+ else:
+ d = {}
+ for child in c.keys():
+ l = trie2list(c[child])
+ for x in l:
+ if x[1] not in d.keys():
+ d[x[1]] = 0
+ d[x[1]] += x[0]
+ return to_list(d, False)
+
+def add_dict(a, b):
+ return dict( [ (n, a.get(n, 0)+b.get(n, 0)) for n in set(a)|set(b) ] )
+
+def do_lookup(trie, key):
+ #print(key)
+ if len(key) == 0 or key[0] not in trie[1].keys():
+ return trie2list(trie)
+ return do_lookup(trie[1][key[0]], key[1:])
+
+def nbest(l, t):
+ l = sorted(l)[-t:]
+ l.reverse()
+ return l
+
+def lookup(key):
+ """Return pronunciations for key"""
+ if key.rstrip() == '':
+ raise ValueError # TODO this is debug
+ return nbest(do_lookup(trie, key[::-1] + ' '), 5)
+
+if __name__ == '__main__':
+ while True:
+ line = sys.stdin.readline()
+ if not line:
+ break
+ line = line.lower().lstrip().rstrip()
+ pprint(lookup(line))
+
diff --git a/pron.py b/pron.py
@@ -1,70 +0,0 @@
-#!/usr/bin/python3 -O
-
-import os
-import json
-import sys
-from pprint import pprint
-
-f = open(os.path.join(os.path.dirname(
- os.path.realpath(__file__)), 'data.json'))
-trie = json.load(f)
-f.close()
-
-def to_list(d, rev=True):
- return [(d[a], a[::-1] if rev else a) for a in d.keys()]
-
-def trie2list(trie):
- v, c = trie
- if c == {}:
- return to_list(v)
- else:
- d = {}
- for child in c.keys():
- l = trie2list(c[child])
- for x in l:
- if x[1] not in d.keys():
- d[x[1]] = 0
- d[x[1]] += x[0]
- return to_list(d, False)
-
-def add_dict(a, b):
- return dict( [ (n, a.get(n, 0)+b.get(n, 0)) for n in set(a)|set(b) ] )
-
-#def trie2list(trie):
-# l = [trie]
-# d = {}
-# while len(l) > 0:
-# print(l[0])
-# v, c = l.pop()
-# if c == {}:
-# d = add_dict(dict(to_list(v)), d)
-# else:
-# for child in c.values():
-# l.append(c)
-# return d
-
-def do_lookup(trie, key):
- #print(key)
- if len(key) == 0 or key[0] not in trie[1].keys():
- return trie2list(trie)
- return do_lookup(trie[1][key[0]], key[1:])
-
-def nbest(l, t):
- l = sorted(l)[-t:]
- l.reverse()
- return l
-
-def lookup(key):
- """Return pronunciations for key"""
- if key.rstrip() == '':
- raise ValueError # TODO this is debug
- return nbest(do_lookup(trie, key[::-1] + ' '), 5)
-
-if __name__ == '__main__':
- while True:
- line = sys.stdin.readline()
- if not line:
- break
- line = line.lower().lstrip().rstrip()
- pprint(lookup(line))
-
diff --git a/rhyme.py b/rhyme.py
@@ -3,7 +3,7 @@
import re
import sys
from pprint import pprint
-import pron
+import frhyme
import functools
vowel = list("Eeaio592O#@y%u")
@@ -45,7 +45,7 @@ def lookup(s):
s = s.split(' ')[-3:]
#pprint(s)
sets = list(map((lambda a : set([x[1] for x in
- pron.lookup(escape(a))])), s))
+ frhyme.lookup(escape(a))])), s))
#print("HERE")
#pprint(sets)
return functools.reduce(concat_couples, sets, set(['']))