frhyme

guess the last phonemes of a French word
git clone https://a3nm.net/git/frhyme/
Log | Files | Refs | README

commit 7b5acb5f891130182002b1949baf758fd4aac8f1
parent 86f7140b3130908fd72a02a8bfd0262398e34fd9
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Tue, 13 Mar 2012 12:58:13 +0100

more cleanup

Diffstat:
README | 6++++--
frhyme.py | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
pron.py | 70----------------------------------------------------------------------
rhyme.py | 4++--
4 files changed, 66 insertions(+), 74 deletions(-)

diff --git a/README b/README @@ -29,10 +29,12 @@ It is trained on a list of words with associated pronunciation, and will infer a few likely possibilities for unseen words using known words with the longest common prefix, using a trie for internal representation. -TODO == 2. Usage == -To avoid licensing headaches, no training data is included. +To avoid licensing headaches, and because the data file is quite big, no +pronunciation data is included, you have to generate it yourself. See section 3. + +Once you have pronunciation data ready in If you just want to use the included training data, you can either run haspirater.py, giving one word per line in stdin and getting the annotation on stout, or you can import it in a Python file and call diff --git a/frhyme.py b/frhyme.py @@ -0,0 +1,60 @@ +#!/usr/bin/python3 -O + +#TODO +"""description""" + +import os +import json +import sys +from pprint import pprint + +f = open(os.path.join(os.path.dirname( + os.path.realpath(__file__)), 'frhyme.json')) +trie = json.load(f) +f.close() + +def to_list(d, rev=True): + return [(d[a], a[::-1] if rev else a) for a in d.keys()] + +def trie2list(trie): + v, c = trie + if c == {}: + return to_list(v) + else: + d = {} + for child in c.keys(): + l = trie2list(c[child]) + for x in l: + if x[1] not in d.keys(): + d[x[1]] = 0 + d[x[1]] += x[0] + return to_list(d, False) + +def add_dict(a, b): + return dict( [ (n, a.get(n, 0)+b.get(n, 0)) for n in set(a)|set(b) ] ) + +def do_lookup(trie, key): + #print(key) + if len(key) == 0 or key[0] not in trie[1].keys(): + return trie2list(trie) + return do_lookup(trie[1][key[0]], key[1:]) + +def nbest(l, t): + l = sorted(l)[-t:] + l.reverse() + return l + +def lookup(key): + """Return pronunciations for key""" + if key.rstrip() == '': + raise ValueError # TODO this is debug + return nbest(do_lookup(trie, key[::-1] + ' '), 5) + +if __name__ == '__main__': + while True: + line = sys.stdin.readline() + if not line: + break + line = line.lower().lstrip().rstrip() + pprint(lookup(line)) + diff --git a/pron.py b/pron.py @@ -1,70 +0,0 @@ -#!/usr/bin/python3 -O - -import os -import json -import sys -from pprint import pprint - -f = open(os.path.join(os.path.dirname( - os.path.realpath(__file__)), 'data.json')) -trie = json.load(f) -f.close() - -def to_list(d, rev=True): - return [(d[a], a[::-1] if rev else a) for a in d.keys()] - -def trie2list(trie): - v, c = trie - if c == {}: - return to_list(v) - else: - d = {} - for child in c.keys(): - l = trie2list(c[child]) - for x in l: - if x[1] not in d.keys(): - d[x[1]] = 0 - d[x[1]] += x[0] - return to_list(d, False) - -def add_dict(a, b): - return dict( [ (n, a.get(n, 0)+b.get(n, 0)) for n in set(a)|set(b) ] ) - -#def trie2list(trie): -# l = [trie] -# d = {} -# while len(l) > 0: -# print(l[0]) -# v, c = l.pop() -# if c == {}: -# d = add_dict(dict(to_list(v)), d) -# else: -# for child in c.values(): -# l.append(c) -# return d - -def do_lookup(trie, key): - #print(key) - if len(key) == 0 or key[0] not in trie[1].keys(): - return trie2list(trie) - return do_lookup(trie[1][key[0]], key[1:]) - -def nbest(l, t): - l = sorted(l)[-t:] - l.reverse() - return l - -def lookup(key): - """Return pronunciations for key""" - if key.rstrip() == '': - raise ValueError # TODO this is debug - return nbest(do_lookup(trie, key[::-1] + ' '), 5) - -if __name__ == '__main__': - while True: - line = sys.stdin.readline() - if not line: - break - line = line.lower().lstrip().rstrip() - pprint(lookup(line)) - diff --git a/rhyme.py b/rhyme.py @@ -3,7 +3,7 @@ import re import sys from pprint import pprint -import pron +import frhyme import functools vowel = list("Eeaio592O#@y%u") @@ -45,7 +45,7 @@ def lookup(s): s = s.split(' ')[-3:] #pprint(s) sets = list(map((lambda a : set([x[1] for x in - pron.lookup(escape(a))])), s)) + frhyme.lookup(escape(a))])), s)) #print("HERE") #pprint(sets) return functools.reduce(concat_couples, sets, set(['']))