songleash

generate chansons en laisse
git clone https://a3nm.net/git/songleash/
Log | Files | Refs

commit 50379b128bc817f4d7d18eb03556f3bf3c03a506
parent 1672857fa9897ecd847992501ba142c11f33a7bf
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Tue,  4 Aug 2015 18:32:38 +0200

TODOs

Diffstat:
common.py | 1+
cycle.py | 4++++
graph.py | 18+++++++-----------
script.sh | 3+++
4 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/common.py b/common.py @@ -6,6 +6,7 @@ vowels = 'io9@2EeaOy#$u()' vowels_script = "aeiouy" semivowels = 'j8w' +# TODO replace by startswith def is_pref(u, v): if len(v) < len(u): return False diff --git a/cycle.py b/cycle.py @@ -3,6 +3,8 @@ import sys from common import fem, vowels_script, rmacc, semivowels +# TODO use defaultdict + g = {} START = "sEl" bestlen = 0 @@ -55,6 +57,8 @@ def print_list(l): print("------------------") print("") +# TODO: do something more clever + def dfs(l): global g global bestlen diff --git a/graph.py b/graph.py @@ -3,6 +3,8 @@ import sys from common import vowels, is_pref, semivowels, fem +# TODO use defaultdict + g = {} def nedd(myg): @@ -55,6 +57,8 @@ for l in sys.stdin.readlines(): p = p.split(' ') for i in range(len(p[2])+1): f = p[0] + p[1] + p[2][:i] + # TODO: this cut is non-optimal: should be "radio/gramme", "géo/graphe", + # but how to tell? t = ((p[-2][-2:] if p[-2][-1] in semivowels else p[-2][-1]) if p[-1][0] in vowels else ((p[-3][-2:] if p[-3][-1] in semivowels else p[-3][-1]) + p[-2])) + p[-1] # print ("%s : %s -> %s" % (w, f, t)) @@ -77,6 +81,9 @@ for l in sys.stdin.readlines(): continue g[f][t].add(w) + +# TODO: replace this by a true SCC + ned = nedd(g) print(ned, file=sys.stderr) @@ -96,15 +103,4 @@ for f in g.keys(): for w in g[f][t]: print ("%s %s %s" % (f, t, w)) -# -#for j in range(100): -# print("-----------------") -# f = list(g.keys())[j] -# print(f) -# for i in range(100): -# t = list(g[f])[0] -# if t[0] not in g.keys(): -# continue -# print("%s -[%s]-> %s" % (f, t[1], t[0])) -# f = t[0] diff --git a/script.sh b/script.sh @@ -1,5 +1,8 @@ #!/bin/bash +# TODO: add n-grams from wikipedia article titles with unambiguous +# pronunciation, known words, and suitable POS + #cat Lexique371/Bases+Scripts/Lexique3.txt | # ./lexique_fix.sh| cut -f1,2,4 | uniq > lexique.txt pv lexique.txt| ./only3.py > lexique3