commit 50379b128bc817f4d7d18eb03556f3bf3c03a506
parent 1672857fa9897ecd847992501ba142c11f33a7bf
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Tue, 4 Aug 2015 18:32:38 +0200
TODOs
Diffstat:
4 files changed, 15 insertions(+), 11 deletions(-)
diff --git a/common.py b/common.py
@@ -6,6 +6,7 @@ vowels = 'io9@2EeaOy#$u()'
vowels_script = "aeiouy"
semivowels = 'j8w'
+# TODO replace by startswith
def is_pref(u, v):
if len(v) < len(u):
return False
diff --git a/cycle.py b/cycle.py
@@ -3,6 +3,8 @@
import sys
from common import fem, vowels_script, rmacc, semivowels
+# TODO use defaultdict
+
g = {}
START = "sEl"
bestlen = 0
@@ -55,6 +57,8 @@ def print_list(l):
print("------------------")
print("")
+# TODO: do something more clever
+
def dfs(l):
global g
global bestlen
diff --git a/graph.py b/graph.py
@@ -3,6 +3,8 @@
import sys
from common import vowels, is_pref, semivowels, fem
+# TODO use defaultdict
+
g = {}
def nedd(myg):
@@ -55,6 +57,8 @@ for l in sys.stdin.readlines():
p = p.split(' ')
for i in range(len(p[2])+1):
f = p[0] + p[1] + p[2][:i]
+ # TODO: this cut is non-optimal: should be "radio/gramme", "géo/graphe",
+ # but how to tell?
t = ((p[-2][-2:] if p[-2][-1] in semivowels else p[-2][-1]) if p[-1][0] in vowels else ((p[-3][-2:] if p[-3][-1] in
semivowels else p[-3][-1]) + p[-2])) + p[-1]
# print ("%s : %s -> %s" % (w, f, t))
@@ -77,6 +81,9 @@ for l in sys.stdin.readlines():
continue
g[f][t].add(w)
+
+# TODO: replace this by a true SCC
+
ned = nedd(g)
print(ned, file=sys.stderr)
@@ -96,15 +103,4 @@ for f in g.keys():
for w in g[f][t]:
print ("%s %s %s" % (f, t, w))
-#
-#for j in range(100):
-# print("-----------------")
-# f = list(g.keys())[j]
-# print(f)
-# for i in range(100):
-# t = list(g[f])[0]
-# if t[0] not in g.keys():
-# continue
-# print("%s -[%s]-> %s" % (f, t[1], t[0]))
-# f = t[0]
diff --git a/script.sh b/script.sh
@@ -1,5 +1,8 @@
#!/bin/bash
+# TODO: add n-grams from wikipedia article titles with unambiguous
+# pronunciation, known words, and suitable POS
+
#cat Lexique371/Bases+Scripts/Lexique3.txt |
# ./lexique_fix.sh| cut -f1,2,4 | uniq > lexique.txt
pv lexique.txt| ./only3.py > lexique3