commit 2d3ffd72edd9f15960976d9be4779e84fe02314f
parent 320f950c9cc6d5fea94b1538bbf0d10b51ff6a79
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Thu, 27 Aug 2015 00:41:53 +0200
oops
Diffstat:
graph2.py | | | 81 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
1 file changed, 81 insertions(+), 0 deletions(-)
diff --git a/graph2.py b/graph2.py
@@ -0,0 +1,81 @@
+#!/usr/bin/python3
+
+import sys
+from common import vowels, semivowels, fem, start
+from collections import defaultdict
+
+g = defaultdict(lambda: defaultdict(lambda: set()))
+
+def explore(graph, fr):
+ seen = set()
+ queue = [fr]
+ while len(queue) > 0:
+ pos = queue.pop(0)
+ if pos in seen:
+ continue
+ seen.add(pos)
+ for t in graph[pos].keys():
+ queue.append(t)
+ return seen
+
+
+for l in sys.stdin.readlines():
+ f = l.strip().split('\t')
+ if len(f) < 3:
+ f.append('')
+ w, p, syl = f
+ syl.strip()
+ p = p.split('-')
+ allp = ''.join(p)
+ fems = fem(w)
+ # w allows us to go from its first syllabe to its last, toggling fem status
+ # we can go to something that requires less consonants
+ seenvow = False
+ for i in range(len(allp)):
+ if allp[i] in vowels:
+ if seenvow:
+ break
+ seenvow = True
+ if not seenvow:
+ continue
+ # we are between first vowel included and second vowel excluded
+ fr = (not fems, allp[:i+1])
+ to = (fems, p[-1])
+ #if (w.startswith('mandib')):
+ #print(w, fr, to)
+ todel = set()
+ toadd = True
+ for (ww, syl) in g[fr][to]:
+ if ww.startswith(w) and w != ww:
+ # we will kill something longer
+ todel.add(ww)
+ elif w.startswith(ww):
+ # we already have something shorter
+ toadd = False
+ break
+ g[fr][to] -= todel
+ if toadd:
+ g[fr][to].add((w, syl))
+
+# compute reverse graph
+rg = defaultdict(lambda: defaultdict(lambda: set()))
+for f in g.keys():
+ for t in g[f].keys():
+ rg[t][f] = g[f][t]
+
+# only keep what's accessible from start
+access = explore(g, start)
+coaccess = explore(rg, start)
+useful = access & coaccess
+useful.add(start)
+
+for f in g.keys():
+ for t in g[f].keys():
+ #if (f[1] == 'm@' and t[1] == 'by'):
+ #print ("HOHOHO", t, start, f in useful, t in useful)
+ if f not in useful or t not in useful:
+ continue
+ for w in g[f][t]:
+ print ("%s\t%s\t%s\t%s\t%s\t%s" % ('f' if f[0] else 'm', f[1], 'f' if t[0] else 'm', t[1], w[0], w[1]))
+
+