
generate chansons en laisse
git clone
Log | Files | Refs (2467B)

      1 #!/usr/bin/python3
      3 import sys
      4 from common import vowels, semivowels, fem, start
      5 from collections import defaultdict
      7 g = defaultdict(lambda: defaultdict(lambda: []))
      9 def explore(graph, fr):
     10     seen = set()
     11     queue = [fr]
     12     while len(queue) > 0:
     13         pos = queue.pop(0)
     14         if pos in seen:
     15             continue
     16         seen.add(pos)
     17         for t in graph[pos].keys():
     18             queue.append(t)
     19     return seen
     22 for l in sys.stdin.readlines():
     23     f = l.strip().split('\t')
     24     if len(f) < 3:
     25         f.append('')
     26     w, p, syl = f
     27     syl.strip()
     28     p = p.split('-')
     29     allp = ''.join(p)
     30     fems = fem(w)
     31     # w allows us to go from its first syllabe to its last, toggling fem status
     32     # we can go to something that requires less consonants
     33     seenvow = False
     34     for i in range(len(allp)):
     35         if allp[i] in vowels:
     36             if seenvow:
     37                 break
     38             seenvow = True
     39         if not seenvow:
     40             continue
     41         # we are between first vowel included and second vowel excluded
     42         fr = (not fems, allp[:i+1])
     43         to = (fems, p[-1])
     44         if allp.startswith(to[1]):
     45             # loop (e.g., trinitrine), not very pretty
     46             continue
     47         #if (w.startswith('mandib')):
     48             #print(w, fr, to)
     49         g[fr][to].append((w, syl))
     50 #        todel = set()
     51 #        toadd = True
     52 #        for (ww, syl) in g[fr][to]:
     53 #            if ww.startswith(w) and w != ww:
     54 #                # we will kill something longer
     55 #                todel.add(ww)
     56 #            elif w.startswith(ww):
     57 #                # we already have something shorter
     58 #                toadd = False
     59 #                break
     60 #        g[fr][to] -= todel
     61 #        if toadd:
     62 #            g[fr][to].add((w, syl))
     64 # compute reverse graph
     65 rg = defaultdict(lambda: defaultdict(lambda: set()))
     66 for f in g.keys():
     67     for t in g[f].keys():
     68         rg[t][f] = g[f][t]
     70 # only keep what's accessible from start
     71 access = explore(g, start)
     72 coaccess = explore(rg, start)
     73 useful = access & coaccess
     74 useful.add(start)
     76 for f in g.keys():
     77     for t in g[f].keys():
     78         #if (f[1] == 'm@' and t[1] == 'by'):
     79             #print ("HOHOHO", t, start, f in useful, t in useful)
     80         if f not in useful or t not in useful:
     81             continue
     82         # only print the most frequent word, for now
     83         for w in g[f][t][:1]:
     84             print ("%s\t%s\t%s\t%s\t%s\t%s" % ('f' if f[0] else 'm', f[1], 'f' if t[0] else 'm', t[1], w[0], w[1]))