reorder.py (2126B)
1 #!/usr/bin/env python3 2 3 # TODO test "suis", "lui"... no multiple interpretations! 4 # TODO assonance 5 6 import sys 7 8 def lcs(x, y): 9 """Longest common suffix""" 10 i = 1 11 while x[-i] == y[-i]: 12 i += 1 13 if i > len(x) or i > len(y): 14 break 15 return i - 1 16 17 def pad(x, n): 18 """Pad x to n chars""" 19 return x + ' ' * max(0, n - len(x)) 20 21 def mp(items, lens, field): 22 return pad(str(items[field]), lens[field]) 23 24 by_pron = {} 25 keys = [] 26 lines = [] 27 names = {0: "pour l'œil", 1: "pauvre", 2: "suffisante", 3: "riche"} 28 29 def key(l): 30 # frequency of interpretation desc, phonemes desc, eye desc, same as base, frequency desc, alpha 31 #print(l) 32 return (-float(l[0]), -l[11], -l[12], 0 if l[9] == l[3] else 1, -float(l[5]), l[3]) 33 34 mx = [0] * 13 35 36 def display(l): 37 global header 38 if header != None: 39 print(header) 40 header = None 41 print(mp(l, mx, 3) + ' w' + mp(l, mx, 12) + ' ' 42 + mp(l, mx, 6) + '-' + mp(l, mx, 7) 43 + ('+' if l[5] == '1' else ' ') + ' ' 44 + mp(l, mx, 4) + ' ' + mp(l, mx, 5)[0:9] + ' ' 45 + l[10] + (', from "' + l[9] + '"' if l[9] != l[3] else '')) 46 47 while True: 48 line = sys.stdin.readline() 49 if not line: 50 break 51 l = line.rstrip().split('|') 52 l.append(lcs(l[2], l[4])) 53 l.append(lcs(l[1], l[3])) 54 l[4] = '[' + l[4] + ']' 55 l[2] = '[' + l[2] + ']' 56 for i in range(len(l)): 57 mx[i] = max(mx[i], len(str(l[i]))) 58 lines.append(l) 59 60 seen = set() 61 62 last2 = None 63 last11 = None 64 header = None 65 66 for l in sorted(lines, key=key): 67 if l[2] != last2: 68 last2 = l[2] 69 print ("## For %s [%s], freq %s" % (l[1], l[2], l[0][0:9])) 70 cache = None 71 seen = set() 72 if l[11] != last11: 73 last11 = l[11] 74 # TODO check if vowel is in there 75 header = (" -- %d phonemes (%s)" % (l[11], names[min(3, l[11])] if min(3, 76 l[11]) in names.keys() else '')) 77 if l[3] == l[1] and l[2] == l[4]: 78 # this is the query word, only display if several exist 79 if cache == None: 80 cache = l 81 else: 82 display(cache) 83 display(l) 84 seen.add(l[9]) 85 continue 86 if l[9] in seen: 87 # skip words with a seen base 88 continue 89 seen.add(l[9]) 90 #print(l[9]) 91 display(l)