homophones.py (1210B)
1 #!/usr/bin/python3 2 3 from collections import defaultdict 4 import sys 5 6 phons = defaultdict(lambda: []) 7 8 blacklist = set() 9 10 defs = defaultdict(lambda: "") 11 12 f = open("tlf_not_exist", 'r') 13 for l in f.readlines(): 14 blacklist.add(l.lower().strip()) 15 f.close() 16 17 f = open("tlf_defs", 'r') 18 for l in f.readlines(): 19 h, word = l.lower().strip().split("\t") 20 defs[word] = h 21 22 for l in sys.stdin.readlines(): 23 ortho, phon, lemme, cgam, genre, nombre, freq = l.strip().split("\t") 24 ortho = ortho.lower() 25 lemme = lemme.lower() 26 if ortho in blacklist: 27 continue 28 #if ortho != lemme: 29 #continue 30 phons[(phon, cgam)].append((ortho, defs[ortho], cgam, genre, nombre, freq, lemme)) 31 32 total_words = 0 33 total_groups = 0 34 35 for k in sorted(phons, key=(lambda x: min(y[5] for y in phons[x]))): 36 v = phons[k] 37 #if len(set([x[6] for x in v])) < 2: 38 if len(set([x[1] for x in v])) < 2: 39 continue 40 if len(set([x[6] for x in v])) < 2: 41 continue 42 print(" + ".join(("%s (%s %s %s)" % (w[0], w[2], w[3], w[4]) for w in v))) 43 #for w in v: 44 #print(w[0]) 45 total_words += len(v) 46 total_groups += 1 47 48 print("Total: %d words in %d groups" % (total_words, total_groups))