nplus7.py (8721B)
1 #!/usr/bin/python3 -uO 2 3 """Undocumented hack to play oulipo's dictionary game with plint""" 4 5 import copy 6 import localization 7 from template import Template 8 from rhyme import Rhyme 9 import re 10 import sys 11 import time 12 from common import normalize, apostrophes, strip_accents_one, vowels, consonants 13 from verse import elision, remove_trivial 14 from pos import postag 15 sys.path.insert(0, "../drime") 16 from query import query 17 18 ORTHO = 0 19 CGRAM = 3 20 GENRE = 4 21 NOMBRE = 5 22 FREQ = 7 23 24 localization.init_locale() 25 26 cats = ['ADV', 'NOM', 'ADJ'] 27 posses = ['A', 'N', 'ADV'] 28 corr = { 29 'ADV': 'ADV', 30 'NOM': 'N', 31 'ADJ': 'A'} 32 varcats = ['NOM', 'ADJ'] 33 genres = ['m', 'f'] 34 nombres = ['s', 'p'] 35 # TODO options to favor frequent, nonfrequent words, words from a certain theme 36 # TODO verbs 37 fthresh = 1 38 exclude = ['travers', 'loin', 'ainsi', 'assez', 'guère', 'pas', 'partout', 'ni', 39 'ne', 'là-bas', 'tant', 'est-ce', 'beau', 'fois', 'milieu', 'présent', 40 'peu', 'peur', 'très', 'enfin', 'tous', 'tout', 'toute', 'toutes', 'bien', 41 'peine', 'autre', 'million', 'millier', 'plus', 'seul', 'puis', 'côté', 42 'encore', 'encor', 'plus', 'point', 'quelque'] 43 mdur = 5 44 45 f = open(sys.argv[1], 'r') 46 47 offset = int(sys.argv[3]) 48 49 words = {} 50 mwords = [] 51 idx = {} 52 53 def adj(x, y): 54 if x == '': 55 return y 56 return [x] 57 58 def cutword(word): 59 x = re.sub("[" + apostrophes + "]", "'", word) 60 if "'" in x: 61 s = x.split("'") 62 before, main, after = cutword(s[-1]) 63 return "'".join(s[:-1]) + "'" + before, main, after 64 before = "" 65 main = "" 66 after = "" 67 started = False 68 finished = False 69 for c in x: 70 if not strip_accents_one(c)[0].lower() in vowels + consonants + ('-' if 71 started else ''): 72 if started: 73 finished = True 74 after = after + c 75 continue 76 before = before + c 77 continue 78 if not finished: 79 started = True 80 main = main + c 81 return before, main, after 82 83 def sure(poss): 84 for (cat, x, y) in poss: 85 if cat not in cats: 86 return False 87 return True 88 89 def possible(poss, tag): 90 for (cat, x, y) in poss: 91 if cat in cats: 92 if tag in posses: 93 return True 94 return False 95 96 def ok_extends(w, w2, tag): 97 try: 98 p = mwords[idx[w]][1] 99 except KeyError: 100 p = [('NOM', 'm', 's'), ('NOM', 'f', 's')] 101 w2 = w2.lower() 102 if w2 not in idx.keys(): 103 return False 104 p2 = mwords[idx[w2]][1] 105 for (cat, a, b) in p: 106 # and corr[cat] == tag 107 if cat in cats and (cat, a, b) not in p2: 108 return False 109 if w2 != w and set(elision(w)) <= set(elision(w2)): 110 return True 111 return False 112 113 def valid_word(w, tag): 114 global words, lists, idx 115 if w not in idx.keys(): 116 return False 117 p = mwords[idx[w]][1] 118 if not sure(p) and not possible(p, tag): 119 return False 120 return True 121 122 def change(w, tag): 123 #print(w, sure(p), tag, possible(p, tag)) 124 try: 125 i = idx[w] 126 except KeyError: 127 i = len([w2 for w2 in idx.keys() if w2 < w]) 128 for (w2, rare, p2) in mwords[i:] + mwords[:i]: 129 if ok_extends(w, w2, tag): 130 yield w2 131 yield w 132 # p = idx[cat][genre][nombre][w] 133 # n = len(lists[cat][genre][nombre]) 134 # return lists[cat][genre][nombre][(p+offset) % n] 135 136 # if w not in words.keys(): 137 # return w 138 # if len(words[w]) > 1: 139 # return w 140 # entry = words[w][0] 141 # if entry[CGRAM] not in cats: 142 # return w 143 # cat = entry[CGRAM] 144 # genre = entry[GENRE] 145 # nombre = entry[NOMBRE] 146 # if cat in varcats and (genre not in genres or nombre not in nombres): 147 # return w 148 # #print(cat, genre, nombre, w) 149 # p = idx[cat][genre][nombre][w] 150 # n = len(lists[cat][genre][nombre]) 151 # return lists[cat][genre][nombre][(p+offset) % n] 152 153 first = True 154 while True: 155 l = f.readline() 156 if not l: 157 break 158 # split header line 159 if first: 160 first = False 161 continue 162 s = l.split('\t') 163 if s[ORTHO] not in words.keys(): 164 words[s[ORTHO]] = [] 165 words[s[ORTHO]].append(s) 166 167 f.close() 168 f = open(sys.argv[2], 'r') 169 x = f.read() 170 template = Template(x) 171 template.options['phon_supposed_ok'] = False 172 f.close() 173 template.reject_errors = True 174 175 lwords = sorted(list(words.keys())) 176 177 for w in lwords: 178 if w in exclude: 179 continue 180 poss = set() 181 oposs = set() 182 ok = True 183 for entry in words[w]: 184 for cat in entry[CGRAM].split(','): 185 #if cat not in cats: 186 #ok = False 187 #break 188 for genre in adj(entry[GENRE], genres): 189 for nombre in adj(entry[NOMBRE], nombres): 190 poss.add((cat, genre, nombre)) 191 if float(entry[FREQ]) >= fthresh and cat in cats: 192 oposs.add((cat, genre, nombre)) 193 if ok and len(poss) >= 1: 194 idx[w] = len(mwords) 195 mwords.append((w, poss, oposs)) 196 197 198 # for cat in cats: 199 # if cat not in lists.keys(): 200 # lists[cat] = {} 201 # idx[cat] = {} 202 # for genre in (genres if cat in varcats else ['']): 203 # if genre not in lists[cat].keys(): 204 # lists[cat][genre] = {} 205 # idx[cat][genre] = {} 206 # for nombre in (nombres if cat in varcats else ['']): 207 # if nombre not in lists[cat][genre].keys(): 208 # lists[cat][genre][nombre] = [] 209 # idx[cat][genre][nombre] = {} 210 # for w in lwords: 211 # if len(words[w]) == 1 and ',' not in words[w][0][CGRAM]: 212 # entry = words[w][0] 213 # if (entry[CGRAM] == cat and entry[GENRE] == genre and entry[NOMBRE] == 214 # nombre): 215 # if float(entry[FREQ]) > fthresh: 216 # idx[cat][genre][nombre][w] = len(lists[cat][genre][nombre]) 217 # lists[cat][genre][nombre].append(w) 218 219 whitespace_regexp = re.compile("(\s*)") 220 221 while True: 222 l = sys.stdin.readline() 223 if not l: 224 break 225 l = l.strip() 226 if len(l) == 0: 227 print(l) 228 continue 229 s = re.split(whitespace_regexp, l) 230 try: 231 loffset = int(s[-1]) 232 s = s[:-1] 233 except ValueError: 234 loffset = offset 235 #print("before init:", template.position) 236 errors = template.check(' '.join(s)) 237 template.back() 238 #print("after init:", template.position) 239 if errors: 240 print ("PROBLEM with ORIGINAL") 241 print (errors.report()) 242 continue 243 lw = s[-1] 244 s = remove_trivial(s, (lambda w: re.match("^\s*$", w) or 245 len(normalize(w, rm_all=True)) == 0)) 246 r = [] 247 #print ("INIT rhyme: ", l) 248 constraint = template.template[template.position % len(template.template)].constraint 249 rhyme = Rhyme(lw, constraint, template.mergers, template.options) 250 scut = [cutword(wfull) for wfull in s] 251 #print(scut) 252 tags = postag(scut) 253 #print(tags) 254 #print(scut) 255 first = True 256 for i, (before, ow, after) in reversed(list(enumerate(scut))): 257 #print ("<%s|%s|%s>" % (before, w, after)) 258 w = ow.lower() 259 started = time.time() 260 ok = False 261 tried = 0 262 acceptable = 0 263 if valid_word(w, tags[i]) or (ow[0] == ow[0].upper() and i > 0): 264 if first and len(normalize(w)) > 0: 265 first = False 266 was_first = True 267 rr, c, sur = query(w) 268 try: 269 lrhymes = sorted([x['word'] for x in rr['result']] + [w]) 270 it = lrhymes 271 wpos = it.index(w) 272 it = it[wpos+1:] 273 except KeyError: 274 it = change(w, tags[1]) 275 else: 276 it = change(w, tags[i]) 277 for w2 in it: 278 if not (ok_extends(w, w2, tags[i])): 279 continue 280 if time.time() - started > mdur: 281 break #timeout 282 if w2.lower() == w.lower(): 283 break 284 tried += 1 285 #print (w2, "try:" + ' '.join(r + [w2] + s[i+1:])) 286 line = ' '.join(s[:i] + [before + w2 + after] + list(reversed(r))) 287 #print ("CONSIDER: " + line) 288 if was_first: 289 was_first = False 290 nrhyme = copy.deepcopy(rhyme) 291 #print(lw, rhyme.phon, rhyme.eye) 292 nrhyme.feed(w2, constraint) 293 #print(normalize(line), nrhyme.phon, nrhyme.eye) 294 if not nrhyme.satisfied(): 295 #print(nrhyme.phon, nrhyme.eye) 296 #print ("... NO RHYME") 297 continue 298 #print ("TRY: " + line) 299 #print("before inter:", template.position) 300 #print ("check...") 301 errors = template.check(line, quiet=True) 302 #print ("...done") 303 template.back() 304 #print("after inter:", template.position) 305 if not errors: 306 acceptable += 1 307 if acceptable == loffset: 308 r.append(w2) 309 ok = True 310 break 311 else: 312 pass 313 #print (errors.report()) 314 if not ok: 315 r.append(w) 316 if len(w) > 0 and ow[0] == ow[0].upper(): 317 r[-1] = r[-1][0].upper() + r[-1][1:] 318 r[-1] = before + r[-1] + after 319 final = ''.join(reversed(r)) 320 #print("before final:", template.position) 321 errors = template.check(final) 322 #print("after final:", template.position) 323 if errors: 324 print ("PROBLEM") 325 print (errors.report()) 326 break 327 print (final)