nplus7.py - plint_extra - various extra tools around plint

nplus7.py (8721B)
      1 #!/usr/bin/python3 -uO
      2 
      3 """Undocumented hack to play oulipo's dictionary game with plint"""
      4 
      5 import copy
      6 import localization
      7 from template import Template
      8 from rhyme import Rhyme
      9 import re
     10 import sys
     11 import time
     12 from common import normalize, apostrophes, strip_accents_one, vowels, consonants
     13 from verse import elision, remove_trivial
     14 from pos import postag
     15 sys.path.insert(0, "../drime")
     16 from query import query
     17 
     18 ORTHO = 0
     19 CGRAM = 3
     20 GENRE = 4
     21 NOMBRE = 5
     22 FREQ = 7
     23 
     24 localization.init_locale()
     25 
     26 cats = ['ADV', 'NOM', 'ADJ']
     27 posses = ['A', 'N', 'ADV']
     28 corr = {
     29     'ADV': 'ADV',
     30     'NOM': 'N',
     31     'ADJ': 'A'}
     32 varcats = ['NOM', 'ADJ']
     33 genres = ['m', 'f']
     34 nombres = ['s', 'p']
     35 # TODO options to favor frequent, nonfrequent words, words from a certain theme
     36 # TODO verbs
     37 fthresh = 1
     38 exclude = ['travers', 'loin', 'ainsi', 'assez', 'guère', 'pas', 'partout', 'ni',
     39     'ne', 'là-bas', 'tant', 'est-ce', 'beau', 'fois', 'milieu', 'présent',
     40     'peu', 'peur', 'très', 'enfin', 'tous', 'tout', 'toute', 'toutes', 'bien',
     41     'peine', 'autre', 'million', 'millier', 'plus', 'seul', 'puis', 'côté',
     42     'encore', 'encor', 'plus', 'point', 'quelque']
     43 mdur = 5
     44 
     45 f = open(sys.argv[1], 'r')
     46 
     47 offset = int(sys.argv[3])
     48 
     49 words = {}
     50 mwords = []
     51 idx = {}
     52 
     53 def adj(x, y):
     54   if x == '':
     55     return y
     56   return [x]
     57 
     58 def cutword(word):
     59   x = re.sub("[" + apostrophes + "]", "'", word)
     60   if "'" in x:
     61     s = x.split("'")
     62     before, main, after = cutword(s[-1])
     63     return "'".join(s[:-1]) + "'" + before, main, after
     64   before = ""
     65   main = ""
     66   after = ""
     67   started = False
     68   finished = False
     69   for c in x:
     70     if not strip_accents_one(c)[0].lower() in vowels + consonants + ('-' if
     71         started else ''):
     72       if started:
     73         finished = True
     74         after = after + c
     75         continue
     76       before = before + c
     77       continue
     78     if not finished:
     79       started = True
     80       main = main + c
     81   return before, main, after
     82 
     83 def sure(poss):
     84   for (cat, x, y) in poss:
     85     if cat not in cats:
     86       return False
     87   return True
     88 
     89 def possible(poss, tag):
     90   for (cat, x, y) in poss:
     91     if cat in cats:
     92       if tag in posses:
     93         return True
     94   return False
     95 
     96 def ok_extends(w, w2, tag):
     97   try:
     98     p = mwords[idx[w]][1]
     99   except KeyError:
    100     p = [('NOM', 'm', 's'), ('NOM', 'f', 's')]
    101   w2 = w2.lower()
    102   if w2 not in idx.keys():
    103     return False
    104   p2 = mwords[idx[w2]][1]
    105   for (cat, a, b) in p:
    106     # and corr[cat] == tag 
    107     if cat in cats and (cat, a, b) not in p2:
    108       return False
    109   if w2 != w and set(elision(w)) <= set(elision(w2)):
    110     return True
    111   return False
    112 
    113 def valid_word(w, tag):
    114   global words, lists, idx
    115   if w not in idx.keys():
    116     return False
    117   p = mwords[idx[w]][1]
    118   if not sure(p) and not possible(p, tag):
    119     return False
    120   return True
    121 
    122 def change(w, tag):
    123   #print(w, sure(p), tag, possible(p, tag))
    124   try:
    125     i = idx[w]
    126   except KeyError:
    127     i = len([w2 for w2 in idx.keys() if w2 < w])
    128   for (w2, rare, p2) in mwords[i:] + mwords[:i]:
    129     if ok_extends(w, w2, tag):
    130       yield w2
    131   yield w
    132   # p = idx[cat][genre][nombre][w]
    133   # n = len(lists[cat][genre][nombre])
    134   # return lists[cat][genre][nombre][(p+offset) % n]
    135 
    136   # if w not in words.keys():
    137   #   return w
    138   # if len(words[w]) > 1:
    139   #   return w
    140   # entry = words[w][0]
    141   # if entry[CGRAM] not in cats:
    142   #   return w
    143   # cat = entry[CGRAM]
    144   # genre = entry[GENRE]
    145   # nombre = entry[NOMBRE]
    146   # if cat in varcats and (genre not in genres or nombre not in nombres):
    147   #   return w
    148   # #print(cat, genre, nombre, w)
    149   # p = idx[cat][genre][nombre][w]
    150   # n = len(lists[cat][genre][nombre])
    151   # return lists[cat][genre][nombre][(p+offset) % n]
    152 
    153 first = True
    154 while True:
    155   l = f.readline()
    156   if not l:
    157     break
    158   # split header line
    159   if first:
    160     first = False
    161     continue
    162   s = l.split('\t')
    163   if s[ORTHO] not in words.keys():
    164     words[s[ORTHO]] = []
    165   words[s[ORTHO]].append(s)
    166 
    167 f.close()
    168 f = open(sys.argv[2], 'r')
    169 x = f.read()
    170 template = Template(x)
    171 template.options['phon_supposed_ok'] = False
    172 f.close()
    173 template.reject_errors = True
    174 
    175 lwords = sorted(list(words.keys()))
    176 
    177 for w in lwords:
    178   if w in exclude:
    179     continue
    180   poss = set()
    181   oposs = set()
    182   ok = True
    183   for entry in words[w]:
    184     for cat in entry[CGRAM].split(','):
    185       #if cat not in cats:
    186         #ok = False
    187         #break
    188       for genre in adj(entry[GENRE], genres):
    189         for nombre in adj(entry[NOMBRE], nombres):
    190             poss.add((cat, genre, nombre))
    191             if float(entry[FREQ]) >= fthresh and cat in cats:
    192               oposs.add((cat, genre, nombre))
    193   if ok and len(poss) >= 1:
    194     idx[w] = len(mwords)
    195     mwords.append((w, poss, oposs))
    196 
    197 
    198 # for cat in cats:
    199 #   if cat not in lists.keys():
    200 #     lists[cat] = {}
    201 #     idx[cat] = {}
    202 #   for genre in (genres if cat in varcats else ['']):
    203 #     if genre not in lists[cat].keys():
    204 #       lists[cat][genre] = {}
    205 #       idx[cat][genre] = {}
    206 #     for nombre in (nombres if cat in varcats else ['']):
    207 #       if nombre not in lists[cat][genre].keys():
    208 #         lists[cat][genre][nombre] = []
    209 #         idx[cat][genre][nombre] = {}
    210 #       for w in lwords:
    211 #         if len(words[w]) == 1 and ',' not in words[w][0][CGRAM]:
    212 #           entry = words[w][0]
    213 #           if (entry[CGRAM] == cat and entry[GENRE] == genre and entry[NOMBRE] ==
    214 #               nombre):
    215 #             if float(entry[FREQ]) > fthresh:
    216 #               idx[cat][genre][nombre][w] = len(lists[cat][genre][nombre])
    217 #               lists[cat][genre][nombre].append(w)
    218 
    219 whitespace_regexp = re.compile("(\s*)")
    220 
    221 while True:
    222   l = sys.stdin.readline()
    223   if not l:
    224     break
    225   l = l.strip()
    226   if len(l) == 0:
    227     print(l)
    228     continue
    229   s = re.split(whitespace_regexp, l)
    230   try:
    231     loffset = int(s[-1])
    232     s = s[:-1]
    233   except ValueError:
    234     loffset = offset
    235   #print("before init:", template.position)
    236   errors = template.check(' '.join(s))
    237   template.back()
    238   #print("after init:", template.position)
    239   if errors:
    240     print ("PROBLEM with ORIGINAL")
    241     print (errors.report())
    242     continue
    243   lw = s[-1]
    244   s = remove_trivial(s, (lambda w: re.match("^\s*$", w) or
    245           len(normalize(w, rm_all=True)) == 0))
    246   r = []
    247   #print ("INIT rhyme: ", l)
    248   constraint = template.template[template.position % len(template.template)].constraint
    249   rhyme = Rhyme(lw, constraint, template.mergers, template.options)
    250   scut = [cutword(wfull) for wfull in s]
    251   #print(scut)
    252   tags = postag(scut)
    253   #print(tags)
    254   #print(scut)
    255   first = True
    256   for i, (before, ow, after) in reversed(list(enumerate(scut))):
    257     #print ("<%s|%s|%s>" % (before, w, after))
    258     w = ow.lower()
    259     started = time.time()
    260     ok = False
    261     tried = 0
    262     acceptable = 0
    263     if valid_word(w, tags[i]) or (ow[0] == ow[0].upper() and i > 0):
    264       if first and len(normalize(w)) > 0:
    265         first = False
    266         was_first = True
    267         rr, c, sur = query(w)
    268         try:
    269           lrhymes = sorted([x['word'] for x in rr['result']] + [w])
    270           it = lrhymes
    271           wpos = it.index(w)
    272           it = it[wpos+1:]
    273         except KeyError:
    274           it = change(w, tags[1])
    275       else:
    276         it = change(w, tags[i])
    277       for w2 in it:
    278         if not (ok_extends(w, w2, tags[i])):
    279           continue
    280         if time.time() - started > mdur:
    281           break #timeout
    282         if w2.lower() == w.lower():
    283           break
    284         tried += 1
    285         #print (w2, "try:" + ' '.join(r + [w2] + s[i+1:]))
    286         line = ' '.join(s[:i] + [before + w2 + after] + list(reversed(r)))
    287         #print ("CONSIDER: " + line)
    288         if was_first:
    289           was_first = False
    290           nrhyme = copy.deepcopy(rhyme)
    291           #print(lw, rhyme.phon, rhyme.eye)
    292           nrhyme.feed(w2, constraint)
    293           #print(normalize(line), nrhyme.phon, nrhyme.eye)
    294           if not nrhyme.satisfied():
    295             #print(nrhyme.phon, nrhyme.eye)
    296             #print ("... NO RHYME")
    297             continue
    298         #print ("TRY: " + line)
    299         #print("before inter:", template.position)
    300         #print ("check...")
    301         errors = template.check(line, quiet=True)
    302         #print ("...done")
    303         template.back()
    304         #print("after inter:", template.position)
    305         if not errors:
    306           acceptable += 1
    307           if acceptable == loffset:
    308             r.append(w2)
    309             ok = True
    310             break
    311         else:
    312           pass
    313           #print (errors.report())
    314     if not ok:
    315       r.append(w)
    316     if len(w) > 0 and ow[0] == ow[0].upper():
    317       r[-1] = r[-1][0].upper() + r[-1][1:]
    318     r[-1] = before + r[-1] + after
    319   final = ''.join(reversed(r))
    320   #print("before final:", template.position)
    321   errors = template.check(final)
    322   #print("after final:", template.position)
    323   if errors:
    324     print ("PROBLEM")
    325     print (errors.report())
    326     break
    327   print (final)
	plint_extra various extra tools around plint
	git clone https://a3nm.net/git/plint_extra/
	Log \| Files \| Refs \| README