drime

French rhyme dictionary with web and CLI interface
git clone https://a3nm.net/git/drime/
Log | Files | Refs | README

metric.py (4038B)


      1 #!/usr/bin/env python3
      2 #coding: utf-8
      3 
      4 # this file is pretty generic, because it's part of a larger project I haven't
      5 # released yet, i should clean this up someday
      6 
      7 # TODO: use verse.py from plint instead
      8 
      9 import re
     10 from common import normalize, is_vowels, consonants, sure_end_fem
     11 from vowels import possible_weights_approx
     12 import haspirater
     13 
     14 def annotate_aspirated(word):
     15   """Annotate aspirated 'h'"""
     16   if word[0] != 'h':
     17     return word
     18   if haspirater.lookup(word):
     19     return '*'+word
     20   else:
     21     return word
     22 
     23 def fit(chunks, pos, left):
     24   """bruteforce exploration of all possible vowel cluster weghting,
     25   within a maximum total of left"""
     26   if pos >= len(chunks):
     27     return [[]] # the only possibility is the empty list
     28   if left < 0:
     29     return [] # no possibilities
     30   # skip consonants
     31   if (not is_vowels(chunks[pos])):
     32     return [[chunks[pos]] + x for x in fit(chunks, pos+1, left)]
     33   else:
     34     if (pos >= len(chunks) - 2 and chunks[pos] == 'e'):
     35       # special case for verse endings, which can get elided (or not)
     36       if pos == len(chunks) - 1:
     37         weights = [0] # ending 'e' is elided
     38       elif chunks[pos+1] == 's':
     39         weights = [0] # ending 'es' is elided
     40       elif chunks[pos+1] == 'nt':
     41         # ending 'ent' is sometimes elided
     42         # actually, this will have an influence on the rhyme's gender
     43         weights = [0, 1]
     44       else:
     45         weights = possible_weights_approx(chunks[pos])
     46     else:
     47       weights = possible_weights_approx(chunks[pos])
     48     result = []
     49     for weight in weights:
     50       # combine all possibilities
     51       result += [[(chunks[pos], weight)] + x for x in fit(chunks, pos+1,
     52         left - weight)]
     53     return result
     54 
     55 def feminine(align, verse):
     56   for a in sure_end_fem:
     57     if verse.endswith(a):
     58       return ['F']
     59   if not verse.endswith('ent'):
     60     return ['M']
     61   # verse ends with 'ent'
     62   if align[-2][1] == 0:
     63     return ['F'] # mute -ent
     64   if align[-2][1] > 0 and align[-2][0] == 'e':
     65     return ['M'] # non-mute "-ent" by the choice of metric
     66   # and now, what? "tient" vs. "lient" for instance, 
     67   # TODO check pronunciation? :-/
     68   return ['M', 'F']
     69 
     70 def parse(text, bound):
     71   """Return possible aligns for text, bound is an upper bound on the
     72   align length to limit running time"""
     73 
     74   original_text = normalize(text)
     75 
     76   # avoid some vowel problems
     77   text = re.sub("qu", 'q', original_text)
     78   text = re.sub("gue", 'ge', text)
     79   text = re.sub("gué", 'gé', text)
     80   text = re.sub("guè", 'gè', text)
     81   text = re.sub("gua", 'ga', text)
     82 
     83   # split in words
     84   words = text.split(' ')
     85   words = [annotate_aspirated(word) for word in words if word != '']
     86 
     87   all_consonants = consonants + consonants.upper()
     88   pattern = re.compile(r'([^' + all_consonants + '*-]+)', re.UNICODE)
     89 
     90   # cut each word in chunks of vowels and consonants, with some specific
     91   # kludges
     92   for i in range(len(words)):
     93     words[i] = re.split(pattern, words[i])
     94     words[i] = [chunk for chunk in words[i] if chunk != '']
     95     nwords = []
     96     # the case of 'y' is special
     97     for chunk in words[i]:
     98       if 'y' not in chunk or len(chunk) == 1 or chunk[0] == 'y':
     99         nwords.append(chunk)
    100       else:
    101         a = chunk.split('y')
    102         nwords.append(a[0])
    103         nwords.append('Y')
    104         if a[1] != '':
    105           nwords.append(a[1])
    106         else:
    107           # the case of "pays" is very special :-(
    108           if words[i] == ['p', 'ay', 's']:
    109             nwords.append('y')
    110     words[i] = nwords
    111     # remove mute 'e'
    112     if i > 0:
    113       if sum([1 for chunk in words[i-1] if is_vowels(chunk)]) > 1:
    114         if words[i-1][-1] == 'e' and is_vowels(words[i][0], True):
    115           words[i-1].pop(-1)
    116           words[i-1][-1] = words[i-1][-1]+"'"
    117 
    118   # group back words
    119   for word in words:
    120     word.append(' ')
    121   chunks = sum(words, [])[:-1]
    122  
    123   # return all possibilities to weigh the vowel clusters, annotated by
    124   # the femininity of the align (depending both on the align and
    125   # original text)
    126   return list(map((lambda x : (x, feminine(x, original_text))),
    127     fit(chunks, 0, bound)))
    128