metric.py (4038B)
1 #!/usr/bin/env python3 2 #coding: utf-8 3 4 # this file is pretty generic, because it's part of a larger project I haven't 5 # released yet, i should clean this up someday 6 7 # TODO: use verse.py from plint instead 8 9 import re 10 from common import normalize, is_vowels, consonants, sure_end_fem 11 from vowels import possible_weights_approx 12 import haspirater 13 14 def annotate_aspirated(word): 15 """Annotate aspirated 'h'""" 16 if word[0] != 'h': 17 return word 18 if haspirater.lookup(word): 19 return '*'+word 20 else: 21 return word 22 23 def fit(chunks, pos, left): 24 """bruteforce exploration of all possible vowel cluster weghting, 25 within a maximum total of left""" 26 if pos >= len(chunks): 27 return [[]] # the only possibility is the empty list 28 if left < 0: 29 return [] # no possibilities 30 # skip consonants 31 if (not is_vowels(chunks[pos])): 32 return [[chunks[pos]] + x for x in fit(chunks, pos+1, left)] 33 else: 34 if (pos >= len(chunks) - 2 and chunks[pos] == 'e'): 35 # special case for verse endings, which can get elided (or not) 36 if pos == len(chunks) - 1: 37 weights = [0] # ending 'e' is elided 38 elif chunks[pos+1] == 's': 39 weights = [0] # ending 'es' is elided 40 elif chunks[pos+1] == 'nt': 41 # ending 'ent' is sometimes elided 42 # actually, this will have an influence on the rhyme's gender 43 weights = [0, 1] 44 else: 45 weights = possible_weights_approx(chunks[pos]) 46 else: 47 weights = possible_weights_approx(chunks[pos]) 48 result = [] 49 for weight in weights: 50 # combine all possibilities 51 result += [[(chunks[pos], weight)] + x for x in fit(chunks, pos+1, 52 left - weight)] 53 return result 54 55 def feminine(align, verse): 56 for a in sure_end_fem: 57 if verse.endswith(a): 58 return ['F'] 59 if not verse.endswith('ent'): 60 return ['M'] 61 # verse ends with 'ent' 62 if align[-2][1] == 0: 63 return ['F'] # mute -ent 64 if align[-2][1] > 0 and align[-2][0] == 'e': 65 return ['M'] # non-mute "-ent" by the choice of metric 66 # and now, what? "tient" vs. "lient" for instance, 67 # TODO check pronunciation? :-/ 68 return ['M', 'F'] 69 70 def parse(text, bound): 71 """Return possible aligns for text, bound is an upper bound on the 72 align length to limit running time""" 73 74 original_text = normalize(text) 75 76 # avoid some vowel problems 77 text = re.sub("qu", 'q', original_text) 78 text = re.sub("gue", 'ge', text) 79 text = re.sub("gué", 'gé', text) 80 text = re.sub("guè", 'gè', text) 81 text = re.sub("gua", 'ga', text) 82 83 # split in words 84 words = text.split(' ') 85 words = [annotate_aspirated(word) for word in words if word != ''] 86 87 all_consonants = consonants + consonants.upper() 88 pattern = re.compile(r'([^' + all_consonants + '*-]+)', re.UNICODE) 89 90 # cut each word in chunks of vowels and consonants, with some specific 91 # kludges 92 for i in range(len(words)): 93 words[i] = re.split(pattern, words[i]) 94 words[i] = [chunk for chunk in words[i] if chunk != ''] 95 nwords = [] 96 # the case of 'y' is special 97 for chunk in words[i]: 98 if 'y' not in chunk or len(chunk) == 1 or chunk[0] == 'y': 99 nwords.append(chunk) 100 else: 101 a = chunk.split('y') 102 nwords.append(a[0]) 103 nwords.append('Y') 104 if a[1] != '': 105 nwords.append(a[1]) 106 else: 107 # the case of "pays" is very special :-( 108 if words[i] == ['p', 'ay', 's']: 109 nwords.append('y') 110 words[i] = nwords 111 # remove mute 'e' 112 if i > 0: 113 if sum([1 for chunk in words[i-1] if is_vowels(chunk)]) > 1: 114 if words[i-1][-1] == 'e' and is_vowels(words[i][0], True): 115 words[i-1].pop(-1) 116 words[i-1][-1] = words[i-1][-1]+"'" 117 118 # group back words 119 for word in words: 120 word.append(' ') 121 chunks = sum(words, [])[:-1] 122 123 # return all possibilities to weigh the vowel clusters, annotated by 124 # the femininity of the align (depending both on the align and 125 # original text) 126 return list(map((lambda x : (x, feminine(x, original_text))), 127 fit(chunks, 0, bound))) 128