commit 8fcced6ac814fd9091d265755682a12e2d17f54d
parent 8c82ee9e591bb8dcbda275a32e964d197e66c2d8
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Tue, 27 Dec 2011 01:13:18 +0100
add missing file metric.py
Diffstat:
metric.py | | | 125 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
1 file changed, 125 insertions(+), 0 deletions(-)
diff --git a/metric.py b/metric.py
@@ -0,0 +1,125 @@
+#!/usr/bin/python
+#coding: utf-8
+
+# this file is pretty generic, because it's part of a larger project I haven't
+# released yet, i should clean this up someday
+
+import re
+from common import normalize, is_vowels, consonants, sure_end_fem
+from vowels import possible_weights
+import haspirater
+
+def annotate_aspirated(word):
+ """Annotate aspirated 'h'"""
+ if word[0] != 'h':
+ return word
+ if haspirater.lookup(word):
+ return '*'+word
+ else:
+ return word
+
+def fit(chunks, pos, left):
+ """bruteforce exploration of all possible vowel cluster weghting,
+ within a maximum total of left"""
+ if pos >= len(chunks):
+ return [[]] # the only possibility is the empty list
+ if left < 0:
+ return [] # no possibilities
+ # skip consonants
+ if (not is_vowels(chunks[pos])):
+ return [[chunks[pos]] + x for x in fit(chunks, pos+1, left)]
+ else:
+ if (pos >= len(chunks) - 2 and chunks[pos] == 'e'):
+ # special case for verse endings, which can get elided (or not)
+ if pos == len(chunks) - 1:
+ weights = [0] # ending 'e' is elided
+ elif chunks[pos+1] == 's':
+ weights = [0] # ending 'es' is elided
+ elif chunks[pos+1] == 'nt':
+ # ending 'ent' is sometimes elided
+ # actually, this will have an influence on the rhyme's gender
+ weights = [0, 1]
+ else:
+ weights = possible_weights(chunks[pos])
+ else:
+ weights = possible_weights(chunks[pos])
+ result = []
+ for weight in weights:
+ # combine all possibilities
+ result += [[(chunks[pos], weight)] + x for x in fit(chunks, pos+1,
+ left - weight)]
+ return result
+
+def feminine(align, verse):
+ for a in sure_end_fem:
+ if verse.endswith(a):
+ return ['F']
+ if not verse.endswith('ent'):
+ return ['M']
+ # verse ends with 'ent'
+ if align[-2][1] == 0:
+ return ['F'] # mute -ent
+ if align[-2][1] > 0 and align[-2][0] == 'e':
+ return ['M'] # non-mute "-ent" by the choice of metric
+ # and now, what? "tient" vs. "lient" for instance,
+ # TODO check pronunciation? :-/
+ return ['M', 'F']
+
+def parse(text, bound):
+ """Return possible aligns for text, bound is an upper bound on the
+ align length to limit running time"""
+
+ original_text = normalize(text)
+
+ # avoid some vowel problems
+ text = re.sub("qu", 'q', original_text)
+ text = re.sub("gue", 'ge', text)
+ text = re.sub("gué", 'gé', text)
+ text = re.sub("guè", 'gè', text)
+ text = re.sub("gua", 'ga', text)
+
+ # split in words
+ words = text.split(' ')
+ words = [annotate_aspirated(word) for word in words if word != '']
+
+ pattern = re.compile('(['+consonants+'*-]*)', re.UNICODE)
+
+ # cut each word in chunks of vowels and consonants, with some specific
+ # kludges
+ for i in range(len(words)):
+ words[i] = re.split(pattern, words[i])
+ words[i] = [chunk for chunk in words[i] if chunk != '']
+ nwords = []
+ # the case of 'y' is special
+ for chunk in words[i]:
+ if 'y' not in chunk or len(chunk) == 1 or chunk[0] == 'y':
+ nwords.append(chunk)
+ else:
+ a = chunk.split('y')
+ nwords.append(a[0])
+ nwords.append('Y')
+ if a[1] != '':
+ nwords.append(a[1])
+ else:
+ # the case of "pays" is very special :-(
+ if words[i] == ['p', 'ay', 's']:
+ nwords.append('y')
+ words[i] = nwords
+ # remove mute 'e'
+ if i > 0:
+ if sum([1 for chunk in words[i-1] if is_vowels(chunk)]) > 1:
+ if words[i-1][-1] == 'e' and is_vowels(words[i][0], True):
+ words[i-1].pop(-1)
+ words[i-1][-1] = words[i-1][-1]+"'"
+
+ # group back words
+ for word in words:
+ word.append(' ')
+ chunks = sum(words, [])[:-1]
+
+ # return all possibilities to weigh the vowel clusters, annotated by
+ # the femininity of the align (depending both on the align and
+ # original text)
+ return list(map((lambda x : (x, feminine(x, original_text))),
+ fit(chunks, 0, bound)))
+