plint

French poetry validator
git clone https://a3nm.net/git/plint/
Log | Files | Refs | README

commit 651cc9b9cfcfb465e2972ba4a67518da5ad38235
parent a2a27f077454a37a8997d92f0f1b5618aeed9924
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Fri, 27 Apr 2012 18:08:47 +0200

handle lone words, lone letters, elision exceptions

Diffstat:
metric.py | 47+++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/metric.py b/metric.py @@ -15,6 +15,9 @@ def annotate_aspirated(word): else: return word +def contains_break(chunk): + return ' ' in chunk or '-' in chunk + def fit(chunks, pos, left): """bruteforce exploration of all possible vowel cluster weghting, within a maximum total of left""" @@ -26,8 +29,11 @@ def fit(chunks, pos, left): if (not is_vowels(chunks[pos])): return [[chunks[pos]] + x for x in fit(chunks, pos+1, left)] else: - if (pos >= len(chunks) - 2 and chunks[pos] == 'e'): + if ((pos >= len(chunks) - 2 and chunks[pos] == 'e') and ( + pos <= 0 or not contains_break(chunks[pos-1])) and ( + pos <= 1 or not contains_break(chunks[pos-2]))): # special case for verse endings, which can get elided (or not) + # but we don't elide lone syllables ("prends-le", etc.) if pos == len(chunks) - 1: weights = [0] # ending 'e' is elided elif chunks[pos+1] == 's': @@ -39,7 +45,12 @@ def fit(chunks, pos, left): else: weights = possible_weights(chunks[pos]) else: - weights = possible_weights(chunks[pos]) + if (pos >= len(chunks) - 1 and chunks[pos] == 'e' and + pos > 0 and (chunks[pos-1].endswith('-c') or + chunks[pos-1].endswith('-j'))): + weights = [0] # -ce and -je are elided + else: + weights = possible_weights(chunks[pos]) result = [] for weight in weights: # combine all possibilities @@ -77,6 +88,38 @@ def parse(text, bound): # split in words words = text.split(' ') + + # other exceptions + for i in range(len(words)): + # no elision on y- words except "ypérite", "yeuse", "yeux" + if words[i].startswith('y') and words[i] != "y" and not ( + words[i].startswith('yp') or words[i].startswith('yeu')): + words[i] = "*" + words[i] + + # no elision for "oui", "ouis", "ouistitis" + # but elision for "ouighour" + # TODO boileau writes: + # "Ont l'esprit mieux tourné que n'a l'homme ? Oui sans doute." + # so it's unclear what should be done here + # if (words[i] == "oui" or words[i] == "ouis" or + # words[i].startswith("ouistiti")): + # words[i] = "*" + words[i] + + # no elision on those numerals + # TODO "un" or "une" are sometimes elidable and sometimes non-elidable + # Belle, une fois encor, réponds à mon appel. + # Mon journal, il est vrai, a une belle une. + if (words[i] == "onze"): + words[i] = "*" + words[i] + + if len(words[i]) == 1 and words[i][0] in consonants: + if (words[i] == 'w'): + words[i] = "doublevé" + else: + words[i] = words[i] + "a" + + + # aspirated words = [annotate_aspirated(word) for word in words if word != ''] pattern = re.compile('(['+consonants+'*-]*)', re.UNICODE)