plint

French poetry validator (local mirror of https://gitlab.com/a3nm/plint)
git clone https://a3nm.net/git/plint/
Log | Files | Refs | README

commit 63f02cd2424269acf84b0d7a63501e1f41ef1384
parent 7079f072bd1db3f28bc43ede33c6520d2abf2d6b
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sun,  6 Sep 2015 23:26:40 +0200

improve disjunction/elision handling

Diffstat:
verse.py | 38+++++++++++++++++++++++++++++++-------
versetest.py | 50++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 81 insertions(+), 7 deletions(-)

diff --git a/verse.py b/verse.py @@ -28,22 +28,44 @@ letters = { 'z': 'zaide' } -def elision(word): - if (word.startswith('y') and not word == 'y' and not word.startswith("yp") and - not word.startswith("yeu")): - return [False] - if word in ["oui", "ouis"] or word.startswith("ouistiti"): +def elision(word, was_cap): + if word.startswith('y'): + if word == 'y': + return [True] + if was_cap: + if word == 'york': + return [False] + # Grevisse, Le Bon usage, 14th ed., paragraphs 49-50 + # depends on whether it's French or foreign... + return [True, False] + else: + exc = ["york", "yeux", "yeuse", "ypérite"] + for w in exc: + if word.startswith(w): + return [True] + # otherwise, no elision + return [False] + if word in ["oui", "ouis"]: # elision for those words, but beware, no elision for "ouighour" # boileau : "Ont l'esprit mieux tourné que n'a l'homme ? Oui sans doute." - # so elission sometimes + # so elision sometimes return [True, False] + if word.startswith("ouistiti") or word.startswith("ouagadougou"): + return [False] # "un", "une" are non-elided as nouns ("cette une") if word in ["un", "une"]: return [True, False] # "onze" is not elided if word == "onze": return [False] + if word.startswith('ulul'): + return [False] # ululement, ululer, etc. + if word.startswith('uhlan'): + return [False] # uhlan if word[0] == 'h': + if word == "huis": + # special case, "huis" is elided but "huis clos" isn't + return [True, False] return list(map((lambda s: not s), haspirater.lookup(word))) if is_vowels(word[0]): return [True] @@ -216,7 +238,9 @@ class Verse: # vowel elision problems for w in self.chunks: if 'elision' not in w[0].keys(): - w[0]['elision'] = elision(''.join(x['text'] for x in w)) + first_letter = common.rm_punct(w[0]['original'].strip()) + w[0]['elision'] = elision(''.join(x['text'] for x in w), + first_letter == first_letter.upper()) # case of 'y' ys_regexp = re.compile("(y*)") diff --git a/versetest.py b/versetest.py @@ -189,6 +189,56 @@ class PoemCounts(Counts): possible = self.runCount(self.v3, limit="6/6") self.assertTrue(self.achievesPossibility(possible, 12)) +class Disjunct(Counts): + # inspired by Grevisse, Le Bon usage, 14th ed., paragraphs 49-50 + d = { + "hiérarchie": 4, + "yeux": 1, + "yeuse": 1, + "yodel": 3, + "yacht": 2, + "York": 1, + "yole": 2, + "Yourcenar": 4, + "Yvelines": 3, + "Ypres": 1, + "ypérite": 3, + "Ysaÿe": 3, + "Ionesco": 4, + "Yahvé": 3, + "Yungfrau": 3, + "yodler": 3, + "oui": 2, + "ouïe": 2, + "ouïr": 2, + "ouest": 1, + "Ouagadougou": 6, + "oisif": 2, + "huis": 2, + "huit": 2, + "huissier": 2, + "uhlan": 3, + "ululer": 4, + "ululement": 5, + "onze": 2, + "onzième": 3, + # both are possible for 'un' and 'une' + "Un": 2, + "un": 2, + "Une": 2, + "une": 1, + # too weird to figure out correct counts in poems + #"Yolande" + #"ouistiti" + } + + def testDisjunct(self): + for k in self.d.keys(): + v = self.d[k] + 1 + vv = "belle " + k + possible = self.runCount(vv, limit=v) + self.assertTrue(self.achievesPossibility(possible, v)) + class SanityCheck(unittest.TestCase): def testSimple(self): text = "Patati patata patata tata vies"