commit 63f02cd2424269acf84b0d7a63501e1f41ef1384
parent 7079f072bd1db3f28bc43ede33c6520d2abf2d6b
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Sun, 6 Sep 2015 23:26:40 +0200
improve disjunction/elision handling
Diffstat:
verse.py | | | 38 | +++++++++++++++++++++++++++++++------- |
versetest.py | | | 50 | ++++++++++++++++++++++++++++++++++++++++++++++++++ |
2 files changed, 81 insertions(+), 7 deletions(-)
diff --git a/verse.py b/verse.py
@@ -28,22 +28,44 @@ letters = {
'z': 'zaide'
}
-def elision(word):
- if (word.startswith('y') and not word == 'y' and not word.startswith("yp") and
- not word.startswith("yeu")):
- return [False]
- if word in ["oui", "ouis"] or word.startswith("ouistiti"):
+def elision(word, was_cap):
+ if word.startswith('y'):
+ if word == 'y':
+ return [True]
+ if was_cap:
+ if word == 'york':
+ return [False]
+ # Grevisse, Le Bon usage, 14th ed., paragraphs 49-50
+ # depends on whether it's French or foreign...
+ return [True, False]
+ else:
+ exc = ["york", "yeux", "yeuse", "ypérite"]
+ for w in exc:
+ if word.startswith(w):
+ return [True]
+ # otherwise, no elision
+ return [False]
+ if word in ["oui", "ouis"]:
# elision for those words, but beware, no elision for "ouighour"
# boileau : "Ont l'esprit mieux tourné que n'a l'homme ? Oui sans doute."
- # so elission sometimes
+ # so elision sometimes
return [True, False]
+ if word.startswith("ouistiti") or word.startswith("ouagadougou"):
+ return [False]
# "un", "une" are non-elided as nouns ("cette une")
if word in ["un", "une"]:
return [True, False]
# "onze" is not elided
if word == "onze":
return [False]
+ if word.startswith('ulul'):
+ return [False] # ululement, ululer, etc.
+ if word.startswith('uhlan'):
+ return [False] # uhlan
if word[0] == 'h':
+ if word == "huis":
+ # special case, "huis" is elided but "huis clos" isn't
+ return [True, False]
return list(map((lambda s: not s), haspirater.lookup(word)))
if is_vowels(word[0]):
return [True]
@@ -216,7 +238,9 @@ class Verse:
# vowel elision problems
for w in self.chunks:
if 'elision' not in w[0].keys():
- w[0]['elision'] = elision(''.join(x['text'] for x in w))
+ first_letter = common.rm_punct(w[0]['original'].strip())
+ w[0]['elision'] = elision(''.join(x['text'] for x in w),
+ first_letter == first_letter.upper())
# case of 'y'
ys_regexp = re.compile("(y*)")
diff --git a/versetest.py b/versetest.py
@@ -189,6 +189,56 @@ class PoemCounts(Counts):
possible = self.runCount(self.v3, limit="6/6")
self.assertTrue(self.achievesPossibility(possible, 12))
+class Disjunct(Counts):
+ # inspired by Grevisse, Le Bon usage, 14th ed., paragraphs 49-50
+ d = {
+ "hiérarchie": 4,
+ "yeux": 1,
+ "yeuse": 1,
+ "yodel": 3,
+ "yacht": 2,
+ "York": 1,
+ "yole": 2,
+ "Yourcenar": 4,
+ "Yvelines": 3,
+ "Ypres": 1,
+ "ypérite": 3,
+ "Ysaÿe": 3,
+ "Ionesco": 4,
+ "Yahvé": 3,
+ "Yungfrau": 3,
+ "yodler": 3,
+ "oui": 2,
+ "ouïe": 2,
+ "ouïr": 2,
+ "ouest": 1,
+ "Ouagadougou": 6,
+ "oisif": 2,
+ "huis": 2,
+ "huit": 2,
+ "huissier": 2,
+ "uhlan": 3,
+ "ululer": 4,
+ "ululement": 5,
+ "onze": 2,
+ "onzième": 3,
+ # both are possible for 'un' and 'une'
+ "Un": 2,
+ "un": 2,
+ "Une": 2,
+ "une": 1,
+ # too weird to figure out correct counts in poems
+ #"Yolande"
+ #"ouistiti"
+ }
+
+ def testDisjunct(self):
+ for k in self.d.keys():
+ v = self.d[k] + 1
+ vv = "belle " + k
+ possible = self.runCount(vv, limit=v)
+ self.assertTrue(self.achievesPossibility(possible, v))
+
class SanityCheck(unittest.TestCase):
def testSimple(self):
text = "Patati patata patata tata vies"