commit f726bfffe714b25118092cb16b9e7360d7c12232
parent f57cc84f75061ee7ad554c555f600faf4b88db51
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Tue, 10 Jul 2012 21:58:08 +0200
add hiatus, improve forbidden reporting
Diffstat:
3 files changed, 35 insertions(+), 12 deletions(-)
diff --git a/error.py b/error.py
@@ -43,12 +43,19 @@ class ErrorBadCharacters(Error):
% ', '.join(["'" + a + "'" for a in self.characters]), short)
class ErrorForbiddenPattern(Error):
- def __init__(self):
- # TODO give more info
- pass
+ def __init__(self, pattern):
+ self.pattern = pattern
+
+ def report(self, short=False):
+ return Error.report(self, "Illegal ambiguous pattern: %s" % self.pattern,
+ short)
+
+class ErrorHiatus(Error):
+ def __init__(self, hiatus):
+ self.hiatus = hiatus
def report(self, short=False):
- return Error.report(self, "Illegal ambiguous pattern", short)
+ return Error.report(self, "Illegal hiatus: %s" % self.hiatus, short)
class ErrorBadRhyme(Error):
def __init__(self, expected, inferred):
diff --git a/metric.py b/metric.py
@@ -7,6 +7,9 @@ import vowels
import haspirater
+no_hiatus = ["oui"]
+
+
def annotate_aspirated(word):
"""Annotate aspirated 'h'"""
if word[0] != 'h':
@@ -90,7 +93,7 @@ def feminine(align, verse, phon):
return possible
-def parse(text, phon, bound, forbidden_ok, diaeresis):
+def parse(text, phon, bound, forbidden_ok, hiatus_ok, diaeresis):
"""Return possible aligns for text, bound is an upper bound on the align
length to limit running time, phon is the pronunciation to help for gender,
forbidden_ok is true if we allow classically forbidden patterns"""
@@ -145,7 +148,8 @@ def parse(text, phon, bound, forbidden_ok, diaeresis):
pattern = re.compile('(['+consonants+'*-]*)', re.UNICODE)
- forbidden = False
+ forbidden = None
+ hiatus = None
# cut each word in chunks of vowels and consonants, with some specific
# kludges
@@ -175,12 +179,16 @@ def parse(text, phon, bound, forbidden_ok, diaeresis):
[1 for chunk in words[i-1] if is_vowels(chunk)]) > 1:
words[i-1].pop(-1)
words[i-1][-1] = words[i-1][-1]+"`"
+ if (is_vowels(words[i-1][-1]) and not words[i-1][-1][-1] == 'e'
+ and not (''.join(words[i]) in no_hiatus
+ and ''.join(words[i-1]) in no_hiatus)):
+ hiatus = words[i-1][-1] + ' ' + words[i][0]
else:
if words[i-1][-1] == 'ée' or words[i-1][-1] == 'ie':
- forbidden = True
+ forbidden = words[i-1][-1]
if words[i-1][-1] == 's' and len(words[i-1]):
if words[i-1][-2] == 'ée' or words[i-1][-2] == 'ie':
- forbidden = True
+ forbidden = words[i-1][-2]
# TODO there are arcane rules for "aient"
# case of "soient"
# TODO there are a lot of "oient" in boileau and malherme
@@ -191,7 +199,9 @@ def parse(text, phon, bound, forbidden_ok, diaeresis):
# forbidden = True
if forbidden and not forbidden_ok:
- return None
+ return ("forbidden", forbidden)
+ if hiatus and not hiatus_ok:
+ return ("hiatus", hiatus)
# group back words
for word in words:
diff --git a/template.py b/template.py
@@ -46,6 +46,7 @@ class Template:
self.template = []
self.pattern_line_no = 0
self.forbidden_ok = False
+ self.hiatus_ok = False
self.normande_ok = True
self.check_end_hemistiche = True
self.check_occurrences = True
@@ -66,6 +67,8 @@ class Template:
self.mergers.append(value)
elif key == "forbidden_ok":
self.forbidden_ok = str2bool(value)
+ elif key == "hiatus_ok":
+ self.hiatus_ok = str2bool(value)
elif key == "normande_ok":
self.normande_ok = str2bool(value)
elif key == "diaeresis":
@@ -136,9 +139,12 @@ class Template:
# compute alignments, check hemistiches, sort by score
possible = parse(line, self.env[pattern.myid].phon, pattern.length + 2,
- self.forbidden_ok, self.diaeresis)
- if not possible:
- errors.append(error.ErrorForbiddenPattern())
+ self.forbidden_ok, self.hiatus_ok, self.diaeresis)
+ if not isinstance(possible, list):
+ if possible[0] == "forbidden":
+ errors.append(error.ErrorForbiddenPattern(possible[1]))
+ elif possible[0] == "hiatus":
+ errors.append(error.ErrorHiatus(possible[1]))
possible = []
return errors, pattern
possible = list(map((lambda p: (p[0], p[1],