plint

French poetry validator
git clone https://a3nm.net/git/plint/
Log | Files | Refs | README

commit f726bfffe714b25118092cb16b9e7360d7c12232
parent f57cc84f75061ee7ad554c555f600faf4b88db51
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Tue, 10 Jul 2012 21:58:08 +0200

add hiatus, improve forbidden reporting

Diffstat:
error.py | 15+++++++++++----
metric.py | 20+++++++++++++++-----
template.py | 12+++++++++---
3 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/error.py b/error.py @@ -43,12 +43,19 @@ class ErrorBadCharacters(Error): % ', '.join(["'" + a + "'" for a in self.characters]), short) class ErrorForbiddenPattern(Error): - def __init__(self): - # TODO give more info - pass + def __init__(self, pattern): + self.pattern = pattern + + def report(self, short=False): + return Error.report(self, "Illegal ambiguous pattern: %s" % self.pattern, + short) + +class ErrorHiatus(Error): + def __init__(self, hiatus): + self.hiatus = hiatus def report(self, short=False): - return Error.report(self, "Illegal ambiguous pattern", short) + return Error.report(self, "Illegal hiatus: %s" % self.hiatus, short) class ErrorBadRhyme(Error): def __init__(self, expected, inferred): diff --git a/metric.py b/metric.py @@ -7,6 +7,9 @@ import vowels import haspirater +no_hiatus = ["oui"] + + def annotate_aspirated(word): """Annotate aspirated 'h'""" if word[0] != 'h': @@ -90,7 +93,7 @@ def feminine(align, verse, phon): return possible -def parse(text, phon, bound, forbidden_ok, diaeresis): +def parse(text, phon, bound, forbidden_ok, hiatus_ok, diaeresis): """Return possible aligns for text, bound is an upper bound on the align length to limit running time, phon is the pronunciation to help for gender, forbidden_ok is true if we allow classically forbidden patterns""" @@ -145,7 +148,8 @@ def parse(text, phon, bound, forbidden_ok, diaeresis): pattern = re.compile('(['+consonants+'*-]*)', re.UNICODE) - forbidden = False + forbidden = None + hiatus = None # cut each word in chunks of vowels and consonants, with some specific # kludges @@ -175,12 +179,16 @@ def parse(text, phon, bound, forbidden_ok, diaeresis): [1 for chunk in words[i-1] if is_vowels(chunk)]) > 1: words[i-1].pop(-1) words[i-1][-1] = words[i-1][-1]+"`" + if (is_vowels(words[i-1][-1]) and not words[i-1][-1][-1] == 'e' + and not (''.join(words[i]) in no_hiatus + and ''.join(words[i-1]) in no_hiatus)): + hiatus = words[i-1][-1] + ' ' + words[i][0] else: if words[i-1][-1] == 'ée' or words[i-1][-1] == 'ie': - forbidden = True + forbidden = words[i-1][-1] if words[i-1][-1] == 's' and len(words[i-1]): if words[i-1][-2] == 'ée' or words[i-1][-2] == 'ie': - forbidden = True + forbidden = words[i-1][-2] # TODO there are arcane rules for "aient" # case of "soient" # TODO there are a lot of "oient" in boileau and malherme @@ -191,7 +199,9 @@ def parse(text, phon, bound, forbidden_ok, diaeresis): # forbidden = True if forbidden and not forbidden_ok: - return None + return ("forbidden", forbidden) + if hiatus and not hiatus_ok: + return ("hiatus", hiatus) # group back words for word in words: diff --git a/template.py b/template.py @@ -46,6 +46,7 @@ class Template: self.template = [] self.pattern_line_no = 0 self.forbidden_ok = False + self.hiatus_ok = False self.normande_ok = True self.check_end_hemistiche = True self.check_occurrences = True @@ -66,6 +67,8 @@ class Template: self.mergers.append(value) elif key == "forbidden_ok": self.forbidden_ok = str2bool(value) + elif key == "hiatus_ok": + self.hiatus_ok = str2bool(value) elif key == "normande_ok": self.normande_ok = str2bool(value) elif key == "diaeresis": @@ -136,9 +139,12 @@ class Template: # compute alignments, check hemistiches, sort by score possible = parse(line, self.env[pattern.myid].phon, pattern.length + 2, - self.forbidden_ok, self.diaeresis) - if not possible: - errors.append(error.ErrorForbiddenPattern()) + self.forbidden_ok, self.hiatus_ok, self.diaeresis) + if not isinstance(possible, list): + if possible[0] == "forbidden": + errors.append(error.ErrorForbiddenPattern(possible[1])) + elif possible[0] == "hiatus": + errors.append(error.ErrorHiatus(possible[1])) possible = [] return errors, pattern possible = list(map((lambda p: (p[0], p[1],