plint

French poetry validator
git clone https://a3nm.net/git/plint/
Log | Files | Refs | README

commit 01d617d42d2d62251e7a92c79edf4083d8b43e96
parent 691e8e776f1992457b2f922e0a8e580406728c79
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Fri, 18 May 2012 21:01:35 +0200

Merge branch 'classical'

Conflicts:
	TODO
	metric.py
	template.py

Diffstat:
TODO | 3+++
error.py | 11+++++++++++
metric.py | 70+++++++++++++++++++++++++++++++++++++++++++++++++---------------------
rhyme.py | 97++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
static/tpl/alexandrin.tpl | 3++-
static/tpl/classical.tpl | 8++++----
template.py | 82++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
test/boileau.tpl | 9+++++----
views/about.html | 4++++
9 files changed, 212 insertions(+), 75 deletions(-)

diff --git a/TODO b/TODO @@ -1,3 +1,6 @@ +- options for tolerant diaresis, no diarersis, classical diaresis +- better check of hemistiches with known words + larger label for radios no diérèse on 'uei'? diff --git a/error.py b/error.py @@ -42,6 +42,14 @@ class ErrorBadCharacters(Error): return Error.report(self, "Illegal character: %s" % ', '.join(["'" + a + "'" for a in self.characters])) +class ErrorForbiddenPattern(Error): + def __init__(self): + # TODO give more info + pass + + def report(self): + return Error.report(self, "Illegal ambiguous pattern") + class ErrorBadRhyme(Error): def __init__(self, expected, inferred): Error.__init__(self) @@ -49,6 +57,9 @@ class ErrorBadRhyme(Error): self.inferred = inferred def report(self, short=False): + # TODO indicate eye rhyme since this is also important + # TODO don't indicate more than the minimal required rhyme (in length and + # present of a vowel phoneme) return Error.report(self, "Bad rhyme %s for type %s (expected %s, inferred %s)" % (self.kind, self.get_id(), self.fmt(self.expected), self.fmt(self.inferred)), short) diff --git a/metric.py b/metric.py @@ -2,10 +2,11 @@ #coding: utf-8 import re -from common import normalize, is_vowels, consonants, sure_end_fem +from common import normalize, is_vowels, consonants, sure_end_fem, is_consonants from vowels import possible_weights import haspirater + def annotate_aspirated(word): """Annotate aspirated 'h'""" if word[0] != 'h': @@ -58,7 +59,7 @@ def fit(chunks, pos, left): left - weight)] return result -def feminine(align, verse): +def feminine(align, verse, phon): for a in sure_end_fem: if verse.endswith(a): return ['F'] @@ -69,13 +70,24 @@ def feminine(align, verse): return ['F'] # mute -ent if align[-2][1] > 0 and align[-2][0] == 'e': return ['M'] # non-mute "-ent" by the choice of metric - # what now? "tient" vs. "lient" for instance, - # TODO check pronunciation? :-/ - return ['M', 'F'] + possible = [] + # now, we must check pronunciation? + # "tient" vs. "lient" for instance, "excellent"... + for possible_phon in phon: + if possible_phon.endswith(')') or possible_phon.endswith('#'): + possible.append('M') + else: + possible.append('F') + if possible_phon.endswith('E') and verse.endswith('aient'): + # imparfait and conditionnel are masculine... + possible.append('M') + return possible + -def parse(text, bound): - """Return possible aligns for text, bound is an upper bound on the - align length to limit running time""" +def parse(text, phon, bound, forbidden_ok): + """Return possible aligns for text, bound is an upper bound on the align + length to limit running time, phon is the pronunciation to help for gender, + forbidden_ok is true if we allow classically forbidden patterns""" original_text = normalize(text) @@ -112,18 +124,14 @@ def parse(text, bound): if (words[i] == "onze"): words[i] = "*" + words[i] - all_consonants = True - for x in words[i]: - if not x in consonants: - all_consonants = False - if all_consonants: - new_word = '' + if is_consonants(words[i]): + new_word = [] for x in words[i]: - if (words[i] == 'w'): - new_word += "doublevé-" + if (x == 'w'): + new_word.append("doublevé") else: - new_word += words[i]+'a-' - words[i] = new_word + new_word.append(x + "a") + words[i] = ''.join(new_word) # aspirated @@ -131,6 +139,8 @@ def parse(text, bound): pattern = re.compile('(['+consonants+'*-]*)', re.UNICODE) + forbidden = False + # cut each word in chunks of vowels and consonants, with some specific # kludges for i in range(len(words)): @@ -154,10 +164,28 @@ def parse(text, bound): words[i] = nwords # remove mute 'e' if i > 0: - if sum([1 for chunk in words[i-1] if is_vowels(chunk)]) > 1: - if words[i-1][-1] == 'e' and is_vowels(words[i][0], True): + if is_vowels(words[i][0], True): + if words[i-1][-1] == 'e' and sum( + [1 for chunk in words[i-1] if is_vowels(chunk)]) > 1: words[i-1].pop(-1) words[i-1][-1] = words[i-1][-1]+"`" + else: + if words[i-1][-1] == 'ée' or words[i-1][-1] == 'ie': + forbidden = True + if words[i-1][-1] == 's' and len(words[i-1]): + if words[i-1][-2] == 'ée' or words[i-1][-2] == 'ie': + forbidden = True + # TODO there are arcane rules for "aient" + # case of "soient" + # TODO there are a lot of "oient" in boileau and malherme + # so apparently there is no simple way to check that + # if words[i-1][-1] == 'nt' and len(words[i-1]): + # if words[i-1][-2] == 'oie': + # if len(words[i-1]) != 3 or words[i-1][-3] != 's': + # forbidden = True + + if forbidden and not forbidden_ok: + return None # group back words for word in words: @@ -167,6 +195,6 @@ def parse(text, bound): # return all possibilities to weigh the vowel clusters, annotated by # the femininity of the align (depending both on the align and # original text) - return list(map((lambda x: (x, feminine(x, original_text))), + return list(map((lambda x: (x, feminine(x, original_text, phon))), fit(chunks, 0, bound))) diff --git a/rhyme.py b/rhyme.py @@ -1,22 +1,37 @@ #!/usr/bin/python3 -u #encoding: utf8 +import copy import re import sys from pprint import pprint import frhyme import functools +from common import consonants # number of possible rhymes to consider NBEST = 5 # phonetic vowels vowel = list("Eeaio592O#@y%u()$") +liaison = { + 'c': 'k', + 'd': 't', + 'g': 'k', + 'k': 'k', + 'p': 'p', + 'r': 'R', + 's': 'z', + 't': 't', + 'x': 'z', + 'z': 'z', + } + + class Constraint: - def __init__(self, phon, eye, aphon): + def __init__(self, classical, phon): self.phon = phon # minimal number of common suffix phones - self.eye = eye # minimal number of common suffix letters - self.aphon = aphon # minimal number of common suffix vowel phones + self.classical = classical # should we impose classical rhyme rules def mmax(self, a, b): """max, with -1 representing infty""" @@ -30,14 +45,27 @@ class Constraint: if not c: return self.phon = self.mmax(self.phon, c.phon) - self.eye = self.mmax(self.eye, c.eye) - self.aphon = self.mmax(self.aphon, c.aphon) + self.eye = self.classical or c.classical class Rhyme: - def __init__(self, line, constraint): + def apply_mergers(self, phon): + return ''.join([(self.mergers[x] if x in self.mergers.keys() + else x) for x in phon]) + + def supposed_liaison(self, x): + if x[-1] in liaison.keys(): + return x + liaison[x[-1]] + return x + + def __init__(self, line, constraint, mergers=[], normande_ok=True): self.constraint = constraint - self.phon = lookup(line) - self.eye = line + self.mergers = {} + self.normande_ok = normande_ok + for phon_set in mergers: + for phon in phon_set[1:]: + self.mergers[phon] = phon_set[0] + self.phon = set([self.apply_mergers(x) for x in self.lookup(line)]) + self.eye = self.supposed_liaison(consonant_suffix(line)) def match(self, phon, eye): """limit our phon and eye to those which match phon and eye and which @@ -49,31 +77,48 @@ class Rhyme: if val >= self.constraint.phon and self.constraint.phon >= 0: new_phon.add(x[-val:]) val = assonance_rhyme(x, y) - if val >= self.constraint.aphon and self.constraint.aphon >= 0: - new_phon.add(x[-val:]) self.phon = new_phon if self.eye: val = eye_rhyme(self.eye, eye) - if val >= self.constraint.eye and self.constraint.eye >= 0: - self.eye = self.eye[-val:] + if val == 0: + self.eye = "" else: - self.eye = None + self.eye = self.eye[-val:] def restrict(self, r): """take the intersection between us and rhyme object r""" self.constraint.restrict(r.constraint) - self.match(r.phon, r.eye) + self.match(set([self.apply_mergers(x) for x in r.phon]), + self.supposed_liaison(consonant_suffix(r.eye))) def feed(self, line, constraint=None): """extend us with a line and a constraint""" - return self.restrict(Rhyme(line, constraint)) + return self.restrict(Rhyme(line, constraint, self.mergers)) def satisfied(self): - return self.eye or len(self.phon) > 0 + return (len(self.eye) >= self.constraint.eye + and len(self.phon) > 0 or not self.constraint.classical) def pprint(self): pprint(self.phon) + def lookup(self, s): + """lookup the pronunciation of s, adding rime normande kludges and liaisons""" + result = raw_lookup(s) + if self.normande_ok and (s.endswith('er') or s.endswith('ers')): + result.add("ER") + # TODO better here + result2 = copy.deepcopy(result) + # the case 'ent' would lead to trouble for gender + if self.constraint.classical: + if s[-1] in liaison.keys() and not s.endswith('ent'): + for r in result2: + result.add(r + liaison[s[-1]]) + if (s[-1] == 's'): + result.add(r + 's') + return result + + def suffix(x, y): """length of the longest common suffix of x and y""" bound = min(len(x), len(y)) @@ -110,11 +155,17 @@ def concat_couples(a, b): s.add(x + y) return s -def lookup(s): - """lookup the pronunciation of s, adding rime normande kludges""" - result = raw_lookup(s) - if s.endswith('er'): - result.add("ER") +def consonant_suffix(s): + for i in range(len(s)): + if not s[-(i+1)] in consonants: + break + result = s[-(i+1):] + if result.endswith('m'): + result = result[:-1] + 'n' + if result.endswith('à'): + result = result[:-1] + 'a' + if result.endswith('û'): + result = result[:-1] + 'u' return result def raw_lookup(s): @@ -137,8 +188,8 @@ if __name__ == '__main__': line = line.lower().strip().split(' ') if len(line) < 1: continue - constraint = Constraint(1, -1, -1) - rhyme = Rhyme(line[0], constraint) + constraint = Constraint(True, 1) + rhyme = Rhyme(line[0], constraint, self.mergers, self.normande_ok) for x in line[1:]: rhyme.feed(x) rhyme.pprint() diff --git a/static/tpl/alexandrin.tpl b/static/tpl/alexandrin.tpl @@ -1 +1,2 @@ -12 +! forbidden_ok:yes +12 A diff --git a/static/tpl/classical.tpl b/static/tpl/classical.tpl @@ -1,4 +1,4 @@ -6/6 A:1 !X -6/6 A:1 !X -6/6 B:1 !x -6/6 B:1 !x +6/6 A !X +6/6 A !X +6/6 B !x +6/6 B !x diff --git a/template.py b/template.py @@ -30,21 +30,41 @@ class Template: def __init__(self, string): self.template = [] self.pattern_line_no = 0 + self.forbidden_ok = False + self.normande_ok = True + self.mergers = [] self.load(string) self.line_no = 0 self.position = 0 + self.prev = None self.env = {} self.femenv = {} self.occenv = {} self.reject_errors = False + def read_option(self, x): + key, value = x.split(':') + if key == "merge": + self.mergers.append(value) + elif key == "forbidden_ok": + self.forbidden_ok = str2bool(value) + elif key == "normande_ok": + self.normande_ok = str2bool(value) + else: + raise ValueError + def load(self, s): """Load from a string""" for line in s.split('\n'): line = line.strip() self.pattern_line_no += 1 if line != '' and line[0] != '#': - self.template.append(self.parse_line(line.strip())) + if line[0] == '!': + # don't count the '!' in the options, that's why we use [1:] + for option in line.split()[1:]: + self.read_option(option) + else: + self.template.append(self.parse_line(line.strip())) def count(self, align): """total weight of an align""" @@ -69,15 +89,37 @@ class Template: line_with_case = normalize(line, downcase=False) line = normalize(line) pattern = self.get() + + errors = [] + + # rhymes + if pattern.myid not in self.env.keys(): + # initialize the rhyme + self.env[pattern.myid] = rhyme.Rhyme(line, pattern.constraint, + self.mergers, self.normande_ok) + else: + # update the rhyme + old_p = self.env[pattern.myid].phon + old_e = self.env[pattern.myid].eye + self.env[pattern.myid].feed(line, pattern.constraint) + # no more possible rhymes, something went wrong + if not self.env[pattern.myid].satisfied(): + self.env[pattern.myid].phon = old_p + self.env[pattern.myid].eye = old_e + errors.append(error.ErrorBadRhymeSound(self.env[pattern.myid], None)) + # compute alignments, check hemistiches, sort by score - possible = parse(line, pattern.length + 2) + possible = parse(line, self.env[pattern.myid].phon, pattern.length + 2, + self.forbidden_ok) + if not possible: + errors.append(error.ErrorForbiddenPattern()) + possible = [] + return errors, pattern possible = list(map((lambda p: (p[0], p[1], check_hemistiches(p[0], pattern.hemistiches))), possible)) possible = map((lambda x: (self.rate(pattern, x), x)), possible) possible = sorted(possible, key=(lambda x: x[0])) - errors = [] - # check characters illegal = set() for x in line: @@ -95,21 +137,6 @@ class Template: possible = [(score, align) for (score, align) in possible if score == possible[0][0]] - # rhymes - if pattern.myid not in self.env.keys(): - # initialize the rhyme - self.env[pattern.myid] = rhyme.Rhyme(line, pattern.constraint) - else: - # update the rhyme - old_p = self.env[pattern.myid].phon - old_e = self.env[pattern.myid].eye - self.env[pattern.myid].feed(line, pattern.constraint) - # no more possible rhymes, something went wrong - if not self.env[pattern.myid].satisfied(): - self.env[pattern.myid].phon = old_p - self.env[pattern.myid].eye = old_e - errors.append(error.ErrorBadRhymeSound(self.env[pattern.myid], None)) - # occurrences if pattern.myid not in self.occenv.keys(): self.occenv[pattern.myid] = {} @@ -159,13 +186,17 @@ class Template: femid = str(self.pattern_line_no) # unique idsplit = myid.split(':') if len(idsplit) >= 2: - constraint = [int(x) for x in idsplit[-1].split('|')] + constraint = idsplit[-1].split('|') + if len(constraint) > 0: + constraint[0] = False if constraint[0] == "no" else constraint[0] + if len(constraint) > 1: + constraint[1] = int(constraint[1]) else: constraint = [] if len(constraint) == 0: constraint.append(1) - while len(constraint) < 3: - constraint.append(-1) + if len(constraint) < 2: + constraint.append(True) return Pattern(metric, myid, femid, rhyme.Constraint(*constraint)) def reset_conditional(self, d): @@ -212,3 +243,10 @@ class Template: self.line_no -= 1 return errors +def str2bool(x): + if x == "yes": + return True + if x == "no": + return False + raise ValueError + diff --git a/test/boileau.tpl b/test/boileau.tpl @@ -1,4 +1,5 @@ -6/6 A:1|2 !X -6/6 A:1|2 !X -6/6 B:1|2 !x -6/6 B:1|2 !x +! merge:oO +6/6 A !X +6/6 A !X +6/6 B !x +6/6 B !x diff --git a/views/about.html b/views/about.html @@ -25,6 +25,7 @@ aucun des modèles ne vous convient, vous pouvez <a href="#template">écrire le vôtre</a>.</p> <h2>Qu'est-ce qui est vérifié par plint&nbsp;?</h2> +<p>TODO outdated.</p> <p>Ces explications simplifiées ne sont pas exhaustives. Pour une description exacte, se reporter au code source.</p> <dl> @@ -77,6 +78,7 @@ alexandrin classique parfaitement valide.</p> <h2 id="template">Comment faire pour définir ses propres modèles&nbsp;?</h2> +<p>TODO outdated.</p> <p> Chaque ligne du format correspond à un vers (ie. une ligne non-vide). Une ligne peut indiquer trois éléments séparés par une espace : la métrique, l'identifiant @@ -181,6 +183,7 @@ predefined templates suit you, you can <a href="#template">write your own</a>.</p> <h2>What does plint check?</h2> +<p>TODO outdated.</p> <p>Here are some simplified explanations. To know all the details, go read the source code.</p> <dl> @@ -230,6 +233,7 @@ classical alexandrine.</p> <h2 id="template">How can I define my own templates?</h2> +<p>TODO outdated.</p> <p>Each template line will be checked against a non-blank poem line. When the template is finished, it starts over from the beginning, and the rhyme and rhyme genre identifiers (see below) are reinitialized unless they start with a '!'.