commit 01d617d42d2d62251e7a92c79edf4083d8b43e96
parent 691e8e776f1992457b2f922e0a8e580406728c79
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Fri, 18 May 2012 21:01:35 +0200
Merge branch 'classical'
Conflicts:
TODO
metric.py
template.py
Diffstat:
9 files changed, 212 insertions(+), 75 deletions(-)
diff --git a/TODO b/TODO
@@ -1,3 +1,6 @@
+- options for tolerant diaresis, no diarersis, classical diaresis
+- better check of hemistiches with known words
+
larger label for radios
no diérèse on 'uei'?
diff --git a/error.py b/error.py
@@ -42,6 +42,14 @@ class ErrorBadCharacters(Error):
return Error.report(self, "Illegal character: %s"
% ', '.join(["'" + a + "'" for a in self.characters]))
+class ErrorForbiddenPattern(Error):
+ def __init__(self):
+ # TODO give more info
+ pass
+
+ def report(self):
+ return Error.report(self, "Illegal ambiguous pattern")
+
class ErrorBadRhyme(Error):
def __init__(self, expected, inferred):
Error.__init__(self)
@@ -49,6 +57,9 @@ class ErrorBadRhyme(Error):
self.inferred = inferred
def report(self, short=False):
+ # TODO indicate eye rhyme since this is also important
+ # TODO don't indicate more than the minimal required rhyme (in length and
+ # present of a vowel phoneme)
return Error.report(self, "Bad rhyme %s for type %s (expected %s, inferred %s)"
% (self.kind, self.get_id(), self.fmt(self.expected),
self.fmt(self.inferred)), short)
diff --git a/metric.py b/metric.py
@@ -2,10 +2,11 @@
#coding: utf-8
import re
-from common import normalize, is_vowels, consonants, sure_end_fem
+from common import normalize, is_vowels, consonants, sure_end_fem, is_consonants
from vowels import possible_weights
import haspirater
+
def annotate_aspirated(word):
"""Annotate aspirated 'h'"""
if word[0] != 'h':
@@ -58,7 +59,7 @@ def fit(chunks, pos, left):
left - weight)]
return result
-def feminine(align, verse):
+def feminine(align, verse, phon):
for a in sure_end_fem:
if verse.endswith(a):
return ['F']
@@ -69,13 +70,24 @@ def feminine(align, verse):
return ['F'] # mute -ent
if align[-2][1] > 0 and align[-2][0] == 'e':
return ['M'] # non-mute "-ent" by the choice of metric
- # what now? "tient" vs. "lient" for instance,
- # TODO check pronunciation? :-/
- return ['M', 'F']
+ possible = []
+ # now, we must check pronunciation?
+ # "tient" vs. "lient" for instance, "excellent"...
+ for possible_phon in phon:
+ if possible_phon.endswith(')') or possible_phon.endswith('#'):
+ possible.append('M')
+ else:
+ possible.append('F')
+ if possible_phon.endswith('E') and verse.endswith('aient'):
+ # imparfait and conditionnel are masculine...
+ possible.append('M')
+ return possible
+
-def parse(text, bound):
- """Return possible aligns for text, bound is an upper bound on the
- align length to limit running time"""
+def parse(text, phon, bound, forbidden_ok):
+ """Return possible aligns for text, bound is an upper bound on the align
+ length to limit running time, phon is the pronunciation to help for gender,
+ forbidden_ok is true if we allow classically forbidden patterns"""
original_text = normalize(text)
@@ -112,18 +124,14 @@ def parse(text, bound):
if (words[i] == "onze"):
words[i] = "*" + words[i]
- all_consonants = True
- for x in words[i]:
- if not x in consonants:
- all_consonants = False
- if all_consonants:
- new_word = ''
+ if is_consonants(words[i]):
+ new_word = []
for x in words[i]:
- if (words[i] == 'w'):
- new_word += "doublevé-"
+ if (x == 'w'):
+ new_word.append("doublevé")
else:
- new_word += words[i]+'a-'
- words[i] = new_word
+ new_word.append(x + "a")
+ words[i] = ''.join(new_word)
# aspirated
@@ -131,6 +139,8 @@ def parse(text, bound):
pattern = re.compile('(['+consonants+'*-]*)', re.UNICODE)
+ forbidden = False
+
# cut each word in chunks of vowels and consonants, with some specific
# kludges
for i in range(len(words)):
@@ -154,10 +164,28 @@ def parse(text, bound):
words[i] = nwords
# remove mute 'e'
if i > 0:
- if sum([1 for chunk in words[i-1] if is_vowels(chunk)]) > 1:
- if words[i-1][-1] == 'e' and is_vowels(words[i][0], True):
+ if is_vowels(words[i][0], True):
+ if words[i-1][-1] == 'e' and sum(
+ [1 for chunk in words[i-1] if is_vowels(chunk)]) > 1:
words[i-1].pop(-1)
words[i-1][-1] = words[i-1][-1]+"`"
+ else:
+ if words[i-1][-1] == 'ée' or words[i-1][-1] == 'ie':
+ forbidden = True
+ if words[i-1][-1] == 's' and len(words[i-1]):
+ if words[i-1][-2] == 'ée' or words[i-1][-2] == 'ie':
+ forbidden = True
+ # TODO there are arcane rules for "aient"
+ # case of "soient"
+ # TODO there are a lot of "oient" in boileau and malherme
+ # so apparently there is no simple way to check that
+ # if words[i-1][-1] == 'nt' and len(words[i-1]):
+ # if words[i-1][-2] == 'oie':
+ # if len(words[i-1]) != 3 or words[i-1][-3] != 's':
+ # forbidden = True
+
+ if forbidden and not forbidden_ok:
+ return None
# group back words
for word in words:
@@ -167,6 +195,6 @@ def parse(text, bound):
# return all possibilities to weigh the vowel clusters, annotated by
# the femininity of the align (depending both on the align and
# original text)
- return list(map((lambda x: (x, feminine(x, original_text))),
+ return list(map((lambda x: (x, feminine(x, original_text, phon))),
fit(chunks, 0, bound)))
diff --git a/rhyme.py b/rhyme.py
@@ -1,22 +1,37 @@
#!/usr/bin/python3 -u
#encoding: utf8
+import copy
import re
import sys
from pprint import pprint
import frhyme
import functools
+from common import consonants
# number of possible rhymes to consider
NBEST = 5
# phonetic vowels
vowel = list("Eeaio592O#@y%u()$")
+liaison = {
+ 'c': 'k',
+ 'd': 't',
+ 'g': 'k',
+ 'k': 'k',
+ 'p': 'p',
+ 'r': 'R',
+ 's': 'z',
+ 't': 't',
+ 'x': 'z',
+ 'z': 'z',
+ }
+
+
class Constraint:
- def __init__(self, phon, eye, aphon):
+ def __init__(self, classical, phon):
self.phon = phon # minimal number of common suffix phones
- self.eye = eye # minimal number of common suffix letters
- self.aphon = aphon # minimal number of common suffix vowel phones
+ self.classical = classical # should we impose classical rhyme rules
def mmax(self, a, b):
"""max, with -1 representing infty"""
@@ -30,14 +45,27 @@ class Constraint:
if not c:
return
self.phon = self.mmax(self.phon, c.phon)
- self.eye = self.mmax(self.eye, c.eye)
- self.aphon = self.mmax(self.aphon, c.aphon)
+ self.eye = self.classical or c.classical
class Rhyme:
- def __init__(self, line, constraint):
+ def apply_mergers(self, phon):
+ return ''.join([(self.mergers[x] if x in self.mergers.keys()
+ else x) for x in phon])
+
+ def supposed_liaison(self, x):
+ if x[-1] in liaison.keys():
+ return x + liaison[x[-1]]
+ return x
+
+ def __init__(self, line, constraint, mergers=[], normande_ok=True):
self.constraint = constraint
- self.phon = lookup(line)
- self.eye = line
+ self.mergers = {}
+ self.normande_ok = normande_ok
+ for phon_set in mergers:
+ for phon in phon_set[1:]:
+ self.mergers[phon] = phon_set[0]
+ self.phon = set([self.apply_mergers(x) for x in self.lookup(line)])
+ self.eye = self.supposed_liaison(consonant_suffix(line))
def match(self, phon, eye):
"""limit our phon and eye to those which match phon and eye and which
@@ -49,31 +77,48 @@ class Rhyme:
if val >= self.constraint.phon and self.constraint.phon >= 0:
new_phon.add(x[-val:])
val = assonance_rhyme(x, y)
- if val >= self.constraint.aphon and self.constraint.aphon >= 0:
- new_phon.add(x[-val:])
self.phon = new_phon
if self.eye:
val = eye_rhyme(self.eye, eye)
- if val >= self.constraint.eye and self.constraint.eye >= 0:
- self.eye = self.eye[-val:]
+ if val == 0:
+ self.eye = ""
else:
- self.eye = None
+ self.eye = self.eye[-val:]
def restrict(self, r):
"""take the intersection between us and rhyme object r"""
self.constraint.restrict(r.constraint)
- self.match(r.phon, r.eye)
+ self.match(set([self.apply_mergers(x) for x in r.phon]),
+ self.supposed_liaison(consonant_suffix(r.eye)))
def feed(self, line, constraint=None):
"""extend us with a line and a constraint"""
- return self.restrict(Rhyme(line, constraint))
+ return self.restrict(Rhyme(line, constraint, self.mergers))
def satisfied(self):
- return self.eye or len(self.phon) > 0
+ return (len(self.eye) >= self.constraint.eye
+ and len(self.phon) > 0 or not self.constraint.classical)
def pprint(self):
pprint(self.phon)
+ def lookup(self, s):
+ """lookup the pronunciation of s, adding rime normande kludges and liaisons"""
+ result = raw_lookup(s)
+ if self.normande_ok and (s.endswith('er') or s.endswith('ers')):
+ result.add("ER")
+ # TODO better here
+ result2 = copy.deepcopy(result)
+ # the case 'ent' would lead to trouble for gender
+ if self.constraint.classical:
+ if s[-1] in liaison.keys() and not s.endswith('ent'):
+ for r in result2:
+ result.add(r + liaison[s[-1]])
+ if (s[-1] == 's'):
+ result.add(r + 's')
+ return result
+
+
def suffix(x, y):
"""length of the longest common suffix of x and y"""
bound = min(len(x), len(y))
@@ -110,11 +155,17 @@ def concat_couples(a, b):
s.add(x + y)
return s
-def lookup(s):
- """lookup the pronunciation of s, adding rime normande kludges"""
- result = raw_lookup(s)
- if s.endswith('er'):
- result.add("ER")
+def consonant_suffix(s):
+ for i in range(len(s)):
+ if not s[-(i+1)] in consonants:
+ break
+ result = s[-(i+1):]
+ if result.endswith('m'):
+ result = result[:-1] + 'n'
+ if result.endswith('à'):
+ result = result[:-1] + 'a'
+ if result.endswith('û'):
+ result = result[:-1] + 'u'
return result
def raw_lookup(s):
@@ -137,8 +188,8 @@ if __name__ == '__main__':
line = line.lower().strip().split(' ')
if len(line) < 1:
continue
- constraint = Constraint(1, -1, -1)
- rhyme = Rhyme(line[0], constraint)
+ constraint = Constraint(True, 1)
+ rhyme = Rhyme(line[0], constraint, self.mergers, self.normande_ok)
for x in line[1:]:
rhyme.feed(x)
rhyme.pprint()
diff --git a/static/tpl/alexandrin.tpl b/static/tpl/alexandrin.tpl
@@ -1 +1,2 @@
-12
+! forbidden_ok:yes
+12 A
diff --git a/static/tpl/classical.tpl b/static/tpl/classical.tpl
@@ -1,4 +1,4 @@
-6/6 A:1 !X
-6/6 A:1 !X
-6/6 B:1 !x
-6/6 B:1 !x
+6/6 A !X
+6/6 A !X
+6/6 B !x
+6/6 B !x
diff --git a/template.py b/template.py
@@ -30,21 +30,41 @@ class Template:
def __init__(self, string):
self.template = []
self.pattern_line_no = 0
+ self.forbidden_ok = False
+ self.normande_ok = True
+ self.mergers = []
self.load(string)
self.line_no = 0
self.position = 0
+ self.prev = None
self.env = {}
self.femenv = {}
self.occenv = {}
self.reject_errors = False
+ def read_option(self, x):
+ key, value = x.split(':')
+ if key == "merge":
+ self.mergers.append(value)
+ elif key == "forbidden_ok":
+ self.forbidden_ok = str2bool(value)
+ elif key == "normande_ok":
+ self.normande_ok = str2bool(value)
+ else:
+ raise ValueError
+
def load(self, s):
"""Load from a string"""
for line in s.split('\n'):
line = line.strip()
self.pattern_line_no += 1
if line != '' and line[0] != '#':
- self.template.append(self.parse_line(line.strip()))
+ if line[0] == '!':
+ # don't count the '!' in the options, that's why we use [1:]
+ for option in line.split()[1:]:
+ self.read_option(option)
+ else:
+ self.template.append(self.parse_line(line.strip()))
def count(self, align):
"""total weight of an align"""
@@ -69,15 +89,37 @@ class Template:
line_with_case = normalize(line, downcase=False)
line = normalize(line)
pattern = self.get()
+
+ errors = []
+
+ # rhymes
+ if pattern.myid not in self.env.keys():
+ # initialize the rhyme
+ self.env[pattern.myid] = rhyme.Rhyme(line, pattern.constraint,
+ self.mergers, self.normande_ok)
+ else:
+ # update the rhyme
+ old_p = self.env[pattern.myid].phon
+ old_e = self.env[pattern.myid].eye
+ self.env[pattern.myid].feed(line, pattern.constraint)
+ # no more possible rhymes, something went wrong
+ if not self.env[pattern.myid].satisfied():
+ self.env[pattern.myid].phon = old_p
+ self.env[pattern.myid].eye = old_e
+ errors.append(error.ErrorBadRhymeSound(self.env[pattern.myid], None))
+
# compute alignments, check hemistiches, sort by score
- possible = parse(line, pattern.length + 2)
+ possible = parse(line, self.env[pattern.myid].phon, pattern.length + 2,
+ self.forbidden_ok)
+ if not possible:
+ errors.append(error.ErrorForbiddenPattern())
+ possible = []
+ return errors, pattern
possible = list(map((lambda p: (p[0], p[1],
check_hemistiches(p[0], pattern.hemistiches))), possible))
possible = map((lambda x: (self.rate(pattern, x), x)), possible)
possible = sorted(possible, key=(lambda x: x[0]))
- errors = []
-
# check characters
illegal = set()
for x in line:
@@ -95,21 +137,6 @@ class Template:
possible = [(score, align) for (score, align) in possible
if score == possible[0][0]]
- # rhymes
- if pattern.myid not in self.env.keys():
- # initialize the rhyme
- self.env[pattern.myid] = rhyme.Rhyme(line, pattern.constraint)
- else:
- # update the rhyme
- old_p = self.env[pattern.myid].phon
- old_e = self.env[pattern.myid].eye
- self.env[pattern.myid].feed(line, pattern.constraint)
- # no more possible rhymes, something went wrong
- if not self.env[pattern.myid].satisfied():
- self.env[pattern.myid].phon = old_p
- self.env[pattern.myid].eye = old_e
- errors.append(error.ErrorBadRhymeSound(self.env[pattern.myid], None))
-
# occurrences
if pattern.myid not in self.occenv.keys():
self.occenv[pattern.myid] = {}
@@ -159,13 +186,17 @@ class Template:
femid = str(self.pattern_line_no) # unique
idsplit = myid.split(':')
if len(idsplit) >= 2:
- constraint = [int(x) for x in idsplit[-1].split('|')]
+ constraint = idsplit[-1].split('|')
+ if len(constraint) > 0:
+ constraint[0] = False if constraint[0] == "no" else constraint[0]
+ if len(constraint) > 1:
+ constraint[1] = int(constraint[1])
else:
constraint = []
if len(constraint) == 0:
constraint.append(1)
- while len(constraint) < 3:
- constraint.append(-1)
+ if len(constraint) < 2:
+ constraint.append(True)
return Pattern(metric, myid, femid, rhyme.Constraint(*constraint))
def reset_conditional(self, d):
@@ -212,3 +243,10 @@ class Template:
self.line_no -= 1
return errors
+def str2bool(x):
+ if x == "yes":
+ return True
+ if x == "no":
+ return False
+ raise ValueError
+
diff --git a/test/boileau.tpl b/test/boileau.tpl
@@ -1,4 +1,5 @@
-6/6 A:1|2 !X
-6/6 A:1|2 !X
-6/6 B:1|2 !x
-6/6 B:1|2 !x
+! merge:oO
+6/6 A !X
+6/6 A !X
+6/6 B !x
+6/6 B !x
diff --git a/views/about.html b/views/about.html
@@ -25,6 +25,7 @@ aucun des modèles ne vous convient, vous pouvez <a href="#template">écrire le
vôtre</a>.</p>
<h2>Qu'est-ce qui est vérifié par plint ?</h2>
+<p>TODO outdated.</p>
<p>Ces explications simplifiées ne sont pas exhaustives. Pour une description
exacte, se reporter au code source.</p>
<dl>
@@ -77,6 +78,7 @@ alexandrin classique parfaitement valide.</p>
<h2 id="template">Comment faire pour définir ses propres modèles ?</h2>
+<p>TODO outdated.</p>
<p>
Chaque ligne du format correspond à un vers (ie. une ligne non-vide). Une ligne
peut indiquer trois éléments séparés par une espace : la métrique, l'identifiant
@@ -181,6 +183,7 @@ predefined templates suit you, you can <a href="#template">write your
own</a>.</p>
<h2>What does plint check?</h2>
+<p>TODO outdated.</p>
<p>Here are some simplified explanations. To know all the details, go read the
source code.</p>
<dl>
@@ -230,6 +233,7 @@ classical alexandrine.</p>
<h2 id="template">How can I define my own templates?</h2>
+<p>TODO outdated.</p>
<p>Each template line will be checked against a non-blank poem line. When the
template is finished, it starts over from the beginning, and the rhyme and rhyme
genre identifiers (see below) are reinitialized unless they start with a '!'.