Merge branch 'classical' - plint - French poetry validator (local mirror of https://gitlab.com/a3nm/plint)

commit 01d617d42d2d62251e7a92c79edf4083d8b43e96
parent 691e8e776f1992457b2f922e0a8e580406728c79
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Fri, 18 May 2012 21:01:35 +0200

Merge branch 'classical'

Conflicts:
	TODO
	metric.py
	template.py

Diffstat:
TODO  | 3 +++
error.py  | 11 +++++++++++
metric.py  | 70 +++++++++++++++++++++++++++++++++++++++++++++++++---------------------
rhyme.py  | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
static/tpl/alexandrin.tpl  | 3 ++-
static/tpl/classical.tpl  | 8 ++++----
template.py  | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
test/boileau.tpl  | 9 +++++----
views/about.html  | 4 ++++

9 files changed, 212 insertions(+), 75 deletions(-)
diff --git a/TODO b/TODO
@@ -1,3 +1,6 @@
+- options for tolerant diaresis, no diarersis, classical diaresis
+- better check of hemistiches with known words
+
 larger label for radios
 
 no diérèse on 'uei'?
diff --git a/error.py b/error.py
@@ -42,6 +42,14 @@ class ErrorBadCharacters(Error):
     return Error.report(self, "Illegal character: %s"
         % ', '.join(["'" + a + "'" for a in self.characters]))
 
+class ErrorForbiddenPattern(Error):
+  def __init__(self):
+    # TODO give more info
+    pass
+
+  def report(self):
+    return Error.report(self, "Illegal ambiguous pattern")
+
 class ErrorBadRhyme(Error):
   def __init__(self, expected, inferred):
     Error.__init__(self)
@@ -49,6 +57,9 @@ class ErrorBadRhyme(Error):
     self.inferred = inferred
 
   def report(self, short=False):
+    # TODO indicate eye rhyme since this is also important
+    # TODO don't indicate more than the minimal required rhyme (in length and
+    # present of a vowel phoneme)
     return Error.report(self, "Bad rhyme %s for type %s (expected %s, inferred %s)"
         % (self.kind, self.get_id(), self.fmt(self.expected),
           self.fmt(self.inferred)), short)
diff --git a/metric.py b/metric.py
@@ -2,10 +2,11 @@
 #coding: utf-8
 
 import re
-from common import normalize, is_vowels, consonants, sure_end_fem
+from common import normalize, is_vowels, consonants, sure_end_fem, is_consonants
 from vowels import possible_weights
 import haspirater
 
+
 def annotate_aspirated(word):
   """Annotate aspirated 'h'"""
   if word[0] != 'h':
@@ -58,7 +59,7 @@ def fit(chunks, pos, left):
         left - weight)]
     return result
 
-def feminine(align, verse):
+def feminine(align, verse, phon):
   for a in sure_end_fem:
     if verse.endswith(a):
       return ['F']
@@ -69,13 +70,24 @@ def feminine(align, verse):
     return ['F'] # mute -ent
   if align[-2][1] > 0 and align[-2][0] == 'e':
     return ['M'] # non-mute "-ent" by the choice of metric
-  # what now? "tient" vs. "lient" for instance, 
-  # TODO check pronunciation? :-/
-  return ['M', 'F']
+  possible = []
+  # now, we must check pronunciation?
+  # "tient" vs. "lient" for instance, "excellent"...
+  for possible_phon in phon:
+    if possible_phon.endswith(')') or possible_phon.endswith('#'):
+      possible.append('M')
+    else:
+      possible.append('F')
+      if possible_phon.endswith('E') and verse.endswith('aient'):
+        # imparfait and conditionnel are masculine...
+        possible.append('M')
+  return possible
+
 
-def parse(text, bound):
-  """Return possible aligns for text, bound is an upper bound on the
-  align length to limit running time"""
+def parse(text, phon, bound, forbidden_ok):
+  """Return possible aligns for text, bound is an upper bound on the align
+  length to limit running time, phon is the pronunciation to help for gender,
+  forbidden_ok is true if we allow classically forbidden patterns"""
 
   original_text = normalize(text)
 
@@ -112,18 +124,14 @@ def parse(text, bound):
     if (words[i] == "onze"):
       words[i] = "*" + words[i]
 
-    all_consonants = True
-    for x in words[i]:
-      if not x in consonants:
-        all_consonants = False
-    if all_consonants:
-      new_word = ''
+    if is_consonants(words[i]):
+      new_word = []
       for x in words[i]:
-        if (words[i] == 'w'):
-          new_word += "doublevé-"
+        if (x == 'w'):
+          new_word.append("doublevé")
         else:
-          new_word += words[i]+'a-'
-      words[i] = new_word
+          new_word.append(x + "a")
+      words[i] = ''.join(new_word)
 
 
   # aspirated
@@ -131,6 +139,8 @@ def parse(text, bound):
 
   pattern = re.compile('(['+consonants+'*-]*)', re.UNICODE)
 
+  forbidden = False
+
   # cut each word in chunks of vowels and consonants, with some specific
   # kludges
   for i in range(len(words)):
@@ -154,10 +164,28 @@ def parse(text, bound):
     words[i] = nwords
     # remove mute 'e'
     if i > 0:
-      if sum([1 for chunk in words[i-1] if is_vowels(chunk)]) > 1:
-        if words[i-1][-1] == 'e' and is_vowels(words[i][0], True):
+      if is_vowels(words[i][0], True):
+        if words[i-1][-1] == 'e' and sum(
+            [1 for chunk in words[i-1] if is_vowels(chunk)]) > 1:
           words[i-1].pop(-1)
           words[i-1][-1] = words[i-1][-1]+"`"
+      else:
+        if words[i-1][-1] == 'ée' or words[i-1][-1] == 'ie':
+          forbidden = True
+      if words[i-1][-1] == 's' and len(words[i-1]):
+        if words[i-1][-2] == 'ée' or words[i-1][-2] == 'ie':
+          forbidden = True
+        # TODO there are arcane rules for "aient"
+      # case of "soient"
+      # TODO there are a lot of "oient" in boileau and malherme
+      # so apparently there is no simple way to check that
+      # if words[i-1][-1] == 'nt' and len(words[i-1]):
+      #   if words[i-1][-2] == 'oie':
+      #     if len(words[i-1]) != 3 or words[i-1][-3] != 's':
+      #       forbidden = True
+
+  if forbidden and not forbidden_ok:
+    return None
 
   # group back words
   for word in words:
@@ -167,6 +195,6 @@ def parse(text, bound):
   # return all possibilities to weigh the vowel clusters, annotated by
   # the femininity of the align (depending both on the align and
   # original text)
-  return list(map((lambda x: (x, feminine(x, original_text))),
+  return list(map((lambda x: (x, feminine(x, original_text, phon))),
     fit(chunks, 0, bound)))
 
diff --git a/rhyme.py b/rhyme.py
@@ -1,22 +1,37 @@
 #!/usr/bin/python3 -u
 #encoding: utf8
 
+import copy
 import re
 import sys
 from pprint import pprint
 import frhyme
 import functools
+from common import consonants
 
 # number of possible rhymes to consider
 NBEST = 5
 # phonetic vowels
 vowel = list("Eeaio592O#@y%u()$")
 
+liaison = {
+    'c': 'k',
+    'd': 't',
+    'g': 'k',
+    'k': 'k',
+    'p': 'p',
+    'r': 'R',
+    's': 'z',
+    't': 't',
+    'x': 'z',
+    'z': 'z',
+    }
+
+
 class Constraint:
-  def __init__(self, phon, eye, aphon):
+  def __init__(self, classical, phon):
     self.phon = phon # minimal number of common suffix phones
-    self.eye = eye # minimal number of common suffix letters
-    self.aphon = aphon # minimal number of common suffix vowel phones
+    self.classical = classical # should we impose classical rhyme rules
 
   def mmax(self, a, b):
     """max, with -1 representing infty"""
@@ -30,14 +45,27 @@ class Constraint:
     if not c:
       return
     self.phon = self.mmax(self.phon, c.phon)
-    self.eye = self.mmax(self.eye, c.eye)
-    self.aphon = self.mmax(self.aphon, c.aphon)
+    self.eye = self.classical or c.classical
 
 class Rhyme:
-  def __init__(self, line, constraint):
+  def apply_mergers(self, phon):
+    return ''.join([(self.mergers[x] if x in self.mergers.keys()
+        else x) for x in phon])
+
+  def supposed_liaison(self, x):
+    if x[-1] in liaison.keys():
+      return x + liaison[x[-1]]
+    return x
+
+  def __init__(self, line, constraint, mergers=[], normande_ok=True):
     self.constraint = constraint
-    self.phon = lookup(line)
-    self.eye = line
+    self.mergers = {}
+    self.normande_ok = normande_ok
+    for phon_set in mergers:
+      for phon in phon_set[1:]:
+        self.mergers[phon] = phon_set[0]
+    self.phon = set([self.apply_mergers(x) for x in self.lookup(line)])
+    self.eye = self.supposed_liaison(consonant_suffix(line))
 
   def match(self, phon, eye):
     """limit our phon and eye to those which match phon and eye and which
@@ -49,31 +77,48 @@ class Rhyme:
         if val >= self.constraint.phon and self.constraint.phon >= 0:
           new_phon.add(x[-val:])
         val = assonance_rhyme(x, y)
-        if val >= self.constraint.aphon and self.constraint.aphon >= 0:
-          new_phon.add(x[-val:])
     self.phon = new_phon
     if self.eye:
       val = eye_rhyme(self.eye, eye)
-      if val >= self.constraint.eye and self.constraint.eye >= 0:
-        self.eye = self.eye[-val:]
+      if val == 0:
+        self.eye = ""
       else:
-        self.eye = None
+        self.eye = self.eye[-val:]
 
   def restrict(self, r):
     """take the intersection between us and rhyme object r"""
     self.constraint.restrict(r.constraint)
-    self.match(r.phon, r.eye)
+    self.match(set([self.apply_mergers(x) for x in r.phon]),
+        self.supposed_liaison(consonant_suffix(r.eye)))
 
   def feed(self, line, constraint=None):
     """extend us with a line and a constraint"""
-    return self.restrict(Rhyme(line, constraint))
+    return self.restrict(Rhyme(line, constraint, self.mergers))
 
   def satisfied(self):
-    return self.eye or len(self.phon) > 0
+    return (len(self.eye) >= self.constraint.eye
+        and len(self.phon) > 0 or not self.constraint.classical)
 
   def pprint(self):
     pprint(self.phon)
 
+  def lookup(self, s):
+    """lookup the pronunciation of s, adding rime normande kludges and liaisons"""
+    result = raw_lookup(s)
+    if self.normande_ok and (s.endswith('er') or s.endswith('ers')):
+      result.add("ER")
+    # TODO better here
+    result2 = copy.deepcopy(result)
+    # the case 'ent' would lead to trouble for gender
+    if self.constraint.classical:
+      if s[-1] in liaison.keys() and not s.endswith('ent'):
+        for r in result2:
+          result.add(r + liaison[s[-1]])
+          if (s[-1] == 's'):
+            result.add(r + 's')
+    return result
+
+
 def suffix(x, y):
   """length of the longest common suffix of x and y"""
   bound = min(len(x), len(y))
@@ -110,11 +155,17 @@ def concat_couples(a, b):
       s.add(x + y)
   return s
 
-def lookup(s):
-  """lookup the pronunciation of s, adding rime normande kludges"""
-  result = raw_lookup(s)
-  if s.endswith('er'):
-    result.add("ER")
+def consonant_suffix(s):
+  for i in range(len(s)):
+    if not s[-(i+1)] in consonants:
+      break
+  result = s[-(i+1):]
+  if result.endswith('m'):
+    result = result[:-1] + 'n'
+  if result.endswith('à'):
+    result = result[:-1] + 'a'
+  if result.endswith('û'):
+    result = result[:-1] + 'u'
   return result
 
 def raw_lookup(s):
@@ -137,8 +188,8 @@ if __name__ == '__main__':
     line = line.lower().strip().split(' ')
     if len(line) < 1:
       continue
-    constraint = Constraint(1, -1, -1)
-    rhyme = Rhyme(line[0], constraint)
+    constraint = Constraint(True, 1)
+    rhyme = Rhyme(line[0], constraint, self.mergers, self.normande_ok)
     for x in line[1:]:
       rhyme.feed(x)
       rhyme.pprint()
diff --git a/static/tpl/alexandrin.tpl b/static/tpl/alexandrin.tpl
@@ -1 +1,2 @@
-12
+! forbidden_ok:yes
+12 A
diff --git a/static/tpl/classical.tpl b/static/tpl/classical.tpl
@@ -1,4 +1,4 @@
-6/6 A:1 !X
-6/6 A:1 !X
-6/6 B:1 !x
-6/6 B:1 !x
+6/6 A !X
+6/6 A !X
+6/6 B !x
+6/6 B !x
diff --git a/template.py b/template.py
@@ -30,21 +30,41 @@ class Template:
   def __init__(self, string):
     self.template = []
     self.pattern_line_no = 0
+    self.forbidden_ok = False
+    self.normande_ok = True
+    self.mergers = []
     self.load(string)
     self.line_no = 0
     self.position = 0
+    self.prev = None
     self.env = {}
     self.femenv = {}
     self.occenv = {}
     self.reject_errors = False
 
+  def read_option(self, x):
+    key, value = x.split(':')
+    if key == "merge":
+      self.mergers.append(value)
+    elif key == "forbidden_ok":
+      self.forbidden_ok = str2bool(value)
+    elif key == "normande_ok":
+      self.normande_ok = str2bool(value)
+    else:
+      raise ValueError
+
   def load(self, s):
     """Load from a string"""
     for line in s.split('\n'):
       line = line.strip()
       self.pattern_line_no += 1
       if line != '' and line[0] != '#':
-        self.template.append(self.parse_line(line.strip()))
+        if line[0] == '!':
+          # don't count the '!' in the options, that's why we use [1:]
+          for option in line.split()[1:]:
+            self.read_option(option)
+        else:
+          self.template.append(self.parse_line(line.strip()))
 
   def count(self, align):
     """total weight of an align"""
@@ -69,15 +89,37 @@ class Template:
     line_with_case = normalize(line, downcase=False)
     line = normalize(line)
     pattern = self.get()
+
+    errors = []
+
+    # rhymes
+    if pattern.myid not in self.env.keys():
+      # initialize the rhyme
+      self.env[pattern.myid] = rhyme.Rhyme(line, pattern.constraint,
+          self.mergers, self.normande_ok)
+    else:
+      # update the rhyme
+      old_p = self.env[pattern.myid].phon
+      old_e = self.env[pattern.myid].eye
+      self.env[pattern.myid].feed(line, pattern.constraint)
+      # no more possible rhymes, something went wrong
+      if not self.env[pattern.myid].satisfied():
+        self.env[pattern.myid].phon = old_p
+        self.env[pattern.myid].eye = old_e
+        errors.append(error.ErrorBadRhymeSound(self.env[pattern.myid], None))
+
     # compute alignments, check hemistiches, sort by score
-    possible = parse(line, pattern.length + 2)
+    possible = parse(line, self.env[pattern.myid].phon, pattern.length + 2,
+        self.forbidden_ok)
+    if not possible:
+      errors.append(error.ErrorForbiddenPattern())
+      possible = []
+      return errors, pattern
     possible = list(map((lambda p: (p[0], p[1],
       check_hemistiches(p[0], pattern.hemistiches))), possible))
     possible = map((lambda x: (self.rate(pattern, x), x)), possible)
     possible = sorted(possible, key=(lambda x: x[0]))
 
-    errors = []
-
     # check characters
     illegal = set()
     for x in line:
@@ -95,21 +137,6 @@ class Template:
     possible = [(score, align) for (score, align) in possible
         if score == possible[0][0]]
 
-    # rhymes
-    if pattern.myid not in self.env.keys():
-      # initialize the rhyme
-      self.env[pattern.myid] = rhyme.Rhyme(line, pattern.constraint)
-    else:
-      # update the rhyme
-      old_p = self.env[pattern.myid].phon
-      old_e = self.env[pattern.myid].eye
-      self.env[pattern.myid].feed(line, pattern.constraint)
-      # no more possible rhymes, something went wrong
-      if not self.env[pattern.myid].satisfied():
-        self.env[pattern.myid].phon = old_p
-        self.env[pattern.myid].eye = old_e
-        errors.append(error.ErrorBadRhymeSound(self.env[pattern.myid], None))
-
     # occurrences
     if pattern.myid not in self.occenv.keys():
       self.occenv[pattern.myid] = {}
@@ -159,13 +186,17 @@ class Template:
       femid = str(self.pattern_line_no) # unique
     idsplit = myid.split(':')
     if len(idsplit) >= 2:
-      constraint = [int(x) for x in idsplit[-1].split('|')]
+      constraint = idsplit[-1].split('|')
+      if len(constraint) > 0:
+        constraint[0] = False if constraint[0] == "no" else constraint[0]
+      if len(constraint) > 1:
+        constraint[1] = int(constraint[1])
     else:
       constraint = []
     if len(constraint) == 0:
       constraint.append(1)
-    while len(constraint) < 3:
-      constraint.append(-1)
+    if len(constraint) < 2:
+      constraint.append(True)
     return Pattern(metric, myid, femid, rhyme.Constraint(*constraint))
 
   def reset_conditional(self, d):
@@ -212,3 +243,10 @@ class Template:
       self.line_no -= 1
     return errors
 
+def str2bool(x):
+  if x == "yes":
+    return True
+  if x == "no":
+    return False
+  raise ValueError
+
diff --git a/test/boileau.tpl b/test/boileau.tpl
@@ -1,4 +1,5 @@
-6/6 A:1|2 !X
-6/6 A:1|2 !X
-6/6 B:1|2 !x
-6/6 B:1|2 !x
+! merge:oO
+6/6 A !X
+6/6 A !X
+6/6 B !x
+6/6 B !x
diff --git a/views/about.html b/views/about.html
@@ -25,6 +25,7 @@ aucun des modèles ne vous convient, vous pouvez <a href="#template">écrire le
   vôtre</a>.</p>
 
 <h2>Qu'est-ce qui est vérifié par plint&nbsp;?</h2>
+<p>TODO outdated.</p>
 <p>Ces explications simplifiées ne sont pas exhaustives. Pour une description
 exacte, se reporter au code source.</p>
 <dl>
@@ -77,6 +78,7 @@ alexandrin classique parfaitement valide.</p>
 
 <h2 id="template">Comment faire pour définir ses propres modèles&nbsp;?</h2>
 
+<p>TODO outdated.</p>
 <p>
 Chaque ligne du format correspond à un vers (ie. une ligne non-vide). Une ligne
 peut indiquer trois éléments séparés par une espace : la métrique, l'identifiant
@@ -181,6 +183,7 @@ predefined templates suit you, you can <a href="#template">write your
   own</a>.</p>
 
 <h2>What does plint check?</h2>
+<p>TODO outdated.</p>
 <p>Here are some simplified explanations. To know all the details, go read the
 source code.</p>
 <dl>
@@ -230,6 +233,7 @@ classical alexandrine.</p>
 
 <h2 id="template">How can I define my own templates?</h2>
 
+<p>TODO outdated.</p>
 <p>Each template line will be checked against a non-blank poem line. When the
 template is finished, it starts over from the beginning, and the rhyme and rhyme
 genre identifiers (see below) are reinitialized unless they start with a '!'.

	plint French poetry validator (local mirror of https://gitlab.com/a3nm/plint)
	git clone https://a3nm.net/git/plint/
	Log \| Files \| Refs \| README

TODO	\|	3	+++
error.py	\|	11	+++++++++++
metric.py	\|	70	+++++++++++++++++++++++++++++++++++++++++++++++++---------------------
rhyme.py	\|	97	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
static/tpl/alexandrin.tpl	\|	3	++-
static/tpl/classical.tpl	\|	8	++++----
template.py	\|	82	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
test/boileau.tpl	\|	9	+++++----
views/about.html	\|	4	++++