Merge gitlab.com:a3nm/plint - plint - French poetry validator (local mirror of https://gitlab.com/a3nm/plint)

commit bdf7000c91d00282d7bc5595459458abbbbbe4df
parent b308017c164fe7bd94a6be8a2c3993a35db1cee0
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Thu, 15 Aug 2019 15:26:47 +0200

Merge gitlab.com:a3nm/plint

Merge my own commits with Julien's

Diffstat:
.gitignore  | 3 +++
compare_test_output.py  | 12 ++++++++++++
lexique_comparison/count_syllables_plint.py  | 4 +++-
plint.py  | 110 ++++++++++++++++++++++++++++++++++++++++----------------------------------------
plint/chunk.py  | 595 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
plint/chunks.py  | 554 +++++++++++--------------------------------------------------------------------
plint/common.py  | 7 +++++++
plint/error.py  | 6 +++---
plint/pattern.py  | 32 ++++++++++++++++++++++++++++++++
plint/plint_irc.py  | 2 +-
plint/plint_web.py  | 329 ++++++++++++++++++++++++++++++++++++++++++-------------------------------------
plint/template.py  | 547 +++++++++++++++++++++++++++++++++++++------------------------------------------
plint/tests/test_bad_chars.py  | 5 +++--
plint/tests/test_counts.py  | 3 ++-
plint/tests/test_eliminate.py  | 5 +++--
plint/tests/test_gender.py  | 13 +++++++------
plint/tests/test_hiatus.py  | 13 +++++++------
plint/tests/test_sanity_check.py  | 11 ++++++-----
plint/tests/test_sanity_check2.py  | 3 ++-
plint/verse.py  | 32 ++++++++++++++------------------
plint/vowels.py  | 128 -------------------------------------------------------------------------------
test.sh  | 24 ++++++++++++++++++++++--

22 files changed, 1280 insertions(+), 1158 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,6 @@
 __pycache__/*
+.idea
+Lexique382.tsv
 frhyme
 frhyme/*
 haspirater
@@ -35,3 +37,4 @@ final_syneresis2.ctx
 coverage
 .coverage
 ouliplint/stanford-postagger-full-2013-11-12/
+test_temp.txt
diff --git a/compare_test_output.py b/compare_test_output.py
@@ -0,0 +1,12 @@
+import sys
+
+file0 = sys.argv[1]
+file1 = sys.argv[2]
+
+with open(file0) as f:
+    content0 = f.read()
+
+with open(file1) as f:
+    content1 = f.read()
+
+print(int(sorted(content0) == sorted(content1)))
diff --git a/lexique_comparison/count_syllables_plint.py b/lexique_comparison/count_syllables_plint.py
@@ -4,12 +4,14 @@ import os
 import sys
 
 # modules are in the parent folder
+import plint.pattern
+
 sys.path.insert(1, os.path.join(sys.path[0], '..'))
 
 from plint import template, verse, rhyme
 
 templateobj = template.Template()
-patternobj = template.Pattern("12")
+patternobj = plint.pattern.Pattern("12")
 
 for l in sys.stdin.readlines():
     w = (l.strip().split("\t"))[0]
diff --git a/plint.py b/plint.py
@@ -5,64 +5,64 @@ import sys
 
 
 def run():
-  ok = True
-  f2 = None
-  nsyl = None
-  offset = 0
-  if len(sys.argv) >= 4:
-    f2 = open(sys.argv[3], 'w')
-  if len(sys.argv) >= 5:
-    nsyl = int(sys.argv[4])
-  if len(sys.argv) == 6:
-    offset = int(sys.argv[5])
-  should_end = False
-  while True:
-    line = sys.stdin.readline()
-    if not line:
-      should_end = True
-      line = ""
-    errors = template.check(line, f2, last=should_end, nsyl=nsyl, offset=offset)
-    if errors:
-      print(errors.report(), file=sys.stderr)
-      ok = False
-    if should_end:
-      break
-  return ok
+    ok = True
+    f2 = None
+    nsyl = None
+    offset = 0
+    if len(sys.argv) >= 4:
+        f2 = open(sys.argv[3], 'w')
+    if len(sys.argv) >= 5:
+        nsyl = int(sys.argv[4])
+    if len(sys.argv) == 6:
+        offset = int(sys.argv[5])
+    should_end = False
+    while True:
+        line = sys.stdin.readline()
+        if not line:
+            should_end = True
+            line = ""
+        errors = template.check(line, f2, last=should_end, n_syllables=nsyl, offset=offset)
+        if errors:
+            print(errors.report(), file=sys.stderr)
+            ok = False
+        if should_end:
+            break
+    return ok
 
-if __name__ == '__main__':
-  localization.init_locale()
-  if len(sys.argv) < 2 or len(sys.argv) > 6:
-    print(_("Usage: %s TEMPLATE [DFILE [OCONTEXT [NSYL [OFFSET]]]]") % sys.argv[0],
-        file=sys.stderr)
-    print(_("Check stdin according to TEMPLATE, report errors on stdout"),
-        file=sys.stderr)
-    print(_("For internal use:"),
-        file=sys.stderr)
-    print(_("DFILE is the diaeresis file, OCONTEXT is the context output file"),
-        file=sys.stderr)
-    print(_("NSYL is the assigned weight to the last chunk (diaeresis training)"),
-        file=sys.stderr)
-    print(_("OFFSET is to add after the last chunk (diaeresis training)"),
-        file=sys.stderr)
-    sys.exit(2)
 
-  template_name = sys.argv[1]
-  if len(sys.argv) > 2:
-    diaeresis_name = sys.argv[2]
-  else:
-    diaeresis_name = "../data/diaeresis.json"
-  diaeresis.set_diaeresis(diaeresis_name)
+if __name__ == '__main__':
+    localization.init_locale()
+    if len(sys.argv) < 2 or len(sys.argv) > 6:
+        print(_("Usage: %s TEMPLATE [DFILE [OCONTEXT [NSYL [OFFSET]]]]") % sys.argv[0],
+              file=sys.stderr)
+        print(_("Check stdin according to TEMPLATE, report errors on stdout"),
+              file=sys.stderr)
+        print(_("For internal use:"),
+              file=sys.stderr)
+        print(_("DFILE is the diaeresis file, OCONTEXT is the context output file"),
+              file=sys.stderr)
+        print(_("NSYL is the assigned weight to the last chunk (diaeresis training)"),
+              file=sys.stderr)
+        print(_("OFFSET is to add after the last chunk (diaeresis training)"),
+              file=sys.stderr)
+        sys.exit(2)
 
-  f = open(template_name)
-  x = f.read()
-  f.close()
+    template_name = sys.argv[1]
+    if len(sys.argv) > 2:
+        diaeresis_name = sys.argv[2]
+    else:
+        diaeresis_name = "../data/diaeresis.json"
+    diaeresis.set_diaeresis(diaeresis_name)
 
-  try:
-    template = template.Template(x)
-  except error.TemplateLoadError as e:
-    print("Could not load template %s: %s" % (template_name, e.msg), file=sys.stderr)
-    sys.exit(2)
+    f = open(template_name)
+    x = f.read()
+    f.close()
 
-  ok = run()
-  sys.exit(0 if ok else 1)
+    try:
+        template = template.Template(x)
+    except error.TemplateLoadError as e:
+        print("Could not load template %s: %s" % (template_name, e.msg), file=sys.stderr)
+        sys.exit(2)
 
+    ok = run()
+    sys.exit(0 if ok else 1)
diff --git a/plint/chunk.py b/plint/chunk.py
@@ -0,0 +1,595 @@
+import re
+
+from haspirater import haspirater
+from plint import common, diaeresis, error
+from plint.common import normalize, strip_accents_one, is_consonants, APOSTROPHES, is_vowels, get_consonants_regex, \
+    strip_accents, SURE_END_FEM
+from plint.vowels import contains_trema, intersperse
+
+
+DEFAULT_THRESHOLD = 3
+
+
+class Chunk:
+
+    def __init__(self, word, verse):
+        self.original = word
+        self.text = normalize(word, rm_apostrophe=True)
+        self.hemistiche = None
+        self.error = None
+        self.illegal_str = None
+        self.weights = None
+        self.had_hyphen = None
+        self.text_pron = None
+        self.elision = None
+        self.no_hiatus = None
+        self.elidable = None
+        self.word_end = False
+        # TODO What is a weight without s?
+        self.weight = None
+        self.verse = verse
+
+    def __repr__(self):
+        return "Chunk(" \
+               + "original:" + self.original \
+               + ", text:" + self.text \
+               + ", weights:" + str(self.weights or []) \
+               + ", weight:" + str(self.weight or "") \
+               + ", elidable:" + str(self.elidable or False) \
+               + ", elision:" + str(self.elision or False) \
+               + ", hemistiche:" + str(self.hemistiche) \
+               + ", error:" + str(self.error) \
+               + ", illegal_str:" + str(self.illegal_str) \
+               + ", had_hypher:" + str(self.had_hyphen) \
+               + ", text_pron:" + str(self.text_pron) \
+               + ", no_hiatus:" + str(self.no_hiatus) \
+               + ", word_end:" + str(self.word_end) \
+               + ")" + "\n"
+
+    def copy(self):
+        new_chunk = Chunk(self.original, self.verse)
+        new_chunk.original = self.original
+        new_chunk.text = self.text
+        new_chunk.hemistiche = self.hemistiche
+        new_chunk.error = self.error
+        new_chunk.illegal_str = self.illegal_str
+        new_chunk.weights = self.weights
+        new_chunk.had_hyphen = self.had_hyphen
+        new_chunk.text_pron = self.text_pron
+        new_chunk.elision = self.elision
+        new_chunk.no_hiatus = self.no_hiatus
+        new_chunk.elidable = self.elidable
+        new_chunk.word_end = self.word_end
+        new_chunk.weight = self.weight
+        return new_chunk
+
+    def set_hemistiche(self, hemis):
+        self.hemistiche = hemis
+
+    def check_forbidden_characters(self):
+        es = ""
+        for x in self.text:
+            if not common.remove_punctuation(strip_accents_one(x)[0].lower()) in common.LEGAL:
+                es += 'I'
+                self.error = "illegal"
+            else:
+                es += ' '
+        if self.error is not None and self.error == "illegal":
+            self.illegal_str = es
+
+    def simplify_gu_qu(self, next_chunk):
+        if next_chunk.text.startswith('u'):
+            if self.text.endswith('q'):
+                next_chunk.text = next_chunk.text[1:]
+                if next_chunk.text == '':
+                    self.original += next_chunk.original
+                    next_chunk.original = ''
+            if self.text.endswith('g') and len(next_chunk.text) >= 2:
+                if next_chunk.text[1] in "eéèa":
+                    next_chunk.text = next_chunk.text[1:]
+
+    def elide_inside_words(self, all_next_chunks):
+        if self.text == "e-":
+            self.weights = [0]  # force elision
+        next_chunk = all_next_chunks[0]
+        if self.text == "e" and next_chunk.text.startswith("-h"):
+            # collect what follows until the next hyphen or end
+            flw = next_chunk.original.split('-')[1]
+            for future_chunk in all_next_chunks[1:]:
+                flw += future_chunk.original.split('-')[0]
+                if '-' in future_chunk.original:
+                    break
+            # TODO: not sure if this reconstruction of the original word is bulletproof...
+            if haspirater.lookup(normalize(flw)):
+                self.weights = [0]
+            else:
+                self.weights = [1]
+
+    def remove_leading_and_trailing_crap(self):
+        seen_space = False
+        seen_hyphen = False
+        while len(self.text) > 0 and self.text[0] in ' -':
+            if self.text[0] == ' ':
+                seen_space = True
+            else:
+                seen_hyphen = True
+            self.text = self.text[1:]
+        while len(self.text) > 0 and self.text[-1] in ' -':
+            if self.text[-1] == ' ':
+                seen_space = True
+            else:
+                seen_hyphen = True
+            self.text = self.text[:-1]
+        if seen_hyphen and not seen_space:
+            self.had_hyphen = True
+
+    def is_empty(self):
+        return len(self.text) == 0
+
+    def add_original(self, other_chunk):
+        self.original += other_chunk.original
+
+    def create_acronym(self):
+        new_chunks = []
+        for j, character in enumerate(self.text):
+            try:
+                new_chunk_content = LETTERS[character]
+                # hack: the final 'e's in letters are just to help pronunciation
+                # inference and are only needed at end of word, otherwise they will
+                # mess syllable count up
+                if j < len(self.text) - 1 and new_chunk_content[-1] == 'e':
+                    new_chunk_content = new_chunk_content[:-1]
+            except KeyError:
+                new_chunk_content = character + 'é'
+            new_chunks += [(j, x) for x in re.split(get_consonants_regex(), new_chunk_content)]
+        new_chunks = [x for x in new_chunks if len(x[1]) > 0]
+        new_word = []
+        last_opos = -1
+        for j, (original_position, character) in enumerate(new_chunks):
+            part = ""
+            if j == len(new_chunks) - 1:
+                # don't miss final spaces
+                part = self.original[last_opos + 1:]
+            elif last_opos < original_position:
+                part = self.original[last_opos + 1:original_position + 1]
+                last_opos = original_position
+            # allow or forbid elision because of possible ending '-e' before
+            # forbid hiatus both for this and for preceding
+            # instruct that we must use text for the pronunciation
+            new_chunk = Chunk(part, self.verse)
+            new_chunk.original = part
+            new_chunk.text = character
+            new_chunk.text_pron = True
+            new_chunk.elision = [False, True]
+            new_chunk.no_hiatus = True
+            new_word.append(new_chunk)
+            # propagate information from splithyph
+            new_word[-1].hemistiche = self.hemistiche
+        return new_word
+
+    def check_elidable(self):
+        if self.text == 'e':
+            self.elidable = [True]
+
+    def is_consonants(self):
+        return is_consonants(self.text)
+
+    def ends_with_apostrophe(self):
+        return re.search("[" + APOSTROPHES + "]$", self.original) is not None
+
+    def elide_vowel_problems(self, chunk_group):
+        if self.elision is None:
+            self.elision = elision_wrap(chunk_group)
+
+    def process_y_cases(self, previous_chunk, next_chunk):
+        new_word_from_chunk = []
+        if 'y' not in self.text or len(self.text) == 1 or self.text.startswith("y"):
+            new_word_from_chunk.append(self)
+        else:
+            if previous_chunk is not None and next_chunk is not None:
+                # special cases of "pays", "alcoyle", "abbayes"
+                c_text = self.text
+                p_text = previous_chunk.text
+                n_text = next_chunk.text
+                # TODO Should you force if this condition does not apply?
+                if ((c_text == "ay" and p_text.endswith("p") and n_text.startswith("s"))
+                        or
+                        (c_text == "oy" and p_text.endswith("lc")
+                         and n_text.startswith("l"))
+                        or
+                        (c_text == "aye" and p_text.endswith("bb")
+                         and n_text.startswith("s"))):
+                    # force weight
+                    self.weights = [2]
+                    new_word_from_chunk.append(self)
+                    return new_word_from_chunk
+            must_force = next_chunk is None and previous_chunk is not None and \
+                (self.text == "aye" and previous_chunk.text.endswith("bb"))
+            if must_force:
+                # force weight
+                self.weights = [2]
+                new_word_from_chunk.append(self)
+            else:
+                sub_chunks = re.split(re.compile("(y+)"), self.text)
+                sub_chunks = [x for x in sub_chunks if len(x) > 0]
+                for j, sub_chunk in enumerate(sub_chunks):
+                    lindex = int(j * len(self.original) / len(sub_chunks))
+                    rindex = int((j + 1) * len(self.original) / len(sub_chunks))
+                    part = self.original[lindex:rindex]
+                    new_subchunk_text = 'Y' if 'y' in sub_chunk else sub_chunk
+                    new_subchunk = self.copy()
+                    new_subchunk.original = part
+                    new_subchunk.text = new_subchunk_text
+                    new_word_from_chunk.append(new_subchunk)
+        return new_word_from_chunk
+
+    def is_vowels(self):
+        return is_vowels(self.text)
+
+    def is_dash_elidable(self):
+        # "fais-le" not elidable, but "suis-je" and "est-ce" is
+        return not ('-' in self.text and not self.text.endswith('-j') and not self.text.endswith('-c'))
+
+    def check_elidable_with_next(self, next_chunk):
+        if self.elidable is None:
+            self.elidable = next_chunk.elision
+
+    def is_potentially_ambiguous_hiatus(self):
+        return self.text in ["ie", "ée", "ue"]
+
+    def ends_with_potentially_ambiguous_hiatus(self):
+        return len(self.text) >= 2 and self.text[-2:] in ["ie", "ée", "ue"]
+
+    def check_potentially_ambiguous_plural(self, previous_chunk):
+        if self.text == "s":
+            if previous_chunk.is_potentially_ambiguous_hiatus():
+                previous_chunk.error = "ambiguous"
+                self.error = "ambiguous"
+
+    def check_potentially_ambiguous_with_elision(self, next_chunk):
+        if self.ends_with_potentially_ambiguous_hiatus():
+            if next_chunk.elision is not None or True not in next_chunk.elision:
+                self.error = "ambiguous"
+                next_chunk.error = "ambiguous"
+
+    def check_hiatus(self, previous_chunk, next_chunk, only_two_parts):
+        if previous_chunk is not None:
+            self.check_potentially_ambiguous_plural(previous_chunk)
+        if self.ends_with_potentially_ambiguous_hiatus():
+            if not any(next_chunk.elision or [False]):
+                self.error = "ambiguous"
+                next_chunk.error = "ambiguous"
+
+        # elision concerns words ending with a vowel without a mute 'e'
+        # that have not been marked "no_hiatus"
+        # it also concerns specifically "et"
+        elif (not self.text.endswith('e') and self.no_hiatus is None
+              and (self.is_vowels() or self.text == 'Y')
+              or (only_two_parts and previous_chunk.text == 'e' and self.text == 't')):
+            # it happens if the next word is not marked no_hiatus
+            # and starts with something that causes elision
+            if all(next_chunk.elision) and next_chunk.no_hiatus is None:
+                self.error = "hiatus"
+                next_chunk.error = "hiatus"
+
+    def make_word_end(self):
+        self.word_end = True
+
+    def contains_break(self):
+        return '-' in self.text \
+               or self.word_end or False \
+               or self.had_hyphen or False
+
+    def is_e(self):
+        return self.text == "e"
+
+    def possible_weights_approx(self):
+        """Return the possible number of syllabes taken by a vowel chunk (permissive approximation)"""
+        chunk_text = self.text
+        if len(chunk_text) == 1:
+            return [1]
+        # old spelling and weird exceptions
+        if chunk_text in ['ouï']:
+            return [1, 2]  # TODO unsure about that
+        if chunk_text in ['eüi', 'aoû', 'uë']:
+            return [1]
+        if chunk_text in ['aïe', 'oë', 'ouü']:
+            return [1, 2]
+        if contains_trema(chunk_text):
+            return [2]
+        chunk_text = strip_accents(chunk_text, True)
+        if chunk_text in ['ai', 'ou', 'eu', 'ei', 'eau', 'eoi', 'eui', 'au', 'oi',
+                          'oie', 'œi', 'œu', 'eaie', 'aie', 'oei', 'oeu', 'ea', 'ae', 'eo',
+                          'eoie', 'oe', 'eai', 'eue', 'aa', 'oo', 'ee', 'ii', 'aii',
+                          'yeu', 'ye', 'you']:
+            return [1]
+        if chunk_text == "oua":
+            return [1, 2]  # "pouah"
+        if chunk_text == "ao":
+            return [1, 2]  # "paon"
+        for x in ['oa', 'ea', 'eua', 'euo', 'ua', 'uo', 'yau']:
+            if x in chunk_text:
+                return [2]
+        # beware of "déesse"
+        if chunk_text == 'ée':
+            return [1, 2]
+        if chunk_text[0] == 'i':
+            return [1, 2]
+        if chunk_text[0] == 'u' and (strip_accents(chunk_text[1]) in ['i', 'e']):
+            return [1, 2]
+        if chunk_text[0] == 'o' and chunk_text[1] == 'u' and len(chunk_text) >= 3 and\
+                strip_accents(chunk_text[2]) in ['i', 'e']:
+            return [1, 2]
+        if 'é' in chunk_text or 'è' in chunk_text:
+            return [2]
+        # we can't tell
+        return [1, 2]
+
+    def clear(self):
+        if self.word_end is None or not self.word_end:
+            return self.text
+        return self.text + ' '
+
+    def set_possible_weights_from_context(self, chunks_before, chunks_after, template, threshold):
+        if self.weights is not None:
+            return
+        if len(chunks_after) > 0:
+            next_chunk = chunks_after[0]
+        else:
+            next_chunk = None
+
+        if len(chunks_before) > 0:
+            previous_chunk = chunks_before[-1]
+        else:
+            previous_chunk = None
+
+        if len(chunks_before) > 1:
+            previous_previous_chunk = chunks_before[-2]
+        else:
+            previous_previous_chunk = None
+
+        if ((len(chunks_after) <= 1 and self.is_e())
+                and not (next_chunk is not None and next_chunk.is_vowels())
+                and not (previous_chunk is None or previous_chunk.contains_break())
+                and not (previous_previous_chunk is None or previous_previous_chunk.contains_break())):
+            # special case for verse endings, which can get elided (or not)
+            # but we don't elide lone syllables ("prends-le", etc.)
+
+            if next_chunk is None:
+                self.weights = [0]  # ending 'e' is elided
+            elif next_chunk.text == 's':
+                self.weights = [0]  # ending 'es' is elided
+            elif next_chunk.text == 'nt':
+                # ending 'ent' is sometimes elided, try to use pronunciation
+                # actually, this will have an influence on the rhyme's gender
+                # see feminine
+                possible = []
+                if not self.verse.phon or len(self.verse.phon) == 0:
+                    self.weights = [0, 1]  # do something reasonable without pron
+                else:
+                    for possible_phon in self.verse.phon:
+                        if possible_phon.endswith(')') or possible_phon.endswith('#'):
+                            possible.append(1)
+                        else:
+                            possible.append(0)
+                    self.weights = possible
+            else:
+                self.weights = self.possible_weights(chunks_before, chunks_after, template, threshold)
+        elif (next_chunk is None and self.text == 'e' and
+                previous_chunk is not None and (previous_chunk.text.endswith('-c')
+                                                or previous_chunk.text.endswith('-j')
+                                                or (previous_chunk.text == 'c'
+                                                    and previous_chunk.had_hyphen is not None)
+                                                or (previous_chunk.text == 'j'
+                                                    and previous_chunk.had_hyphen is not None))):
+            self.weights = [0]  # -ce and -je are elided
+        elif next_chunk is None and self.text in ['ie', 'ée']:
+            self.weights = [1]
+        # elide "-ée" and "-ées", but be specific (beware of e.g. "réel")
+        elif (len(chunks_after) <= 1
+                and self.text == 'ée'
+                and (next_chunk is None or chunks_after[-1].text == 's')):
+            self.weights = [1]
+        elif self.elidable is not None:
+            self.weights = [int(not x) for x in self.elidable]
+        else:
+            self.weights = self.possible_weights(chunks_before, chunks_after, template, threshold)
+
+    def possible_weights(self, chunks_before, chunks_after, template, threshold):
+        if template.options['diaeresis'] == "classical":
+            return self.possible_weights_ctx(chunks_before, chunks_after, threshold=threshold)
+        elif template.options['diaeresis'] == "permissive":
+            return self.possible_weights_approx()
+
+    def possible_weights_ctx(self, chunks_before, chunks_after, threshold=None):
+        if not threshold:
+            threshold = DEFAULT_THRESHOLD
+        q = self.make_query(chunks_before, chunks_after)
+        v = diaeresis.diaeresis_finder.lookup(q)
+        if len(v.keys()) == 1 and v[list(v.keys())[0]] > threshold:
+            return [int(list(v.keys())[0])]
+        else:
+            return self.possible_weights_seed()
+
+    def make_query(self, chunks_before, chunks_after):
+        cleaned_before = [chunk.clear() for chunk in chunks_before]
+        cleaned_after = [chunk.clear() for chunk in chunks_after]
+        current_clear = self.clear()
+        if current_clear.endswith(' '):
+            current_clear = current_clear.rstrip()
+            if len(cleaned_after) > 0:
+                cleaned_after[0] = " " + cleaned_after[0]
+            else:
+                cleaned_after.append(' ')
+        ret2 = intersperse(
+            ''.join(cleaned_after),
+            ''.join([x[::-1] for x in cleaned_before[::-1]]))
+        ret = [current_clear] + ret2
+        return ret
+
+    def possible_weights_seed(self):
+        """Return the possible number of syllabes taken by a vowel chunk"""
+        if len(self.text) == 1:
+            return [1]
+        # dioïde, maoïste, taoïste
+        if (self.text[-1] == 'ï' and len(self.text) >= 3 and not
+                self.text[-3:-1] == 'ou'):
+            return [3]
+        # ostéoarthrite
+        if "éoa" in self.text:
+            return [3]
+        # antiaérien; but let's play it safe
+        if "iaé" in self.text:
+            return [2, 3]
+        # giaour, miaou, niaouli
+        if "iaou" in self.text:
+            return [2, 3]
+        # bioélectrique
+        if "ioé" in self.text:
+            return [2, 3]
+        # méiose, nucléion, etc.
+        if "éio" in self.text:
+            return [2, 3]
+        # radioactif, radioamateur, etc.
+        if "ioa" in self.text:
+            return [2, 3]
+        # pléiade
+        if "éio" in self.text:
+            return [2, 3]
+        # pompéien, tarpéien...
+        # in theory the "-ie" should give a diaeresis, so 3 syllabes
+        # let's keep the benefit of the doubt...
+        # => this also gives 3 as a possibility for "obéie"...
+        if "éie" in self.text:
+            return [2, 3]
+        # tolstoïen
+        # same remark
+        if "oïe" in self.text:
+            return [2, 3]
+        # shanghaïen (diaeresis?), but also "aië"
+        if "aïe" in self.text:
+            return [1, 2, 3]
+        if self.text in ['ai', 'ou', 'eu', 'ei', 'eau', 'au', 'oi']:
+            return [1]
+        # we can't tell
+        return [1, 2]
+
+    def set_hemistiche_from_context(self, previous_previous_chunk, previous_chunk, next_chunk):
+        if self.hemistiche is not None:
+            return
+        ending = self.text
+        if not (self.word_end or False) and next_chunk is not None:
+            if not (next_chunk.word_end or False):
+                self.hemistiche = "cut"
+                return
+            ending += next_chunk.text
+        if ending in SURE_END_FEM and previous_previous_chunk is not None and previous_chunk is not None:
+            # check that this isn't a one-syllabe wourd (which is allowed)
+            ok = False
+            try:
+                if '-' in previous_chunk.original or (previous_chunk.word_end or False):
+                    ok = True
+                if '-' in previous_previous_chunk.original or (previous_previous_chunk.word_end or False):
+                    ok = True
+            except IndexError:
+                pass
+            if not ok:
+                # hemistiche ends in feminine
+                if any(self.elidable or [False]):
+                    self.hemistiche = "elid"  # elidable final -e, but only OK if actually elided
+                    return
+                else:
+                    self.hemistiche = "fem"
+                    return
+        self.hemistiche = "ok"
+
+    def normalize(self):
+        if self.text_pron is None:
+            return normalize(self.original, strip=False, rm_apostrophe_end=False)
+        else:
+            return self.text
+
+    def get_original_text(self):
+        return self.original
+
+    def get_errors_set(self, forbidden_ok, hiatus_ok):
+        errors_chunk = set()
+        if self.error is not None:
+            if self.error == "ambiguous" and not forbidden_ok:
+                errors_chunk.add(error.ErrorForbiddenPattern)
+            if self.error == "hiatus" and not hiatus_ok:
+                errors_chunk.add(error.ErrorHiatus)
+            if self.error == "illegal":
+                errors_chunk.add(error.ErrorBadCharacters)
+        return errors_chunk
+
+
+LETTERS = {
+    'f': 'effe',
+    'h': 'ache',
+    'j': 'gi',
+    'k': 'ka',
+    'l': 'elle',
+    'm': 'aime',
+    'n': 'aine',
+    'q': 'cu',
+    'r': 'ère',
+    's': 'esse',
+    'w': 'doublevé',
+    'x': 'ixe',
+    'z': 'zaide'
+}
+
+
+def elision_wrap(chunk_group):
+    first_letter = common.remove_punctuation(chunk_group[0].original.strip())
+    temp = elision(''.join(chunk.text for chunk in chunk_group),
+                   ''.join(chunk.original for chunk in chunk_group),
+                   first_letter == first_letter.upper())
+    return temp
+
+
+def elision(word, original_word, was_cap):
+    if word.startswith('y'):
+        if word == 'y':
+            return [True]
+        if was_cap:
+            if word == 'york':
+                return [False]
+            # Grevisse, Le Bon usage, 14th ed., paragraphs 49-50
+            # depends on whether it's French or foreign...
+            return [True, False]
+        else:
+            exc = ["york", "yeux", "yeuse", "ypérite"]
+            for w in exc:
+                if word.startswith(w):
+                    return [True]
+            # otherwise, no elision
+            return [False]
+    if word in ["oui", "ouis"]:
+        # elision for those words, but beware, no elision for "ouighour"
+        # boileau : "Ont l'esprit mieux tourné que n'a l'homme ? Oui sans doute."
+        # so elision sometimes
+        return [True, False]
+    if word.startswith("ouistiti") or word.startswith("ouagadougou"):
+        return [False]
+    # "un", "une" are non-elided as nouns ("cette une")
+    if word in ["un", "une"]:
+        return [True, False]
+    # "onze" is not elided
+    if word == "onze":
+        return [False]
+    if word.startswith('ulul'):
+        return [False]  # ululement, ululer, etc.
+    if word.startswith('uhlan'):
+        return [False]  # uhlan
+    if word[0] == 'h':
+        if word == "huis":
+            # special case, "huis" is elided but "huis clos" isn't
+            return [True, False]
+        # look up in haspirater using the original (but normalized) word
+        return list(map((lambda s: not s),
+                        haspirater.lookup(normalize(original_word))))
+    if is_vowels(word[0]):
+        return [True]
+    return [False]
diff --git a/plint/chunks.py b/plint/chunks.py
@@ -2,298 +2,18 @@ import re
 import sys
 from pprint import pprint
 
-from haspirater import haspirater
-from plint import common, vowels
-from plint.common import is_vowels, APOSTROPHES, is_consonants, normalize, strip_accents_one, CONSONANTS, SURE_END_FEM
+from plint.chunk import Chunk
+from plint.common import normalize, get_consonants_regex
 from plint.hyphen_splitter import HyphenSplitter
 
 
-class Chunk:
-
-    def __init__(self, word):
-        self.original = word
-        self.text = normalize(word, rm_apostrophe=True)
-        self.hemistiche = None
-        self.error = None
-        self.illegal_str = None
-        self.weights = None
-        self.had_hyphen = None
-        self.text_pron = None
-        self.elision = None
-        self.no_hiatus = None
-        self.elidable = None
-        self.word_end = False
-        # TODO What is a weight without s?
-        self.weight = None
-
-    def __repr__(self):
-        return "Chunk("\
-                + "original:" + self.original\
-                + ", text:" + self.text\
-                + ", weights:" + str(self.weights or [])\
-                + ", weight:" + str(self.weight or "")\
-                + ", elidable:" + str(self.elidable or False)\
-                + ", elision:" + str(self.elision or False)\
-                + ", hemistiche:" + str(self.hemistiche)\
-            + ", error:" + str(self.error)\
-            + ", illegal_str:" + str(self.illegal_str)\
-            + ", had_hypher:" + str(self.had_hyphen)\
-            + ", text_pron:" + str(self.text_pron)\
-            + ", no_hiatus:" + str(self.no_hiatus)\
-            + ", word_end:" + str(self.word_end)\
-            + ")" + "\n"
-
-    def copy(self):
-        new_chunk = Chunk(self.original)
-        new_chunk.original = self.original
-        new_chunk.text = self.text
-        new_chunk.hemistiche = self.hemistiche
-        new_chunk.error = self.error
-        new_chunk.illegal_str = self.illegal_str
-        new_chunk.weights = self.weights
-        new_chunk.had_hyphen = self.had_hyphen
-        new_chunk.text_pron = self.text_pron
-        new_chunk.elision = self.elision
-        new_chunk.no_hiatus = self.no_hiatus
-        new_chunk.elidable = self.elidable
-        new_chunk.word_end = self.word_end
-        new_chunk.weight = self.weight
-        return new_chunk
-
-    def set_hemistiche(self, hemis):
-        self.hemistiche = hemis
-
-    def check_forbidden_characters(self):
-        es = ""
-        for x in self.text:
-            if not common.remove_punctuation(strip_accents_one(x)[0].lower()) in common.LEGAL:
-                es += 'I'
-                self.error = "illegal"
-            else:
-                es += ' '
-        if self.error is not None and self.error == "illegal":
-            self.illegal_str = es
-
-    def simplify_gu_qu(self, next_chunk):
-        if next_chunk.text.startswith('u'):
-            if self.text.endswith('q'):
-                next_chunk.text = next_chunk.text[1:]
-                if next_chunk.text == '':
-                    self.original += next_chunk.original
-                    next_chunk.original = ''
-            if self.text.endswith('g') and len(next_chunk.text) >= 2:
-                if next_chunk.text[1] in "eéèa":
-                    next_chunk.text = next_chunk.text[1:]
-
-    def elide_inside_words(self, all_next_chunks):
-        if self.text == "e-":
-            self.weights = [0]  # force elision
-        next_chunk = all_next_chunks[0]
-        if self.text == "e" and next_chunk.text.startswith("-h"):
-            # collect what follows until the next hyphen or end
-            flw = next_chunk.original.split('-')[1]
-            for future_chunk in all_next_chunks[1:]:
-                flw += future_chunk.original.split('-')[0]
-                if '-' in future_chunk.original:
-                    break
-            # TODO: not sure if this reconstruction of the original word is bulletproof...
-            if haspirater.lookup(normalize(flw)):
-                self.weights = [0]
-            else:
-                self.weights = [1]
-
-    def remove_leading_and_trailing_crap(self):
-        seen_space = False
-        seen_hyphen = False
-        while len(self.text) > 0 and self.text[0] in ' -':
-            if self.text[0] == ' ':
-                seen_space = True
-            else:
-                seen_hyphen = True
-            self.text = self.text[1:]
-        while len(self.text) > 0 and self.text[-1] in ' -':
-            if self.text[-1] == ' ':
-                seen_space = True
-            else:
-                seen_hyphen = True
-            self.text = self.text[:-1]
-        if seen_hyphen and not seen_space:
-            self.had_hyphen = True
-
-    def is_empty(self):
-        return len(self.text) == 0
-
-    def add_original(self, other_chunk):
-        self.original += other_chunk.original
-
-    def create_sigles(self):
-        new_chunks = []
-        for j, character in enumerate(self.text):
-            try:
-                new_chunk_content = LETTERS[character]
-                # hack: the final 'e's in letters are just to help pronunciation
-                # inference and are only needed at end of word, otherwise they will
-                # mess syllable count up
-                if j < len(self.text) - 1 and new_chunk_content[-1] == 'e':
-                    new_chunk_content = new_chunk_content[:-1]
-            except KeyError:
-                new_chunk_content = character + 'é'
-            new_chunks += [(j, x) for x in re.split(get_consonants_regex(), new_chunk_content)]
-        new_chunks = [x for x in new_chunks if len(x[1]) > 0]
-        new_word = []
-        last_opos = -1
-        for j, (original_position, character) in enumerate(new_chunks):
-            part = ""
-            if j == len(new_chunks) - 1:
-                # don't miss final spaces
-                part = self.original[last_opos + 1:]
-            elif last_opos < original_position:
-                part = self.original[last_opos + 1:original_position + 1]
-                last_opos = original_position
-            # allow or forbid elision because of possible ending '-e' before
-            # forbid hiatus both for this and for preceding
-            # instruct that we must use text for the pronunciation
-            new_chunk = Chunk(part)
-            new_chunk.original = part
-            new_chunk.text = character
-            new_chunk.text_pron = True
-            new_chunk.elision = [False, True]
-            new_chunk.no_hiatus = True
-            new_word.append(new_chunk)
-            # propagate information from splithyph
-            new_word[-1].hemistiche = self.hemistiche
-        return new_word
-
-    def check_elidable(self):
-        if self.text == 'e':
-            self.elidable = [True]
-
-    def is_consonants(self):
-        return is_consonants(self.text)
-
-    def ends_with_apostrophe(self):
-        return re.search("[" + APOSTROPHES + "]$", self.original) is not None
-
-    def elide_vowel_problems(self, chunk_group):
-        if self.elision is None:
-            self.elision = elision_wrap(chunk_group)
-
-    def process_y_cases(self, previous_chunk, next_chunk):
-        new_word_from_chunk = []
-        if 'y' not in self.text or len(self.text) == 1 or self.text.startswith("y"):
-            new_word_from_chunk.append(self)
-        else:
-            if previous_chunk is not None and next_chunk is not None:
-                # special cases of "pays", "alcoyle", "abbayes"
-                c_text = self.text
-                p_text = previous_chunk.text
-                n_text = next_chunk.text
-                # TODO Should you force if this condition does not apply?
-                if ((c_text == "ay" and p_text.endswith("p") and n_text.startswith("s"))
-                        or
-                        (c_text == "oy" and p_text.endswith("lc")
-                         and n_text.startswith("l"))
-                        or
-                        (c_text == "aye" and p_text.endswith("bb")
-                         and n_text.startswith("s"))):
-                    # force weight
-                    self.weights = [2]
-                    new_word_from_chunk.append(self)
-                    return new_word_from_chunk
-            must_force = next_chunk is None and previous_chunk is not None and \
-                         (self.text == "aye" and previous_chunk.text.endswith("bb"))
-            if must_force:
-                # force weight
-                self.weights = [2]
-                new_word_from_chunk.append(self)
-            else:
-                sub_chunks = re.split(re.compile("(y+)"), self.text)
-                sub_chunks = [x for x in sub_chunks if len(x) > 0]
-                for j, sub_chunk in enumerate(sub_chunks):
-                    lindex = int(j * len(self.original) / len(sub_chunks))
-                    rindex = int((j + 1) * len(self.original) / len(sub_chunks))
-                    part = self.original[lindex:rindex]
-                    new_subchunk_text = 'Y' if 'y' in sub_chunk else sub_chunk
-                    new_subchunk = self.copy()
-                    new_subchunk.original = part
-                    new_subchunk.text = new_subchunk_text
-                    new_word_from_chunk.append(new_subchunk)
-        return new_word_from_chunk
-
-    def is_vowels(self):
-        return is_vowels(self.text)
-
-    def is_dash_elidable(self):
-        # "fais-le" not elidable, but "suis-je" and "est-ce" is
-        return not ('-' in self.text and not self.text.endswith('-j') and not self.text.endswith('-c'))
-
-    def check_elidable_with_next(self, next_chunk):
-        if self.elidable is None:
-            self.elidable = next_chunk.elision
-
-    def is_potentially_ambiguous_hiatus(self):
-        return self.text in ["ie", "ée", "ue"]
-
-    def ends_with_potentially_ambiguous_hiatus(self):
-        return len(self.text) >= 2 and self.text[-2:] in ["ie", "ée", "ue"]
-
-    def check_potentially_ambiguous_plural(self, previous_chunk):
-        if self.text == "s":
-            if previous_chunk.is_potentially_ambiguous_hiatus():
-                previous_chunk.error = "ambiguous"
-                self.error = "ambiguous"
-
-    def check_potentially_ambiguous_with_elision(self, next_chunk):
-        if self.ends_with_potentially_ambiguous_hiatus():
-            if next_chunk.elision is not None or True not in next_chunk.elision:
-                self.error = "ambiguous"
-                next_chunk.error = "ambiguous"
-
-    def check_hiatus(self, previous_chunk, next_chunk, only_two_parts):
-        if previous_chunk is not None:
-            self.check_potentially_ambiguous_plural(previous_chunk)
-        if self.ends_with_potentially_ambiguous_hiatus():
-            if not any(next_chunk.elision or [False]):
-                self.error = "ambiguous"
-                next_chunk.error = "ambiguous"
-
-        # elision concerns words ending with a vowel without a mute 'e'
-        # that have not been marked "no_hiatus"
-        # it also concerns specifically "et"
-        elif (not self.text.endswith('e') and self.no_hiatus is None
-              and (self.is_vowels() or self.text == 'Y')
-              or (only_two_parts and previous_chunk.text == 'e' and self.text == 't')):
-            # it happens if the next word is not marked no_hiatus
-            # and starts with something that causes elision
-            if all(next_chunk.elision) and next_chunk.no_hiatus is None:
-                self.error = "hiatus"
-                next_chunk.error = "hiatus"
-
-    def make_word_end(self):
-        self.word_end = True
-
-    def contains_break(self):
-        return '-' in self.text \
-            or self.word_end or False \
-            or self.had_hyphen or False
-
-    def is_e(self):
-        return self.text == "e"
-
-
-def get_consonants_regex():
-    all_consonants = CONSONANTS + CONSONANTS.upper()
-    consonants_regexp = re.compile('([^' + all_consonants + '*-]+)', re.UNICODE)
-    return consonants_regexp
-
-
 class Chunks:
 
-    def __init__(self, line):
-        self._line = line
+    def __init__(self, verse):
+        # TODO Find a way to remove this dependency
+        self.verse = verse
         self.chunks = []
         self.create_chunks()
-        self.phon = None
         self.separated_chunks = []
 
     def create_chunks(self):
@@ -304,7 +24,7 @@ class Chunks:
         self.elide_inside_words()
         self.remove_leading_and_trailing_crap()
         self.collapse_empty_chunks_from_simplifications()
-        self.create_sigles()
+        self.create_acronym()
         self.elide_vowel_problems()
         self.process_y_cases()
         self.annotate_final_mute_e()
@@ -315,8 +35,8 @@ class Chunks:
 
     def print_new_line_if_changed(self):
         now_line = ''.join(chunk.original for chunk in self.chunks)
-        if now_line != self._line:
-            print("%s became %s" % (self._line, now_line), file=sys.stderr)
+        if now_line != self.verse.input_line:
+            print("%s became %s" % (self.verse.input_line, now_line), file=sys.stderr)
             pprint(self.chunks, stream=sys.stderr)
 
     def merge_chunks_words(self):
@@ -384,12 +104,12 @@ class Chunks:
             future_chunks.append(acc)
         self.separated_chunks = future_chunks
 
-    def create_sigles(self):
+    def create_acronym(self):
         for i, chunk_group in enumerate(self.separated_chunks):
             if len(chunk_group) == 1:
                 first_chunk = chunk_group[0]
                 if first_chunk.is_consonants():
-                    new_word = first_chunk.create_sigles()
+                    new_word = first_chunk.create_acronym()
                     self.separated_chunks[i] = new_word
                     self.separated_chunks[i][-1].check_elidable()
 
@@ -430,37 +150,25 @@ class Chunks:
 
     def initialize_chunks(self):
         word_bi_tokens = self.get_word_tokens()
-        pre_chunks = self.preprocess_bi_tokens(word_bi_tokens)
+        pre_chunks = pre_process_bi_tokens(word_bi_tokens)
         self.separated_chunks = []
         for (is_end_word, pre_chunk) in pre_chunks:
             if len(pre_chunk) != 0:
-                self.separated_chunks.append([Chunk(word) for word in pre_chunk])
+                self.separated_chunks.append([Chunk(word, self.verse) for word in pre_chunk])
                 if not is_end_word:
                     # word end is a fake word end
                     for chunk in self.separated_chunks[-1]:
                         chunk.set_hemistiche('cut')
 
-    def preprocess_bi_tokens(self, word_bi_tokens):
-        consonants_regexp = get_consonants_regex()
-        pre_chunks = [(b, re.split(consonants_regexp, word)) for (b, word) in word_bi_tokens]
-        pre_chunks = [(b, remove_trivial(x, self.is_empty_word)) for (b, x) in pre_chunks]
-        return pre_chunks
-
     def get_word_tokens(self):
         words = self.split_input_line_by_whitespace()
-        words = remove_trivial(words, self.is_empty_word)
-        word_tokens = self.split_all_hyph(words)
+        words = remove_trivial(words, is_empty_word)
+        word_tokens = split_all_hyphen(words)
         return word_tokens
 
-    def split_all_hyph(self, words):
-        return sum([HyphenSplitter().split(w) for w in words], [])
-
-    def is_empty_word(self, word):
-        return re.match(r"^\s*$", word) or len(normalize(word, rm_all=True)) == 0
-
     def split_input_line_by_whitespace(self):
         whitespace_regexp = re.compile(r"(\s+)")
-        words = re.split(whitespace_regexp, self._line)
+        words = re.split(whitespace_regexp, self.verse.input_line)
         return words
 
     def annotate(self, template, threshold):
@@ -468,187 +176,75 @@ class Chunks:
         for i, chunk in enumerate(self.chunks):
             if not chunk.is_vowels():
                 continue
+
+            chunks_before = self.chunks[:i]
+            chunks_after = self.chunks[i + 1:]
             # for the case of "pays" and related words
-            if chunk.weights is None:
-                chunk.weights = self.possible_weights_context(i, template, threshold)
-            if chunk.hemistiche is None:
-                chunk.hemistiche = self.hemistiche(i)
-        return self.align2str()
+            chunk.set_possible_weights_from_context(chunks_before, chunks_after, template, threshold)
 
-    def possible_weights_context(self, pos, template, threshold):
-        chunk = self.chunks[pos]
-        if pos != len(self.chunks) - 1:
-            next_chunk = self.chunks[pos + 1]
-        else:
-            next_chunk = None
-        if pos > 0:
-            previous_chunk = self.chunks[pos - 1]
-        else:
-            previous_chunk = None
-        if pos > 1:
-            previous_previous_chunk = self.chunks[pos - 2]
-        else:
-            previous_previous_chunk = None
-
-        if ((pos >= len(self.chunks) - 2 and chunk.is_e())
-                and not (next_chunk is not None and next_chunk.is_vowels())
-                and not (previous_chunk is None or previous_chunk.contains_break())
-                and not (previous_previous_chunk is None or previous_previous_chunk.contains_break())):
-            # special case for verse endings, which can get elided (or not)
-            # but we don't elide lone syllables ("prends-le", etc.)
-
-            if next_chunk is None:
-                return [0]  # ending 'e' is elided
-            if next_chunk.text == 's':
-                return [0]  # ending 'es' is elided
-            if next_chunk.text == 'nt':
-                # ending 'ent' is sometimes elided, try to use pronunciation
-                # actually, this will have an influence on the rhyme's gender
-                # see feminine
-                possible = []
-                if not self.phon or len(self.phon) == 0:
-                    return [0, 1]  # do something reasonable without pron
-                for possible_phon in self.phon:
-                    if possible_phon.endswith(')') or possible_phon.endswith('#'):
-                        possible.append(1)
-                    else:
-                        possible.append(0)
-                return possible
-            return self.possible_weights(pos, template, threshold)
-        if (next_chunk is None and chunk.text == 'e' and
-                previous_chunk is not None and (previous_chunk.text.endswith('-c')
-                             or previous_chunk.text.endswith('-j')
-                             or (previous_chunk.text == 'c'
-                                 and previous_chunk.had_hyphen is not None)
-                             or (previous_chunk.text == 'j'
-                                 and previous_chunk.had_hyphen is not None))):
-            return [0]  # -ce and -je are elided
-        if next_chunk is None and chunk.text in ['ie', 'ée']:
-            return [1]
-        # elide "-ée" and "-ées", but be specific (beware of e.g. "réel")
-        if (pos >= len(self.chunks) - 2
-                and chunk.text == 'ée'
-                and (next_chunk is None or self.chunks[-1].text == 's')):
-            return [1]
-        if chunk.elidable is not None:
-            return [int(not x) for x in chunk.elidable]
-        return self.possible_weights(pos, template, threshold)
-
-    def possible_weights(self, pos, template, threshold):
-        if template.options['diaeresis'] == "classical":
-            return vowels.possible_weights_ctx(self.chunks, pos, threshold=threshold)
-        elif template.options['diaeresis'] == "permissive":
-            return vowels.possible_weights_approx(self.chunks[pos].text)
-
-    def hemistiche(self, pos):
-        current_chunk = self.chunks[pos]
-        ending = current_chunk.text
-        if not (current_chunk.word_end or False) and pos < len(self.chunks) - 1:
-            if not (self.chunks[pos + 1].word_end or False):
-                return "cut"
-            ending += self.chunks[pos + 1].text
-        if ending in SURE_END_FEM:
-            # check that this isn't a one-syllabe wourd (which is allowed)
-            ok = False
-            try:
-                for i in range(2):
-                    if '-' in self.chunks[pos - i - 1].original or (self.chunks[pos - i - 1].word_end or False) :
-                        ok = True
-            except IndexError:
-                pass
-            if not ok:
-                # hemistiche ends in feminine
-                if any(current_chunk.elidable or [False]):
-                    return "elid"  # elidable final -e, but only OK if actually elided
-                else:
-                    return "fem"
-        return "ok"
+            next_chunk = self.chunks[i + 1] if i < len(self.chunks) - 1 else None
+            previous_chunk = self.chunks[i - 1] if i > 0 else None
+            previous_previous_chunk = self.chunks[i - 2] if i > 1 else None
+            chunk.set_hemistiche_from_context(previous_previous_chunk, previous_chunk, next_chunk)
+        return self.align2str()
 
     def align2str(self):
         return ''.join([x.text for x in self.chunks])
 
+    def print_n_syllables(self, n_syllables, offset, output_file):
+        count = 0
+        for i, chunk in enumerate(self.chunks[::-1]):
+            if chunk.weights is not None:
+                if count < offset:
+                    count += 1
+                    continue
+                pos = len(self.chunks) - i - 1
+                considered_chunk = self.chunks[pos]
+                chunks_before = self.chunks[:pos]
+                chunks_after = self.chunks[pos + 1:]
+                print(str(n_syllables) + ' ' + ' '.join(considered_chunk.make_query(chunks_before, chunks_after)),
+                      file=output_file)
+                break
 
-LETTERS = {
-    'f': 'effe',
-    'h': 'ache',
-    'j': 'gi',
-    'k': 'ka',
-    'l': 'elle',
-    'm': 'aime',
-    'n': 'aine',
-    'q': 'cu',
-    'r': 'ère',
-    's': 'esse',
-    'w': 'doublevé',
-    'x': 'ixe',
-    'z': 'zaide'
-}
-
-
-def elision_wrap(chunk_group):
-    first_letter = common.remove_punctuation(chunk_group[0].original.strip())
-    temp = elision(''.join(chunk.text for chunk in chunk_group),
-                   ''.join(chunk.original for chunk in chunk_group),
-                   first_letter == first_letter.upper())
-    return temp
-
-
-def elision(word, original_word, was_cap):
-    if word.startswith('y'):
-        if word == 'y':
-            return [True]
-        if was_cap:
-            if word == 'york':
-                return [False]
-            # Grevisse, Le Bon usage, 14th ed., paragraphs 49-50
-            # depends on whether it's French or foreign...
-            return [True, False]
-        else:
-            exc = ["york", "yeux", "yeuse", "ypérite"]
-            for w in exc:
-                if word.startswith(w):
-                    return [True]
-            # otherwise, no elision
-            return [False]
-    if word in ["oui", "ouis"]:
-        # elision for those words, but beware, no elision for "ouighour"
-        # boileau : "Ont l'esprit mieux tourné que n'a l'homme ? Oui sans doute."
-        # so elision sometimes
-        return [True, False]
-    if word.startswith("ouistiti") or word.startswith("ouagadougou"):
-        return [False]
-    # "un", "une" are non-elided as nouns ("cette une")
-    if word in ["un", "une"]:
-        return [True, False]
-    # "onze" is not elided
-    if word == "onze":
-        return [False]
-    if word.startswith('ulul'):
-        return [False]  # ululement, ululer, etc.
-    if word.startswith('uhlan'):
-        return [False]  # uhlan
-    if word[0] == 'h':
-        if word == "huis":
-            # special case, "huis" is elided but "huis clos" isn't
-            return [True, False]
-        # look up in haspirater using the original (but normalized) word
-        return list(map((lambda s: not s),
-                        haspirater.lookup(normalize(original_word))))
-    if is_vowels(word[0]):
-        return [True]
-    return [False]
-
-
-def remove_trivial(chunks, predicate):
+    def normalized(self):
+        return ''.join(chunk.normalize() for chunk in self.chunks).lstrip().rstrip()
+
+    def get_line(self):
+        return ''.join(chunk.get_original_text() for chunk in self.chunks)
+
+    def get_errors_set(self, forbidden_ok, hiatus_ok):
+        errors = set()
+        for chunk in self.chunks:
+            errors_chunk = chunk.get_errors_set(forbidden_ok, hiatus_ok)
+            errors = errors.union(errors_chunk)
+        return errors
+
+
+def remove_trivial(words, predicate):
     new_chunks = []
-    accu = ""
-    for i, w in enumerate(chunks):
-        if predicate(w):
+    words_accumulation = ""
+    for i, chunk in enumerate(words):
+        if predicate(chunk):
             if len(new_chunks) == 0:
-                accu = accu + w
+                words_accumulation = words_accumulation + chunk
             else:
-                new_chunks[-1] = new_chunks[-1] + w
+                new_chunks[-1] = new_chunks[-1] + chunk
         else:
-            new_chunks.append(accu + w)
-            accu = ""
+            new_chunks.append(words_accumulation + chunk)
+            words_accumulation = ""
     return new_chunks
+
+
+def split_all_hyphen(words):
+    return sum([HyphenSplitter().split(w) for w in words], [])
+
+
+def is_empty_word(word):
+    return re.match(r"^\s*$", word) or len(normalize(word, rm_all=True)) == 0
+
+
+def pre_process_bi_tokens(word_bi_tokens):
+    consonants_regexp = get_consonants_regex()
+    pre_chunks = [(b, re.split(consonants_regexp, word)) for (b, word) in word_bi_tokens]
+    pre_chunks = [(b, remove_trivial(x, is_empty_word)) for (b, x) in pre_chunks]
+    return pre_chunks
diff --git a/plint/common.py b/plint/common.py
@@ -122,3 +122,9 @@ def to_xsampa(s):
 def from_xsampa(s):
     """convert x-sampa to our modified format"""
     return subst(s, [(x[1], x[0]) for x in SUBSTS])
+
+
+def get_consonants_regex():
+    all_consonants = CONSONANTS + CONSONANTS.upper()
+    consonants_regexp = re.compile('([^' + all_consonants + '*-]+)', re.UNICODE)
+    return consonants_regexp+
\ No newline at end of file
diff --git a/plint/error.py b/plint/error.py
@@ -118,11 +118,11 @@ class ErrorBadRhymeGenre(ErrorBadRhyme):
     return "\"" + result + "\""
 
   def get_id(self, pattern):
-    return pattern.femid
+    return pattern.feminine_id
 
 class ErrorBadRhymeObject(ErrorBadRhyme):
   def get_id(self, pattern):
-    return pattern.myid
+    return pattern.my_id
 
 class ErrorBadRhymeSound(ErrorBadRhymeObject):
   @property
@@ -157,7 +157,7 @@ class ErrorMultipleWordOccurrence:
 
   def report(self, pattern):
     return (_("Too many occurrences of word \"%s\" for rhyme %s")
-        % (self.word, pattern.myid))
+        % (self.word, pattern.my_id))
 
 class ErrorIncompleteTemplate:
   def report(self, pattern):
diff --git a/plint/pattern.py b/plint/pattern.py
@@ -0,0 +1,31 @@
+from plint import error
+
+
+class Pattern:
+    def __init__(self, metric, my_id="", feminine_id="", constraint=None, hemistiches=None):
+        self.metric = metric
+        self.length = None
+        self.parse_metric()
+        self.my_id = my_id
+        self.feminine_id = feminine_id
+        self.constraint = constraint
+        if hemistiches:
+            self.hemistiches = hemistiches
+
+    def parse_metric(self):
+        """Parse from a metric description"""
+        try:
+            verse = [int(x) for x in self.metric.split('/')]
+            for i in verse:
+                if i < 1:
+                    raise ValueError
+        except ValueError:
+            raise error.TemplateLoadError("Metric description should only contain positive integers")
+        if sum(verse) > 16:
+            raise error.TemplateLoadError("Metric length limit exceeded")
+        self.hemistiches = []
+        self.length = 0
+        for v in verse:
+            self.length += v
+            self.hemistiches.append(self.length)
+        self.length = self.hemistiches.pop()+
\ No newline at end of file
diff --git a/plint/plint_irc.py b/plint/plint_irc.py
@@ -75,7 +75,7 @@ def manage(line, descriptor=sys.stdout):
     else:
       lbuf = [l]
     return True
-  errors = template.check(text, quiet=False)
+  errors = template.check(text)
   quiet = False
   if errors:
     print(errors.report())
diff --git a/plint/plint_web.py b/plint/plint_web.py
@@ -1,5 +1,5 @@
 #!/usr/bin/python3 -Ou
-#encoding: utf8
+# encoding: utf8
 
 from plint import localization, error, template, diaeresis
 import re
@@ -10,6 +10,7 @@ import time
 
 env = Environment(loader=PackageLoader('plint_web', 'views'))
 
+
 # force HTTPS usage
 # http://bottlepy.org/docs/dev/faq.html#problems-with-reverse-proxies
 # because bottle makes absolute redirects
@@ -17,215 +18,235 @@ env = Environment(loader=PackageLoader('plint_web', 'views'))
 # even though relative Location: is now allowed
 # http://stackoverflow.com/a/25643550
 def fix_https(app):
-  def fixed_app(environ, start_response):
-    environ['wsgi.url_scheme'] = 'https'
-    return app(environ, start_response)
-  return fixed_app
+    def fixed_app(environ, start_response):
+        environ['wsgi.url_scheme'] = 'https'
+        return app(environ, start_response)
+
+    return fixed_app
+
+
 app = Bottle()
 app.wsgi = fix_https(app.wsgi)
 
 THROTTLE_DELAY = 2
 throttle = set()
 
+
 def best_match(matches, header):
-  # inspired by http://www.xml.com/pub/a/2005/06/08/restful.html
+    # inspired by http://www.xml.com/pub/a/2005/06/08/restful.html
+
+    def parse_one(t):
+        parts = t.split(";")
+        d = {}
+        for param in parts[1:]:
+            spl = param.split("=")
+            if (len(spl) != 2):
+                # this should be formatted as key=value
+                # so ignore it
+                continue
+            k, v = spl
+            d[k.strip().lower()] = v.strip()
+        if 'q' not in d.keys():
+            d['q'] = "1"
+        return (parts[0], d)
+
+    parts = []
+    for p in header.split(","):
+        parsed = parse_one(p)
+        try:
+            value = float(parsed[1]['q'])
+        except ValueError:
+            # q value should be a float; set it to 0
+            value = 0
+        parts.append((value, parsed[0].split("-")))
+    for lang in [x[1] for x in sorted(parts, reverse=True)]:
+        for match in matches:
+            if match in lang:
+                return match
+    return matches[0]
 
-  def parse_one(t):
-    parts = t.split(";")
-    d = {}
-    for param in parts[1:]:
-        spl = param.split("=")
-        if (len(spl) != 2):
-            # this should be formatted as key=value
-            # so ignore it
-            continue
-        k, v = spl
-        d[k.strip().lower()] = v.strip()
-    if 'q' not in d.keys():
-      d['q'] = "1"
-    return (parts[0], d)
-
-  parts = []
-  for p in header.split(","):
-    parsed = parse_one(p)
-    try:
-        value = float(parsed[1]['q'])
-    except ValueError:
-        # q value should be a float; set it to 0
-        value = 0
-    parts.append((value, parsed[0].split("-")))
-  for lang in [x[1] for x in sorted(parts, reverse=True)]:
-    for match in matches:
-      if match in lang:
-        return match
-  return matches[0]
 
 def get_locale():
-  header = request.headers.get('Accept-Language')
-  print(header)
-  try:
-    return best_match(['fr', 'en'], header)
-  except AttributeError:
-    return 'en'
+    header = request.headers.get('Accept-Language')
+    print(header)
+    try:
+        return best_match(['fr', 'en'], header)
+    except AttributeError:
+        return 'en'
+
 
 def get_title(lang):
-  if lang == 'fr':
-    return "plint -- vérification formelle de poèmes"
-  else:
-    return "plint -- French poetry checker"
+    if lang == 'fr':
+        return "plint -- vérification formelle de poèmes"
+    else:
+        return "plint -- French poetry checker"
+
 
 @app.route('/static/tpl/<filename>')
 def server_static(filename):
-  return static_file(filename, root="./static/tpl", mimetype="text/plain")
+    return static_file(filename, root="./static/tpl", mimetype="text/plain")
+
 
 @app.route('/<lang>/static/img/<filename>')
 def server_static(filename, lang=None):
-  return static_file(filename, root="./static/img")
+    return static_file(filename, root="./static/img")
+
 
 @app.route('/<lang>/static/tpl/<filename>')
 def server_static(filename, lang=None):
-  return static_file(filename, root="./static/tpl", mimetype="text/plain")
+    return static_file(filename, root="./static/tpl", mimetype="text/plain")
+
 
 @app.route('/static/<filename>')
 def server_static(filename):
-  return static_file(filename, root="./static")
+    return static_file(filename, root="./static")
+
 
 @app.route('/<lang>/static/<filename>')
 def server_static(filename, lang=None):
-  return static_file(filename, root="./static")
+    return static_file(filename, root="./static")
+
 
 @app.route('/')
 def root():
-  redirect('/' + get_locale() + '/')
+    redirect('/' + get_locale() + '/')
+
 
 @app.route('/<page>')
 def paged(page):
-  redirect('/' + get_locale() + '/' + page)
+    redirect('/' + get_locale() + '/' + page)
+
 
 @app.route('/<lang>/')
 def root(lang):
-  if lang not in ['fr', 'en']:
-    return paged(lang)
-  return env.get_template('index.html').render(title=get_title(lang),
-      lang=lang, path="")
+    if lang not in ['fr', 'en']:
+        return paged(lang)
+    return env.get_template('index.html').render(title=get_title(lang),
+                                                 lang=lang, path="")
+
 
 @app.route('/<lang>/about')
 def about(lang):
-  return env.get_template('about.html').render(title=get_title(lang),
-      lang=lang, path="about")
+    return env.get_template('about.html').render(title=get_title(lang),
+                                                 lang=lang, path="about")
+
 
 MAX_POEM_LEN = 8192
 MAX_LINE_LEN = 512
 
+
 class TooBigException(Exception):
     pass
 
+
 class TooLongLinesException(Exception):
     pass
 
+
 def check(poem):
-  if len(poem) > MAX_POEM_LEN:
-    raise TooBigException
-  s = poem.split("\n")
-  for x in range(len(s)):
-    if len(s[x]) > MAX_LINE_LEN:
-      raise TooLongLinesException
-    s[x] = s[x].strip()
-  return s
+    if len(poem) > MAX_POEM_LEN:
+        raise TooBigException
+    s = poem.split("\n")
+    for x in range(len(s)):
+        if len(s[x]) > MAX_LINE_LEN:
+            raise TooLongLinesException
+        s[x] = s[x].strip()
+    return s
+
 
 @app.route('/<lang>/checkjs', method='POST')
 def q(lang):
-  global throttle
-  # necessary when serving with lighttpd proxy-core
-  ip = request.environ.get('HTTP_X_FORWARDED_FOR')
-  if not ip:
-    # fallback; this is 127.0.0.1 with proxy-core
-    ip = request.environ.get('REMOTE_ADDR')
-  t = time.time()
-  print("== %s %s ==" % (ip, t))
-  response.content_type = 'application/json'
-  localization.init_locale(lang)
-  throttle = set(x for x in throttle if t - x[1] < THROTTLE_DELAY)
-  if ip in (x[0] for x in throttle):
+    global throttle
+    # necessary when serving with lighttpd proxy-core
+    ip = request.environ.get('HTTP_X_FORWARDED_FOR')
+    if not ip:
+        # fallback; this is 127.0.0.1 with proxy-core
+        ip = request.environ.get('REMOTE_ADDR')
+    t = time.time()
+    print("== %s %s ==" % (ip, t))
+    response.content_type = 'application/json'
+    localization.init_locale(lang)
+    throttle = set(x for x in throttle if t - x[1] < THROTTLE_DELAY)
+    if ip in (x[0] for x in throttle):
+        if lang == 'fr':
+            msg = (("Trop de requêtes pour vérifier le poème,"
+                    + " veuillez réessayer dans %d secondes") %
+                   THROTTLE_DELAY)
+        else:
+            msg = (("Too many requests to check poem,"
+                    + " please try again in %d seconds") %
+                   THROTTLE_DELAY)
+        return dumps({'error': msg})
+    throttle.add((ip, t))
+    poem = re.sub(r'<>&', '', request.forms.get('poem'))
+    print(poem)
+
+    # default message
     if lang == 'fr':
-      msg = (("Trop de requêtes pour vérifier le poème,"
-        + " veuillez réessayer dans %d secondes") %
-          THROTTLE_DELAY)
+        msg = "Le poème est vide"
     else:
-      msg = (("Too many requests to check poem,"
-        + " please try again in %d seconds") %
-          THROTTLE_DELAY)
-    return dumps({'error': msg})
-  throttle.add((ip, t))
-  poem = re.sub(r'<>&', '', request.forms.get('poem'))
-  print(poem)
-
-  # default message
-  if lang == 'fr':
-    msg = "Le poème est vide"
-  else:
-    msg = "Poem is empty"
-  
-  try:
-      poem = check(poem)
-  except TooBigException:
-      poem = None
-      if lang == 'fr':
-          msg = "Le poème est trop long (maximum %d caractères)" % MAX_POEM_LEN
-      else:
-          msg = "Poem is too long (maximum %d characters)" % MAX_POEM_LEN
-  except TooLongLinesException:
-      poem = None
-      if lang == 'fr':
-          msg = "Certaines lignes du poème sont trop longues (maximum %d caractères)" % MAX_LINE_LEN
-      else:
-          msg = "Some lines of the poem are too long (maximum %d characters)" % MAX_LINE_LEN
-  if not poem or len(poem) == 0 or (len(poem) == 1 and len(poem[0]) == 0):
-      return dumps({'error': msg})
-  templateName = re.sub(r'[^a-z_]', '', request.forms.get('template'))
-  print(templateName)
-  if templateName == 'custom':
-    x = request.forms.get('custom_template')
-  else:
+        msg = "Poem is empty"
+
     try:
-      f = open("static/tpl/" + templateName + ".tpl")
-      x = f.read()
-      f.close()
-    except IOError:
-      if lang == 'fr':
-        msg = "Modèle inexistant"
-      else:
-        msg = "No such template"
-      return dumps({'error': msg})
-  print(x)
-  try:
-    templ = template.Template(x)
-  except error.TemplateLoadError as e:
-    if lang == 'fr':
-      msg = "Erreur à la lecture du modèle : " + e.msg
+        poem = check(poem)
+    except TooBigException:
+        poem = None
+        if lang == 'fr':
+            msg = "Le poème est trop long (maximum %d caractères)" % MAX_POEM_LEN
+        else:
+            msg = "Poem is too long (maximum %d characters)" % MAX_POEM_LEN
+    except TooLongLinesException:
+        poem = None
+        if lang == 'fr':
+            msg = "Certaines lignes du poème sont trop longues (maximum %d caractères)" % MAX_LINE_LEN
+        else:
+            msg = "Some lines of the poem are too long (maximum %d characters)" % MAX_LINE_LEN
+    if not poem or len(poem) == 0 or (len(poem) == 1 and len(poem[0]) == 0):
+        return dumps({'error': msg})
+    templateName = re.sub(r'[^a-z_]', '', request.forms.get('template'))
+    print(templateName)
+    if templateName == 'custom':
+        x = request.forms.get('custom_template')
     else:
-      msg = "Error when reading template: " + e.msg
-    return dumps({'error': msg})
-  poem.append(None)
-  r = []
-  i = 0
-  d = {}
-  for line in poem:
-    i += 1
-    last = False
-    if line == None:
-      line = ""
-      last = True
-    errors = templ.check(line, last=last)
-    if errors:
-      r.append({
-        'line': line,
-        'num': i,
-        'errors': sum(errors.lines(short=True), [])
-        })
-  d['result'] = r
-  return dumps(d)
+        try:
+            f = open("static/tpl/" + templateName + ".tpl")
+            x = f.read()
+            f.close()
+        except IOError:
+            if lang == 'fr':
+                msg = "Modèle inexistant"
+            else:
+                msg = "No such template"
+            return dumps({'error': msg})
+    print(x)
+    try:
+        templ = template.Template(x)
+    except error.TemplateLoadError as e:
+        if lang == 'fr':
+            msg = "Erreur à la lecture du modèle : " + e.msg
+        else:
+            msg = "Error when reading template: " + e.msg
+        return dumps({'error': msg})
+    poem.append(None)
+    r = []
+    i = 0
+    d = {}
+    for line in poem:
+        i += 1
+        last = False
+        if line == None:
+            line = ""
+            last = True
+        errors = templ.check(line, last=last)
+        if errors:
+            r.append({
+                'line': line,
+                'num': i,
+                'errors': sum(errors.lines(short=True), [])
+            })
+    d['result'] = r
+    return dumps(d)
 
-if __name__ == '__main__':
-  run(app, port='5000', server="cherrypy", host="::")
 
+if __name__ == '__main__':
+    run(app, port='5000', server="cherrypy", host="::")
diff --git a/plint/template.py b/plint/template.py
@@ -5,305 +5,264 @@ from plint import error, rhyme
 from plint.common import normalize
 from plint.nature import nature_count
 from plint.options import default_options
+from plint.pattern import Pattern
 from plint.verse import Verse
-from plint.vowels import make_query
-
-
-class Pattern:
-  def __init__(self, metric, myid="", femid="", constraint=None, hemistiches=None):
-    self.metric = metric
-    self.parse_metric()
-    self.myid = myid
-    self.femid = femid
-    self.constraint = constraint
-    if hemistiches:
-        self.hemistiches = hemistiches
-
-  def parse_metric(self):
-    """Parse from a metric description"""
-    try:
-      verse = [int(x) for x in self.metric.split('/')]
-      for i in verse:
-        if i < 1:
-          raise ValueError
-    except ValueError:
-      raise error.TemplateLoadError(("Metric description should only contain positive integers"))
-    if sum(verse) > 16:
-      raise error.TemplateLoadError(("Metric length limit exceeded"))
-    self.hemistiches = []
-    self.length = 0
-    for v in verse:
-      self.length += v
-      self.hemistiches.append(self.length)
-    self.length = self.hemistiches.pop()
 
-class Template:
-  option_aliases = {
-    'fusionner': 'merge',
-    'ambiguous_ok': 'forbidden_ok',
-    'ambigu_ok': 'forbidden_ok',
-    'dierese': 'diaeresis',
-    'verifie_occurrences': 'check_occurrences',
-    'repetition_ok': 'repeat_ok',
-    'incomplet_ok': 'incomplete_ok',
-    'phon_supposee_ok': 'phon_supposed_ok',
-    'oeil_supposee_ok': 'eye_supposed_ok',
-    'oeil_tolerance_ok': 'eye_tolerance_ok',
-    'pauvre_oeil_requise': 'poor_eye_required',
-    'pauvre_oeil_supposee_ok': 'poor_eye_supposed_ok',
-    'pauvre_oeil_vocalique_ok': 'poor_eye_vocalic_ok',
+
+OPTION_ALIASES = {
+        'fusionner': 'merge',
+        'ambiguous_ok': 'forbidden_ok',
+        'ambigu_ok': 'forbidden_ok',
+        'dierese': 'diaeresis',
+        'verifie_occurrences': 'check_occurrences',
+        'repetition_ok': 'repeat_ok',
+        'incomplet_ok': 'incomplete_ok',
+        'phon_supposee_ok': 'phon_supposed_ok',
+        'oeil_supposee_ok': 'eye_supposed_ok',
+        'oeil_tolerance_ok': 'eye_tolerance_ok',
+        'pauvre_oeil_requise': 'poor_eye_required',
+        'pauvre_oeil_supposee_ok': 'poor_eye_supposed_ok',
+        'pauvre_oeil_vocalique_ok': 'poor_eye_vocalic_ok',
     }
 
 
-  def __init__(self, string=None):
-    self.template = []
-    self.pattern_line_no = 0
-    self.options = dict(default_options)
-    self.mergers = []
-    self.overflowed = False
-    if string != None:
-      self.load(string)
-    self.line_no = 0
-    self.position = 0
-    self.prev = None
-    self.env = {}
-    self.femenv = {}
-    self.occenv = {}
-    self.reject_errors = False
-
-  def read_option(self, x):
-    try:
-      key, value = x.split(':')
-    except ValueError:
-      raise error.TemplateLoadError(("Global options must be provided as key-value pairs"))
-    if key in self.option_aliases.keys():
-      key = self.option_aliases[key]
-    if key == 'merge':
-      self.mergers.append(value)
-    elif key == 'diaeresis':
-      if value == "classique":
-        value = "classical"
-      if value not in ["permissive", "classical"]:
-        raise error.TemplateLoadError(("Bad value for global option %s") % key)
-      self.options['diaeresis'] = value
-    elif key in self.options.keys():
-      self.options[key] = str2bool(value)
-    else:
-      raise error.TemplateLoadError(("Unknown global option"))
-
-  def load(self, s):
-    """Load from a string"""
-    for line in s.split('\n'):
-      line = line.strip()
-      self.pattern_line_no += 1
-      if line != '' and line[0] != '#':
-        if line[0] == '!':
-          # don't count the '!' in the options, that's why we use [1:]
-          for option in line.split()[1:]:
-            self.read_option(option)
+class Template:
+
+    def __init__(self, template_string=None):
+        self.template = []
+        self.pattern_line_no = 0
+        self.options = dict(default_options)
+        self.mergers = []
+        self.overflowed = False
+        if template_string is not None:
+            self.load(template_string)
+        self.line_no = 0
+        self.position = 0
+        self.prev = None
+        self.env = {}
+        self.feminine_environment = {}
+        self.occurrence_environment = {}
+        self.reject_errors = False
+
+    def load(self, template_string):
+        """Load from a string"""
+        for line in template_string.split('\n'):
+            line = line.strip()
+            self.pattern_line_no += 1
+            if len(line) != 0 and line[0] != '#':
+                if line[0] == '!':
+                    # don't count the '!' in the options, that's why we use [1:]
+                    for option_string in line.split()[1:]:
+                        self.read_option(option_string)
+                else:
+                    self.template.append(self.parse_line(line.strip()))
+        if len(self.template) == 0:
+            raise error.TemplateLoadError("Template is empty")
+
+    def read_option(self, option_string):
+        try:
+            key, value = option_string.split(':')
+        except ValueError:
+            raise error.TemplateLoadError("Global options must be provided as key-value pairs")
+        if key in OPTION_ALIASES:
+            key = OPTION_ALIASES[key]
+        if key == 'merge':
+            self.mergers.append(value)
+        elif key == 'diaeresis':
+            if value == "classique":
+                value = "classical"
+            if value not in ["permissive", "classical"]:
+                raise error.TemplateLoadError("Bad value for global option %s" % key)
+            self.options['diaeresis'] = value
+        elif key in self.options:
+            self.options[key] = str2bool(value)
+        else:
+            raise error.TemplateLoadError("Unknown global option")
+
+    def parse_line(self, line):
+        """Parse template line from a line"""
+        split = line.split(' ')
+        metric = split[0]
+        if len(split) >= 2:
+            my_id = split[1]
         else:
-          self.template.append(self.parse_line(line.strip()))
-    if len(self.template) == 0:
-      raise error.TemplateLoadError(("Template is empty"))
-
-  def match(self, line, ofile=None, quiet=False, last=False, nsyl=None,
-          offset=0):
-    """Check a line against current pattern, return errors"""
-
-    was_incomplete = last and not self.beyond
-
-    errors = []
-    pattern = self.get()
-
-    line_with_case = normalize(line, downcase=False)
-
-    v = Verse(line, self, pattern)
-    
-    if nsyl:
-      v.annotate()
-      count = 0
-      # only generate a context with the prescribed final weight
-      # where "final" is the offset-th chunk with a weight from the end
-      for i, p in enumerate(v.chunks.chunks[::-1]):
-        if (p.weights is not None):
-          if count < offset:
-            count += 1
-            continue
-          print(str(nsyl) + ' '
-              + ' '.join(make_query(v.chunks.chunks, len(v.chunks.chunks)-i-1)), file=ofile)
-          break
-      return errors, pattern, v
-
-    if last:
-      if was_incomplete and not self.options['incomplete_ok'] and not self.overflowed:
-        return [error.ErrorIncompleteTemplate()], pattern, v
-      return [], pattern, v
-
-    if self.overflowed:
-      return [error.ErrorOverflowedTemplate()], pattern, v
-
-    rhyme_failed = False
-    # rhymes
-    if pattern.myid not in self.env.keys():
-      # initialize the rhyme
-      # last_count is passed later
-      self.env[pattern.myid] = rhyme.Rhyme(v.normalized,
-                                           pattern.constraint, self.mergers, self.options)
-    else:
-      # update the rhyme
-      self.env[pattern.myid].feed(v.normalized, pattern.constraint)
-      if not self.env[pattern.myid].satisfied_phon():
-        # no more possible rhymes, something went wrong, check phon
-        self.env[pattern.myid].rollback()
-        rhyme_failed = True
-        errors.append(error.ErrorBadRhymeSound(self.env[pattern.myid],
-                                               self.env[pattern.myid].new_rhyme))
-
-    # occurrences
-    if self.options['check_occurrences']:
-      if pattern.myid not in self.occenv.keys():
-        self.occenv[pattern.myid] = {}
-      last_word = re.split(r'[- ]', line_with_case)[-1]
-      if last_word not in self.occenv[pattern.myid].keys():
-        self.occenv[pattern.myid][last_word] = 0
-      self.occenv[pattern.myid][last_word] += 1
-      if self.occenv[pattern.myid][last_word] > nature_count(last_word):
-        errors.insert(0, error.ErrorMultipleWordOccurrence(last_word,
-                                                           self.occenv[pattern.myid][last_word]))
-
-    v.phon = self.env[pattern.myid].phon
-    v.parse()
-
-    # now that we have parsed, adjust rhyme to reflect last word length
-    # and check eye
-    if not rhyme_failed:
-      self.env[pattern.myid].adjustLastCount(v.last_count())
-      if not self.env[pattern.myid].satisfied_eye():
-        old_phon = len(self.env[pattern.myid].phon)
-        self.env[pattern.myid].rollback()
-        errors.append(error.ErrorBadRhymeEye(self.env[pattern.myid],
-                                             self.env[pattern.myid].new_rhyme, old_phon))
-   
-    rhyme_failed = False
-
-    errors = v.problems() + errors
-
-    if ofile:
-      possible = v.possible
-      if len(possible) == 1:
-        for i, p in enumerate(possible[0]):
-          if (p.weights is not None and len(p.weights) > 1
-              and p.weight is not None and p.weight > 0):
-            print(str(p.weight) + ' '
-                + ' '.join(make_query(possible[0], i)), file=ofile)
-
-    # rhyme genres
-    # inequality constraint
-    # TODO this is simplistic and order-dependent
-    if pattern.femid.swapcase() in self.femenv.keys():
-      new = set(['M', 'F']) - self.femenv[pattern.femid.swapcase()]
-      if len(new) > 0:
-        self.femenv[pattern.femid] = new
-    if pattern.femid not in self.femenv.keys():
-      if pattern.femid == 'M':
-        x = set(['M'])
-      elif pattern.femid == 'F':
-        x = set(['F'])
-      else:
-        x = set(['M', 'F'])
-      self.femenv[pattern.femid] = x
-    old = list(self.femenv[pattern.femid])
-    new = v.genders()
-    self.femenv[pattern.femid] &= set(new)
-    if len(self.femenv[pattern.femid]) == 0:
-      errors.append(error.ErrorBadRhymeGenre(old, new))
-
-    return errors, pattern, v
-
-  def parse_line(self, line):
-    """Parse template line from a line"""
-    split = line.split(' ')
-    metric = split[0]
-    if len(split) >= 2:
-      myid = split[1]
-    else:
-      myid = str(self.pattern_line_no) # unique
-    if len(split) >= 3:
-      femid = split[2]
-    else:
-      femid = str(self.pattern_line_no) # unique
-    idsplit = myid.split(':')
-    if len(idsplit) >= 2:
-      constraint = idsplit[-1].split('|')
-      if len(constraint) > 0:
-        constraint[0] = False if constraint[0] in ["no", "non"] else constraint[0]
-      if len(constraint) > 1:
-        constraint[1] = int(constraint[1])
-    else:
-      constraint = []
-    if len(constraint) == 0:
-      constraint.append(1)
-    if len(constraint) < 2:
-      constraint.append(True)
-    return Pattern(metric, myid, femid, rhyme.Constraint(*constraint))
-
-  def reset_conditional(self, d):
-    return dict((k, v) for k, v in d.items() if len(k) > 0 and k[0] == '!')
-
-  def reset_state(self, with_femenv=False):
-    """Reset our state, except ids starting with '!'"""
-    self.position = 0
-    self.env = self.reset_conditional(self.env)
-    self.femenv = self.reset_conditional(self.femenv)
-    self.occenv = {} # always reset
-
-  @property
-  def beyond(self):
-    return self.position >= len(self.template)
-
-  def get(self):
-    """Get next state, resetting if needed"""
-    self.old_position = self.position
-    self.old_env = copy.deepcopy(self.env)
-    self.old_femenv = copy.deepcopy(self.femenv)
-    self.old_occenv = copy.deepcopy(self.occenv)
-    if self.beyond:
-      if not self.options['repeat_ok']:
-        self.overflowed = True
-      self.reset_state()
-    result = self.template[self.position]
-    self.position += 1
-    return result
-
-  def back(self):
-    """Revert to previous state"""
-    self.position = self.old_position
-    self.env = copy.deepcopy(self.old_env)
-    self.femenv = copy.deepcopy(self.old_femenv)
-    self.occenv = copy.deepcopy(self.old_occenv)
-
-  def check(self, line, ofile=None, quiet=False, last=False, nsyl=None,
-          offset=0):
-    """Check line (wrapper)"""
-    self.line_no += 1
-    line = line.rstrip()
-    if normalize(line) == '' and not last:
-      return None
-    #possible = [compute(p) for p in possible]
-    #possible = sorted(possible, key=rate)
-    errors, pattern, verse = self.match(line, ofile, quiet=quiet, last=last,
-            nsyl=nsyl, offset=offset)
-    if len(errors) > 0:
-      if self.reject_errors:
-        self.back()
-        self.line_no -= 1
-      return error.ErrorCollection(self.line_no, line, pattern, verse, errors)
-    return None
+            my_id = str(self.pattern_line_no)  # unique
+        if len(split) >= 3:
+            feminine_id = split[2]
+        else:
+            feminine_id = str(self.pattern_line_no)  # unique
+        id_split = my_id.split(':')
+        classical = True
+        n_common_suffix_phones = 1
+        if len(id_split) >= 2:
+            constraint = id_split[-1].split('|')
+            if len(constraint) > 0:
+                classical = False if constraint[0] in ["no", "non"] else constraint[0]
+            if len(constraint) > 1:
+                n_common_suffix_phones = int(constraint[1])
+        else:
+            constraint = []
+        if len(constraint) == 0:
+            n_common_suffix_phones = 1
+        if len(constraint) < 2:
+            classical = True
+        return Pattern(metric, my_id, feminine_id, rhyme.Constraint(classical, n_common_suffix_phones))
 
-def str2bool(x):
-  if x.lower() in ["yes", "oui", "y", "o", "true", "t", "vrai", "v"]:
-    return True
-  if x.lower() in ["no", "non", "n", "false", "faux", "f"]:
-    return False
-  raise error.TemplateLoadError(("Bad value in global option"))
+    def match(self, line, output_file=None, last=False, n_syllables=None, offset=0):
+        """Check a line against current pattern, return errors"""
+
+        was_incomplete = last and not self.beyond
+
+        errors = []
+        pattern = self.get()
 
+        line_with_case = normalize(line, downcase=False)
+
+        verse = Verse(line, self, pattern)
+
+        if n_syllables:
+            verse.print_n_syllables(n_syllables, offset, output_file)
+            return errors, pattern, verse
+
+        if last:
+            if was_incomplete and not self.options['incomplete_ok'] and not self.overflowed:
+                return [error.ErrorIncompleteTemplate()], pattern, verse
+            return [], pattern, verse
+
+        if self.overflowed:
+            return [error.ErrorOverflowedTemplate()], pattern, verse
+
+        rhyme_failed = False
+        # rhymes
+        if pattern.my_id not in self.env:
+            # initialize the rhyme
+            # last_count is passed later
+            self.env[pattern.my_id] = rhyme.Rhyme(verse.normalized, pattern.constraint, self.mergers, self.options)
+        else:
+            # update the rhyme
+            self.env[pattern.my_id].feed(verse.normalized, pattern.constraint)
+            if not self.env[pattern.my_id].satisfied_phon():
+                # no more possible rhymes, something went wrong, check phon
+                self.env[pattern.my_id].rollback()
+                rhyme_failed = True
+                errors.append(error.ErrorBadRhymeSound(self.env[pattern.my_id],
+                                                       self.env[pattern.my_id].new_rhyme))
+
+        # occurrences
+        if self.options['check_occurrences']:
+            if pattern.my_id not in self.occurrence_environment.keys():
+                self.occurrence_environment[pattern.my_id] = {}
+            last_word = re.split(r'[- ]', line_with_case)[-1]
+            if last_word not in self.occurrence_environment[pattern.my_id].keys():
+                self.occurrence_environment[pattern.my_id][last_word] = 0
+            self.occurrence_environment[pattern.my_id][last_word] += 1
+            if self.occurrence_environment[pattern.my_id][last_word] > nature_count(last_word):
+                errors.insert(0, error.ErrorMultipleWordOccurrence(last_word,
+                                                                   self.occurrence_environment[pattern.my_id][last_word]))
+
+        verse.phon = self.env[pattern.my_id].phon
+        verse.parse()
+
+        # now that we have parsed, adjust rhyme to reflect last word length
+        # and check eye
+        if not rhyme_failed:
+            self.env[pattern.my_id].adjustLastCount(verse.last_count())
+            if not self.env[pattern.my_id].satisfied_eye():
+                old_phon = len(self.env[pattern.my_id].phon)
+                self.env[pattern.my_id].rollback()
+                errors.append(error.ErrorBadRhymeEye(self.env[pattern.my_id],
+                                                     self.env[pattern.my_id].new_rhyme, old_phon))
+
+        errors = verse.problems() + errors
+
+        if output_file:
+            possible = verse.possible
+            if len(possible) == 1:
+                for i, chunk in enumerate(possible[0]):
+                    if (chunk.weights is not None and len(chunk.weights) > 1
+                            and chunk.weight is not None and chunk.weight > 0):
+                        chunks_before = possible[0][:i]
+                        chunks_after = possible[0][i + 1:]
+                        print(str(chunk.weight) + ' '
+                              + ' '.join(chunk.make_query(chunks_before, chunks_after)), file=output_file)
+
+        # rhyme genres
+        # inequality constraint
+        # TODO this is simplistic and order-dependent
+        if pattern.feminine_id.swapcase() in self.feminine_environment.keys():
+            new = {'M', 'F'} - self.feminine_environment[pattern.feminine_id.swapcase()]
+            if len(new) > 0:
+                self.feminine_environment[pattern.feminine_id] = new
+        if pattern.feminine_id not in self.feminine_environment.keys():
+            if pattern.feminine_id == 'M':
+                x = {'M'}
+            elif pattern.feminine_id == 'F':
+                x = {'F'}
+            else:
+                x = {'M', 'F'}
+            self.feminine_environment[pattern.feminine_id] = x
+        old = list(self.feminine_environment[pattern.feminine_id])
+        new = verse.genders()
+        self.feminine_environment[pattern.feminine_id] &= set(new)
+        if len(self.feminine_environment[pattern.feminine_id]) == 0:
+            errors.append(error.ErrorBadRhymeGenre(old, new))
+
+        return errors, pattern, verse
+
+    def reset_conditional(self, d):
+        return dict((k, v) for k, v in d.items() if len(k) > 0 and k[0] == '!')
+
+    def reset_state(self, with_femenv=False):
+        """Reset our state, except ids starting with '!'"""
+        self.position = 0
+        self.env = self.reset_conditional(self.env)
+        self.feminine_environment = self.reset_conditional(self.feminine_environment)
+        self.occurrence_environment = {}  # always reset
+
+    @property
+    def beyond(self):
+        return self.position >= len(self.template)
+
+    def get(self):
+        """Get next state, resetting if needed"""
+        self.old_position = self.position
+        self.old_env = copy.deepcopy(self.env)
+        self.old_femenv = copy.deepcopy(self.feminine_environment)
+        self.old_occenv = copy.deepcopy(self.occurrence_environment)
+        if self.beyond:
+            if not self.options['repeat_ok']:
+                self.overflowed = True
+            self.reset_state()
+        result = self.template[self.position]
+        self.position += 1
+        return result
+
+    def back(self):
+        """Revert to previous state"""
+        self.position = self.old_position
+        self.env = copy.deepcopy(self.old_env)
+        self.feminine_environment = copy.deepcopy(self.old_femenv)
+        self.occurrence_environment = copy.deepcopy(self.old_occenv)
+
+    def check(self, line, output_file=None, last=False, n_syllables=None, offset=0):
+        """Check line (wrapper)"""
+        self.line_no += 1
+        line = line.rstrip()
+        if normalize(line) == '' and not last:
+            return None
+
+        errors, pattern, verse = self.match(line, output_file, last=last, n_syllables=n_syllables, offset=offset)
+        if len(errors) > 0:
+            if self.reject_errors:
+                self.back()
+                self.line_no -= 1
+            return error.ErrorCollection(self.line_no, line, pattern, verse, errors)
+        return None
+
+
+def str2bool(x):
+    if x.lower() in ["yes", "oui", "y", "o", "true", "t", "vrai", "v"]:
+        return True
+    if x.lower() in ["no", "non", "n", "false", "faux", "f"]:
+        return False
+    raise error.TemplateLoadError(("Bad value in global option"))
diff --git a/plint/tests/test_bad_chars.py b/plint/tests/test_bad_chars.py
@@ -1,16 +1,17 @@
 import unittest
 
+import plint.pattern
 from plint import verse, template
 
 
 class BadChars(unittest.TestCase):
     def testBadAlone(self):
-        v = verse.Verse("42", template.Template(), template.Pattern("12"))
+        v = verse.Verse("42", template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         self.assertFalse(v.valid())
 
     def testBadAndGood(self):
-        v = verse.Verse("bla h42 blah ", template.Template(), template.Pattern("12"))
+        v = verse.Verse("bla h42 blah ", template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         self.assertFalse(v.valid())
 
diff --git a/plint/tests/test_counts.py b/plint/tests/test_counts.py
@@ -1,12 +1,13 @@
 import unittest
 
+import plint.pattern
 from plint import verse, template
 
 
 class Counts(unittest.TestCase):
 
     def runCount(self, text, limit=12, hemistiches=None):
-        v = verse.Verse(text, template.Template(), template.Pattern(str(limit), hemistiches=hemistiches))
+        v = verse.Verse(text, template.Template(), plint.pattern.Pattern(str(limit), hemistiches=hemistiches))
         v.parse()
         return v.possible
 
diff --git a/plint/tests/test_eliminate.py b/plint/tests/test_eliminate.py
@@ -1,19 +1,20 @@
 import unittest
 
+import plint.pattern
 from plint import verse, template
 
 
 class Eliminate(unittest.TestCase):
     def testEliminateOneGue(self):
         text = "gue"
-        v = verse.Verse(text, template.Template(), template.Pattern("12"))
+        v = verse.Verse(text, template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         c = ''.join([x.text for x in v.chunks.chunks])
         self.assertFalse("gue" in c)
 
     def testEliminateGue(self):
         text = "gue gue GUE ogues longuement la guerre"
-        v = verse.Verse(text, template.Template(), template.Pattern("12"))
+        v = verse.Verse(text, template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         c = ''.join([x.text for x in v.chunks.chunks])
         self.assertFalse("gue" in c)
diff --git a/plint/tests/test_gender.py b/plint/tests/test_gender.py
@@ -1,12 +1,13 @@
 import unittest
 
+import plint.pattern
 from plint import verse, template
 
 
 class Genders(unittest.TestCase):
     def testSingleSyllJe(self):
         text = "Patati patata patatatah où suis-je"
-        v = verse.Verse(text, template.Template(), template.Pattern("12"))
+        v = verse.Verse(text, template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         gend = v.genders()
         self.assertTrue(v.valid())
@@ -15,7 +16,7 @@ class Genders(unittest.TestCase):
 
     def testSingleSyllJeBis(self):
         text = "Patati patata patatah la verrai-je"
-        v = verse.Verse(text, template.Template(), template.Pattern("12"))
+        v = verse.Verse(text, template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         gend = v.genders()
         self.assertTrue(v.valid())
@@ -24,7 +25,7 @@ class Genders(unittest.TestCase):
 
     def testSingleSyllLe(self):
         text = "Patati patata patatata prends-le"
-        v = verse.Verse(text, template.Template(), template.Pattern("12"))
+        v = verse.Verse(text, template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         gend = v.genders()
         self.assertTrue(v.valid())
@@ -33,7 +34,7 @@ class Genders(unittest.TestCase):
 
     def testSingleSyllCe(self):
         text = "Patati patata patatata mais qu'est-ce"
-        v = verse.Verse(text, template.Template(), template.Pattern("12"))
+        v = verse.Verse(text, template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         gend = v.genders()
         self.assertTrue(v.valid())
@@ -42,7 +43,7 @@ class Genders(unittest.TestCase):
 
     def testSingleSyllHyphen(self):
         text = "Patati patata patata mange-les"
-        v = verse.Verse(text, template.Template(), template.Pattern("12"))
+        v = verse.Verse(text, template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         gend = v.genders()
         self.assertTrue(v.valid())
@@ -51,7 +52,7 @@ class Genders(unittest.TestCase):
 
     def testSingleSyllNoHyphen(self):
         text = "Patati patata patata mange les"
-        v = verse.Verse(text, template.Template(), template.Pattern("12"))
+        v = verse.Verse(text, template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         gend = v.genders()
         self.assertTrue(v.valid())
diff --git a/plint/tests/test_hiatus.py b/plint/tests/test_hiatus.py
@@ -1,36 +1,37 @@
 import unittest
 
+import plint.pattern
 from plint import verse, template
 
 
 class Hiatus(unittest.TestCase):
     def testBadVowel(self):
-        v = verse.Verse("patati patata patata arbrisseau", template.Template(), template.Pattern("12"))
+        v = verse.Verse("patati patata patata arbrisseau", template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         self.assertFalse(v.valid())
 
     def testBadUnaspirated(self):
-        v = verse.Verse("patati patata patata hirondelle", template.Template(), template.Pattern("12"))
+        v = verse.Verse("patati patata patata hirondelle", template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         self.assertFalse(v.valid())
 
     def testGoodAspirated(self):
-        v = verse.Verse("patati patata patata tata hache", template.Template(), template.Pattern("12"))
+        v = verse.Verse("patati patata patata tata hache", template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         self.assertTrue(v.valid())
 
     def testGoodConsonant(self):
-        v = verse.Verse("patati patata patatah arbrisseau", template.Template(), template.Pattern("12"))
+        v = verse.Verse("patati patata patatah arbrisseau", template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         self.assertTrue(v.valid())
 
     def testGoodMuteE(self):
-        v = verse.Verse("patati patata patatue arbrisseau", template.Template(), template.Pattern("12"))
+        v = verse.Verse("patati patata patatue arbrisseau", template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         self.assertTrue(v.valid())
 
     def testBadEt(self):
-        v = verse.Verse("patati patata patata et avant", template.Template(), template.Pattern("12"))
+        v = verse.Verse("patati patata patata et avant", template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         self.assertFalse(v.valid())
 
diff --git a/plint/tests/test_sanity_check.py b/plint/tests/test_sanity_check.py
@@ -1,5 +1,6 @@
 import unittest
 
+import plint.pattern
 from plint import diaeresis, verse, template, common
 
 
@@ -7,31 +8,31 @@ class SanityCheck(unittest.TestCase):
 
     def testSimple(self):
         text = "Hello World!!  This is a test_data"
-        v = verse.Verse(text, template.Template(), template.Pattern("12"))
+        v = verse.Verse(text, template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         self.assertEqual(text, v.line)
 
     def testComplex(self):
         text = "Aye AYAYE   aye  gue que geque AYAYAY a prt   sncf bbbéé"
-        v = verse.Verse(text, template.Template(), template.Pattern("12"))
+        v = verse.Verse(text, template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         self.assertEqual(text, v.line)
 
     def testLeadingSpace(self):
         text = " a"
-        v = verse.Verse(text, template.Template(), template.Pattern("12"))
+        v = verse.Verse(text, template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         self.assertEqual(text, v.line)
 
     def testLeadingSpaceHyphenVowel(self):
         text = " -a"
-        v = verse.Verse(text, template.Template(), template.Pattern("12"))
+        v = verse.Verse(text, template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         self.assertEqual(text, v.line)
 
     def testLeadingSpaceHyphenConsonant(self):
         text = " -c"
-        v = verse.Verse(text, template.Template(), template.Pattern("12"))
+        v = verse.Verse(text, template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         self.assertEqual(text, v.line)
 
diff --git a/plint/tests/test_sanity_check2.py b/plint/tests/test_sanity_check2.py
@@ -1,12 +1,13 @@
 import unittest
 
+import plint.pattern
 from plint import verse, template
 
 
 class SanityCheck2(unittest.TestCase):
     def testSimple(self):
         text = "Patati patata patata tata vies"
-        v = verse.Verse(text, template.Template(), template.Pattern("12"))
+        v = verse.Verse(text, template.Template(), plint.pattern.Pattern("12"))
         v.parse()
         gend = v.genders()
         self.assertEqual(1, len(gend))
diff --git a/plint/verse.py b/plint/verse.py
@@ -1,7 +1,8 @@
 #!/usr/bin/python3
-from plint.chunks import Chunks
-from plint.common import normalize, is_vowels, SURE_END_FEM, strip_accents
 from plint import error, common
+from plint.chunks import Chunks
+from plint.common import SURE_END_FEM, strip_accents
+
 
 # the writing is designed to make frhyme succeed
 # end vowels will be elided
@@ -11,13 +12,11 @@ class Verse:
 
     @property
     def line(self):
-        return ''.join(x.original for x in self.chunks.chunks)
+        return self.chunks.get_line()
 
     @property
     def normalized(self):
-        return ''.join(normalize(x.original, strip=False, rm_apostrophe_end=False)
-                       if x.text_pron is None else x.text
-                       for x in self.chunks.chunks).lstrip().rstrip()
+        return self.chunks.normalized()
 
     def __init__(self, input_line, template, pattern, threshold=None):
         self.template = template
@@ -25,8 +24,8 @@ class Verse:
         self.threshold = threshold
         self.phon = None
         self.possible = None
-        self._line = input_line
-        self.chunks = Chunks(input_line)
+        self.input_line = input_line
+        self.chunks = Chunks(self)
         self.text = None
 
     def annotate(self):
@@ -119,7 +118,6 @@ class Verse:
 
     def last_count(self):
         """return min number of syllables for last word"""
-
         tot = 0
         for chunk in self.chunks.chunks[::-1]:
             if chunk.original.endswith(' ') or chunk.original.endswith('-'):
@@ -133,18 +131,10 @@ class Verse:
         return tot
 
     def problems(self):
+        errors = self.chunks.get_errors_set(self.template.options['forbidden_ok'], self.template.options['hiatus_ok'])
         result = []
-        errors = set()
         if len(self.possible) == 0:
             result.append(error.ErrorBadMetric())
-        for chunk in self.chunks.chunks:
-            if chunk.error is not None:
-                if chunk.error == "ambiguous" and not self.template.options['forbidden_ok']:
-                    errors.add(error.ErrorForbiddenPattern)
-                if chunk.error == "hiatus" and not self.template.options['hiatus_ok']:
-                    errors.add(error.ErrorHiatus)
-                if chunk.error == "illegal":
-                    errors.add(error.ErrorBadCharacters)
         for k in errors:
             result.append(k())
         return result
@@ -160,3 +150,9 @@ class Verse:
             # try to infer gender even when metric is wrong
             result.update(set(self.feminine(None)))
         return result
+
+    def print_n_syllables(self, n_syllables, offset, output_file):
+        self.annotate()
+        # only generate a context with the prescribed final weight
+        # where "final" is the offset-th chunk with a weight from the end
+        self.chunks.print_n_syllables(n_syllables, offset, output_file)
diff --git a/plint/vowels.py b/plint/vowels.py
@@ -3,45 +3,6 @@
 
 """Compute the number of syllabes taken by a vowel chunk"""
 
-from plint.common import strip_accents
-from plint import diaeresis
-
-DEFAULT_THRESHOLD = 3
-
-
-def possible_weights_ctx(chunks, pos, threshold=None):
-    global DEFAULT_THRESHOLD
-    if not threshold:
-        threshold = DEFAULT_THRESHOLD
-    chunk = chunks[pos]
-    q = make_query(chunks, pos)
-    v = diaeresis.diaeresis_finder.lookup(q)
-    if len(v.keys()) == 1 and v[list(v.keys())[0]] > threshold:
-        return [int(list(v.keys())[0])]
-    else:
-        return possible_weights_seed(chunk)
-
-
-def make_query(chunks, pos):
-    cleared = [clear(chunk) for chunk in chunks]
-    if cleared[pos].endswith(' '):
-        cleared[pos] = cleared[pos].rstrip()
-        if pos + 1 < len(cleared):
-            cleared[pos + 1] = " " + cleared[pos + 1]
-        else:
-            cleared.append(' ')
-    ret2 = intersperse(
-        ''.join(cleared[pos + 1:]),
-        ''.join([x[::-1] for x in cleared[:pos][::-1]]))
-    ret = [cleared[pos]] + ret2
-    return ret
-
-
-def clear(chunk):
-    if chunk.word_end == True:
-        return (chunk.text + ' ')
-    return chunk.text
-
 
 def intersperse(left, right):
     if (len(left) == 0 or left[0] == ' ') and (len(right) == 0 or right[0] == ' '):
@@ -53,98 +14,9 @@ def intersperse(left, right):
     return [left[0], right[0]] + intersperse(left[1:], right[1:])
 
 
-def possible_weights_approx(chunk):
-    """Return the possible number of syllabes taken by a vowel chunk (permissive approximation)"""
-    if len(chunk) == 1:
-        return [1]
-    # old spelling and weird exceptions
-    if chunk in ['ouï']:
-        return [1, 2]  # TODO unsure about that
-    if chunk in ['eüi', 'aoû', 'uë']:
-        return [1]
-    if chunk in ['aïe', 'oë', 'ouü']:
-        return [1, 2]
-    if contains_trema(chunk):
-        return [2]
-    chunk = strip_accents(chunk, True)
-    if chunk in ['ai', 'ou', 'eu', 'ei', 'eau', 'eoi', 'eui', 'au', 'oi',
-                 'oie', 'œi', 'œu', 'eaie', 'aie', 'oei', 'oeu', 'ea', 'ae', 'eo',
-                 'eoie', 'oe', 'eai', 'eue', 'aa', 'oo', 'ee', 'ii', 'aii',
-                 'yeu', 'ye', 'you']:
-        return [1]
-    if chunk == "oua":
-        return [1, 2]  # "pouah"
-    if chunk == "ao":
-        return [1, 2]  # "paon"
-    for x in ['oa', 'ea', 'eua', 'euo', 'ua', 'uo', 'yau']:
-        if x in chunk:
-            return [2]
-    # beware of "déesse"
-    if chunk == 'ée':
-        return [1, 2]
-    if chunk[0] == 'i':
-        return [1, 2]
-    if chunk[0] == 'u' and (strip_accents(chunk[1]) in ['i', 'e']):
-        return [1, 2]
-    if chunk[0] == 'o' and chunk[1] == 'u' and len(chunk) >= 3 and strip_accents(chunk[2]) in ['i', 'e']:
-        return [1, 2]
-    if 'é' in chunk or 'è' in chunk:
-        return [2]
-    # we can't tell
-    return [1, 2]
-
-
 def contains_trema(chunk):
     """Test if a string contains a word with a trema"""
     for x in ['ä', 'ë', 'ï', 'ö', 'ü', 'ÿ']:
         if x in chunk:
             return True
     return False
-
-
-def possible_weights_seed(chunk):
-    """Return the possible number of syllabes taken by a vowel chunk"""
-    if len(chunk.text) == 1:
-        return [1]
-    # dioïde, maoïste, taoïste
-    if (chunk.text[-1] == 'ï' and len(chunk.text) >= 3 and not
-    chunk.text[-3:-1] == 'ou'):
-        return [3]
-    # ostéoarthrite
-    if "éoa" in chunk.text:
-        return [3]
-    # antiaérien; but let's play it safe
-    if "iaé" in chunk.text:
-        return [2, 3]
-    # giaour, miaou, niaouli
-    if "iaou" in chunk.text:
-        return [2, 3]
-    # bioélectrique
-    if "ioé" in chunk.text:
-        return [2, 3]
-    # méiose, nucléion, etc.
-    if "éio" in chunk.text:
-        return [2, 3]
-    # radioactif, radioamateur, etc.
-    if "ioa" in chunk.text:
-        return [2, 3]
-    # pléiade
-    if "éio" in chunk.text:
-        return [2, 3]
-    # pompéien, tarpéien...
-    # in theory the "-ie" should give a diaeresis, so 3 syllabes
-    # let's keep the benefit of the doubt...
-    # => this also gives 3 as a possibility for "obéie"...
-    if "éie" in chunk.text:
-        return [2, 3]
-    # tolstoïen
-    # same remark
-    if "oïe" in chunk.text:
-        return [2, 3]
-    # shanghaïen (diaeresis?), but also "aië"
-    if "aïe" in chunk.text:
-        return [1, 2, 3]
-    if chunk.text in ['ai', 'ou', 'eu', 'ei', 'eau', 'au', 'oi']:
-        return [1]
-    # we can't tell
-    return [1, 2]
diff --git a/test.sh b/test.sh
@@ -5,12 +5,32 @@
 
 echo "It is normal that some errors occur when running this script" >/dev/stderr
 echo "See test_expected_output.out for the usual errors that are output" >/dev/stderr
+
+rm -f test_temp.txt;
+rm -f test_temp_sorted.txt;
+rm -f test_expected_sorted.txt;
+
 for a in plint/test_data/*.tpl; do
   echo "$a"
+  echo "$a" >> test_temp.txt
   if [[ $a == *cyrano_full* ]]
   then
-    ./plint.py $(pwd)/$a ../data/diaeresis_cyrano.json < $(pwd)/${a%.tpl}
+    ./plint.py $(pwd)/$a ../data/diaeresis_cyrano.json < $(pwd)/${a%.tpl} &>> test_temp.txt
   else
-    ./test_one.sh $(basename "${a%.tpl}")
+    ./test_one.sh $(basename "${a%.tpl}") &>> test_temp.txt
   fi
 done
+
+sort test_temp.txt > test_temp_sorted.txt;
+sort test_expected_output.out > test_expected_sorted.txt;
+
+if [ $(python3 compare_test_output.py test_temp_sorted.txt test_expected_sorted.txt | wc -l) -eq  1 ]; then
+    echo "TEST SUCCEED";
+else
+    echo "TEST FAILED";
+    diff test_temp_sorted.txt test_expected_sorted.txt
+fi
+
+rm -f test_temp.txt;
+rm -f test_temp_sorted.txt;
+rm -f test_expected_sorted.txt

	plint French poetry validator (local mirror of https://gitlab.com/a3nm/plint)
	git clone https://a3nm.net/git/plint/
	Log \| Files \| Refs \| README

.gitignore	\|	3	+++
compare_test_output.py	\|	12	++++++++++++
lexique_comparison/count_syllables_plint.py	\|	4	+++-
plint.py	\|	110	++++++++++++++++++++++++++++++++++++++++----------------------------------------
plint/chunk.py	\|	595	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
plint/chunks.py	\|	554	+++++++++++--------------------------------------------------------------------
plint/common.py	\|	7	+++++++
plint/error.py	\|	6	+++---
plint/pattern.py	\|	32	++++++++++++++++++++++++++++++++
plint/plint_irc.py	\|	2	+-
plint/plint_web.py	\|	329	++++++++++++++++++++++++++++++++++++++++++-------------------------------------
plint/template.py	\|	547	+++++++++++++++++++++++++++++++++++++------------------------------------------
plint/tests/test_bad_chars.py	\|	5	+++--
plint/tests/test_counts.py	\|	3	++-
plint/tests/test_eliminate.py	\|	5	+++--
plint/tests/test_gender.py	\|	13	+++++++------
plint/tests/test_hiatus.py	\|	13	+++++++------
plint/tests/test_sanity_check.py	\|	11	++++++-----
plint/tests/test_sanity_check2.py	\|	3	++-
plint/verse.py	\|	32	++++++++++++++------------------
plint/vowels.py	\|	128	-------------------------------------------------------------------------------
test.sh	\|	24	++++++++++++++++++++++--