plint

French poetry validator
git clone https://a3nm.net/git/plint/
Log | Files | Refs | README

commit 0d655d2f8408074bbfd4b5ed26ca371e14cbc954
parent 25e1f9b06ac0b9262a99f2bf27d61d1ab40a0c0b
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sun, 10 Jun 2012 18:40:50 +0200

Merge branch 'dierese'

Diffstat:
diaeresis.json | 1+
diaeresis.py | 36++++++++++++++++++++++++++++++++++++
metric.py | 22++++++++++++++--------
plint.py | 10+++++++---
static/tpl/alexandrin.tpl | 2+-
template.py | 48++++++++++++++++++++++++++++++++++++++++++++----
vowels.py | 47+++++++++++++++++++++++++++++++++++++++++++++--
7 files changed, 148 insertions(+), 18 deletions(-)

diff --git a/diaeresis.json b/diaeresis.json @@ -0,0 +1 @@ +[{"1": 87707, "2": 2604}, {"ou\u00e9": [{"2": 27}, {}], "ou\u00ef": [{"2": 4}, {}], "\u00e9u": [{"2": 10}, {}], "\u00e9i": [{"2": 37}, {}], "\u00e9o": [{"2": 23}, {}], "\u00e9a": [{"2": 86}, {}], "\u00e9e": [{"1": 606, "2": 9}, {"s": [{"1": 182, "2": 9}, {"c": [{"1": 15}, {}], "d": [{"2": 9}, {}], "f": [{"1": 5}, {}], "h": [{"1": 5}, {}], "m": [{"1": 10}, {}], "l": [{"1": 24}, {}], "n": [{"1": 24}, {}], "s": [{"1": 55}, {}], "r": [{"1": 24}, {}], "t": [{"1": 20}, {}]}], "/": [{"1": 424}, {}]}], "eui": [{"1": 104}, {}], "eue": [{"1": 5}, {}], "ao\u00fb": [{"1": 5}, {}], "ieu": [{"1": 856, "2": 374}, {"x": [{"1": 508, "2": 293}, {"c": [{"1": 40, "2": 86}, {"-": [{"1": 40}, {}], "/": [{"2": 86}, {}]}], "d": [{"1": 64, "2": 56}, {"-": [{"1": 54}, {}], "/": [{"1": 10, "2": 56}, {"a": [{"1": 5, "2": 10}, {"-": [{"1": 5}, {}], "/": [{"2": 10}, {}]}], "-": [{"1": 5}, {}], "u": [{"2": 4}, {}], "o": [{"2": 42}, {}]}]}], "g": [{"2": 9}, {}], "m": [{"1": 149}, {}], "l": [{"1": 179}, {}], "n": [{"2": 17}, {}], "p": [{"2": 21}, {}], "r": [{"2": 61}, {}], "t": [{"2": 28}, {}], "v": [{"1": 76, "2": 15}, {"-": [{"1": 76}, {}], "/": [{"2": 15}, {}]}]}], "s": [{"2": 65}, {}], "r": [{"1": 27, "2": 16}, {"s": [{"1": 27}, {}], "r": [{"2": 16}, {}]}], "/": [{"1": 321}, {}]}], "iei": [{"1": 88}, {}], "a\u00efe": [{"2": 13}, {}], "a\u00efa": [{"2": 2}, {}], "ye": [{"1": 5}, {}], "\u00e2i": [{"1": 15}, {}], "\u0153u": [{"1": 290}, {}], "\u0153i": [{"1": 24}, {}], "o\u00ef": [{"2": 13}, {}], "o\u00ee": [{"1": 68}, {}], "o\u00ea": [{"1": 5}, {}], "o\u00e9": [{"2": 26}, {}], "o\u00e8": [{"2": 135}, {}], "o\u00fb": [{"1": 166}, {}], "o\u00f9": [{"1": 355}, {}], "\u00fb": [{"1": 276}, {}], "oui": [{"1": 232, "2": 87}, {"-": [{"1": 76}, {}], "l": [{"1": 156}, {}], "/": [{"2": 12}, {}], "s": [{"2": 50}, {}], "r": [{"2": 9}, {}], "t": [{"2": 16}, {}]}], "ei": [{"1": 421}, {}], "e\u00fb": [{"1": 112}, {}], "oua": [{"2": 56}, {}], "ea": [{"1": 107}, {}], "oue": [{"1": 99, "2": 122}, {"m": [{"1": 5}, {}], "/": [{"1": 84}, {}], "n": [{"2": 5}, {}], "s": [{"2": 10}, {}], "r": [{"1": 10, "2": 89}, {"r": [{"2": 8}, {}], "v": [{"1": 10, "2": 21}, {"a": [{"1": 10}, {}], "-": [{"2": 4}, {}], "/": [{"2": 17}, {}]}], "l": [{"2": 37}, {}], "j": [{"2": 23}, {}]}], "t": [{"2": 14}, {}], "z": [{"2": 4}, {}]}], "e\u00fci": [{"1": 5}, {}], "oueu": [{"2": 6}, {}], "eu": [{"1": 2157}, {}], "o\u00fcoi": [{"2": 5}, {}], "\u00e9\u00e2": [{"2": 50}, {}], "yeu": [{"1": 382}, {}], "a\u00efeu": [{"2": 47}, {}], "eu\u00e2": [{"2": 4}, {}], "\u00e9au": [{"2": 3}, {}], "euoe": [{"2": 2}, {}], "oeu": [{"1": 265}, {}], "aie": [{"1": 268}, {}], "oei": [{"1": 48}, {}], "\u00e0": [{"1": 1038}, {}], "oi": [{"1": 2053}, {}], "eaie": [{"1": 5}, {}], "\u00e8": [{"1": 467}, {}], "oe": [{"1": 10}, {}], "o": [{"1": 7195}, {}], "\u00f4": [{"1": 405}, {}], "ou": [{"1": 4051}, {}], "u\u00e9e": [{"2": 9}, {}], "ui\u00e8": [{"1": 5}, {}], "eo": [{"1": 40}, {}], "a\u00ef": [{"2": 39}, {}], "a\u00ee": [{"1": 406}, {}], "i\u00e9": [{"1": 41, "2": 122}, {"s": [{"1": 5, "2": 22}, {"r": [{"2": 4}, {}], "v": [{"2": 13}, {}], "l": [{"2": 5}, {}], "t": [{"1": 5}, {}]}], "t": [{"2": 56}, {}], "d": [{"1": 10}, {}], "g": [{"1": 11}, {}], "/": [{"1": 15, "2": 44}, {"f": [{"2": 8}, {}], "l": [{"2": 21}, {}], "p": [{"2": 5}, {}], "s": [{"2": 5}, {}], "r": [{"2": 5}, {}], "t": [{"1": 15}, {}]}]}], "i\u00e8": [{"1": 703, "2": 34}, {"c": [{"1": 111}, {}], "g": [{"1": 24}, {}], "m": [{"1": 34, "2": 4}, {"q": [{"1": 5}, {}], "x": [{"1": 19}, {}], "s": [{"1": 5}, {}], "r": [{"2": 4}, {}], "v": [{"1": 5}, {}]}], "r": [{"1": 511, "2": 21}, {"c": [{"1": 5}, {}], "d": [{"1": 10}, {}], "f": [{"1": 10}, {}], "m": [{"1": 112}, {}], "l": [{"1": 39}, {}], "n": [{"1": 68}, {}], "q": [{"1": 10}, {}], "p": [{"1": 5}, {}], "s": [{"1": 60}, {}], "r": [{"1": 75, "2": 21}, {"`": [{"1": 15}, {}], "e": [{"1": 60, "2": 21}, {"p": [{"2": 17}, {}], "r": [{"1": 60}, {}], "t": [{"2": 4}, {}]}]}], "t": [{"1": 92}, {}], "v": [{"1": 25}, {}]}], "t": [{"2": 9}, {}], "v": [{"1": 23}, {}]}], "uei": [{"1": 72}, {}], "iai": [{"2": 9}, {}], "yau": [{"2": 4}, {}], "iau": [{"2": 3}, {}], "eai": [{"1": 20}, {}], "uau": [{"2": 10}, {}], "eau": [{"1": 555}, {}], "\u00e9": [{"1": 3370}, {}], "uo": [{"2": 10}, {}], "ui": [{"1": 1972, "2": 37}, {"d": [{"1": 72}, {}], "g": [{"1": 5}, {}], "-": [{"1": 108}, {}], "l": [{"1": 40}, {}], "/": [{"1": 447}, {}], "n": [{"1": 5, "2": 32}, {"r": [{"2": 32}, {}], "j": [{"1": 5}, {}]}], "s": [{"1": 330}, {}], "r": [{"1": 108}, {}], "t": [{"1": 694, "2": 5}, {"d": [{"1": 127}, {}], "g": [{"1": 10}, {}], "f": [{"1": 82}, {}], "h": [{"1": 15}, {}], "l": [{"2": 5}, {}], "n": [{"1": 116}, {}], "p": [{"1": 10}, {}], "s": [{"1": 75}, {}], "r": [{"1": 259}, {}]}], "v": [{"1": 163}, {}]}], "ue": [{"1": 301, "2": 74}, {"l": [{"2": 28}, {}], "/": [{"1": 223}, {}], "n": [{"1": 2, "2": 9}, {"p": [{"1": 1}, {}], "m": [{"1": 1}, {}], "l": [{"2": 9}, {}]}], "s": [{"1": 76}, {}], "r": [{"2": 16}, {}], "t": [{"2": 21}, {}]}], "o\u00fc\u00e9": [{"2": 4}, {}], "ua": [{"2": 21}, {}], "uia": [{"2": 4}, {}], "ae": [{"1": 15}, {}], "ai": [{"1": 2811}, {}], "iu": [{"2": 13}, {}], "ao": [{"2": 8}, {}], "ueu": [{"2": 91}, {}], "au": [{"1": 1498}, {}], "oa": [{"2": 9}, {}], "io": [{"1": 45, "2": 463}, {"c": [{"2": 11}, {}], "d": [{"2": 4}, {}], "m": [{"2": 21}, {}], "l": [{"2": 25}, {}], "n": [{"1": 45, "2": 386}, {"d": [{"2": 5}, {}], "g": [{"2": 5}, {}], "h": [{"2": 1}, {}], "l": [{"1": 5, "2": 51}, {"s": [{"1": 5, "2": 15}, {"l": [{"1": 5}, {}], "/": [{"2": 15}, {}]}], "-": [{"2": 12}, {}], "/": [{"2": 20}, {}], "n": [{"2": 4}, {}]}], "n": [{"2": 2}, {}], "p": [{"2": 11}, {}], "s": [{"1": 10, "2": 124}, {"s": [{"1": 10, "2": 41}, {"i": [{"2": 6}, {}], "s": [{"1": 10, "2": 32}, {"/": [{"1": 10, "2": 32}, {"a": [{"2": 18}, {}], "i": [{"1": 10}, {}], "e": [{"2": 14}, {}]}]}], "n": [{"2": 3}, {}]}], "-": [{"2": 5}, {}], "/": [{"2": 78}, {}]}], "r": [{"1": 25}, {}], "t": [{"2": 187}, {}], "v": [{"1": 5}, {}]}], "p": [{"2": 4}, {}], "s": [{"2": 6}, {}], "t": [{"2": 6}, {}]}], "ia": [{"1": 35, "2": 142}, {"b": [{"1": 25, "2": 2}, {"c": [{"2": 2}, {}], "d": [{"1": 25}, {}]}], "d": [{"2": 9}, {}], "g": [{"2": 24}, {}], "m": [{"2": 14}, {}], "l": [{"2": 12}, {}], "/": [{"2": 11}, {}], "n": [{"1": 5, "2": 41}, {"d": [{"2": 1}, {}], "f": [{"2": 5}, {}], "l": [{"2": 9}, {}], "r": [{"2": 22}, {}], "t": [{"2": 4}, {}], "v": [{"1": 5}, {}]}], "s": [{"2": 9}, {}], "r": [{"1": 5, "2": 5}, {"t": [{"2": 5}, {}], "l": [{"1": 5}, {}]}], "t": [{"2": 15}, {}]}], "eoie": [{"1": 5}, {}], "ie": [{"1": 3486, "2": 333}, {"d": [{"1": 143}, {}], "f": [{"1": 5}, {}], "l": [{"1": 260, "2": 4}, {"c": [{"1": 223}, {}], "m": [{"1": 10}, {}], "t": [{"2": 4}, {}], "f": [{"1": 27}, {}]}], "/": [{"1": 446}, {}], "n": [{"1": 1343, "2": 142}, {"c": [{"2": 43}, {}], "b": [{"1": 387}, {}], "d": [{"2": 6}, {}], "g": [{"2": 9}, {}], "h": [{"1": 25}, {}], "m": [{"1": 5}, {}], "l": [{"2": 25}, {}], "s": [{"1": 5, "2": 9}, {"s": [{"1": 5, "2": 9}, {"i": [{"2": 9}, {}], "/": [{"1": 5}, {}]}]}], "r": [{"1": 289, "2": 34}, {"c": [{"2": 14}, {}], "-": [{"1": 284}, {}], "s": [{"1": 5, "2": 8}, {"i": [{"2": 4}, {}], "/": [{"1": 5}, {}], "o": [{"2": 4}, {}]}], "/": [{"2": 8}, {}], "n": [{"2": 4}, {}]}], "t": [{"1": 317, "2": 16}, {"c": [{"2": 4}, {}], "d": [{"1": 15}, {}], "-": [{"1": 5}, {}], "/": [{"1": 79}, {}], "n": [{"1": 32}, {}], "s": [{"1": 48}, {}], "t": [{"1": 138, "2": 12}, {"a": [{"2": 12}, {}], "u": [{"1": 40}, {}], "e": [{"1": 20}, {}], "/": [{"1": 64}, {}], "n": [{"1": 14}, {}]}]}], "v": [{"1": 315}, {}]}], "s": [{"1": 98, "2": 3}, {"c": [{"1": 10}, {}], "d": [{"2": 3}, {}], "h": [{"1": 10}, {}], "m": [{"1": 10}, {}], "s": [{"1": 10}, {}], "r": [{"1": 48}, {}], "t": [{"1": 10}, {}]}], "r": [{"1": 1117, "2": 162}, {"c": [{"1": 47, "2": 5}, {"s": [{"1": 11}, {}], "/": [{"1": 31, "2": 5}, {"i": [{"1": 20}, {}], "s": [{"2": 3}, {}], "r": [{"2": 2}, {}], "e": [{"1": 5}, {}], "n": [{"1": 6}, {}]}], "g": [{"1": 5}, {}]}], "b": [{"1": 21}, {}], "d": [{"2": 4}, {}], "f": [{"1": 111, "2": 28}, {"s": [{"1": 28}, {}], "-": [{"1": 44}, {}], "t": [{"1": 24}, {}], "/": [{"1": 15, "2": 28}, {"i": [{"2": 25}, {}], "\u00e9": [{"2": 3}, {}], "f": [{"1": 15}, {}]}]}], "h": [{"1": 10, "2": 16}, {"r": [{"1": 5}, {}], "-": [{"2": 16}, {}], "/": [{"1": 5}, {}]}], "m": [{"1": 146}, {}], "l": [{"1": 96, "2": 20}, {"`": [{"1": 5}, {}], "s": [{"1": 30}, {}], "-": [{"2": 4}, {}], "/": [{"1": 61, "2": 16}, {"a": [{"1": 11}, {}], "b": [{"2": 16}, {}], "e": [{"1": 10}, {}], "i": [{"1": 15}, {}], "l": [{"1": 5}, {}], "o": [{"1": 15}, {}], "\u00e9": [{"1": 5}, {}]}]}], "n": [{"1": 97, "2": 16}, {"s": [{"1": 20}, {}], "-": [{"2": 11}, {}], "/": [{"1": 77, "2": 5}, {"a": [{"2": 5}, {}], "e": [{"1": 15}, {}], "g": [{"1": 20}, {}], "r": [{"1": 32}, {}], "u": [{"1": 5}, {}], "\u00f4": [{"1": 5}, {}]}]}], "q": [{"1": 28}, {}], "p": [{"1": 91, "2": 4}, {"s": [{"1": 14}, {}], "r": [{"1": 24}, {}], "/": [{"1": 53, "2": 4}, {"a": [{"1": 48}, {}], "r": [{"1": 5}, {}], "o": [{"2": 4}, {}]}]}], "s": [{"1": 84, "2": 4}, {"s": [{"1": 40}, {}], "/": [{"1": 44, "2": 4}, {"a": [{"1": 5, "2": 4}, {"/": [{"1": 5, "2": 4}, {"r": [{"1": 5}, {}], "t": [{"2": 4}, {}]}]}], "s": [{"1": 24}, {}], "r": [{"1": 10}, {}], "u": [{"1": 5}, {}]}]}], "r": [{"1": 140, "2": 61}, {"s": [{"1": 76}, {}], "/": [{"1": 64, "2": 61}, {"c": [{"2": 47}, {}], "o": [{"1": 10, "2": 5}, {"/": [{"1": 10, "2": 5}, {"s": [{"1": 10}, {}], "m": [{"2": 5}, {}]}]}], "p": [{"2": 5}, {}], "r": [{"1": 44}, {}], "u": [{"1": 10}, {}], "v": [{"2": 4}, {}]}]}], "t": [{"1": 212}, {}], "v": [{"1": 21, "2": 4}, {"g": [{"1": 11}, {}], "/": [{"1": 10, "2": 4}, {"a": [{"1": 5}, {}], "n": [{"1": 5, "2": 4}, {"/": [{"1": 5, "2": 4}, {"a": [{"1": 5}, {}], "e": [{"2": 4}, {}]}]}]}]}], "z": [{"1": 13}, {}]}], "t": [{"1": 29, "2": 13}, {"q": [{"2": 13}, {}], "s": [{"1": 24}, {}], "v": [{"1": 5}, {}]}], "z": [{"1": 45, "2": 9}, {"s": [{"1": 15}, {}], "r": [{"1": 25, "2": 4}, {"-": [{"1": 10}, {}], "/": [{"1": 15, "2": 4}, {"a": [{"2": 4}, {}], "r": [{"1": 10}, {}], "u": [{"1": 5}, {}]}]}], "d": [{"2": 5}, {}], "v": [{"1": 5}, {}]}]}], "yo": [{"2": 9}, {}], "ouai": [{"2": 2}, {}], "u\u00eb": [{"1": 20}, {}], "oie": [{"1": 230}, {}], "i\u00e9e": [{"2": 5}, {}], "\u0153": [{"1": 5}, {}], "a": [{"1": 12198}, {}], "\u00e2": [{"1": 496}, {}], "e": [{"1": 23090}, {}], "i": [{"1": 8073}, {}], "eoi": [{"1": 32}, {}], "\u00ea": [{"1": 464}, {}], "\u00ee": [{"1": 106}, {}], "u\u00ef": [{"2": 5}, {}], "u\u00ee": [{"1": 15}, {}], "u": [{"1": 4885}, {}], "ouaie": [{"2": 3}, {}], "u\u00ea": [{"1": 10}, {}], "y": [{"1": 475}, {}], "uie": [{"1": 40}, {}]}] diff --git a/diaeresis.py b/diaeresis.py @@ -0,0 +1,36 @@ +#!/usr/bin/python3 + +"""Get the number of syllables of a vowel cluster with context""" + +import os +import json +import sys + +f = open(os.path.join(os.path.dirname( + os.path.realpath(__file__)), 'diaeresis.json')) +trie = json.load(f) +f.close() + +def do_lookup(trie, key): + if len(key) == 0 or (key[0] not in trie[1].keys()): + return trie[0] + return do_lookup(trie[1][key[0]], key[1:]) + +def lookup(key): + return do_lookup(trie, key + ['-', '-']) + +def wrap_lookup(line): + result = lookup(line) + print("%s: %s" % (line, result)) + +if __name__ == '__main__': + if len(sys.argv) > 1: + for arg in sys.argv[1:]: + wrap_lookup(arg) + else: + while True: + line = sys.stdin.readline() + if not line: + break + wrap_lookup(line.lower().lstrip().rstrip().split()) + diff --git a/metric.py b/metric.py @@ -3,7 +3,7 @@ import re from common import normalize, is_vowels, consonants, sure_end_fem, is_consonants -from vowels import possible_weights +import vowels import haspirater @@ -19,7 +19,13 @@ def annotate_aspirated(word): def contains_break(chunk): return ' ' in chunk or '-' in chunk -def fit(chunks, pos, left): +def possible_weights(chunks, pos, diaeresis): + if diaeresis == "classical": + return vowels.possible_weights_ctx(chunks, pos) + elif diaeresis == "permissive": + return vowels.possible_weights_approx(chunks[pos]) + +def fit(chunks, pos, left, diaeresis): """bruteforce exploration of all possible vowel cluster weghting, within a maximum total of left""" if pos >= len(chunks): @@ -28,7 +34,7 @@ def fit(chunks, pos, left): return [] # no possibilities # skip consonants if (not is_vowels(chunks[pos])): - return [[chunks[pos]] + x for x in fit(chunks, pos+1, left)] + return [[chunks[pos]] + x for x in fit(chunks, pos+1, left, diaeresis)] else: if ((pos >= len(chunks) - 2 and chunks[pos] == 'e') and not ( pos <= 0 or contains_break(chunks[pos-1])) and not ( @@ -44,19 +50,19 @@ def fit(chunks, pos, left): # actually, this will have an influence on the rhyme's gender weights = [0, 1] else: - weights = possible_weights(chunks[pos]) + weights = possible_weights(chunks, pos, diaeresis) else: if (pos >= len(chunks) - 1 and chunks[pos] == 'e' and pos > 0 and (chunks[pos-1].endswith('-c') or chunks[pos-1].endswith('-j'))): weights = [0] # -ce and -je are elided else: - weights = possible_weights(chunks[pos]) + weights = possible_weights(chunks, pos, diaeresis) result = [] for weight in weights: # combine all possibilities result += [[(chunks[pos], weight)] + x for x in fit(chunks, pos+1, - left - weight)] + left - weight, diaeresis)] return result def feminine(align, verse, phon): @@ -84,7 +90,7 @@ def feminine(align, verse, phon): return possible -def parse(text, phon, bound, forbidden_ok): +def parse(text, phon, bound, forbidden_ok, diaeresis): """Return possible aligns for text, bound is an upper bound on the align length to limit running time, phon is the pronunciation to help for gender, forbidden_ok is true if we allow classically forbidden patterns""" @@ -196,5 +202,5 @@ def parse(text, phon, bound, forbidden_ok): # the femininity of the align (depending both on the align and # original text) return list(map((lambda x: (x, feminine(x, original_text, phon))), - fit(chunks, 0, bound))) + fit(chunks, 0, bound, diaeresis))) diff --git a/plint.py b/plint.py @@ -5,19 +5,22 @@ import template def run(): ok = True + f2 = None + if len(sys.argv) == 3: + f2 = open(sys.argv[2], 'w') while True: line = sys.stdin.readline() if not line: break - errors = template.check(line) + errors = template.check(line, f2) for error in errors: print(error.report(), file=sys.stderr) ok = False return ok if __name__ == '__main__': - if len(sys.argv) != 2: - print("Usage: %s TEMPLATE" % sys.argv[0], file=sys.stderr) + if len(sys.argv) < 2 or len(sys.argv) > 3: + print("Usage: %s TEMPLATE [OCONTEXT]" % sys.argv[0], file=sys.stderr) print("Check stdin according to template, report errors on stdout", file=sys.stderr) sys.exit(1) @@ -25,6 +28,7 @@ if __name__ == '__main__': f = open(sys.argv[1]) x = f.read() f.close() + template = template.Template(x) ok = run() diff --git a/static/tpl/alexandrin.tpl b/static/tpl/alexandrin.tpl @@ -1,2 +1,2 @@ -! forbidden_ok:yes +! forbidden_ok:yes diaeresis:permissive 12 A diff --git a/template.py b/template.py @@ -5,6 +5,21 @@ import copy import rhyme from common import normalize, legal, strip_accents_one from nature import nature_count +from vowels import possible_weights_ctx, make_query + + +def handle(poss): + l = [] + #print(poss) + for i in range(len(poss)): + if isinstance(poss[i], tuple): + #print(cleared[:i][::-1]) + #print(cleared[i+1:]) + # print(poss) + # print (make_query(poss, i)) + if len(possible_weights_ctx(poss, i)) > 1: + l.append((poss[i][1], make_query(poss, i))) + return l class Pattern: def __init__(self, metric, myid, femid, constraint): @@ -32,6 +47,7 @@ class Template: self.pattern_line_no = 0 self.forbidden_ok = False self.normande_ok = True + self.diaeresis = "classical" self.mergers = [] self.load(string) self.line_no = 0 @@ -50,6 +66,10 @@ class Template: self.forbidden_ok = str2bool(value) elif key == "normande_ok": self.normande_ok = str2bool(value) + elif key == "diaeresis": + self.diaeresis = value + if value not in ["permissive", "classical"]: + raise ValueError else: raise ValueError @@ -83,7 +103,7 @@ class Template: return ((1+len(hemis.keys()))*abs(pattern.length - c) + sum([1 for x in hemis.values() if x != "ok"])) - def match(self, line): + def match(self, line, ofile=None): """Check a line against current pattern, return errors""" line_with_case = normalize(line, downcase=False) @@ -110,7 +130,7 @@ class Template: # compute alignments, check hemistiches, sort by score possible = parse(line, self.env[pattern.myid].phon, pattern.length + 2, - self.forbidden_ok) + self.forbidden_ok, self.diaeresis) if not possible: errors.append(error.ErrorForbiddenPattern()) possible = [] @@ -136,6 +156,26 @@ class Template: # keep the best alignment as hypotheses possible = [(score, align) for (score, align) in possible if score == possible[0][0]] + if ofile: + if len(possible) == 1 and possible[0][0] == 0: + l = [(x[1][0]) for x in possible] + poss = [] + for p in l: + c = [] + while len(p) > 0: + x = p.pop() + if x == ' ': + poss.append(c[::-1]) + c = [] + else: + c.append(x) + if len(c) > 0: + poss.append(c[::-1]) + for w in poss: + l = handle(w) + for x in l: + # print(x) + print((str(x[0]) + ' ' + ' '.join(x[1])), file=ofile) # occurrences if pattern.myid not in self.occenv.keys(): @@ -226,7 +266,7 @@ class Template: self.env = copy.deepcopy(self.old_env) self.femenv = copy.deepcopy(self.old_femenv) - def check(self, line): + def check(self, line, ofile=None): """Check line (wrapper)""" self.line_no += 1 line = line.rstrip() @@ -234,7 +274,7 @@ class Template: return [] #possible = [compute(p) for p in possible] #possible = sorted(possible, key=rate) - errors, pattern = self.match(line) + errors, pattern = self.match(line, ofile) for error in errors: # update errors with line position and pattern error.pos(line, self.line_no, pattern) diff --git a/vowels.py b/vowels.py @@ -4,6 +4,19 @@ """Compute the number of syllabes taken by a vowel chunk""" from common import strip_accents +from diaeresis import lookup + +def clear(l): + return [x[0] if isinstance(x, tuple) else x for x in l] + +def intersperse(a, b): + if (len(a) == 0 or a[0] == ' ') and (len(b) == 0 or b[0] == ' '): + return [] + if len(a) == 0 or a[0] == ' ': + return ["/", b[0]] + intersperse(a, b[1:]) + if len(b) == 0 or b[0] == ' ': + return [a[0], "/"] + intersperse(a[1:], b) + return [a[0], b[0]] + intersperse(a[1:], b[1:]) def contains_trema(chunk): """Test if a string contains a word with a trema""" @@ -12,8 +25,29 @@ def contains_trema(chunk): return True return False -def possible_weights(chunk): - """Return the possible number of syllabes taken by a vowel chunk""" +threshold = 10 + +def make_query(chunks, pos): + cleared = clear(chunks) + return [cleared[pos]] + intersperse( + ''.join(cleared[pos+1:]), + ''.join([x[::-1] for x in cleared[:pos][::-1]])) + +def possible_weights_ctx(chunks, pos): + chunk = chunks[pos] + q = make_query(chunks, pos) + #print (q) + v = lookup(q) + #print (v) + #print (possible_weights(chunk)) + if len(v.keys()) == 1 and v[list(v.keys())[0]] > threshold: + return [int(list(v.keys())[0])] + else: + return possible_weights_seed(chunk) + +def possible_weights_approx(chunk): + """Return the possible number of syllabes taken by a vowel chunk (permissive + approximation)""" if len(chunk) == 1: return [1] # old spelling and weird exceptions @@ -47,3 +81,12 @@ def possible_weights(chunk): # we can't tell return [1, 2] +def possible_weights_seed(chunk): + """Return the possible number of syllabes taken by a vowel chunk""" + if len(chunk) == 1: + return [1] + #if chunk in ['ai', 'ou', 'eu', 'ei', 'eau', 'au', 'oi']: + # return [1] + # we can't tell + return [1, 2] +