commit 0d655d2f8408074bbfd4b5ed26ca371e14cbc954
parent 25e1f9b06ac0b9262a99f2bf27d61d1ab40a0c0b
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Sun, 10 Jun 2012 18:40:50 +0200
Merge branch 'dierese'
Diffstat:
7 files changed, 148 insertions(+), 18 deletions(-)
diff --git a/diaeresis.json b/diaeresis.json
@@ -0,0 +1 @@
+[{"1": 87707, "2": 2604}, {"ou\u00e9": [{"2": 27}, {}], "ou\u00ef": [{"2": 4}, {}], "\u00e9u": [{"2": 10}, {}], "\u00e9i": [{"2": 37}, {}], "\u00e9o": [{"2": 23}, {}], "\u00e9a": [{"2": 86}, {}], "\u00e9e": [{"1": 606, "2": 9}, {"s": [{"1": 182, "2": 9}, {"c": [{"1": 15}, {}], "d": [{"2": 9}, {}], "f": [{"1": 5}, {}], "h": [{"1": 5}, {}], "m": [{"1": 10}, {}], "l": [{"1": 24}, {}], "n": [{"1": 24}, {}], "s": [{"1": 55}, {}], "r": [{"1": 24}, {}], "t": [{"1": 20}, {}]}], "/": [{"1": 424}, {}]}], "eui": [{"1": 104}, {}], "eue": [{"1": 5}, {}], "ao\u00fb": [{"1": 5}, {}], "ieu": [{"1": 856, "2": 374}, {"x": [{"1": 508, "2": 293}, {"c": [{"1": 40, "2": 86}, {"-": [{"1": 40}, {}], "/": [{"2": 86}, {}]}], "d": [{"1": 64, "2": 56}, {"-": [{"1": 54}, {}], "/": [{"1": 10, "2": 56}, {"a": [{"1": 5, "2": 10}, {"-": [{"1": 5}, {}], "/": [{"2": 10}, {}]}], "-": [{"1": 5}, {}], "u": [{"2": 4}, {}], "o": [{"2": 42}, {}]}]}], "g": [{"2": 9}, {}], "m": [{"1": 149}, {}], "l": [{"1": 179}, {}], "n": [{"2": 17}, {}], "p": [{"2": 21}, {}], "r": [{"2": 61}, {}], "t": [{"2": 28}, {}], "v": [{"1": 76, "2": 15}, {"-": [{"1": 76}, {}], "/": [{"2": 15}, {}]}]}], "s": [{"2": 65}, {}], "r": [{"1": 27, "2": 16}, {"s": [{"1": 27}, {}], "r": [{"2": 16}, {}]}], "/": [{"1": 321}, {}]}], "iei": [{"1": 88}, {}], "a\u00efe": [{"2": 13}, {}], "a\u00efa": [{"2": 2}, {}], "ye": [{"1": 5}, {}], "\u00e2i": [{"1": 15}, {}], "\u0153u": [{"1": 290}, {}], "\u0153i": [{"1": 24}, {}], "o\u00ef": [{"2": 13}, {}], "o\u00ee": [{"1": 68}, {}], "o\u00ea": [{"1": 5}, {}], "o\u00e9": [{"2": 26}, {}], "o\u00e8": [{"2": 135}, {}], "o\u00fb": [{"1": 166}, {}], "o\u00f9": [{"1": 355}, {}], "\u00fb": [{"1": 276}, {}], "oui": [{"1": 232, "2": 87}, {"-": [{"1": 76}, {}], "l": [{"1": 156}, {}], "/": [{"2": 12}, {}], "s": [{"2": 50}, {}], "r": [{"2": 9}, {}], "t": [{"2": 16}, {}]}], "ei": [{"1": 421}, {}], "e\u00fb": [{"1": 112}, {}], "oua": [{"2": 56}, {}], "ea": [{"1": 107}, {}], "oue": [{"1": 99, "2": 122}, {"m": [{"1": 5}, {}], "/": [{"1": 84}, {}], "n": [{"2": 5}, {}], "s": [{"2": 10}, {}], "r": [{"1": 10, "2": 89}, {"r": [{"2": 8}, {}], "v": [{"1": 10, "2": 21}, {"a": [{"1": 10}, {}], "-": [{"2": 4}, {}], "/": [{"2": 17}, {}]}], "l": [{"2": 37}, {}], "j": [{"2": 23}, {}]}], "t": [{"2": 14}, {}], "z": [{"2": 4}, {}]}], "e\u00fci": [{"1": 5}, {}], "oueu": [{"2": 6}, {}], "eu": [{"1": 2157}, {}], "o\u00fcoi": [{"2": 5}, {}], "\u00e9\u00e2": [{"2": 50}, {}], "yeu": [{"1": 382}, {}], "a\u00efeu": [{"2": 47}, {}], "eu\u00e2": [{"2": 4}, {}], "\u00e9au": [{"2": 3}, {}], "euoe": [{"2": 2}, {}], "oeu": [{"1": 265}, {}], "aie": [{"1": 268}, {}], "oei": [{"1": 48}, {}], "\u00e0": [{"1": 1038}, {}], "oi": [{"1": 2053}, {}], "eaie": [{"1": 5}, {}], "\u00e8": [{"1": 467}, {}], "oe": [{"1": 10}, {}], "o": [{"1": 7195}, {}], "\u00f4": [{"1": 405}, {}], "ou": [{"1": 4051}, {}], "u\u00e9e": [{"2": 9}, {}], "ui\u00e8": [{"1": 5}, {}], "eo": [{"1": 40}, {}], "a\u00ef": [{"2": 39}, {}], "a\u00ee": [{"1": 406}, {}], "i\u00e9": [{"1": 41, "2": 122}, {"s": [{"1": 5, "2": 22}, {"r": [{"2": 4}, {}], "v": [{"2": 13}, {}], "l": [{"2": 5}, {}], "t": [{"1": 5}, {}]}], "t": [{"2": 56}, {}], "d": [{"1": 10}, {}], "g": [{"1": 11}, {}], "/": [{"1": 15, "2": 44}, {"f": [{"2": 8}, {}], "l": [{"2": 21}, {}], "p": [{"2": 5}, {}], "s": [{"2": 5}, {}], "r": [{"2": 5}, {}], "t": [{"1": 15}, {}]}]}], "i\u00e8": [{"1": 703, "2": 34}, {"c": [{"1": 111}, {}], "g": [{"1": 24}, {}], "m": [{"1": 34, "2": 4}, {"q": [{"1": 5}, {}], "x": [{"1": 19}, {}], "s": [{"1": 5}, {}], "r": [{"2": 4}, {}], "v": [{"1": 5}, {}]}], "r": [{"1": 511, "2": 21}, {"c": [{"1": 5}, {}], "d": [{"1": 10}, {}], "f": [{"1": 10}, {}], "m": [{"1": 112}, {}], "l": [{"1": 39}, {}], "n": [{"1": 68}, {}], "q": [{"1": 10}, {}], "p": [{"1": 5}, {}], "s": [{"1": 60}, {}], "r": [{"1": 75, "2": 21}, {"`": [{"1": 15}, {}], "e": [{"1": 60, "2": 21}, {"p": [{"2": 17}, {}], "r": [{"1": 60}, {}], "t": [{"2": 4}, {}]}]}], "t": [{"1": 92}, {}], "v": [{"1": 25}, {}]}], "t": [{"2": 9}, {}], "v": [{"1": 23}, {}]}], "uei": [{"1": 72}, {}], "iai": [{"2": 9}, {}], "yau": [{"2": 4}, {}], "iau": [{"2": 3}, {}], "eai": [{"1": 20}, {}], "uau": [{"2": 10}, {}], "eau": [{"1": 555}, {}], "\u00e9": [{"1": 3370}, {}], "uo": [{"2": 10}, {}], "ui": [{"1": 1972, "2": 37}, {"d": [{"1": 72}, {}], "g": [{"1": 5}, {}], "-": [{"1": 108}, {}], "l": [{"1": 40}, {}], "/": [{"1": 447}, {}], "n": [{"1": 5, "2": 32}, {"r": [{"2": 32}, {}], "j": [{"1": 5}, {}]}], "s": [{"1": 330}, {}], "r": [{"1": 108}, {}], "t": [{"1": 694, "2": 5}, {"d": [{"1": 127}, {}], "g": [{"1": 10}, {}], "f": [{"1": 82}, {}], "h": [{"1": 15}, {}], "l": [{"2": 5}, {}], "n": [{"1": 116}, {}], "p": [{"1": 10}, {}], "s": [{"1": 75}, {}], "r": [{"1": 259}, {}]}], "v": [{"1": 163}, {}]}], "ue": [{"1": 301, "2": 74}, {"l": [{"2": 28}, {}], "/": [{"1": 223}, {}], "n": [{"1": 2, "2": 9}, {"p": [{"1": 1}, {}], "m": [{"1": 1}, {}], "l": [{"2": 9}, {}]}], "s": [{"1": 76}, {}], "r": [{"2": 16}, {}], "t": [{"2": 21}, {}]}], "o\u00fc\u00e9": [{"2": 4}, {}], "ua": [{"2": 21}, {}], "uia": [{"2": 4}, {}], "ae": [{"1": 15}, {}], "ai": [{"1": 2811}, {}], "iu": [{"2": 13}, {}], "ao": [{"2": 8}, {}], "ueu": [{"2": 91}, {}], "au": [{"1": 1498}, {}], "oa": [{"2": 9}, {}], "io": [{"1": 45, "2": 463}, {"c": [{"2": 11}, {}], "d": [{"2": 4}, {}], "m": [{"2": 21}, {}], "l": [{"2": 25}, {}], "n": [{"1": 45, "2": 386}, {"d": [{"2": 5}, {}], "g": [{"2": 5}, {}], "h": [{"2": 1}, {}], "l": [{"1": 5, "2": 51}, {"s": [{"1": 5, "2": 15}, {"l": [{"1": 5}, {}], "/": [{"2": 15}, {}]}], "-": [{"2": 12}, {}], "/": [{"2": 20}, {}], "n": [{"2": 4}, {}]}], "n": [{"2": 2}, {}], "p": [{"2": 11}, {}], "s": [{"1": 10, "2": 124}, {"s": [{"1": 10, "2": 41}, {"i": [{"2": 6}, {}], "s": [{"1": 10, "2": 32}, {"/": [{"1": 10, "2": 32}, {"a": [{"2": 18}, {}], "i": [{"1": 10}, {}], "e": [{"2": 14}, {}]}]}], "n": [{"2": 3}, {}]}], "-": [{"2": 5}, {}], "/": [{"2": 78}, {}]}], "r": [{"1": 25}, {}], "t": [{"2": 187}, {}], "v": [{"1": 5}, {}]}], "p": [{"2": 4}, {}], "s": [{"2": 6}, {}], "t": [{"2": 6}, {}]}], "ia": [{"1": 35, "2": 142}, {"b": [{"1": 25, "2": 2}, {"c": [{"2": 2}, {}], "d": [{"1": 25}, {}]}], "d": [{"2": 9}, {}], "g": [{"2": 24}, {}], "m": [{"2": 14}, {}], "l": [{"2": 12}, {}], "/": [{"2": 11}, {}], "n": [{"1": 5, "2": 41}, {"d": [{"2": 1}, {}], "f": [{"2": 5}, {}], "l": [{"2": 9}, {}], "r": [{"2": 22}, {}], "t": [{"2": 4}, {}], "v": [{"1": 5}, {}]}], "s": [{"2": 9}, {}], "r": [{"1": 5, "2": 5}, {"t": [{"2": 5}, {}], "l": [{"1": 5}, {}]}], "t": [{"2": 15}, {}]}], "eoie": [{"1": 5}, {}], "ie": [{"1": 3486, "2": 333}, {"d": [{"1": 143}, {}], "f": [{"1": 5}, {}], "l": [{"1": 260, "2": 4}, {"c": [{"1": 223}, {}], "m": [{"1": 10}, {}], "t": [{"2": 4}, {}], "f": [{"1": 27}, {}]}], "/": [{"1": 446}, {}], "n": [{"1": 1343, "2": 142}, {"c": [{"2": 43}, {}], "b": [{"1": 387}, {}], "d": [{"2": 6}, {}], "g": [{"2": 9}, {}], "h": [{"1": 25}, {}], "m": [{"1": 5}, {}], "l": [{"2": 25}, {}], "s": [{"1": 5, "2": 9}, {"s": [{"1": 5, "2": 9}, {"i": [{"2": 9}, {}], "/": [{"1": 5}, {}]}]}], "r": [{"1": 289, "2": 34}, {"c": [{"2": 14}, {}], "-": [{"1": 284}, {}], "s": [{"1": 5, "2": 8}, {"i": [{"2": 4}, {}], "/": [{"1": 5}, {}], "o": [{"2": 4}, {}]}], "/": [{"2": 8}, {}], "n": [{"2": 4}, {}]}], "t": [{"1": 317, "2": 16}, {"c": [{"2": 4}, {}], "d": [{"1": 15}, {}], "-": [{"1": 5}, {}], "/": [{"1": 79}, {}], "n": [{"1": 32}, {}], "s": [{"1": 48}, {}], "t": [{"1": 138, "2": 12}, {"a": [{"2": 12}, {}], "u": [{"1": 40}, {}], "e": [{"1": 20}, {}], "/": [{"1": 64}, {}], "n": [{"1": 14}, {}]}]}], "v": [{"1": 315}, {}]}], "s": [{"1": 98, "2": 3}, {"c": [{"1": 10}, {}], "d": [{"2": 3}, {}], "h": [{"1": 10}, {}], "m": [{"1": 10}, {}], "s": [{"1": 10}, {}], "r": [{"1": 48}, {}], "t": [{"1": 10}, {}]}], "r": [{"1": 1117, "2": 162}, {"c": [{"1": 47, "2": 5}, {"s": [{"1": 11}, {}], "/": [{"1": 31, "2": 5}, {"i": [{"1": 20}, {}], "s": [{"2": 3}, {}], "r": [{"2": 2}, {}], "e": [{"1": 5}, {}], "n": [{"1": 6}, {}]}], "g": [{"1": 5}, {}]}], "b": [{"1": 21}, {}], "d": [{"2": 4}, {}], "f": [{"1": 111, "2": 28}, {"s": [{"1": 28}, {}], "-": [{"1": 44}, {}], "t": [{"1": 24}, {}], "/": [{"1": 15, "2": 28}, {"i": [{"2": 25}, {}], "\u00e9": [{"2": 3}, {}], "f": [{"1": 15}, {}]}]}], "h": [{"1": 10, "2": 16}, {"r": [{"1": 5}, {}], "-": [{"2": 16}, {}], "/": [{"1": 5}, {}]}], "m": [{"1": 146}, {}], "l": [{"1": 96, "2": 20}, {"`": [{"1": 5}, {}], "s": [{"1": 30}, {}], "-": [{"2": 4}, {}], "/": [{"1": 61, "2": 16}, {"a": [{"1": 11}, {}], "b": [{"2": 16}, {}], "e": [{"1": 10}, {}], "i": [{"1": 15}, {}], "l": [{"1": 5}, {}], "o": [{"1": 15}, {}], "\u00e9": [{"1": 5}, {}]}]}], "n": [{"1": 97, "2": 16}, {"s": [{"1": 20}, {}], "-": [{"2": 11}, {}], "/": [{"1": 77, "2": 5}, {"a": [{"2": 5}, {}], "e": [{"1": 15}, {}], "g": [{"1": 20}, {}], "r": [{"1": 32}, {}], "u": [{"1": 5}, {}], "\u00f4": [{"1": 5}, {}]}]}], "q": [{"1": 28}, {}], "p": [{"1": 91, "2": 4}, {"s": [{"1": 14}, {}], "r": [{"1": 24}, {}], "/": [{"1": 53, "2": 4}, {"a": [{"1": 48}, {}], "r": [{"1": 5}, {}], "o": [{"2": 4}, {}]}]}], "s": [{"1": 84, "2": 4}, {"s": [{"1": 40}, {}], "/": [{"1": 44, "2": 4}, {"a": [{"1": 5, "2": 4}, {"/": [{"1": 5, "2": 4}, {"r": [{"1": 5}, {}], "t": [{"2": 4}, {}]}]}], "s": [{"1": 24}, {}], "r": [{"1": 10}, {}], "u": [{"1": 5}, {}]}]}], "r": [{"1": 140, "2": 61}, {"s": [{"1": 76}, {}], "/": [{"1": 64, "2": 61}, {"c": [{"2": 47}, {}], "o": [{"1": 10, "2": 5}, {"/": [{"1": 10, "2": 5}, {"s": [{"1": 10}, {}], "m": [{"2": 5}, {}]}]}], "p": [{"2": 5}, {}], "r": [{"1": 44}, {}], "u": [{"1": 10}, {}], "v": [{"2": 4}, {}]}]}], "t": [{"1": 212}, {}], "v": [{"1": 21, "2": 4}, {"g": [{"1": 11}, {}], "/": [{"1": 10, "2": 4}, {"a": [{"1": 5}, {}], "n": [{"1": 5, "2": 4}, {"/": [{"1": 5, "2": 4}, {"a": [{"1": 5}, {}], "e": [{"2": 4}, {}]}]}]}]}], "z": [{"1": 13}, {}]}], "t": [{"1": 29, "2": 13}, {"q": [{"2": 13}, {}], "s": [{"1": 24}, {}], "v": [{"1": 5}, {}]}], "z": [{"1": 45, "2": 9}, {"s": [{"1": 15}, {}], "r": [{"1": 25, "2": 4}, {"-": [{"1": 10}, {}], "/": [{"1": 15, "2": 4}, {"a": [{"2": 4}, {}], "r": [{"1": 10}, {}], "u": [{"1": 5}, {}]}]}], "d": [{"2": 5}, {}], "v": [{"1": 5}, {}]}]}], "yo": [{"2": 9}, {}], "ouai": [{"2": 2}, {}], "u\u00eb": [{"1": 20}, {}], "oie": [{"1": 230}, {}], "i\u00e9e": [{"2": 5}, {}], "\u0153": [{"1": 5}, {}], "a": [{"1": 12198}, {}], "\u00e2": [{"1": 496}, {}], "e": [{"1": 23090}, {}], "i": [{"1": 8073}, {}], "eoi": [{"1": 32}, {}], "\u00ea": [{"1": 464}, {}], "\u00ee": [{"1": 106}, {}], "u\u00ef": [{"2": 5}, {}], "u\u00ee": [{"1": 15}, {}], "u": [{"1": 4885}, {}], "ouaie": [{"2": 3}, {}], "u\u00ea": [{"1": 10}, {}], "y": [{"1": 475}, {}], "uie": [{"1": 40}, {}]}]
diff --git a/diaeresis.py b/diaeresis.py
@@ -0,0 +1,36 @@
+#!/usr/bin/python3
+
+"""Get the number of syllables of a vowel cluster with context"""
+
+import os
+import json
+import sys
+
+f = open(os.path.join(os.path.dirname(
+ os.path.realpath(__file__)), 'diaeresis.json'))
+trie = json.load(f)
+f.close()
+
+def do_lookup(trie, key):
+ if len(key) == 0 or (key[0] not in trie[1].keys()):
+ return trie[0]
+ return do_lookup(trie[1][key[0]], key[1:])
+
+def lookup(key):
+ return do_lookup(trie, key + ['-', '-'])
+
+def wrap_lookup(line):
+ result = lookup(line)
+ print("%s: %s" % (line, result))
+
+if __name__ == '__main__':
+ if len(sys.argv) > 1:
+ for arg in sys.argv[1:]:
+ wrap_lookup(arg)
+ else:
+ while True:
+ line = sys.stdin.readline()
+ if not line:
+ break
+ wrap_lookup(line.lower().lstrip().rstrip().split())
+
diff --git a/metric.py b/metric.py
@@ -3,7 +3,7 @@
import re
from common import normalize, is_vowels, consonants, sure_end_fem, is_consonants
-from vowels import possible_weights
+import vowels
import haspirater
@@ -19,7 +19,13 @@ def annotate_aspirated(word):
def contains_break(chunk):
return ' ' in chunk or '-' in chunk
-def fit(chunks, pos, left):
+def possible_weights(chunks, pos, diaeresis):
+ if diaeresis == "classical":
+ return vowels.possible_weights_ctx(chunks, pos)
+ elif diaeresis == "permissive":
+ return vowels.possible_weights_approx(chunks[pos])
+
+def fit(chunks, pos, left, diaeresis):
"""bruteforce exploration of all possible vowel cluster weghting,
within a maximum total of left"""
if pos >= len(chunks):
@@ -28,7 +34,7 @@ def fit(chunks, pos, left):
return [] # no possibilities
# skip consonants
if (not is_vowels(chunks[pos])):
- return [[chunks[pos]] + x for x in fit(chunks, pos+1, left)]
+ return [[chunks[pos]] + x for x in fit(chunks, pos+1, left, diaeresis)]
else:
if ((pos >= len(chunks) - 2 and chunks[pos] == 'e') and not (
pos <= 0 or contains_break(chunks[pos-1])) and not (
@@ -44,19 +50,19 @@ def fit(chunks, pos, left):
# actually, this will have an influence on the rhyme's gender
weights = [0, 1]
else:
- weights = possible_weights(chunks[pos])
+ weights = possible_weights(chunks, pos, diaeresis)
else:
if (pos >= len(chunks) - 1 and chunks[pos] == 'e' and
pos > 0 and (chunks[pos-1].endswith('-c') or
chunks[pos-1].endswith('-j'))):
weights = [0] # -ce and -je are elided
else:
- weights = possible_weights(chunks[pos])
+ weights = possible_weights(chunks, pos, diaeresis)
result = []
for weight in weights:
# combine all possibilities
result += [[(chunks[pos], weight)] + x for x in fit(chunks, pos+1,
- left - weight)]
+ left - weight, diaeresis)]
return result
def feminine(align, verse, phon):
@@ -84,7 +90,7 @@ def feminine(align, verse, phon):
return possible
-def parse(text, phon, bound, forbidden_ok):
+def parse(text, phon, bound, forbidden_ok, diaeresis):
"""Return possible aligns for text, bound is an upper bound on the align
length to limit running time, phon is the pronunciation to help for gender,
forbidden_ok is true if we allow classically forbidden patterns"""
@@ -196,5 +202,5 @@ def parse(text, phon, bound, forbidden_ok):
# the femininity of the align (depending both on the align and
# original text)
return list(map((lambda x: (x, feminine(x, original_text, phon))),
- fit(chunks, 0, bound)))
+ fit(chunks, 0, bound, diaeresis)))
diff --git a/plint.py b/plint.py
@@ -5,19 +5,22 @@ import template
def run():
ok = True
+ f2 = None
+ if len(sys.argv) == 3:
+ f2 = open(sys.argv[2], 'w')
while True:
line = sys.stdin.readline()
if not line:
break
- errors = template.check(line)
+ errors = template.check(line, f2)
for error in errors:
print(error.report(), file=sys.stderr)
ok = False
return ok
if __name__ == '__main__':
- if len(sys.argv) != 2:
- print("Usage: %s TEMPLATE" % sys.argv[0], file=sys.stderr)
+ if len(sys.argv) < 2 or len(sys.argv) > 3:
+ print("Usage: %s TEMPLATE [OCONTEXT]" % sys.argv[0], file=sys.stderr)
print("Check stdin according to template, report errors on stdout",
file=sys.stderr)
sys.exit(1)
@@ -25,6 +28,7 @@ if __name__ == '__main__':
f = open(sys.argv[1])
x = f.read()
f.close()
+
template = template.Template(x)
ok = run()
diff --git a/static/tpl/alexandrin.tpl b/static/tpl/alexandrin.tpl
@@ -1,2 +1,2 @@
-! forbidden_ok:yes
+! forbidden_ok:yes diaeresis:permissive
12 A
diff --git a/template.py b/template.py
@@ -5,6 +5,21 @@ import copy
import rhyme
from common import normalize, legal, strip_accents_one
from nature import nature_count
+from vowels import possible_weights_ctx, make_query
+
+
+def handle(poss):
+ l = []
+ #print(poss)
+ for i in range(len(poss)):
+ if isinstance(poss[i], tuple):
+ #print(cleared[:i][::-1])
+ #print(cleared[i+1:])
+ # print(poss)
+ # print (make_query(poss, i))
+ if len(possible_weights_ctx(poss, i)) > 1:
+ l.append((poss[i][1], make_query(poss, i)))
+ return l
class Pattern:
def __init__(self, metric, myid, femid, constraint):
@@ -32,6 +47,7 @@ class Template:
self.pattern_line_no = 0
self.forbidden_ok = False
self.normande_ok = True
+ self.diaeresis = "classical"
self.mergers = []
self.load(string)
self.line_no = 0
@@ -50,6 +66,10 @@ class Template:
self.forbidden_ok = str2bool(value)
elif key == "normande_ok":
self.normande_ok = str2bool(value)
+ elif key == "diaeresis":
+ self.diaeresis = value
+ if value not in ["permissive", "classical"]:
+ raise ValueError
else:
raise ValueError
@@ -83,7 +103,7 @@ class Template:
return ((1+len(hemis.keys()))*abs(pattern.length - c)
+ sum([1 for x in hemis.values() if x != "ok"]))
- def match(self, line):
+ def match(self, line, ofile=None):
"""Check a line against current pattern, return errors"""
line_with_case = normalize(line, downcase=False)
@@ -110,7 +130,7 @@ class Template:
# compute alignments, check hemistiches, sort by score
possible = parse(line, self.env[pattern.myid].phon, pattern.length + 2,
- self.forbidden_ok)
+ self.forbidden_ok, self.diaeresis)
if not possible:
errors.append(error.ErrorForbiddenPattern())
possible = []
@@ -136,6 +156,26 @@ class Template:
# keep the best alignment as hypotheses
possible = [(score, align) for (score, align) in possible
if score == possible[0][0]]
+ if ofile:
+ if len(possible) == 1 and possible[0][0] == 0:
+ l = [(x[1][0]) for x in possible]
+ poss = []
+ for p in l:
+ c = []
+ while len(p) > 0:
+ x = p.pop()
+ if x == ' ':
+ poss.append(c[::-1])
+ c = []
+ else:
+ c.append(x)
+ if len(c) > 0:
+ poss.append(c[::-1])
+ for w in poss:
+ l = handle(w)
+ for x in l:
+ # print(x)
+ print((str(x[0]) + ' ' + ' '.join(x[1])), file=ofile)
# occurrences
if pattern.myid not in self.occenv.keys():
@@ -226,7 +266,7 @@ class Template:
self.env = copy.deepcopy(self.old_env)
self.femenv = copy.deepcopy(self.old_femenv)
- def check(self, line):
+ def check(self, line, ofile=None):
"""Check line (wrapper)"""
self.line_no += 1
line = line.rstrip()
@@ -234,7 +274,7 @@ class Template:
return []
#possible = [compute(p) for p in possible]
#possible = sorted(possible, key=rate)
- errors, pattern = self.match(line)
+ errors, pattern = self.match(line, ofile)
for error in errors:
# update errors with line position and pattern
error.pos(line, self.line_no, pattern)
diff --git a/vowels.py b/vowels.py
@@ -4,6 +4,19 @@
"""Compute the number of syllabes taken by a vowel chunk"""
from common import strip_accents
+from diaeresis import lookup
+
+def clear(l):
+ return [x[0] if isinstance(x, tuple) else x for x in l]
+
+def intersperse(a, b):
+ if (len(a) == 0 or a[0] == ' ') and (len(b) == 0 or b[0] == ' '):
+ return []
+ if len(a) == 0 or a[0] == ' ':
+ return ["/", b[0]] + intersperse(a, b[1:])
+ if len(b) == 0 or b[0] == ' ':
+ return [a[0], "/"] + intersperse(a[1:], b)
+ return [a[0], b[0]] + intersperse(a[1:], b[1:])
def contains_trema(chunk):
"""Test if a string contains a word with a trema"""
@@ -12,8 +25,29 @@ def contains_trema(chunk):
return True
return False
-def possible_weights(chunk):
- """Return the possible number of syllabes taken by a vowel chunk"""
+threshold = 10
+
+def make_query(chunks, pos):
+ cleared = clear(chunks)
+ return [cleared[pos]] + intersperse(
+ ''.join(cleared[pos+1:]),
+ ''.join([x[::-1] for x in cleared[:pos][::-1]]))
+
+def possible_weights_ctx(chunks, pos):
+ chunk = chunks[pos]
+ q = make_query(chunks, pos)
+ #print (q)
+ v = lookup(q)
+ #print (v)
+ #print (possible_weights(chunk))
+ if len(v.keys()) == 1 and v[list(v.keys())[0]] > threshold:
+ return [int(list(v.keys())[0])]
+ else:
+ return possible_weights_seed(chunk)
+
+def possible_weights_approx(chunk):
+ """Return the possible number of syllabes taken by a vowel chunk (permissive
+ approximation)"""
if len(chunk) == 1:
return [1]
# old spelling and weird exceptions
@@ -47,3 +81,12 @@ def possible_weights(chunk):
# we can't tell
return [1, 2]
+def possible_weights_seed(chunk):
+ """Return the possible number of syllabes taken by a vowel chunk"""
+ if len(chunk) == 1:
+ return [1]
+ #if chunk in ['ai', 'ou', 'eu', 'ei', 'eau', 'au', 'oi']:
+ # return [1]
+ # we can't tell
+ return [1, 2]
+