plint

French poetry validator (local mirror of https://gitlab.com/a3nm/plint)
git clone https://a3nm.net/git/plint/
Log | Files | Refs | README

commit fd4ae6568bcc1e66a469f4d9503cdfecbb9f16d8
parent 1a8f02cf1e7661b20c2825f9008734db1ce8dad2
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Mon, 18 Feb 2013 19:26:32 +0100

Merge branch 'provenance'

Diffstat:
.gitignore | 1+
Makefile | 5++++-
TODO | 9++++++---
common.py | 22+++++++++++++++-------
error.py | 257++++++++++++++++++++++++++++++-------------------------------------------------
hemistiches.py | 80-------------------------------------------------------------------------------
metric.py | 224-------------------------------------------------------------------------------
plint.py | 4++--
plint_irc.py | 47+++++++++++++++++++++++------------------------
plint_web.py | 57++++++++++++++++++++++++++++++++++-----------------------
res/messages_fr.po | 103++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
static/main.css | 9++++++++-
static/tpl/french_abab.tpl | 8++++----
static/tpl/italian_abab.tpl | 8++++----
template.py | 150++++++++++++++++++-------------------------------------------------------------
test/au_lecteur | 2+-
verse.py | 356+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
versetest.py | 155+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
views/about.html | 13++++++-------
views/page.html | 9+++++++--
views/results.html | 6+-----
vowels.py | 14++++++++++----
22 files changed, 826 insertions(+), 713 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -5,6 +5,7 @@ frhyme.json haspirater/* haspirater.py haspirater.json +irc/* occurrences old/* *.pyc diff --git a/Makefile b/Makefile @@ -1,9 +1,12 @@ LANG=res/messages_fr.mo -.PHONY: all +.PHONY: all test %.mo: %.po msgfmt -o $*.mo $*.po all: ${LANG} +test: + python3 versetest.py + diff --git a/TODO b/TODO @@ -1,8 +1,11 @@ -clear textarea "joueront" trois syllabes ? -infer meter -"tu hélas" : assume that hiatus is ok +pas de rappel multiple de ligne dans les erreurs +belle gestion des erreurs en html "paysage", "centurion" +force language: plint.a3nm.net/french/about.html + + +handle "' " and "` " == IRC == diff --git a/common.py b/common.py @@ -42,19 +42,25 @@ def norm_spaces(text): """Remove multiple consecutive whitespace""" return re.sub("\s+-*\s*", ' ', text) -def rm_punct(text): +def rm_punct(text, rm_all=False, rm_apostrophe=False): """Remove punctuation from text""" text = re.sub("’", "'", text) # no weird apostrophes text = re.sub("' ", "'", text) # space after apostrophes + if rm_apostrophe: + text = re.sub("'", "", text) text = re.sub("'*$", "", text) # apostrophes at end of line text = re.sub("[‒–—―⁓⸺⸻]", " ", text) # no weird dashes #TODO rather: keep only good chars - pattern = re.compile("[^'\w -]", re.UNICODE) - text2 = pattern.sub(' ', text) + if not rm_all: + pattern = re.compile("[^'\w -]", re.UNICODE) + text2 = pattern.sub(' ', text) + else: + pattern = re.compile("[^\w]", re.UNICODE) + text2 = pattern.sub('', text) return text2 -def is_vowels(chunk, with_h=False, with_y=True): +def is_vowels(chunk, with_h=False, with_y=True, with_crap=False): """Test if a chunk is vowels with_h counts 'h' as vowel, with_y allows 'y'""" @@ -63,7 +69,8 @@ def is_vowels(chunk, with_h=False, with_y=True): return False for char in strip_accents(chunk): if char not in vowels: - if char != 'h' or not with_h: + if (char != 'h' or not with_h) and (char not in ['*', '?'] or not + with_crap): return False return True @@ -75,9 +82,10 @@ def is_consonants(chunk): return False return True -def normalize(text, downcase=True): +def normalize(text, downcase=True, rm_all=False, rm_apostrophe=False): """Normalize text, ie. lowercase, no useless punctuation or whitespace""" - return norm_spaces(rm_punct(text.lower() if downcase else text)).rstrip().lstrip() + return norm_spaces(rm_punct(text.lower() if downcase else text, + rm_all=rm_all, rm_apostrophe=rm_apostrophe)).rstrip().lstrip() def subst(string, subs): if len(subs) == 0: diff --git a/error.py b/error.py @@ -1,193 +1,134 @@ import common -import hemistiches -class Error: - def __init__(self): - self.line = None - self.line_no = None - self.pattern = None - self.prefix = None - def pos(self, line, line_no, pattern): - self.line = line - self.line_no = line_no - self.pattern = pattern - self.prefix = "stdin:%d: " % self.line_no +class ErrorCollection: + keys = {'hiatus': 'H', 'ambiguous': 'A', 'illegal': 'I'} - def say(self, l): - return self.prefix + l + @property + def prefix(self): + return "stdin:%d: " % self.line_no - def report(self, s, short=False, t = []): + def __init__(self, line_no, line, pattern, verse, errors=[]): + self.line_no = line_no + self.line = line + self.errors = errors + self.pattern = pattern + self.verse = verse + + def say(self, l, short): + return l if short else self.prefix + l + + def align(self): + chunks = self.verse.chunks + keys = ['original', 'error'] + if len(self.verse.possible) == 0: + keys.append('weights') + if len(self.pattern.hemistiches) > 0: + keys.append('hemis') + formatters = {'weights': lambda x, y: '/'.join([str(a) for a in x]), + 'error': lambda x, y: ErrorCollection.keys.get(x, '') * + len(chunk['original'])} + def render(chunk, key): + return (formatters.get(key, lambda x, y: str(x)))(chunk.get(key, ""), chunk) + lines = {} + for key in keys: + lines[key] = "" + for chunk in chunks: + l = max(len(render(chunk, key)) for key in keys) + for key in keys: + lines[key] += ('{:^'+str(l)+'}').format(render(chunk, key)) + return ["> " + lines[key] for key in keys if len(lines[key].strip()) > 0] + + def lines(self, short=False): l = [] - if short: - l.append(s) - else: - l.append(self.say(_("error: %s") % (s))) - msg = _("Line is: %s") % (self.line) - if short: - if t != []: - if self.line.strip() != "": - l.append(msg) - for x in t: - l.append(x) - else: - if self.line.strip() != "": - l.append(self.say(msg)) - for x in t: - l.append(self.say(x)) - return '\n'.join(l) - -class ErrorBadCharacters(Error): - def __init__(self, characters): - self.characters = characters + l.append([self.say(x, short) for x in self.align()]) + for e in self.errors: + l.append([self.say(e.report(self.pattern), short)]) + return l def report(self, short=False): - return Error.report(self, _("Illegal character%s: %s") - % ('' if len(self.characters) == 1 else 's', - ', '.join(["'" + a + "'" for a in self.characters])), short) + return '\n'.join(sum(self.lines(short), [])) -class ErrorForbiddenPattern(Error): - def __init__(self, forbidden): - self.forbidden = forbidden +class ErrorBadElement: + def report(self, pattern): + return (self.message + + _(" (see '%s' above)") % ErrorCollection.keys[self.key]) - def report(self, short=False): - return Error.report(self, _("Illegal ambiguous pattern: %s") % self.forbidden, - short) +class ErrorBadCharacters(ErrorBadElement): + @property + def message(self): + return _("Illegal characters") + key = "illegal" -class ErrorHiatus(Error): - def __init__(self, hiatus): - self.hiatus = hiatus +class ErrorForbiddenPattern(ErrorBadElement): + @property + def message(self): + return _("Illegal ambiguous pattern") + key = "ambiguous" - def report(self, short=False): - return Error.report(self, _("Illegal hiatus: %s") % self.hiatus, short) +class ErrorHiatus(ErrorBadElement): + @property + def message(self): + return _("Illegal hiatus") + key = "hiatus" -class ErrorBadRhyme(Error): +class ErrorBadRhyme: def __init__(self, expected, inferred): - Error.__init__(self) self.expected = expected self.inferred = inferred - def report(self, short=False): + def report(self, pattern): # TODO indicate eye rhyme since this is also important # TODO don't indicate more than the minimal required rhyme (in length and # present of a vowel phoneme) - return Error.report(self, - _("%s for type %s (expected %s, inferred \"%s\")") - % (self.kind, self.get_id(), self.fmt(self.expected), - self.fmt(self.inferred)), short) + return (_("%s for type %s (expected \"%s\", inferred \"%s\")") + % (self.kind, self.get_id(pattern), self.fmt(self.expected), + self.fmt(self.inferred))) class ErrorBadRhymeGenre(ErrorBadRhyme): + @property + def kind(self): + return _("Bad rhyme genre") + def fmt(self, l): result = _(' or ').join(list(l)) if result == '': result = "?" return result - def get_id(self): - return self.pattern.femid + def get_id(self, pattern): + return pattern.femid +class ErrorBadRhymeSound(ErrorBadRhyme): @property def kind(self): - return _("Bad rhyme genre") + return _("Bad rhyme") -class ErrorBadRhymeSound(ErrorBadRhyme): def fmt(self, l): pron = l.phon ok = [] if len(pron) > 0: ok.append("") return ("\"" + '/'.join(list(set([common.to_xsampa(x[-4:]) for x in pron]))) - + "\"" + _(" (ending: \"") + l.eye + "\")") - - def get_id(self): - return self.pattern.myid + + "\"" + _(", ending: \"") + l.eye + "\"") - def report(self, short=False): - return Error.report(self, _("%s for type %s (expected %s)") - % (self.kind, self.pattern.myid, self.fmt(self.expected)), short) + def get_id(self, pattern): + return pattern.myid - @property - def kind(self): - return _("Bad rhyme") + def report(self, pattern): + return (_("%s for type %s (expected %s)") + % (self.kind, pattern.myid, self.fmt(self.expected))) -class ErrorBadMetric(Error): - def __init__(self, possible): - Error.__init__(self) - self.possible = possible - - def restore_elid(self, chunk): - if isinstance(chunk, tuple): - return [chunk] - try: - if chunk[-1] != "`": - return [chunk] - except KeyError: - return [chunk] - return [chunk[:-1], ("e", 0)] - - def align(self, align): - score, align = align - align, feminine, hemis = align - align = sum([self.restore_elid(chunk) for chunk in align], []) - line = self.line - l2 = [] - count = 0 - ccount = 0 - last_he = 0 - summary = [] - offset = 0 - done = False - for x in align: - if isinstance(x, tuple): - orig = "" - while len(line) > 0 and common.is_vowels(line[0]): - orig += line[0] - line = line[1:] - add = ('{:^'+str(len(orig))+'}').format(str(x[1])) - if offset > 0 and len(add) > 0 and add[-1] == ' ': - offset -= 1 - add = add[:-1] - l2 += add - if len(add) > len(orig): - offset = len(add) - len(orig) - count += x[1] - ccount += x[1] - done = False - else: - orig = "" - while len(line) > 0 and not common.is_vowels(line[0]): - orig += line[0] - line = line[1:] - if count in hemis.keys() and not done and last_he < count: - done = True - summary.append(str(ccount)) - ccount = 0 - summary.append(hemistiches.hemis_types[hemis[count]]) - l2 += ('{:^'+str(len(orig))+'}' - ).format(hemistiches.hemis_types[hemis[count]]) - last_he = count - else: - l2 += ' ' * len(orig) - summary.append(str(ccount)+':') - result = ''.join(l2) - summary = ('{:^9}').format(''.join(summary)) - return summary + result +class ErrorBadMetric: + def report(self, pattern): + return (_("Illegal metric: expected %d syllable%s%s") % + (pattern.length, '' if pattern.length == 1 else 's', + '' if len(pattern.hemistiches) == 0 + else (_(" with hemistiche%s at ") % + '' if len(pattern.hemistiches) == 1 else 's') + + ','.join(str(a) for a in pattern.hemistiches))) - def report(self, short=False): - num = min(len(self.possible), 4) - truncated = num < len(self.possible) - return Error.report( - self, - (_("Bad metric (expected %s, inferred %d illegal option%s)") % - (self.pattern.metric, - len(self.possible), ('s' if len(self.possible) != 1 else - ''))), - short, - list(map(self.align, self.possible[:num])) - + ([_("... worse options omitted ...")] if truncated else []) - ) - -class ErrorMultipleWordOccurrence(Error): +class ErrorMultipleWordOccurrence: def __init__(self, word, occurrences): self.word = word self.occurrences = occurrences @@ -195,19 +136,17 @@ class ErrorMultipleWordOccurrence(Error): def get_id(self): return self.pattern.myid - def report(self, short=False): - return Error.report(self, _("Too many occurrences of word %s for rhyme %s") - % (self.word, self.get_id()), short) + def report(self, pattern): + return (_("Too many occurrences of word %s for rhyme %s") + % (self.word, self.get_id())) -class ErrorIncompleteTemplate(Error): - def report(self, short=False): - return Error.report(self, _("Poem is not complete"), - short) +class ErrorIncompleteTemplate: + def report(self, pattern): + return _("Poem is not complete") -class ErrorOverflowedTemplate(Error): - def report(self, short=False): - return Error.report(self, _("Verse is beyond end of poem"), - short) +class ErrorOverflowedTemplate: + def report(self, pattern): + return _("Verse is beyond end of poem") class TemplateLoadError(BaseException): def __init__(self, msg): diff --git a/hemistiches.py b/hemistiches.py @@ -1,80 +0,0 @@ -from common import sure_end_fem - -hemis_types = { - 'ok' : '/', # correct - 'bad' : '!', # something wrong - 'cut' : '?', # falls at the middle of a word - 'fem' : '\\', # preceding word ends by a mute e - 'forbidden' : '#', # last word of hemistiche cannot occur at end of hemistiche - } - -# these words are forbidden at hemistiche -forbidden_hemistiche = [ - "le", - "la", - ] - -def align2str(align): - return ''.join([x[0] if isinstance(x, tuple) else x for x in align]) - -def check_spaces(align, pos): - if pos >= len(align): - # not enough syllabes for hemistiche - return "bad" - if align[pos] == ' ' or '-' in align[pos]: - # word boundary here, so this is ok - return "ok" - # skip consonants - if not isinstance(align[pos], tuple): - return check_spaces(align, pos + 1) - # hemistiche falls at the middle of a word - return "cut" - -def check_hemistiche(align, pos, hem, check_end_hemistiche): - if pos >= len(align): - # not enough syllabes for hemistiche - return ("bad", pos) - if hem == 0: - # hemistiche should end here, check that this is a word boundary - if check_end_hemistiche: - if (align2str(align[:pos+1]).split()[-1]) in forbidden_hemistiche: - return ("forbidden", pos) - return (check_spaces(align, pos), pos) - if hem < 0: - # hemistiche falls at the middle of a vowel cluster - return ("cut", pos) - # skip consonants - if not isinstance(align[pos], tuple): - return check_hemistiche(align, pos +1, hem, check_end_hemistiche) - # hemistiche is there, we should not have a feminine ending here - if hem == 1: - if pos + 1 >= len(align): - # not enough syllabes for hemistiche - return ("bad", pos) - if ((align[pos][0] + align[pos+1]).rstrip() in sure_end_fem): - # check that this isn't a one-syllabe wourd (which is allowed) - ok = False - for i in range(2): - for j in ' -': - if j in align[pos-i-1]: - ok = True - if not ok: - # hemistiche ends in feminine - return ("fem", pos) - return check_hemistiche(align, pos+1, hem - align[pos][1], - check_end_hemistiche) - -def check_hemistiches(align, hems, check_end_hemistiche): - """From a sorted list of distinct hemistiche positions, return a - dictionary which maps each position to the status of this - hemistiche""" - - result = {} - pos = 0 - h2 = 0 - for h in hems: - r, pos = check_hemistiche(align, pos, h-h2, check_end_hemistiche) - h2 = h - result[h] = r - return result - diff --git a/metric.py b/metric.py @@ -1,224 +0,0 @@ -#!/usr/bin/python -#coding: utf-8 - -import re -from common import normalize, is_vowels, consonants, sure_end_fem, is_consonants -import vowels -import haspirater - - -no_hiatus = ["oui"] - - -def annotate_aspirated(word): - """Annotate aspirated 'h'""" - if word[0] != 'h': - return word - if haspirater.lookup(word): - return '*'+word - else: - return word - -def contains_break(chunk): - return ' ' in chunk or '-' in chunk - -def possible_weights(chunks, pos, diaeresis): - if diaeresis == "classical": - return vowels.possible_weights_ctx(chunks, pos) - elif diaeresis == "permissive": - return vowels.possible_weights_approx(chunks[pos]) - -def fit(chunks, pos, left, diaeresis): - """bruteforce exploration of all possible vowel cluster weghting, - within a maximum total of left""" - if pos >= len(chunks): - return [[]] # the only possibility is the empty list - if left < 0: - return [] # no possibilities - # skip consonants - if (not is_vowels(chunks[pos])): - return [[chunks[pos]] + x for x in fit(chunks, pos+1, left, diaeresis)] - else: - if ((pos >= len(chunks) - 2 and chunks[pos] == 'e') and not ( - pos <= 0 or contains_break(chunks[pos-1])) and not ( - pos <= 1 or contains_break(chunks[pos-2]))): - # special case for verse endings, which can get elided (or not) - # but we don't elide lone syllables ("prends-le", etc.) - if pos == len(chunks) - 1: - weights = [0] # ending 'e' is elided - elif chunks[pos+1] == 's': - weights = [0] # ending 'es' is elided - elif chunks[pos+1] == 'nt': - # ending 'ent' is sometimes elided - # actually, this will have an influence on the rhyme's gender - weights = [0, 1] - else: - weights = possible_weights(chunks, pos, diaeresis) - else: - if (pos >= len(chunks) - 1 and chunks[pos] == 'e' and - pos > 0 and (chunks[pos-1].endswith('-c') or - chunks[pos-1].endswith('-j'))): - weights = [0] # -ce and -je are elided - else: - weights = possible_weights(chunks, pos, diaeresis) - result = [] - for weight in weights: - # combine all possibilities - result += [[(chunks[pos], weight)] + x for x in fit(chunks, pos+1, - left - weight, diaeresis)] - return result - -def feminine(align, verse, phon): - for a in sure_end_fem: - if verse.endswith(a): - # check that this isn't a one-syllabe wourd - for i in range(4): - for j in ' -': - try: - if j in align[-i-1]: - return ['M', 'F'] - except IndexError: - return ['M', 'F'] - return ['F'] - if not verse.endswith('ent'): - return ['M'] - # verse ends with 'ent' - if align[-2][1] == 0: - return ['F'] # mute -ent - if align[-2][1] > 0 and align[-2][0] == 'e': - return ['M'] # non-mute "-ent" by the choice of metric - possible = [] - # now, we must check pronunciation? - # "tient" vs. "lient" for instance, "excellent"... - for possible_phon in phon: - if possible_phon.endswith(')') or possible_phon.endswith('#'): - possible.append('M') - else: - possible.append('F') - if possible_phon.endswith('E') and verse.endswith('aient'): - # imparfait and conditionnel are masculine... - possible.append('M') - return possible - - -def parse(text, phon, bound, forbidden_ok, hiatus_ok, diaeresis): - """Return possible aligns for text, bound is an upper bound on the align - length to limit running time, phon is the pronunciation to help for gender, - forbidden_ok is true if we allow classically forbidden patterns""" - - original_text = normalize(text) - - # avoid some vowel problems - text = re.sub("qu", 'q', original_text) - text = re.sub("gue", 'ge', text) - text = re.sub("gué", 'gé', text) - text = re.sub("guè", 'gè', text) - text = re.sub("gua", 'ga', text) - - # split in words - words = text.split(' ') - - # other exceptions - for i in range(len(words)): - # no elision on y- words except "ypérite", "yeuse", "yeux" - if words[i].startswith('y') and words[i] != "y" and not ( - words[i].startswith('yp') or words[i].startswith('yeu')): - words[i] = "*" + words[i] - - # no elision for "oui", "ouis", "ouistitis" - # but elision for "ouighour" - # TODO boileau writes: - # "Ont l'esprit mieux tourné que n'a l'homme ? Oui sans doute." - # so it's unclear what should be done here - # if (words[i] == "oui" or words[i] == "ouis" or - # words[i].startswith("ouistiti")): - # words[i] = "*" + words[i] - - # no elision on those numerals - # TODO "un" or "une" are sometimes elidable and sometimes non-elidable - # Belle, une fois encor, réponds à mon appel. - # Mon journal, il est vrai, a une belle une. - if (words[i] == "onze"): - words[i] = "*" + words[i] - - if is_consonants(words[i]): - new_word = [] - for x in words[i]: - if (x == 'w'): - new_word.append("doublevé") - else: - new_word.append(x + "é") - words[i] = ''.join(new_word) - - - # aspirated - words = [annotate_aspirated(word) for word in words if word != ''] - - pattern = re.compile('(['+consonants+'*-]*)', re.UNICODE) - - forbidden = None - hiatus = None - - # cut each word in chunks of vowels and consonants, with some specific - # kludges - for i in range(len(words)): - words[i] = re.split(pattern, words[i]) - words[i] = [chunk for chunk in words[i] if chunk != ''] - nwords = [] - # the case of 'y' is special - for chunk in words[i]: - if 'y' not in chunk or len(chunk) == 1 or chunk[0] == 'y': - nwords.append(chunk) - else: - a = chunk.split('y') - nwords.append(a[0]) - nwords.append('Y') - if a[1] != '': - nwords.append(a[1]) - else: - # the case of "pays" is very special :-( - if words[i] == ['p', 'ay', 's']: - nwords.append('y') - words[i] = nwords - # remove mute 'e' - if i > 0: - if is_vowels(words[i][0], True): - if words[i-1][-1] == 'e' and sum( - [1 for chunk in words[i-1] if is_vowels(chunk)]) > 1: - words[i-1].pop(-1) - words[i-1][-1] = words[i-1][-1]+"`" - if (is_vowels(words[i-1][-1]) and not words[i-1][-1][-1] == 'e' - and not (''.join(words[i]) in no_hiatus - and ''.join(words[i-1]) in no_hiatus)): - hiatus = words[i-1][-1] + ' ' + words[i][0] - else: - if words[i-1][-1] == 'ée' or words[i-1][-1] == 'ie': - forbidden = words[i-1][-1] - if words[i-1][-1] == 's' and len(words[i-1]): - if words[i-1][-2] == 'ée' or words[i-1][-2] == 'ie': - forbidden = words[i-1][-2] - # TODO there are arcane rules for "aient" - # case of "soient" - # TODO there are a lot of "oient" in boileau and malherme - # so apparently there is no simple way to check that - # if words[i-1][-1] == 'nt' and len(words[i-1]): - # if words[i-1][-2] == 'oie': - # if len(words[i-1]) != 3 or words[i-1][-3] != 's': - # forbidden = True - - if forbidden and not forbidden_ok: - return ("forbidden", forbidden) - if hiatus and not hiatus_ok: - return ("hiatus", hiatus) - - # group back words - for word in words: - word.append(' ') - chunks = sum(words, [])[:-1] - - # return all possibilities to weigh the vowel clusters, annotated by - # the femininity of the align (depending both on the align and - # original text) - return list(map((lambda x: (x, feminine(x, original_text, phon))), - fit(chunks, 0, bound, diaeresis))) - diff --git a/plint.py b/plint.py @@ -17,8 +17,8 @@ def run(): should_end = True line = "" errors = template.check(line, f2, last=should_end) - for error in errors: - print(error.report(), file=sys.stderr) + if errors: + print(errors.report(), file=sys.stderr) ok = False if should_end: break diff --git a/plint_irc.py b/plint_irc.py @@ -4,7 +4,6 @@ import localization import re import sys import rhyme -import metric from template import Template from pprint import pprint from common import normalize @@ -12,11 +11,18 @@ from common import normalize buf = "" lbuf = [] -def output(l): - print(' '.join(l)) - f = open(sys.argv[2], 'a') +def write(l, descriptor=None): + if descriptor: + f = descriptor + else: + f = open(sys.argv[2], 'a') print(' '.join(l), file=f) - f.close() + if not descriptor: + f.close() + +def output(l, descriptor): + print(' '.join(l), file=descriptor) + write(l, descriptor if descriptor != sys.stdout else None) def leading_cap(text): for c in text: @@ -28,7 +34,7 @@ def leading_cap(text): return False return False -def manage(line, silent=False): +def manage(line, descriptor=sys.stdout): """manage one line, indicate if an error occurred""" global buf global lbuf @@ -43,10 +49,7 @@ def manage(line, silent=False): if len(lbuf) > 0: lbuf.append(l) else: - if not silent: - f = open(sys.argv[2], 'a') - print(' '.join(l), file=f) - f.close() + write(l, descriptor) return True if first[0] == '/': return False # ignore other commands @@ -69,20 +72,16 @@ def manage(line, silent=False): return True errors = template.check(text, quiet=False) quiet = False - for error in errors: - if error == None: - quiet = True - if not quiet: - print(error.report()) - if len(errors) == 0: + if errors: + print(errors.report()) + if not errors: buf = "" - if not silent: - if usebuf: - for bl in lbuf: - output(bl) - output(l) + if usebuf: + for bl in lbuf: + output(bl, descriptor) + output(l, descriptor) lbuf = [] - return len(errors) == 0 + return not errors if len(sys.argv) not in [3, 4]: print("Usage: %s TEMPLATE POEM [OFFSET]" % sys.argv[0], file=sys.stderr) @@ -113,8 +112,8 @@ for line in f.readlines(): pos += 1 if pos <= offset: continue # ignore first lines - print("Read: %s" % line, file=sys.stderr) - if not manage(line, True): + print("%s (read)" % line.rstrip(), file=sys.stderr) + if not manage(line, sys.stderr): print("Existing poem is wrong!", file=sys.stderr) sys.exit(2) f.close() diff --git a/plint_web.py b/plint_web.py @@ -5,7 +5,7 @@ import localization import re import template import error -from bottle import run, Bottle, request, static_file +from bottle import run, Bottle, request, static_file, redirect from jinja2 import Environment, PackageLoader env = Environment(loader=PackageLoader('plint_web', 'views')) @@ -41,8 +41,8 @@ def get_locale(): except AttributeError: return 'en' -def get_title(): - if get_locale() == 'fr': +def get_title(lang): + if lang == 'fr': return "plint -- vérification formelle de poèmes" else: return "plint -- French poetry checker" @@ -57,13 +57,23 @@ def server_static(filename): @app.route('/') def root(): - return env.get_template('index.html').render(title=get_title(), - lang=get_locale()) + redirect('/' + get_locale() + '/') -@app.route('/about') -def about(): - return env.get_template('about.html').render(title=get_title(), - lang=get_locale()) +@app.route('/<page>') +def paged(page): + redirect('/' + get_locale() + '/' + page) + +@app.route('/<lang>/') +def root(lang): + if lang not in ['fr', 'en']: + return paged(lang) + return env.get_template('index.html').render(title=get_title(lang), + lang=lang, path="") + +@app.route('/<lang>/about') +def about(lang): + return env.get_template('about.html').render(title=get_title(lang), + lang=lang, path="about") def check(poem): if len(poem) > 8192: @@ -75,26 +85,27 @@ def check(poem): s[x].strip() return s -@app.route('/check', method='POST') -def q(): +@app.route('/<lang>/check', method='POST') +def q(lang): d = { 'poem': request.forms.get('poem'), 'template': request.forms.get('template'), - 'lang': get_locale(), + 'lang': lang, + 'nolocale': True, } - localization.init_locale(get_locale()) + localization.init_locale(lang) d['poem'] = re.sub(r'<>&', '', d['poem']) print(d['poem']) poem = check(d['poem']) if not poem: - if get_locale() == 'fr': + if lang == 'fr': msg = "Le poème est vide, trop long, ou a des lignes trop longues" else: msg = "Poem is empty, too long, or has too long lines" d['error'] = msg return env.get_template('error.html').render(**d) if not re.match("^[a-z_]+$", d['template']): - if get_locale() == 'fr': + if lang == 'fr': msg = "Modèle inexistant" else: msg = "No such template" @@ -108,7 +119,7 @@ def q(): x = f.read() f.close() except IOError: - if get_locale() == 'fr': + if lang == 'fr': msg = "Modèle inexistant" else: msg = "No such template" @@ -117,7 +128,7 @@ def q(): try: templ = template.Template(x) except error.TemplateLoadError as e: - if get_locale() == 'fr': + if lang == 'fr': msg = "Erreur à la lecture du modèle : " + e.msg else: msg = "Error when reading template: " + e.msg @@ -136,18 +147,18 @@ def q(): if line == None: line = "" last = True - errors = [error.report(short=True) for error in templ.check(line, last=last)] - if errors != [] and not firsterror: + errors = templ.check(line, last=last) + if errors and not firsterror: firsterror = i - r.append((line, errors)) - nerror += len(errors) + r.append((line, '\n'.join(sum(errors.lines(short=True), [])) if errors else [])) + nerror += len(errors.errors) if errors else 0 d['result'] = r d['firsterror'] = firsterror d['nerror'] = nerror if nerror == 0: - d['title'] = "[Valid] " + get_title() + d['title'] = "[Valid] " + get_title(lang) else: - d['title'] = "[Invalid] " + get_title() + d['title'] = "[Invalid] " + get_title(lang) return env.get_template('results.html').render(**d) if __name__ == '__main__': diff --git a/res/messages_fr.po b/res/messages_fr.po @@ -5,8 +5,8 @@ msgid "" msgstr "" "Project-Id-Version: plint\n" -"POT-Creation-Date: 2013-01-30 23:19+CET\n" -"PO-Revision-Date: 2013-01-30 23:20+0100\n" +"POT-Creation-Date: 2013-02-15 00:05+CET\n" +"PO-Revision-Date: 2013-02-15 00:06+0100\n" "Last-Translator: Antoine Amarilli <a3nm@a3nm.net>\n" "Language-Team: \n" "Language: \n" @@ -15,67 +15,63 @@ msgstr "" "Content-Transfer-Encoding: 8bit\n" "Generated-By: pygettext.py 1.5\n" -#: error.py:25 -msgid "error: %s" -msgstr "erreur : %s" +#: error.py:53 +msgid " (see '%s' above)" +msgstr " (voir '%s' ci-dessus)" -#: error.py:26 -msgid "Line is: %s" -msgstr "Ligne : %s" +#: error.py:58 +msgid "Illegal characters" +msgstr "Caractères interdits" -#: error.py:45 -msgid "Illegal character%s: %s" -msgstr "Mauvais caractère%s : %s" +#: error.py:64 +msgid "Illegal ambiguous pattern" +msgstr "Motif ambigu interdit" -#: error.py:54 -msgid "Illegal ambiguous pattern: %s" -msgstr "Motif ambigu interdit : %s" +#: error.py:70 +msgid "Illegal hiatus" +msgstr "Hiatus interdit" -#: error.py:62 -msgid "Illegal hiatus: %s" -msgstr "Hiatus interdit : %s" +#: error.py:82 +msgid "%s for type %s (expected \"%s\", inferred \"%s\")" +msgstr "%s pour le type %s (attendu : \"%s\", lu : \"%s\")" -#: error.py:75 -msgid "%s for type %s (expected %s, inferred \"%s\")" -msgstr "%s pour le type %s (attendu : %s, lu : \"%s\")" +#: error.py:89 +msgid "Bad rhyme genre" +msgstr "Mauvais genre de rime" -#: error.py:81 +#: error.py:92 msgid " or " msgstr " ou " -#: error.py:91 -msgid "Bad rhyme genre" -msgstr "Mauvais genre de rime" +#: error.py:103 +msgid "Bad rhyme" +msgstr "Mauvaise rime" -#: error.py:100 -msgid " (ending: \"" -msgstr " (fin : \"" +#: error.py:111 +msgid ", ending: \"" +msgstr ", fin: \"" -#: error.py:106 +#: error.py:117 msgid "%s for type %s (expected %s)" msgstr "%s pour le type %s (attendu : %s)" -#: error.py:111 -msgid "Bad rhyme" -msgstr "Mauvaise rime" - -#: error.py:181 -msgid "Bad metric (expected %s, inferred %d illegal option%s)" -msgstr "Mauvaise métrique (attendu : %s, lu %d choix interdit%s)" +#: error.py:122 +msgid "Illegal metric: expected %d syllable%s%s" +msgstr "Métrique illégale: attendu : %d syllabe%s%s" -#: error.py:187 -msgid "... worse options omitted ..." -msgstr "... et d'autres choix non affichés ..." +#: error.py:125 +msgid " with hemistiche%s at " +msgstr " avec hémistiche%s en " -#: error.py:199 +#: error.py:138 msgid "Too many occurrences of word %s for rhyme %s" msgstr "Trop d'occurrences du mot %s pour la rime %s" -#: error.py:204 +#: error.py:143 msgid "Poem is not complete" msgstr "Poème incomplet" -#: error.py:209 +#: error.py:147 msgid "Verse is beyond end of poem" msgstr "Vers au-delà de la fin du poème" @@ -87,26 +83,41 @@ msgstr "Usage : %s MODÈLE [OCONTEXTE]" msgid "Check stdin according to TEMPLATE, report errors on stdout" msgstr "Vérifie l'entrée standard suivant MODÈLE, signale les erreurs sur la sortie standard" -#: template.py:36 +#: template.py:31 msgid "Metric length limit exceeded" msgstr "La longueur de la métrique est trop grande" -#: template.py:82 +#: template.py:78 msgid "Bad value for global option %s" msgstr "Mauvaise valeur pour l'option globale %s" -#: template.py:92 +#: template.py:88 msgid "Unknown global option" msgstr "Option globale inconnue" -#: template.py:107 +#: template.py:103 msgid "Template is empty" msgstr "Modèle vide" -#: template.py:345 +#: template.py:285 msgid "Bad value in global option" msgstr "Mauvaise valeur pour l'option globale %s" +#~ msgid "hiatus" +#~ msgstr "hiatus" + +#~ msgid "error: %s" +#~ msgstr "erreur : %s" + +#~ msgid "Line is: %s" +#~ msgstr "Ligne : %s" + +#~ msgid "Bad metric (expected %s, inferred %d illegal option%s)" +#~ msgstr "Mauvaise métrique (attendu : %s, lu %d choix interdit%s)" + +#~ msgid "... worse options omitted ..." +#~ msgstr "... et d'autres choix non affichés ..." + #~ msgid "genre" #~ msgstr "genre" diff --git a/static/main.css b/static/main.css @@ -7,13 +7,20 @@ h1 { padding: 0.2em; } +#lang { + float: right; + padding: 0.2em; + margin: 0; + margin-left: 0.3em; +} + #body { margin-top: 1em; padding-left: 0.5em; padding-right: 0.5em; } -h1 a, #about { +header a, #about { color: white; text-decoration: none; } diff --git a/static/tpl/french_abab.tpl b/static/tpl/french_abab.tpl @@ -1,13 +1,13 @@ ! incomplete_ok:no repeat_ok:no 6/6 A x 6/6 B X -6/6 A X -6/6 B x +6/6 A x +6/6 B X 6/6 A x 6/6 B X -6/6 A X -6/6 B x +6/6 A x +6/6 B X 6/6 C y 6/6 C y diff --git a/static/tpl/italian_abab.tpl b/static/tpl/italian_abab.tpl @@ -1,13 +1,13 @@ ! incomplete_ok:no repeat_ok:no 6/6 A x 6/6 B X -6/6 A X -6/6 B x +6/6 A x +6/6 B X 6/6 A x 6/6 B X -6/6 A X -6/6 B x +6/6 A x +6/6 B X 6/6 C y 6/6 C y diff --git a/template.py b/template.py @@ -1,28 +1,15 @@ import error -from metric import parse -from hemistiches import check_hemistiches import copy import rhyme +from verse import Verse from common import normalize, legal, strip_accents_one, rm_punct from nature import nature_count from vowels import possible_weights_ctx, make_query +from pprint import pprint -def handle(poss): - l = [] - #print(poss) - for i in range(len(poss)): - if isinstance(poss[i], tuple): - #print(cleared[:i][::-1]) - #print(cleared[i+1:]) - # print(poss) - # print (make_query(poss, i)) - if len(possible_weights_ctx(poss, i)) > 1: - l.append((poss[i][1], make_query(poss, i))) - return l - class Pattern: - def __init__(self, metric, myid, femid, constraint): + def __init__(self, metric, myid="", femid="", constraint=None): self.metric = metric self.parse_metric() self.myid = myid @@ -42,7 +29,7 @@ class Pattern: self.length = self.hemistiches.pop() class Template: - def __init__(self, string): + def __init__(self, string=None): self.template = [] self.pattern_line_no = 0 self.forbidden_ok = False @@ -55,7 +42,8 @@ class Template: self.check_occurrences = True self.diaeresis = "classical" self.mergers = [] - self.load(string) + if string: + self.load(string) self.line_no = 0 self.position = 0 self.prev = None @@ -106,23 +94,6 @@ class Template: if len(self.template) == 0: raise error.TemplateLoadError(_("Template is empty")) - def count(self, align): - """total weight of an align""" - return sum([x[1] for x in align if isinstance(x, tuple)]) - - def rate(self, pattern, align): - """Rate align according to pattern""" - align, fem, hemis = align - c = self.count(align) - ok = True - for h in hemis.values(): - if h != "ok": - ok = False - if ok and c == pattern.length: - return 0 - return ((1+len(hemis.keys()))*abs(pattern.length - c) - + sum([1 for x in hemis.values() if x != "ok"])) - def match(self, line, ofile=None, quiet=False, last=False): """Check a line against current pattern, return errors""" @@ -131,95 +102,44 @@ class Template: errors = [] pattern = self.get() + line_with_case = normalize(line, downcase=False) + line_normalize = normalize(line) + + v = Verse(line, self, pattern) + if last: if was_incomplete and not self.incomplete_ok and not self.overflowed: - errors.append(error.ErrorIncompleteTemplate()) - return errors, pattern + return [error.ErrorIncompleteTemplate()], pattern, v + return [], pattern, v if self.overflowed: - errors.append(error.ErrorOverflowedTemplate()) - return errors, pattern - - # check characters - illegal = set() - for x in line: - if not rm_punct(strip_accents_one(x)[0].lower()) in legal: - illegal.add(x) - if len(illegal) > 0: - if quiet: - return [None], pattern - errors.append(error.ErrorBadCharacters(illegal)) - return errors, pattern - - line_with_case = normalize(line, downcase=False) - line = normalize(line) + return [error.ErrorOverflowedTemplate()], pattern, verse # rhymes if pattern.myid not in self.env.keys(): # initialize the rhyme - self.env[pattern.myid] = rhyme.Rhyme(line, pattern.constraint, + self.env[pattern.myid] = rhyme.Rhyme(line_normalize, pattern.constraint, self.mergers, self.normande_ok) else: # update the rhyme old_p = self.env[pattern.myid].phon old_e = self.env[pattern.myid].eye - self.env[pattern.myid].feed(line, pattern.constraint) + self.env[pattern.myid].feed(line_normalize, pattern.constraint) # no more possible rhymes, something went wrong if not self.env[pattern.myid].satisfied(): self.env[pattern.myid].phon = old_p self.env[pattern.myid].eye = old_e errors.append(error.ErrorBadRhymeSound(self.env[pattern.myid], None)) - # compute alignments, check hemistiches, sort by score - possible = parse(line, self.env[pattern.myid].phon, pattern.length + 2, - self.forbidden_ok, self.hiatus_ok, self.diaeresis) - if not isinstance(possible, list): - if possible[0] == "forbidden": - errors.append(error.ErrorForbiddenPattern(possible[1])) - elif possible[0] == "hiatus": - errors.append(error.ErrorHiatus(possible[1])) - possible = [] - return errors, pattern - possible = list(map((lambda p: (p[0], p[1], - check_hemistiches(p[0], pattern.hemistiches, self.check_end_hemistiche))), - possible)) - possible = map((lambda x: (self.rate(pattern, x), x)), possible) - possible = sorted(possible, key=(lambda x: x[0])) - - if quiet: - if len(possible) == 0: - return [None], pattern - if possible[0][0] > (1+len(pattern.hemistiches))*pattern.length/2: - return [None], pattern + errors += v.problems() - # check metric - if len(possible) == 0 or possible[0][0] != 0: - errors.append(error.ErrorBadMetric(possible)) - if len(possible) == 0: - return errors, pattern - # keep the best alignment as hypotheses - possible = [(score, align) for (score, align) in possible - if score == possible[0][0]] if ofile: - if len(possible) == 1 and possible[0][0] == 0: - l = [(x[1][0]) for x in possible] - poss = [] - for p in l: - c = [] - while len(p) > 0: - x = p.pop() - if x == ' ': - poss.append(c[::-1]) - c = [] - else: - c.append(x) - if len(c) > 0: - poss.append(c[::-1]) - for w in poss: - l = handle(w) - for x in l: - # print(x) - print((str(x[0]) + ' ' + ' '.join(x[1])), file=ofile) + possible = v.possible + if len(possible) == 1: + for i, p in enumerate(possible[0]): + if 'weight' in p.keys() and len(p['weights']) > 1: + print(str(p['weight']) + ' ' + + ' '.join(make_query(possible[0], i)), file=ofile) # occurrences if self.check_occurrences: @@ -233,7 +153,7 @@ class Template: errors.append(error.ErrorMultipleWordOccurrence(last_word, self.occenv[pattern.myid][last_word])) - # rhyme genres + # rhyme genres # inequality constraint # TODO this is simplistic and order-dependent if pattern.femid.swapcase() in self.femenv.keys(): @@ -250,12 +170,12 @@ class Template: self.femenv[pattern.femid] = x else: old = list(self.femenv[pattern.femid]) - new = list(set(sum([x[1] for (score, x) in possible], []))) + new = v.genders(self.env[pattern.myid].phon) self.femenv[pattern.femid] &= set(new) if len(self.femenv[pattern.femid]) == 0: errors.append(error.ErrorBadRhymeGenre(old, new)) - return errors, pattern + return errors, pattern, v def parse_line(self, line): """Parse template line from a line""" @@ -324,18 +244,16 @@ class Template: self.line_no += 1 line = line.rstrip() if normalize(line) == '' and not last: - return [] + return None #possible = [compute(p) for p in possible] #possible = sorted(possible, key=rate) - errors, pattern = self.match(line, ofile, quiet=quiet, last=last) - for error in errors: - if error != None: - # update errors with line position and pattern - error.pos(line, self.line_no, pattern) - if len(errors) > 0 and self.reject_errors: - self.back() - self.line_no -= 1 - return errors + errors, pattern, verse = self.match(line, ofile, quiet=quiet, last=last) + if len(errors) > 0: + if self.reject_errors: + self.back() + self.line_no -= 1 + return error.ErrorCollection(self.line_no, line, pattern, verse, errors) + return None def str2bool(x): if x.lower() in ["yes", "oui", "y", "o"]: diff --git a/test/au_lecteur b/test/au_lecteur @@ -46,4 +46,4 @@ Et dans un bâillement avalerait le monde; C'est l'Ennui!--L'œil chargé d'un pleur involontaire, Il rêve d'échafauds en fumant son houka. Tu le connais, lecteur, ce monstre délicat, ---Hypocrite lecteur,--mon semblable,--mon frère! +-- Hypocrite lecteur, -- mon semblable, -- mon frère! diff --git a/verse.py b/verse.py @@ -0,0 +1,356 @@ +#!/usr/bin/python3 + +import common +from common import consonants, normalize, is_consonants, is_vowels, sure_end_fem, strip_accents_one +import re +import vowels +import haspirater +import error +from pprint import pprint + +class Verse: + def elision(self, word): + if (word.startswith('y') and not word == 'y' and not word.startswith("yp") and + not word.startswith("yeu")): + return [False] + if word in ["oui", "ouis"] or word.startswith("ouistiti"): + # elision for those words, but beware, no elision for "ouighour" + # boileau : "Ont l'esprit mieux tourné que n'a l'homme ? Oui sans doute." + # so elission sometimes + return [True, False] + # "un", "une" are non-elided as nouns ("cette une") + if word in ["un", "une"]: + return [True, False] + # "onze" is not elided + if word == "onze": + return [False] + if word[0] == 'h': + return list(map((lambda s: not s), haspirater.lookup(word))) + if is_vowels(word[0]): + return [True] + return [False] + + def remove_trivial(self, chunks, predicate): + new_chunks = [] + accu = "" + for i, w in enumerate(chunks): + if predicate(w): + if len(new_chunks) == 0: + accu = accu + w + else: + new_chunks[-1] = new_chunks[-1] + w + else: + new_chunks.append(accu + w) + accu = "" + return new_chunks + + @property + def line(self): + return ''.join(x['original'] for x in self.chunks) + + def __init__(self, line, template, pattern): + self.template = template + self.pattern = pattern + + whitespace_regexp = re.compile("(\s*)") + ys_regexp = re.compile("(\s*)") + all_consonants = consonants + consonants.upper() + consonants_regexp = re.compile('([^'+all_consonants+'*-]*)', re.UNICODE) + + words = re.split(whitespace_regexp, line) + words = self.remove_trivial(words, (lambda w: re.match("^\s*$", w) or + len(normalize(w, rm_all=True)) == 0)) + pre_chunks = [re.split(consonants_regexp, word) for word in words] + pre_chunks = [self.remove_trivial(x, (lambda w: re.match("^\s*$", w) or + len(normalize(w, rm_all=True)) == 0)) for x in pre_chunks] + self.chunks = [[{'original': y, 'text': normalize(y, rm_apostrophe=True)} + for y in x] for x in pre_chunks] + + # check forbidden characters + for w in self.chunks: + for y in w: + for x in y['text']: + if not common.rm_punct(strip_accents_one(x)[0].lower()) in common.legal: + y['error'] = "illegal" + + # gu- and qu- simplifications + for w in self.chunks: + if len(w) < 2: + continue + for i, x in enumerate(w[:-1]): + if not w[i+1]['text'].startswith('u'): + continue + if w[i]['text'].endswith('q'): + w[i+1]['text'] = w[i+1]['text'][1:] + if w[i+1]['text'] == '': + w[i]['original'] += w[i+1]['original'] + if w[i]['text'].endswith('g') and len(w[i+1]['text']) >= 2: + if w[i+1]['text'][1] in "eéèa": + w[i+1]['text'] = w[i+1]['text'][1:] + # remove empty chunks created by simplifications + for i, w in enumerate(self.chunks): + self.chunks[i] = [x for x in w if len(x['text']) > 0] + # remove leading and trailing crap + for w in self.chunks: + for p in [0, -1]: + while len(w[p]['text']) > 0 and w[p]['text'][0] in ' -': + w[p]['text'] = w[p]['text'][1:] + while len(w[p]['text']) > 0 and w[p]['text'][-1] in ' -': + w[p]['text'] = w[p]['text'][:-1] + + # sigles + for i, w in enumerate(self.chunks): + if len(w) == 1 and is_consonants(w[0]['text']): + new_chunks = [] + for j, x in enumerate(w[0]['text']): + if (x == 'w'): + nc = "doublevé" + else: + nc = x + "a" + new_chunks += re.split(consonants_regexp, nc) + new_chunks = [x for x in new_chunks if len(x) > 0] + new_word = [] + for j, x in enumerate(new_chunks): + lindex = int(j*len(w[0]['original'])/len(w[0]['text'])) + rindex = int((j+1)*len(w[0]['original'])/len(w[0]['text'])) + part = w[0]['original'][lindex:rindex] + new_word.append({'original': part, 'text': x}) + self.chunks[i] = new_word + + # vowel elision problems + for w in self.chunks: + w[0]['elision'] = self.elision(''.join(x['text'] for x in w)) + + # case of 'y' + ys_regexp = re.compile("(y*)") + for i, w in enumerate(self.chunks): + new_word = [] + for j, chunk in enumerate(w): + if ('y' not in chunk['text'] or len(chunk['text']) == 1 or + chunk['text'].startswith("y")): + new_word.append(chunk) + continue + # special case of "pays" + if (chunk['text'] == "ay" and j > 0 and j < len(w) - 1 and + w[j-1]['text'].endswith("p") and w[j+1]['text'].startswith("s")): + new_word.append(chunk) + # force weight + chunk['weights'] = [2] + continue + subchunks = re.split(ys_regexp, chunk['text']) + subchunks = [x for x in subchunks if len(x) > 0] + for j, subchunk in enumerate(subchunks): + lindex = int(j*len(chunk['original'])/len(subchunks)) + rindex = int((j+1)*len(chunk['original'])/len(subchunks)) + part = chunk['original'][lindex:rindex] + new_subchunk_text = 'Y' if 'y' in subchunk else subchunk + new_subchunk = dict(chunk) + new_subchunk['original'] = part + new_subchunk['text'] = new_subchunk_text + new_word.append(new_subchunk) + self.chunks[i] = new_word + + # annotate final mute 'e' + for i, w in enumerate(self.chunks[:-1]): + if w[-1]['text'] != "e": + continue + if sum([1 for chunk in w if is_vowels(chunk['text'])]) <= 1: + continue + w[-1]['elidable'] = self.chunks[i+1][0]['elision'] + + # annotate hiatus and ambiguities + ambiguous_potential = ["ie", "ée"] + no_hiatus = ["oui"] + for i, w in enumerate(self.chunks[:-1]): + if w[-1]['text'] == "s": + if w[-2]['text'] in ambiguous_potential: + w[-2]['error'] = "ambiguous" + w[-1]['error'] = "ambiguous" + if w[-1]['text'] in ambiguous_potential: + if self.chunks[i+1][0]['text'][0] in consonants: + w[-1]['error'] = "ambiguous" + self.chunks[i+1][0]['error'] = "ambiguous" + elif is_vowels(w[-1]['text']) and not w[-1]['text'].endswith('e'): + if is_vowels(self.chunks[i+1][0]['text']): + if ''.join(x['text'] for x in w) not in no_hiatus: + if ''.join(x['text'] for x in self.chunks[i+1]) not in no_hiatus: + w[-1]['error'] = "hiatus" + self.chunks[i+1][0]['error'] = "hiatus" + + # annotate word ends + for w in self.chunks[:-1]: + w[-1]['wordend'] = True + + # collapse words + self.chunks = sum(self.chunks, []) + + # annotate weights + for i, chunk in enumerate(self.chunks): + if (not is_vowels(self.chunks[i]['text'])): + continue + # for the case of "pays" and related words + if 'weights' not in self.chunks[i].keys(): + self.chunks[i]['weights'] = self.possible_weights_context(i) + self.chunks[i]['hemis'] = self.hemistiche(i) + + self.possible = self.fit(0, 0, self.pattern.hemistiches) + self.text = self.align2str(self.chunks) + + def contains_break(self, chunk): + return '-' in chunk['text'] or 'wordend' in chunk + + def possible_weights(self, pos): + if self.template.diaeresis == "classical": + return vowels.possible_weights_ctx(self.chunks, pos) + elif self.template.diaeresis == "permissive": + return vowels.possible_weights_approx(self.chunks[pos]['text']) + + def possible_weights_context(self, pos): + if ((pos >= len(self.chunks) - 2 and self.chunks[pos]['text'] == 'e') + and not (pos == len(self.chunks) - 2 and + is_vowels(self.chunks[pos+1]['text'])) + and not (pos <= 0 or self.contains_break(self.chunks[pos-1])) + and not (pos <= 1 or self.contains_break(self.chunks[pos-2]))): + # special case for verse endings, which can get elided (or not) + # but we don't elide lone syllables ("prends-le", etc.) + if pos == len(self.chunks) - 1: + return [0] # ending 'e' is elided + if self.chunks[pos+1]['text'] == 's': + return [0] # ending 'es' is elided + if self.chunks[pos+1]['text'] == 'nt': + # ending 'ent' is sometimes elided + # actually, this will have an influence on the rhyme's gender + return [0, 1] + return self.possible_weights(pos) + if (pos == len(self.chunks) - 1 and self.chunks[pos]['text'] == 'e' and + pos > 0 and (self.chunks[pos-1]['text'].endswith('-c') or + self.chunks[pos-1]['text'].endswith('-j'))): + return [0] # -ce and -je are elided + if (pos >= len(self.chunks) - 1 + and self.chunks[pos]['text'] in ['ie', 'ée']): + return [1] + if (pos >= len(self.chunks) - 2 + and self.chunks[pos]['text'] in ['ée']): + return [1] + if 'elidable' in self.chunks[pos]: + return [0 if x else 1 for x in self.chunks[pos]['elidable']] + return self.possible_weights(pos) + + def feminine(self, align, phon): + for a in sure_end_fem: + if self.text.endswith(a): + # check that this isn't a one-syllabe wourd + for i in range(4): + try: + if '-' in self.chunks[-i-1]['text'] or 'wordend' in self.chunks[-i-1]: + return ['M', 'F'] + except IndexError: + return ['M', 'F'] + return ['F'] + if not self.text.endswith('ent'): + return ['M'] + # verse ends with 'ent' + if align and align[-2]['weight'] == 0: + return ['F'] # mute -ent + if align and align[-2]['weight'] > 0 and align[-2]['text'] == 'e': + return ['M'] # non-mute "-ent" by the choice of metric + possible = [] + # now, we must check pronunciation? + # "tient" vs. "lient" for instance, "excellent"... + for possible_phon in phon: + if possible_phon.endswith(')') or possible_phon.endswith('#'): + possible.append('M') + else: + possible.append('F') + if possible_phon.endswith('E') and self.text.endswith('aient'): + # imparfait and conditionnel are masculine... + possible.append('M') + return possible + + def fit(self, pos, count, hemistiches): + if count > self.pattern.length: + return [] # no possibilites + if len(hemistiches) > 0 and hemistiches[0] < count: + return [] # missed a hemistiche + if pos == len(self.chunks): + if count == self.pattern.length: + return [[]] # empty list is the only possibility + else: + return [] + chunk = self.chunks[pos] + result = [] + for weight in chunk.get('weights', [0]): + next_hemistiches = hemistiches + if (len(hemistiches) > 0 and count + weight == hemistiches[0] and + is_vowels(chunk['text']) and (chunk['hemis'] == "ok" or not + self.template.check_end_hemistiche and chunk['hemis'] != "cut")): + # we hemistiche here + next_hemistiches = next_hemistiches[1:] + current = dict(self.chunks[pos]) + if 'weights' in current: + current['weight'] = weight + for x in self.fit(pos+1, count + weight, next_hemistiches): + result.append([current] + x) + return result + + hemis_types = { + 'ok': '/', # correct + 'cut': '?', # falls at the middle of a word + 'fem': '\\', # preceding word ends by a mute e + } + + def align2str(self, align): + return ''.join([x['text'] for x in align]) + + def hemistiche(self, pos): + ending = self.chunks[pos]['text'] + if not 'wordend' in self.chunks[pos] and pos < len(self.chunks) - 1: + if not 'wordend' in self.chunks[pos+1]: + return "cut" + ending += self.chunks[pos+1]['text'] + if (ending in sure_end_fem): + if True in self.chunks[pos].get('elidable', [False]): + return "ok" # elidable final -e + # check that this isn't a one-syllabe wourd (which is allowed) + ok = False + try: + for i in range(2): + if '-' in self.chunks[pos-i-1]['text'] or 'wordend' in self.chunks[pos-i-1]: + ok = True + except IndexError: + pass + if not ok: + # hemistiche ends in feminine + return "fem" + return "ok" + + def problems(self): + result = [] + errors = set() + for c in self.chunks: + if 'error' in c: + if c['error'] == "ambiguous" and not self.template.forbidden_ok: + errors.add(error.ErrorForbiddenPattern) + if c['error'] == "hiatus" and not self.template.hiatus_ok: + errors.add(error.ErrorHiatus) + if c['error'] == "illegal": + errors.add(error.ErrorBadCharacters) + for k in errors: + result.append(k()) + if len(self.possible) == 0: + result.append(error.ErrorBadMetric()) + return result + + def valid(self): + return len(self.problems()) == 0 + + def genders(self, phon): + result = set() + for p in self.possible: + result.update(set(self.feminine(p, phon))) + if len(self.possible) == 0: + # try to infer gender even when metric is wrong + result.update(set(self.feminine(None, phon))) + return result + + diff --git a/versetest.py b/versetest.py @@ -0,0 +1,155 @@ +#!/usr/bin/python3 + +import template +import verse +import unittest +from pprint import pprint + +class SanityCheck(unittest.TestCase): + def testSimple(self): + text = "Hello World!! This is a test" + v = verse.Verse(text, template.Template(), template.Pattern("12")) + self.assertEqual(text, v.line) + + def testComplex(self): + text = "Aye AYAYE aye gue que geque AYAYAY a prt sncf bbbéé" + v = verse.Verse(text, template.Template(), template.Pattern("12")) + self.assertEqual(text, v.line) + + def testLeadingSpace(self): + text = " a" + v = verse.Verse(text, template.Template(), template.Pattern("12")) + self.assertEqual(text, v.line) + +class Eliminate(unittest.TestCase): + def testEliminateOneGue(self): + text = "gue" + v = verse.Verse(text, template.Template(), template.Pattern("12")) + c = ''.join([x['text'] for x in v.chunks]) + self.assertFalse("gue" in c) + + def testEliminateGue(self): + text = "gue gue GUE ogues longuement la guerre" + v = verse.Verse(text, template.Template(), template.Pattern("12")) + c = ''.join([x['text'] for x in v.chunks]) + self.assertFalse("gue" in c) + +class BadChars(unittest.TestCase): + def testBadAlone(self): + v = verse.Verse("42", template.Template(), template.Pattern("12")) + self.assertFalse(v.valid()) + + def testBadAndGood(self): + v = verse.Verse("bla h42 blah ", template.Template(), template.Pattern("12")) + self.assertFalse(v.valid()) + + + def getWeight(self, align): + return sum(x.get('weight', 0) for x in align) + + def achievesPossibility(self, aligns, target): + for align in aligns: + if self.getWeight(align) == target: + return True + return False + +class Counts(unittest.TestCase): + def runCount(self, text, limit=12): + v = verse.Verse(text, template.Template(), template.Pattern(str(limit))) + return v.possible + + def getWeight(self, align): + return sum(x.get('weight', 0) for x in align) + + def achievesPossibility(self, aligns, target): + for align in aligns: + if self.getWeight(align) == target: + return True + return False + +class SigleCounts(Counts): + def testW(self): + f = self.runCount("W", limit=3) + self.assertEqual(1, len(f)) + self.assertEqual(self.getWeight(f[0]), 3) + + def testB(self): + f = self.runCount("b", limit=1) + self.assertEqual(1, len(f)) + self.assertEqual(self.getWeight(f[0]), 1) + + def testMulti(self): + f = self.runCount("SNCF WWW", limit=13) + self.assertEqual(1, len(f)) + self.assertEqual(self.getWeight(f[0]), 13) + +class SimpleCounts(Counts): + def testTrivialMonovoc(self): + f = self.runCount("Ba", limit=1) + self.assertEqual(1, len(f)) + self.assertEqual(self.getWeight(f[0]), 1) + + def testMonovoc(self): + f = self.runCount("Babababa", limit=4) + self.assertEqual(1, len(f)) + self.assertEqual(self.getWeight(f[0]), 4) + +class AspiratedCounts(Counts): + def testBaudelaire1half(self): + possible = self.runCount("funeste hélas", limit=4) + self.assertTrue(self.achievesPossibility(possible, 4)) + possible = self.runCount("funeste hélas", limit=5) + self.assertTrue(self.achievesPossibility(possible, 5)) + +class RealCounts(Counts): + half1 = "Je veux, pour composer" + half2 = " chastement mes églogues," + verse = "Allez. Après cela direz-vous que je l’aime ?" + + def testBaudelaire1half(self): + f = self.runCount(self.half1, limit=6) + self.assertEqual(1, len(f)) + self.assertEqual(self.getWeight(f[0]), 6) + + def testBaudelaire1half2(self): + f = self.runCount(self.half2, limit=6) + self.assertEqual(1, len(f)) + self.assertEqual(self.getWeight(f[0]), 6) + + def testBaudelaire1(self): + f = self.runCount(self.half1 + self.half2, limit=12) + self.assertEqual(1, len(f)) + self.assertEqual(self.getWeight(f[0]), 12) + + def testAndromaque(self): + f = self.runCount(self.verse, limit=12) + self.assertEqual(1, len(f)) + self.assertEqual(self.getWeight(f[0]), 12) + +class BadCounts(Counts): + def testBad(self): + f = self.runCount("Cela cela", limit=5) + pprint(f) + self.assertEqual(0, len(f)) + +class PoemCounts(Counts): + v1 = "Qui berce longuement notre esprit enchanté" + v2 = "Qu'avez-vous ? Je n'ai rien. Mais... Je n'ai rien, vous dis-je," + v3 = "Princes, toute h mer est de vaisseaux couverte," + v4 = "Souvent le car qui l'a ne le sait pas lui-même" + def testV1(self): + possible = self.runCount(self.v1, limit=12) + self.assertTrue(self.achievesPossibility(possible, 12)) + def testV2(self): + possible = self.runCount(self.v2, limit=12) + self.assertTrue(self.achievesPossibility(possible, 12)) + def testV3(self): + possible = self.runCount(self.v3, limit=12) + self.assertTrue(self.achievesPossibility(possible, 12)) + def testV4(self): + possible = self.runCount(self.v3, limit="6/6") + self.assertTrue(self.achievesPossibility(possible, 12)) + +if __name__ == "__main__": + unittest.main() + diff --git a/views/about.html b/views/about.html @@ -5,7 +5,8 @@ <p>Bienvenue sur <strong>plint</strong>!</p> <h2 id="info">De quoi s'agit-il au juste&nbsp;?</h2> <p>C'est une tentative d'<a href="http://a3nm.net">a3nm</a> pour lutter contre la -poésie de mauvaise qualité. Plint vérifie qu'un poème respecte des contraintes +poésie de mauvaise qualité (celle qui veut suivre des règles classiques sans y +parvenir). Plint vérifie qu'un poème respecte des contraintes de métrique, de rime et de genre de rime. Il utilise <a href="http://gitorious.org/frhyme">frhyme</a> pour les rimes (qui utilise lui-même la base de données <a href="http://lexique.org">Lexique</a>), <a @@ -44,9 +45,7 @@ exacte, se reporter au code source.</p> <dt>Hémistiche</dt> <dd>Les alexandrins classiques sont divisés en deux <em>hémistiches</em> de 6 syllabes. La césure ne doit pas couper un mot et le premier hémistiche ne - doit pas se finir par un son faible (ie. une fin féminine non élidée). - L'hémistiche ne doit pas briser la structure du vers, plint vérifie seulement - qu'il ne se termine pas par un article défini.</dd> + doit pas se finir par un son faible (ie. une fin féminine non élidée).</dd> <dt>Rime.</dt> <dd>La contrainte la plus connue est que les vers doivent rimer. Les phonèmes communs dans une rime doivent inclure un son vocalique (par exemple "tâte" et @@ -202,7 +201,8 @@ programmes), mais aussi pour l'homophonie avec "plainte".</p> <p>Welcome to <strong>plint</strong>!</p> <h2 id="info">Wait, what is this?</h2> <p>This is <a href="http://a3nm.net">a3nm</a>'s attempt to make a better world -by eradicating incorrect French poetry. It checks the validity of a poem with +by eradicating that species of bad French poetry that tries to follow classical +constraints but fails. It checks the validity of a poem with respect to metric, rhyme and rhyme genre constraints. It uses <a href="http://gitorious.org/frhyme">frhyme</a> for rhymes (itself built on the <a href="http://lexique.org">Lexique</a> database), <a @@ -238,8 +238,7 @@ source code.</p> <dd>For classical alexandrines, the 12 syllables are separated in two groups of 6 with an intermediate cesura (the <em>hémistiche</em>). The cesura must not split a word and must not end in a weak sound (essentially, a non-elided - feminine ending). The hemistiche should occur at a pleasant point of the - sentence, plint only checks that it does not end in a definite article.</dd> + feminine ending).</dd> <dt>Rhyme.</dt> <dd>The most well-known constraint is that verses must rhyme. The rhyming phonemes must include a vowel (eg. "tâte" and "bête" do not rhyme because diff --git a/views/page.html b/views/page.html @@ -8,18 +8,23 @@ {% else %} <meta name="description" content="plint French poetry checker" /> {% endif %} - <link rel="stylesheet" href="static/main.css" type="text/css" media="screen" /> + <link rel="stylesheet" href="/static/main.css" type="text/css" media="screen" /> </head> <body> <header> <h1><a href="/">plint</a></h1> - <sup><a id="about" href="/about"> + <sup><a id="about" href="about"> {% if lang == 'fr' %} aide {% else %} help {% endif %} </a></sup> + {% if nolocale != True %} + <div id="lang"> + <a href="/fr/{{path}}">fr</a>&nbsp;&bull;&nbsp;<a href="/en/{{path}}">en</a> + </div> + {% endif %} </header> <div id="body"> diff --git a/views/results.html b/views/results.html @@ -33,11 +33,7 @@ <li class="correct" id="l{{loop.index}}">{{line}} {% else %} <li class="incorrect" id="l{{loop.index}}">{{line}} - <ul> - {% for error in errors %} - <li><pre>{{error}}</pre></li> - {% endfor %} - </ul> + <pre>{{errors}}</pre> {% endif %} </li> {% endfor %} diff --git a/vowels.py b/vowels.py @@ -6,8 +6,8 @@ from common import strip_accents from diaeresis import lookup -def clear(l): - return [x[0] if isinstance(x, tuple) else x for x in l] +def clear(x): + return (x['text'] + ' ') if 'wordend' in x else x['text'] def intersperse(a, b): if (len(a) == 0 or a[0] == ' ') and (len(b) == 0 or b[0] == ' '): @@ -28,7 +28,14 @@ def contains_trema(chunk): threshold = 10 def make_query(chunks, pos): - cleared = clear(chunks) + cleared = [clear(x) for x in chunks] + if cleared[pos].endswith(' '): + cleared[pos] = cleared[pos].rstrip() + if pos + 1 <= len(cleared): + cleared[pos+1] = " " + cleared[pos+1] + else: + cleared.append(' ') + return [cleared[pos]] + intersperse( ''.join(cleared[pos+1:]), ''.join([x[::-1] for x in cleared[:pos][::-1]])) @@ -39,7 +46,6 @@ def possible_weights_ctx(chunks, pos): #print (q) v = lookup(q) #print (v) - #print (possible_weights(chunk)) if len(v.keys()) == 1 and v[list(v.keys())[0]] > threshold: return [int(list(v.keys())[0])] else: