commit fd4ae6568bcc1e66a469f4d9503cdfecbb9f16d8
parent 1a8f02cf1e7661b20c2825f9008734db1ce8dad2
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Mon, 18 Feb 2013 19:26:32 +0100
Merge branch 'provenance'
Diffstat:
22 files changed, 826 insertions(+), 713 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,7 @@ frhyme.json
haspirater/*
haspirater.py
haspirater.json
+irc/*
occurrences
old/*
*.pyc
diff --git a/Makefile b/Makefile
@@ -1,9 +1,12 @@
LANG=res/messages_fr.mo
-.PHONY: all
+.PHONY: all test
%.mo: %.po
msgfmt -o $*.mo $*.po
all: ${LANG}
+test:
+ python3 versetest.py
+
diff --git a/TODO b/TODO
@@ -1,8 +1,11 @@
-clear textarea
"joueront" trois syllabes ?
-infer meter
-"tu hélas" : assume that hiatus is ok
+pas de rappel multiple de ligne dans les erreurs
+belle gestion des erreurs en html
"paysage", "centurion"
+force language: plint.a3nm.net/french/about.html
+
+
+handle "' " and "` "
== IRC ==
diff --git a/common.py b/common.py
@@ -42,19 +42,25 @@ def norm_spaces(text):
"""Remove multiple consecutive whitespace"""
return re.sub("\s+-*\s*", ' ', text)
-def rm_punct(text):
+def rm_punct(text, rm_all=False, rm_apostrophe=False):
"""Remove punctuation from text"""
text = re.sub("’", "'", text) # no weird apostrophes
text = re.sub("' ", "'", text) # space after apostrophes
+ if rm_apostrophe:
+ text = re.sub("'", "", text)
text = re.sub("'*$", "", text) # apostrophes at end of line
text = re.sub("[‒–—―⁓⸺⸻]", " ", text) # no weird dashes
#TODO rather: keep only good chars
- pattern = re.compile("[^'\w -]", re.UNICODE)
- text2 = pattern.sub(' ', text)
+ if not rm_all:
+ pattern = re.compile("[^'\w -]", re.UNICODE)
+ text2 = pattern.sub(' ', text)
+ else:
+ pattern = re.compile("[^\w]", re.UNICODE)
+ text2 = pattern.sub('', text)
return text2
-def is_vowels(chunk, with_h=False, with_y=True):
+def is_vowels(chunk, with_h=False, with_y=True, with_crap=False):
"""Test if a chunk is vowels
with_h counts 'h' as vowel, with_y allows 'y'"""
@@ -63,7 +69,8 @@ def is_vowels(chunk, with_h=False, with_y=True):
return False
for char in strip_accents(chunk):
if char not in vowels:
- if char != 'h' or not with_h:
+ if (char != 'h' or not with_h) and (char not in ['*', '?'] or not
+ with_crap):
return False
return True
@@ -75,9 +82,10 @@ def is_consonants(chunk):
return False
return True
-def normalize(text, downcase=True):
+def normalize(text, downcase=True, rm_all=False, rm_apostrophe=False):
"""Normalize text, ie. lowercase, no useless punctuation or whitespace"""
- return norm_spaces(rm_punct(text.lower() if downcase else text)).rstrip().lstrip()
+ return norm_spaces(rm_punct(text.lower() if downcase else text,
+ rm_all=rm_all, rm_apostrophe=rm_apostrophe)).rstrip().lstrip()
def subst(string, subs):
if len(subs) == 0:
diff --git a/error.py b/error.py
@@ -1,193 +1,134 @@
import common
-import hemistiches
-class Error:
- def __init__(self):
- self.line = None
- self.line_no = None
- self.pattern = None
- self.prefix = None
- def pos(self, line, line_no, pattern):
- self.line = line
- self.line_no = line_no
- self.pattern = pattern
- self.prefix = "stdin:%d: " % self.line_no
+class ErrorCollection:
+ keys = {'hiatus': 'H', 'ambiguous': 'A', 'illegal': 'I'}
- def say(self, l):
- return self.prefix + l
+ @property
+ def prefix(self):
+ return "stdin:%d: " % self.line_no
- def report(self, s, short=False, t = []):
+ def __init__(self, line_no, line, pattern, verse, errors=[]):
+ self.line_no = line_no
+ self.line = line
+ self.errors = errors
+ self.pattern = pattern
+ self.verse = verse
+
+ def say(self, l, short):
+ return l if short else self.prefix + l
+
+ def align(self):
+ chunks = self.verse.chunks
+ keys = ['original', 'error']
+ if len(self.verse.possible) == 0:
+ keys.append('weights')
+ if len(self.pattern.hemistiches) > 0:
+ keys.append('hemis')
+ formatters = {'weights': lambda x, y: '/'.join([str(a) for a in x]),
+ 'error': lambda x, y: ErrorCollection.keys.get(x, '') *
+ len(chunk['original'])}
+ def render(chunk, key):
+ return (formatters.get(key, lambda x, y: str(x)))(chunk.get(key, ""), chunk)
+ lines = {}
+ for key in keys:
+ lines[key] = ""
+ for chunk in chunks:
+ l = max(len(render(chunk, key)) for key in keys)
+ for key in keys:
+ lines[key] += ('{:^'+str(l)+'}').format(render(chunk, key))
+ return ["> " + lines[key] for key in keys if len(lines[key].strip()) > 0]
+
+ def lines(self, short=False):
l = []
- if short:
- l.append(s)
- else:
- l.append(self.say(_("error: %s") % (s)))
- msg = _("Line is: %s") % (self.line)
- if short:
- if t != []:
- if self.line.strip() != "":
- l.append(msg)
- for x in t:
- l.append(x)
- else:
- if self.line.strip() != "":
- l.append(self.say(msg))
- for x in t:
- l.append(self.say(x))
- return '\n'.join(l)
-
-class ErrorBadCharacters(Error):
- def __init__(self, characters):
- self.characters = characters
+ l.append([self.say(x, short) for x in self.align()])
+ for e in self.errors:
+ l.append([self.say(e.report(self.pattern), short)])
+ return l
def report(self, short=False):
- return Error.report(self, _("Illegal character%s: %s")
- % ('' if len(self.characters) == 1 else 's',
- ', '.join(["'" + a + "'" for a in self.characters])), short)
+ return '\n'.join(sum(self.lines(short), []))
-class ErrorForbiddenPattern(Error):
- def __init__(self, forbidden):
- self.forbidden = forbidden
+class ErrorBadElement:
+ def report(self, pattern):
+ return (self.message
+ + _(" (see '%s' above)") % ErrorCollection.keys[self.key])
- def report(self, short=False):
- return Error.report(self, _("Illegal ambiguous pattern: %s") % self.forbidden,
- short)
+class ErrorBadCharacters(ErrorBadElement):
+ @property
+ def message(self):
+ return _("Illegal characters")
+ key = "illegal"
-class ErrorHiatus(Error):
- def __init__(self, hiatus):
- self.hiatus = hiatus
+class ErrorForbiddenPattern(ErrorBadElement):
+ @property
+ def message(self):
+ return _("Illegal ambiguous pattern")
+ key = "ambiguous"
- def report(self, short=False):
- return Error.report(self, _("Illegal hiatus: %s") % self.hiatus, short)
+class ErrorHiatus(ErrorBadElement):
+ @property
+ def message(self):
+ return _("Illegal hiatus")
+ key = "hiatus"
-class ErrorBadRhyme(Error):
+class ErrorBadRhyme:
def __init__(self, expected, inferred):
- Error.__init__(self)
self.expected = expected
self.inferred = inferred
- def report(self, short=False):
+ def report(self, pattern):
# TODO indicate eye rhyme since this is also important
# TODO don't indicate more than the minimal required rhyme (in length and
# present of a vowel phoneme)
- return Error.report(self,
- _("%s for type %s (expected %s, inferred \"%s\")")
- % (self.kind, self.get_id(), self.fmt(self.expected),
- self.fmt(self.inferred)), short)
+ return (_("%s for type %s (expected \"%s\", inferred \"%s\")")
+ % (self.kind, self.get_id(pattern), self.fmt(self.expected),
+ self.fmt(self.inferred)))
class ErrorBadRhymeGenre(ErrorBadRhyme):
+ @property
+ def kind(self):
+ return _("Bad rhyme genre")
+
def fmt(self, l):
result = _(' or ').join(list(l))
if result == '':
result = "?"
return result
- def get_id(self):
- return self.pattern.femid
+ def get_id(self, pattern):
+ return pattern.femid
+class ErrorBadRhymeSound(ErrorBadRhyme):
@property
def kind(self):
- return _("Bad rhyme genre")
+ return _("Bad rhyme")
-class ErrorBadRhymeSound(ErrorBadRhyme):
def fmt(self, l):
pron = l.phon
ok = []
if len(pron) > 0:
ok.append("")
return ("\"" + '/'.join(list(set([common.to_xsampa(x[-4:]) for x in pron])))
- + "\"" + _(" (ending: \"") + l.eye + "\")")
-
- def get_id(self):
- return self.pattern.myid
+ + "\"" + _(", ending: \"") + l.eye + "\"")
- def report(self, short=False):
- return Error.report(self, _("%s for type %s (expected %s)")
- % (self.kind, self.pattern.myid, self.fmt(self.expected)), short)
+ def get_id(self, pattern):
+ return pattern.myid
- @property
- def kind(self):
- return _("Bad rhyme")
+ def report(self, pattern):
+ return (_("%s for type %s (expected %s)")
+ % (self.kind, pattern.myid, self.fmt(self.expected)))
-class ErrorBadMetric(Error):
- def __init__(self, possible):
- Error.__init__(self)
- self.possible = possible
-
- def restore_elid(self, chunk):
- if isinstance(chunk, tuple):
- return [chunk]
- try:
- if chunk[-1] != "`":
- return [chunk]
- except KeyError:
- return [chunk]
- return [chunk[:-1], ("e", 0)]
-
- def align(self, align):
- score, align = align
- align, feminine, hemis = align
- align = sum([self.restore_elid(chunk) for chunk in align], [])
- line = self.line
- l2 = []
- count = 0
- ccount = 0
- last_he = 0
- summary = []
- offset = 0
- done = False
- for x in align:
- if isinstance(x, tuple):
- orig = ""
- while len(line) > 0 and common.is_vowels(line[0]):
- orig += line[0]
- line = line[1:]
- add = ('{:^'+str(len(orig))+'}').format(str(x[1]))
- if offset > 0 and len(add) > 0 and add[-1] == ' ':
- offset -= 1
- add = add[:-1]
- l2 += add
- if len(add) > len(orig):
- offset = len(add) - len(orig)
- count += x[1]
- ccount += x[1]
- done = False
- else:
- orig = ""
- while len(line) > 0 and not common.is_vowels(line[0]):
- orig += line[0]
- line = line[1:]
- if count in hemis.keys() and not done and last_he < count:
- done = True
- summary.append(str(ccount))
- ccount = 0
- summary.append(hemistiches.hemis_types[hemis[count]])
- l2 += ('{:^'+str(len(orig))+'}'
- ).format(hemistiches.hemis_types[hemis[count]])
- last_he = count
- else:
- l2 += ' ' * len(orig)
- summary.append(str(ccount)+':')
- result = ''.join(l2)
- summary = ('{:^9}').format(''.join(summary))
- return summary + result
+class ErrorBadMetric:
+ def report(self, pattern):
+ return (_("Illegal metric: expected %d syllable%s%s") %
+ (pattern.length, '' if pattern.length == 1 else 's',
+ '' if len(pattern.hemistiches) == 0
+ else (_(" with hemistiche%s at ") %
+ '' if len(pattern.hemistiches) == 1 else 's')
+ + ','.join(str(a) for a in pattern.hemistiches)))
- def report(self, short=False):
- num = min(len(self.possible), 4)
- truncated = num < len(self.possible)
- return Error.report(
- self,
- (_("Bad metric (expected %s, inferred %d illegal option%s)") %
- (self.pattern.metric,
- len(self.possible), ('s' if len(self.possible) != 1 else
- ''))),
- short,
- list(map(self.align, self.possible[:num]))
- + ([_("... worse options omitted ...")] if truncated else [])
- )
-
-class ErrorMultipleWordOccurrence(Error):
+class ErrorMultipleWordOccurrence:
def __init__(self, word, occurrences):
self.word = word
self.occurrences = occurrences
@@ -195,19 +136,17 @@ class ErrorMultipleWordOccurrence(Error):
def get_id(self):
return self.pattern.myid
- def report(self, short=False):
- return Error.report(self, _("Too many occurrences of word %s for rhyme %s")
- % (self.word, self.get_id()), short)
+ def report(self, pattern):
+ return (_("Too many occurrences of word %s for rhyme %s")
+ % (self.word, self.get_id()))
-class ErrorIncompleteTemplate(Error):
- def report(self, short=False):
- return Error.report(self, _("Poem is not complete"),
- short)
+class ErrorIncompleteTemplate:
+ def report(self, pattern):
+ return _("Poem is not complete")
-class ErrorOverflowedTemplate(Error):
- def report(self, short=False):
- return Error.report(self, _("Verse is beyond end of poem"),
- short)
+class ErrorOverflowedTemplate:
+ def report(self, pattern):
+ return _("Verse is beyond end of poem")
class TemplateLoadError(BaseException):
def __init__(self, msg):
diff --git a/hemistiches.py b/hemistiches.py
@@ -1,80 +0,0 @@
-from common import sure_end_fem
-
-hemis_types = {
- 'ok' : '/', # correct
- 'bad' : '!', # something wrong
- 'cut' : '?', # falls at the middle of a word
- 'fem' : '\\', # preceding word ends by a mute e
- 'forbidden' : '#', # last word of hemistiche cannot occur at end of hemistiche
- }
-
-# these words are forbidden at hemistiche
-forbidden_hemistiche = [
- "le",
- "la",
- ]
-
-def align2str(align):
- return ''.join([x[0] if isinstance(x, tuple) else x for x in align])
-
-def check_spaces(align, pos):
- if pos >= len(align):
- # not enough syllabes for hemistiche
- return "bad"
- if align[pos] == ' ' or '-' in align[pos]:
- # word boundary here, so this is ok
- return "ok"
- # skip consonants
- if not isinstance(align[pos], tuple):
- return check_spaces(align, pos + 1)
- # hemistiche falls at the middle of a word
- return "cut"
-
-def check_hemistiche(align, pos, hem, check_end_hemistiche):
- if pos >= len(align):
- # not enough syllabes for hemistiche
- return ("bad", pos)
- if hem == 0:
- # hemistiche should end here, check that this is a word boundary
- if check_end_hemistiche:
- if (align2str(align[:pos+1]).split()[-1]) in forbidden_hemistiche:
- return ("forbidden", pos)
- return (check_spaces(align, pos), pos)
- if hem < 0:
- # hemistiche falls at the middle of a vowel cluster
- return ("cut", pos)
- # skip consonants
- if not isinstance(align[pos], tuple):
- return check_hemistiche(align, pos +1, hem, check_end_hemistiche)
- # hemistiche is there, we should not have a feminine ending here
- if hem == 1:
- if pos + 1 >= len(align):
- # not enough syllabes for hemistiche
- return ("bad", pos)
- if ((align[pos][0] + align[pos+1]).rstrip() in sure_end_fem):
- # check that this isn't a one-syllabe wourd (which is allowed)
- ok = False
- for i in range(2):
- for j in ' -':
- if j in align[pos-i-1]:
- ok = True
- if not ok:
- # hemistiche ends in feminine
- return ("fem", pos)
- return check_hemistiche(align, pos+1, hem - align[pos][1],
- check_end_hemistiche)
-
-def check_hemistiches(align, hems, check_end_hemistiche):
- """From a sorted list of distinct hemistiche positions, return a
- dictionary which maps each position to the status of this
- hemistiche"""
-
- result = {}
- pos = 0
- h2 = 0
- for h in hems:
- r, pos = check_hemistiche(align, pos, h-h2, check_end_hemistiche)
- h2 = h
- result[h] = r
- return result
-
diff --git a/metric.py b/metric.py
@@ -1,224 +0,0 @@
-#!/usr/bin/python
-#coding: utf-8
-
-import re
-from common import normalize, is_vowels, consonants, sure_end_fem, is_consonants
-import vowels
-import haspirater
-
-
-no_hiatus = ["oui"]
-
-
-def annotate_aspirated(word):
- """Annotate aspirated 'h'"""
- if word[0] != 'h':
- return word
- if haspirater.lookup(word):
- return '*'+word
- else:
- return word
-
-def contains_break(chunk):
- return ' ' in chunk or '-' in chunk
-
-def possible_weights(chunks, pos, diaeresis):
- if diaeresis == "classical":
- return vowels.possible_weights_ctx(chunks, pos)
- elif diaeresis == "permissive":
- return vowels.possible_weights_approx(chunks[pos])
-
-def fit(chunks, pos, left, diaeresis):
- """bruteforce exploration of all possible vowel cluster weghting,
- within a maximum total of left"""
- if pos >= len(chunks):
- return [[]] # the only possibility is the empty list
- if left < 0:
- return [] # no possibilities
- # skip consonants
- if (not is_vowels(chunks[pos])):
- return [[chunks[pos]] + x for x in fit(chunks, pos+1, left, diaeresis)]
- else:
- if ((pos >= len(chunks) - 2 and chunks[pos] == 'e') and not (
- pos <= 0 or contains_break(chunks[pos-1])) and not (
- pos <= 1 or contains_break(chunks[pos-2]))):
- # special case for verse endings, which can get elided (or not)
- # but we don't elide lone syllables ("prends-le", etc.)
- if pos == len(chunks) - 1:
- weights = [0] # ending 'e' is elided
- elif chunks[pos+1] == 's':
- weights = [0] # ending 'es' is elided
- elif chunks[pos+1] == 'nt':
- # ending 'ent' is sometimes elided
- # actually, this will have an influence on the rhyme's gender
- weights = [0, 1]
- else:
- weights = possible_weights(chunks, pos, diaeresis)
- else:
- if (pos >= len(chunks) - 1 and chunks[pos] == 'e' and
- pos > 0 and (chunks[pos-1].endswith('-c') or
- chunks[pos-1].endswith('-j'))):
- weights = [0] # -ce and -je are elided
- else:
- weights = possible_weights(chunks, pos, diaeresis)
- result = []
- for weight in weights:
- # combine all possibilities
- result += [[(chunks[pos], weight)] + x for x in fit(chunks, pos+1,
- left - weight, diaeresis)]
- return result
-
-def feminine(align, verse, phon):
- for a in sure_end_fem:
- if verse.endswith(a):
- # check that this isn't a one-syllabe wourd
- for i in range(4):
- for j in ' -':
- try:
- if j in align[-i-1]:
- return ['M', 'F']
- except IndexError:
- return ['M', 'F']
- return ['F']
- if not verse.endswith('ent'):
- return ['M']
- # verse ends with 'ent'
- if align[-2][1] == 0:
- return ['F'] # mute -ent
- if align[-2][1] > 0 and align[-2][0] == 'e':
- return ['M'] # non-mute "-ent" by the choice of metric
- possible = []
- # now, we must check pronunciation?
- # "tient" vs. "lient" for instance, "excellent"...
- for possible_phon in phon:
- if possible_phon.endswith(')') or possible_phon.endswith('#'):
- possible.append('M')
- else:
- possible.append('F')
- if possible_phon.endswith('E') and verse.endswith('aient'):
- # imparfait and conditionnel are masculine...
- possible.append('M')
- return possible
-
-
-def parse(text, phon, bound, forbidden_ok, hiatus_ok, diaeresis):
- """Return possible aligns for text, bound is an upper bound on the align
- length to limit running time, phon is the pronunciation to help for gender,
- forbidden_ok is true if we allow classically forbidden patterns"""
-
- original_text = normalize(text)
-
- # avoid some vowel problems
- text = re.sub("qu", 'q', original_text)
- text = re.sub("gue", 'ge', text)
- text = re.sub("gué", 'gé', text)
- text = re.sub("guè", 'gè', text)
- text = re.sub("gua", 'ga', text)
-
- # split in words
- words = text.split(' ')
-
- # other exceptions
- for i in range(len(words)):
- # no elision on y- words except "ypérite", "yeuse", "yeux"
- if words[i].startswith('y') and words[i] != "y" and not (
- words[i].startswith('yp') or words[i].startswith('yeu')):
- words[i] = "*" + words[i]
-
- # no elision for "oui", "ouis", "ouistitis"
- # but elision for "ouighour"
- # TODO boileau writes:
- # "Ont l'esprit mieux tourné que n'a l'homme ? Oui sans doute."
- # so it's unclear what should be done here
- # if (words[i] == "oui" or words[i] == "ouis" or
- # words[i].startswith("ouistiti")):
- # words[i] = "*" + words[i]
-
- # no elision on those numerals
- # TODO "un" or "une" are sometimes elidable and sometimes non-elidable
- # Belle, une fois encor, réponds à mon appel.
- # Mon journal, il est vrai, a une belle une.
- if (words[i] == "onze"):
- words[i] = "*" + words[i]
-
- if is_consonants(words[i]):
- new_word = []
- for x in words[i]:
- if (x == 'w'):
- new_word.append("doublevé")
- else:
- new_word.append(x + "é")
- words[i] = ''.join(new_word)
-
-
- # aspirated
- words = [annotate_aspirated(word) for word in words if word != '']
-
- pattern = re.compile('(['+consonants+'*-]*)', re.UNICODE)
-
- forbidden = None
- hiatus = None
-
- # cut each word in chunks of vowels and consonants, with some specific
- # kludges
- for i in range(len(words)):
- words[i] = re.split(pattern, words[i])
- words[i] = [chunk for chunk in words[i] if chunk != '']
- nwords = []
- # the case of 'y' is special
- for chunk in words[i]:
- if 'y' not in chunk or len(chunk) == 1 or chunk[0] == 'y':
- nwords.append(chunk)
- else:
- a = chunk.split('y')
- nwords.append(a[0])
- nwords.append('Y')
- if a[1] != '':
- nwords.append(a[1])
- else:
- # the case of "pays" is very special :-(
- if words[i] == ['p', 'ay', 's']:
- nwords.append('y')
- words[i] = nwords
- # remove mute 'e'
- if i > 0:
- if is_vowels(words[i][0], True):
- if words[i-1][-1] == 'e' and sum(
- [1 for chunk in words[i-1] if is_vowels(chunk)]) > 1:
- words[i-1].pop(-1)
- words[i-1][-1] = words[i-1][-1]+"`"
- if (is_vowels(words[i-1][-1]) and not words[i-1][-1][-1] == 'e'
- and not (''.join(words[i]) in no_hiatus
- and ''.join(words[i-1]) in no_hiatus)):
- hiatus = words[i-1][-1] + ' ' + words[i][0]
- else:
- if words[i-1][-1] == 'ée' or words[i-1][-1] == 'ie':
- forbidden = words[i-1][-1]
- if words[i-1][-1] == 's' and len(words[i-1]):
- if words[i-1][-2] == 'ée' or words[i-1][-2] == 'ie':
- forbidden = words[i-1][-2]
- # TODO there are arcane rules for "aient"
- # case of "soient"
- # TODO there are a lot of "oient" in boileau and malherme
- # so apparently there is no simple way to check that
- # if words[i-1][-1] == 'nt' and len(words[i-1]):
- # if words[i-1][-2] == 'oie':
- # if len(words[i-1]) != 3 or words[i-1][-3] != 's':
- # forbidden = True
-
- if forbidden and not forbidden_ok:
- return ("forbidden", forbidden)
- if hiatus and not hiatus_ok:
- return ("hiatus", hiatus)
-
- # group back words
- for word in words:
- word.append(' ')
- chunks = sum(words, [])[:-1]
-
- # return all possibilities to weigh the vowel clusters, annotated by
- # the femininity of the align (depending both on the align and
- # original text)
- return list(map((lambda x: (x, feminine(x, original_text, phon))),
- fit(chunks, 0, bound, diaeresis)))
-
diff --git a/plint.py b/plint.py
@@ -17,8 +17,8 @@ def run():
should_end = True
line = ""
errors = template.check(line, f2, last=should_end)
- for error in errors:
- print(error.report(), file=sys.stderr)
+ if errors:
+ print(errors.report(), file=sys.stderr)
ok = False
if should_end:
break
diff --git a/plint_irc.py b/plint_irc.py
@@ -4,7 +4,6 @@ import localization
import re
import sys
import rhyme
-import metric
from template import Template
from pprint import pprint
from common import normalize
@@ -12,11 +11,18 @@ from common import normalize
buf = ""
lbuf = []
-def output(l):
- print(' '.join(l))
- f = open(sys.argv[2], 'a')
+def write(l, descriptor=None):
+ if descriptor:
+ f = descriptor
+ else:
+ f = open(sys.argv[2], 'a')
print(' '.join(l), file=f)
- f.close()
+ if not descriptor:
+ f.close()
+
+def output(l, descriptor):
+ print(' '.join(l), file=descriptor)
+ write(l, descriptor if descriptor != sys.stdout else None)
def leading_cap(text):
for c in text:
@@ -28,7 +34,7 @@ def leading_cap(text):
return False
return False
-def manage(line, silent=False):
+def manage(line, descriptor=sys.stdout):
"""manage one line, indicate if an error occurred"""
global buf
global lbuf
@@ -43,10 +49,7 @@ def manage(line, silent=False):
if len(lbuf) > 0:
lbuf.append(l)
else:
- if not silent:
- f = open(sys.argv[2], 'a')
- print(' '.join(l), file=f)
- f.close()
+ write(l, descriptor)
return True
if first[0] == '/':
return False # ignore other commands
@@ -69,20 +72,16 @@ def manage(line, silent=False):
return True
errors = template.check(text, quiet=False)
quiet = False
- for error in errors:
- if error == None:
- quiet = True
- if not quiet:
- print(error.report())
- if len(errors) == 0:
+ if errors:
+ print(errors.report())
+ if not errors:
buf = ""
- if not silent:
- if usebuf:
- for bl in lbuf:
- output(bl)
- output(l)
+ if usebuf:
+ for bl in lbuf:
+ output(bl, descriptor)
+ output(l, descriptor)
lbuf = []
- return len(errors) == 0
+ return not errors
if len(sys.argv) not in [3, 4]:
print("Usage: %s TEMPLATE POEM [OFFSET]" % sys.argv[0], file=sys.stderr)
@@ -113,8 +112,8 @@ for line in f.readlines():
pos += 1
if pos <= offset:
continue # ignore first lines
- print("Read: %s" % line, file=sys.stderr)
- if not manage(line, True):
+ print("%s (read)" % line.rstrip(), file=sys.stderr)
+ if not manage(line, sys.stderr):
print("Existing poem is wrong!", file=sys.stderr)
sys.exit(2)
f.close()
diff --git a/plint_web.py b/plint_web.py
@@ -5,7 +5,7 @@ import localization
import re
import template
import error
-from bottle import run, Bottle, request, static_file
+from bottle import run, Bottle, request, static_file, redirect
from jinja2 import Environment, PackageLoader
env = Environment(loader=PackageLoader('plint_web', 'views'))
@@ -41,8 +41,8 @@ def get_locale():
except AttributeError:
return 'en'
-def get_title():
- if get_locale() == 'fr':
+def get_title(lang):
+ if lang == 'fr':
return "plint -- vérification formelle de poèmes"
else:
return "plint -- French poetry checker"
@@ -57,13 +57,23 @@ def server_static(filename):
@app.route('/')
def root():
- return env.get_template('index.html').render(title=get_title(),
- lang=get_locale())
+ redirect('/' + get_locale() + '/')
-@app.route('/about')
-def about():
- return env.get_template('about.html').render(title=get_title(),
- lang=get_locale())
+@app.route('/<page>')
+def paged(page):
+ redirect('/' + get_locale() + '/' + page)
+
+@app.route('/<lang>/')
+def root(lang):
+ if lang not in ['fr', 'en']:
+ return paged(lang)
+ return env.get_template('index.html').render(title=get_title(lang),
+ lang=lang, path="")
+
+@app.route('/<lang>/about')
+def about(lang):
+ return env.get_template('about.html').render(title=get_title(lang),
+ lang=lang, path="about")
def check(poem):
if len(poem) > 8192:
@@ -75,26 +85,27 @@ def check(poem):
s[x].strip()
return s
-@app.route('/check', method='POST')
-def q():
+@app.route('/<lang>/check', method='POST')
+def q(lang):
d = {
'poem': request.forms.get('poem'),
'template': request.forms.get('template'),
- 'lang': get_locale(),
+ 'lang': lang,
+ 'nolocale': True,
}
- localization.init_locale(get_locale())
+ localization.init_locale(lang)
d['poem'] = re.sub(r'<>&', '', d['poem'])
print(d['poem'])
poem = check(d['poem'])
if not poem:
- if get_locale() == 'fr':
+ if lang == 'fr':
msg = "Le poème est vide, trop long, ou a des lignes trop longues"
else:
msg = "Poem is empty, too long, or has too long lines"
d['error'] = msg
return env.get_template('error.html').render(**d)
if not re.match("^[a-z_]+$", d['template']):
- if get_locale() == 'fr':
+ if lang == 'fr':
msg = "Modèle inexistant"
else:
msg = "No such template"
@@ -108,7 +119,7 @@ def q():
x = f.read()
f.close()
except IOError:
- if get_locale() == 'fr':
+ if lang == 'fr':
msg = "Modèle inexistant"
else:
msg = "No such template"
@@ -117,7 +128,7 @@ def q():
try:
templ = template.Template(x)
except error.TemplateLoadError as e:
- if get_locale() == 'fr':
+ if lang == 'fr':
msg = "Erreur à la lecture du modèle : " + e.msg
else:
msg = "Error when reading template: " + e.msg
@@ -136,18 +147,18 @@ def q():
if line == None:
line = ""
last = True
- errors = [error.report(short=True) for error in templ.check(line, last=last)]
- if errors != [] and not firsterror:
+ errors = templ.check(line, last=last)
+ if errors and not firsterror:
firsterror = i
- r.append((line, errors))
- nerror += len(errors)
+ r.append((line, '\n'.join(sum(errors.lines(short=True), [])) if errors else []))
+ nerror += len(errors.errors) if errors else 0
d['result'] = r
d['firsterror'] = firsterror
d['nerror'] = nerror
if nerror == 0:
- d['title'] = "[Valid] " + get_title()
+ d['title'] = "[Valid] " + get_title(lang)
else:
- d['title'] = "[Invalid] " + get_title()
+ d['title'] = "[Invalid] " + get_title(lang)
return env.get_template('results.html').render(**d)
if __name__ == '__main__':
diff --git a/res/messages_fr.po b/res/messages_fr.po
@@ -5,8 +5,8 @@
msgid ""
msgstr ""
"Project-Id-Version: plint\n"
-"POT-Creation-Date: 2013-01-30 23:19+CET\n"
-"PO-Revision-Date: 2013-01-30 23:20+0100\n"
+"POT-Creation-Date: 2013-02-15 00:05+CET\n"
+"PO-Revision-Date: 2013-02-15 00:06+0100\n"
"Last-Translator: Antoine Amarilli <a3nm@a3nm.net>\n"
"Language-Team: \n"
"Language: \n"
@@ -15,67 +15,63 @@ msgstr ""
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"
-#: error.py:25
-msgid "error: %s"
-msgstr "erreur : %s"
+#: error.py:53
+msgid " (see '%s' above)"
+msgstr " (voir '%s' ci-dessus)"
-#: error.py:26
-msgid "Line is: %s"
-msgstr "Ligne : %s"
+#: error.py:58
+msgid "Illegal characters"
+msgstr "Caractères interdits"
-#: error.py:45
-msgid "Illegal character%s: %s"
-msgstr "Mauvais caractère%s : %s"
+#: error.py:64
+msgid "Illegal ambiguous pattern"
+msgstr "Motif ambigu interdit"
-#: error.py:54
-msgid "Illegal ambiguous pattern: %s"
-msgstr "Motif ambigu interdit : %s"
+#: error.py:70
+msgid "Illegal hiatus"
+msgstr "Hiatus interdit"
-#: error.py:62
-msgid "Illegal hiatus: %s"
-msgstr "Hiatus interdit : %s"
+#: error.py:82
+msgid "%s for type %s (expected \"%s\", inferred \"%s\")"
+msgstr "%s pour le type %s (attendu : \"%s\", lu : \"%s\")"
-#: error.py:75
-msgid "%s for type %s (expected %s, inferred \"%s\")"
-msgstr "%s pour le type %s (attendu : %s, lu : \"%s\")"
+#: error.py:89
+msgid "Bad rhyme genre"
+msgstr "Mauvais genre de rime"
-#: error.py:81
+#: error.py:92
msgid " or "
msgstr " ou "
-#: error.py:91
-msgid "Bad rhyme genre"
-msgstr "Mauvais genre de rime"
+#: error.py:103
+msgid "Bad rhyme"
+msgstr "Mauvaise rime"
-#: error.py:100
-msgid " (ending: \""
-msgstr " (fin : \""
+#: error.py:111
+msgid ", ending: \""
+msgstr ", fin: \""
-#: error.py:106
+#: error.py:117
msgid "%s for type %s (expected %s)"
msgstr "%s pour le type %s (attendu : %s)"
-#: error.py:111
-msgid "Bad rhyme"
-msgstr "Mauvaise rime"
-
-#: error.py:181
-msgid "Bad metric (expected %s, inferred %d illegal option%s)"
-msgstr "Mauvaise métrique (attendu : %s, lu %d choix interdit%s)"
+#: error.py:122
+msgid "Illegal metric: expected %d syllable%s%s"
+msgstr "Métrique illégale: attendu : %d syllabe%s%s"
-#: error.py:187
-msgid "... worse options omitted ..."
-msgstr "... et d'autres choix non affichés ..."
+#: error.py:125
+msgid " with hemistiche%s at "
+msgstr " avec hémistiche%s en "
-#: error.py:199
+#: error.py:138
msgid "Too many occurrences of word %s for rhyme %s"
msgstr "Trop d'occurrences du mot %s pour la rime %s"
-#: error.py:204
+#: error.py:143
msgid "Poem is not complete"
msgstr "Poème incomplet"
-#: error.py:209
+#: error.py:147
msgid "Verse is beyond end of poem"
msgstr "Vers au-delà de la fin du poème"
@@ -87,26 +83,41 @@ msgstr "Usage : %s MODÈLE [OCONTEXTE]"
msgid "Check stdin according to TEMPLATE, report errors on stdout"
msgstr "Vérifie l'entrée standard suivant MODÈLE, signale les erreurs sur la sortie standard"
-#: template.py:36
+#: template.py:31
msgid "Metric length limit exceeded"
msgstr "La longueur de la métrique est trop grande"
-#: template.py:82
+#: template.py:78
msgid "Bad value for global option %s"
msgstr "Mauvaise valeur pour l'option globale %s"
-#: template.py:92
+#: template.py:88
msgid "Unknown global option"
msgstr "Option globale inconnue"
-#: template.py:107
+#: template.py:103
msgid "Template is empty"
msgstr "Modèle vide"
-#: template.py:345
+#: template.py:285
msgid "Bad value in global option"
msgstr "Mauvaise valeur pour l'option globale %s"
+#~ msgid "hiatus"
+#~ msgstr "hiatus"
+
+#~ msgid "error: %s"
+#~ msgstr "erreur : %s"
+
+#~ msgid "Line is: %s"
+#~ msgstr "Ligne : %s"
+
+#~ msgid "Bad metric (expected %s, inferred %d illegal option%s)"
+#~ msgstr "Mauvaise métrique (attendu : %s, lu %d choix interdit%s)"
+
+#~ msgid "... worse options omitted ..."
+#~ msgstr "... et d'autres choix non affichés ..."
+
#~ msgid "genre"
#~ msgstr "genre"
diff --git a/static/main.css b/static/main.css
@@ -7,13 +7,20 @@ h1 {
padding: 0.2em;
}
+#lang {
+ float: right;
+ padding: 0.2em;
+ margin: 0;
+ margin-left: 0.3em;
+}
+
#body {
margin-top: 1em;
padding-left: 0.5em;
padding-right: 0.5em;
}
-h1 a, #about {
+header a, #about {
color: white;
text-decoration: none;
}
diff --git a/static/tpl/french_abab.tpl b/static/tpl/french_abab.tpl
@@ -1,13 +1,13 @@
! incomplete_ok:no repeat_ok:no
6/6 A x
6/6 B X
-6/6 A X
-6/6 B x
+6/6 A x
+6/6 B X
6/6 A x
6/6 B X
-6/6 A X
-6/6 B x
+6/6 A x
+6/6 B X
6/6 C y
6/6 C y
diff --git a/static/tpl/italian_abab.tpl b/static/tpl/italian_abab.tpl
@@ -1,13 +1,13 @@
! incomplete_ok:no repeat_ok:no
6/6 A x
6/6 B X
-6/6 A X
-6/6 B x
+6/6 A x
+6/6 B X
6/6 A x
6/6 B X
-6/6 A X
-6/6 B x
+6/6 A x
+6/6 B X
6/6 C y
6/6 C y
diff --git a/template.py b/template.py
@@ -1,28 +1,15 @@
import error
-from metric import parse
-from hemistiches import check_hemistiches
import copy
import rhyme
+from verse import Verse
from common import normalize, legal, strip_accents_one, rm_punct
from nature import nature_count
from vowels import possible_weights_ctx, make_query
+from pprint import pprint
-def handle(poss):
- l = []
- #print(poss)
- for i in range(len(poss)):
- if isinstance(poss[i], tuple):
- #print(cleared[:i][::-1])
- #print(cleared[i+1:])
- # print(poss)
- # print (make_query(poss, i))
- if len(possible_weights_ctx(poss, i)) > 1:
- l.append((poss[i][1], make_query(poss, i)))
- return l
-
class Pattern:
- def __init__(self, metric, myid, femid, constraint):
+ def __init__(self, metric, myid="", femid="", constraint=None):
self.metric = metric
self.parse_metric()
self.myid = myid
@@ -42,7 +29,7 @@ class Pattern:
self.length = self.hemistiches.pop()
class Template:
- def __init__(self, string):
+ def __init__(self, string=None):
self.template = []
self.pattern_line_no = 0
self.forbidden_ok = False
@@ -55,7 +42,8 @@ class Template:
self.check_occurrences = True
self.diaeresis = "classical"
self.mergers = []
- self.load(string)
+ if string:
+ self.load(string)
self.line_no = 0
self.position = 0
self.prev = None
@@ -106,23 +94,6 @@ class Template:
if len(self.template) == 0:
raise error.TemplateLoadError(_("Template is empty"))
- def count(self, align):
- """total weight of an align"""
- return sum([x[1] for x in align if isinstance(x, tuple)])
-
- def rate(self, pattern, align):
- """Rate align according to pattern"""
- align, fem, hemis = align
- c = self.count(align)
- ok = True
- for h in hemis.values():
- if h != "ok":
- ok = False
- if ok and c == pattern.length:
- return 0
- return ((1+len(hemis.keys()))*abs(pattern.length - c)
- + sum([1 for x in hemis.values() if x != "ok"]))
-
def match(self, line, ofile=None, quiet=False, last=False):
"""Check a line against current pattern, return errors"""
@@ -131,95 +102,44 @@ class Template:
errors = []
pattern = self.get()
+ line_with_case = normalize(line, downcase=False)
+ line_normalize = normalize(line)
+
+ v = Verse(line, self, pattern)
+
if last:
if was_incomplete and not self.incomplete_ok and not self.overflowed:
- errors.append(error.ErrorIncompleteTemplate())
- return errors, pattern
+ return [error.ErrorIncompleteTemplate()], pattern, v
+ return [], pattern, v
if self.overflowed:
- errors.append(error.ErrorOverflowedTemplate())
- return errors, pattern
-
- # check characters
- illegal = set()
- for x in line:
- if not rm_punct(strip_accents_one(x)[0].lower()) in legal:
- illegal.add(x)
- if len(illegal) > 0:
- if quiet:
- return [None], pattern
- errors.append(error.ErrorBadCharacters(illegal))
- return errors, pattern
-
- line_with_case = normalize(line, downcase=False)
- line = normalize(line)
+ return [error.ErrorOverflowedTemplate()], pattern, verse
# rhymes
if pattern.myid not in self.env.keys():
# initialize the rhyme
- self.env[pattern.myid] = rhyme.Rhyme(line, pattern.constraint,
+ self.env[pattern.myid] = rhyme.Rhyme(line_normalize, pattern.constraint,
self.mergers, self.normande_ok)
else:
# update the rhyme
old_p = self.env[pattern.myid].phon
old_e = self.env[pattern.myid].eye
- self.env[pattern.myid].feed(line, pattern.constraint)
+ self.env[pattern.myid].feed(line_normalize, pattern.constraint)
# no more possible rhymes, something went wrong
if not self.env[pattern.myid].satisfied():
self.env[pattern.myid].phon = old_p
self.env[pattern.myid].eye = old_e
errors.append(error.ErrorBadRhymeSound(self.env[pattern.myid], None))
- # compute alignments, check hemistiches, sort by score
- possible = parse(line, self.env[pattern.myid].phon, pattern.length + 2,
- self.forbidden_ok, self.hiatus_ok, self.diaeresis)
- if not isinstance(possible, list):
- if possible[0] == "forbidden":
- errors.append(error.ErrorForbiddenPattern(possible[1]))
- elif possible[0] == "hiatus":
- errors.append(error.ErrorHiatus(possible[1]))
- possible = []
- return errors, pattern
- possible = list(map((lambda p: (p[0], p[1],
- check_hemistiches(p[0], pattern.hemistiches, self.check_end_hemistiche))),
- possible))
- possible = map((lambda x: (self.rate(pattern, x), x)), possible)
- possible = sorted(possible, key=(lambda x: x[0]))
-
- if quiet:
- if len(possible) == 0:
- return [None], pattern
- if possible[0][0] > (1+len(pattern.hemistiches))*pattern.length/2:
- return [None], pattern
+ errors += v.problems()
- # check metric
- if len(possible) == 0 or possible[0][0] != 0:
- errors.append(error.ErrorBadMetric(possible))
- if len(possible) == 0:
- return errors, pattern
- # keep the best alignment as hypotheses
- possible = [(score, align) for (score, align) in possible
- if score == possible[0][0]]
if ofile:
- if len(possible) == 1 and possible[0][0] == 0:
- l = [(x[1][0]) for x in possible]
- poss = []
- for p in l:
- c = []
- while len(p) > 0:
- x = p.pop()
- if x == ' ':
- poss.append(c[::-1])
- c = []
- else:
- c.append(x)
- if len(c) > 0:
- poss.append(c[::-1])
- for w in poss:
- l = handle(w)
- for x in l:
- # print(x)
- print((str(x[0]) + ' ' + ' '.join(x[1])), file=ofile)
+ possible = v.possible
+ if len(possible) == 1:
+ for i, p in enumerate(possible[0]):
+ if 'weight' in p.keys() and len(p['weights']) > 1:
+ print(str(p['weight']) + ' '
+ + ' '.join(make_query(possible[0], i)), file=ofile)
# occurrences
if self.check_occurrences:
@@ -233,7 +153,7 @@ class Template:
errors.append(error.ErrorMultipleWordOccurrence(last_word,
self.occenv[pattern.myid][last_word]))
- # rhyme genres
+ # rhyme genres
# inequality constraint
# TODO this is simplistic and order-dependent
if pattern.femid.swapcase() in self.femenv.keys():
@@ -250,12 +170,12 @@ class Template:
self.femenv[pattern.femid] = x
else:
old = list(self.femenv[pattern.femid])
- new = list(set(sum([x[1] for (score, x) in possible], [])))
+ new = v.genders(self.env[pattern.myid].phon)
self.femenv[pattern.femid] &= set(new)
if len(self.femenv[pattern.femid]) == 0:
errors.append(error.ErrorBadRhymeGenre(old, new))
- return errors, pattern
+ return errors, pattern, v
def parse_line(self, line):
"""Parse template line from a line"""
@@ -324,18 +244,16 @@ class Template:
self.line_no += 1
line = line.rstrip()
if normalize(line) == '' and not last:
- return []
+ return None
#possible = [compute(p) for p in possible]
#possible = sorted(possible, key=rate)
- errors, pattern = self.match(line, ofile, quiet=quiet, last=last)
- for error in errors:
- if error != None:
- # update errors with line position and pattern
- error.pos(line, self.line_no, pattern)
- if len(errors) > 0 and self.reject_errors:
- self.back()
- self.line_no -= 1
- return errors
+ errors, pattern, verse = self.match(line, ofile, quiet=quiet, last=last)
+ if len(errors) > 0:
+ if self.reject_errors:
+ self.back()
+ self.line_no -= 1
+ return error.ErrorCollection(self.line_no, line, pattern, verse, errors)
+ return None
def str2bool(x):
if x.lower() in ["yes", "oui", "y", "o"]:
diff --git a/test/au_lecteur b/test/au_lecteur
@@ -46,4 +46,4 @@ Et dans un bâillement avalerait le monde;
C'est l'Ennui!--L'œil chargé d'un pleur involontaire,
Il rêve d'échafauds en fumant son houka.
Tu le connais, lecteur, ce monstre délicat,
---Hypocrite lecteur,--mon semblable,--mon frère!
+-- Hypocrite lecteur, -- mon semblable, -- mon frère!
diff --git a/verse.py b/verse.py
@@ -0,0 +1,356 @@
+#!/usr/bin/python3
+
+import common
+from common import consonants, normalize, is_consonants, is_vowels, sure_end_fem, strip_accents_one
+import re
+import vowels
+import haspirater
+import error
+from pprint import pprint
+
+class Verse:
+ def elision(self, word):
+ if (word.startswith('y') and not word == 'y' and not word.startswith("yp") and
+ not word.startswith("yeu")):
+ return [False]
+ if word in ["oui", "ouis"] or word.startswith("ouistiti"):
+ # elision for those words, but beware, no elision for "ouighour"
+ # boileau : "Ont l'esprit mieux tourné que n'a l'homme ? Oui sans doute."
+ # so elission sometimes
+ return [True, False]
+ # "un", "une" are non-elided as nouns ("cette une")
+ if word in ["un", "une"]:
+ return [True, False]
+ # "onze" is not elided
+ if word == "onze":
+ return [False]
+ if word[0] == 'h':
+ return list(map((lambda s: not s), haspirater.lookup(word)))
+ if is_vowels(word[0]):
+ return [True]
+ return [False]
+
+ def remove_trivial(self, chunks, predicate):
+ new_chunks = []
+ accu = ""
+ for i, w in enumerate(chunks):
+ if predicate(w):
+ if len(new_chunks) == 0:
+ accu = accu + w
+ else:
+ new_chunks[-1] = new_chunks[-1] + w
+ else:
+ new_chunks.append(accu + w)
+ accu = ""
+ return new_chunks
+
+ @property
+ def line(self):
+ return ''.join(x['original'] for x in self.chunks)
+
+ def __init__(self, line, template, pattern):
+ self.template = template
+ self.pattern = pattern
+
+ whitespace_regexp = re.compile("(\s*)")
+ ys_regexp = re.compile("(\s*)")
+ all_consonants = consonants + consonants.upper()
+ consonants_regexp = re.compile('([^'+all_consonants+'*-]*)', re.UNICODE)
+
+ words = re.split(whitespace_regexp, line)
+ words = self.remove_trivial(words, (lambda w: re.match("^\s*$", w) or
+ len(normalize(w, rm_all=True)) == 0))
+ pre_chunks = [re.split(consonants_regexp, word) for word in words]
+ pre_chunks = [self.remove_trivial(x, (lambda w: re.match("^\s*$", w) or
+ len(normalize(w, rm_all=True)) == 0)) for x in pre_chunks]
+ self.chunks = [[{'original': y, 'text': normalize(y, rm_apostrophe=True)}
+ for y in x] for x in pre_chunks]
+
+ # check forbidden characters
+ for w in self.chunks:
+ for y in w:
+ for x in y['text']:
+ if not common.rm_punct(strip_accents_one(x)[0].lower()) in common.legal:
+ y['error'] = "illegal"
+
+ # gu- and qu- simplifications
+ for w in self.chunks:
+ if len(w) < 2:
+ continue
+ for i, x in enumerate(w[:-1]):
+ if not w[i+1]['text'].startswith('u'):
+ continue
+ if w[i]['text'].endswith('q'):
+ w[i+1]['text'] = w[i+1]['text'][1:]
+ if w[i+1]['text'] == '':
+ w[i]['original'] += w[i+1]['original']
+ if w[i]['text'].endswith('g') and len(w[i+1]['text']) >= 2:
+ if w[i+1]['text'][1] in "eéèa":
+ w[i+1]['text'] = w[i+1]['text'][1:]
+ # remove empty chunks created by simplifications
+ for i, w in enumerate(self.chunks):
+ self.chunks[i] = [x for x in w if len(x['text']) > 0]
+ # remove leading and trailing crap
+ for w in self.chunks:
+ for p in [0, -1]:
+ while len(w[p]['text']) > 0 and w[p]['text'][0] in ' -':
+ w[p]['text'] = w[p]['text'][1:]
+ while len(w[p]['text']) > 0 and w[p]['text'][-1] in ' -':
+ w[p]['text'] = w[p]['text'][:-1]
+
+ # sigles
+ for i, w in enumerate(self.chunks):
+ if len(w) == 1 and is_consonants(w[0]['text']):
+ new_chunks = []
+ for j, x in enumerate(w[0]['text']):
+ if (x == 'w'):
+ nc = "doublevé"
+ else:
+ nc = x + "a"
+ new_chunks += re.split(consonants_regexp, nc)
+ new_chunks = [x for x in new_chunks if len(x) > 0]
+ new_word = []
+ for j, x in enumerate(new_chunks):
+ lindex = int(j*len(w[0]['original'])/len(w[0]['text']))
+ rindex = int((j+1)*len(w[0]['original'])/len(w[0]['text']))
+ part = w[0]['original'][lindex:rindex]
+ new_word.append({'original': part, 'text': x})
+ self.chunks[i] = new_word
+
+ # vowel elision problems
+ for w in self.chunks:
+ w[0]['elision'] = self.elision(''.join(x['text'] for x in w))
+
+ # case of 'y'
+ ys_regexp = re.compile("(y*)")
+ for i, w in enumerate(self.chunks):
+ new_word = []
+ for j, chunk in enumerate(w):
+ if ('y' not in chunk['text'] or len(chunk['text']) == 1 or
+ chunk['text'].startswith("y")):
+ new_word.append(chunk)
+ continue
+ # special case of "pays"
+ if (chunk['text'] == "ay" and j > 0 and j < len(w) - 1 and
+ w[j-1]['text'].endswith("p") and w[j+1]['text'].startswith("s")):
+ new_word.append(chunk)
+ # force weight
+ chunk['weights'] = [2]
+ continue
+ subchunks = re.split(ys_regexp, chunk['text'])
+ subchunks = [x for x in subchunks if len(x) > 0]
+ for j, subchunk in enumerate(subchunks):
+ lindex = int(j*len(chunk['original'])/len(subchunks))
+ rindex = int((j+1)*len(chunk['original'])/len(subchunks))
+ part = chunk['original'][lindex:rindex]
+ new_subchunk_text = 'Y' if 'y' in subchunk else subchunk
+ new_subchunk = dict(chunk)
+ new_subchunk['original'] = part
+ new_subchunk['text'] = new_subchunk_text
+ new_word.append(new_subchunk)
+ self.chunks[i] = new_word
+
+ # annotate final mute 'e'
+ for i, w in enumerate(self.chunks[:-1]):
+ if w[-1]['text'] != "e":
+ continue
+ if sum([1 for chunk in w if is_vowels(chunk['text'])]) <= 1:
+ continue
+ w[-1]['elidable'] = self.chunks[i+1][0]['elision']
+
+ # annotate hiatus and ambiguities
+ ambiguous_potential = ["ie", "ée"]
+ no_hiatus = ["oui"]
+ for i, w in enumerate(self.chunks[:-1]):
+ if w[-1]['text'] == "s":
+ if w[-2]['text'] in ambiguous_potential:
+ w[-2]['error'] = "ambiguous"
+ w[-1]['error'] = "ambiguous"
+ if w[-1]['text'] in ambiguous_potential:
+ if self.chunks[i+1][0]['text'][0] in consonants:
+ w[-1]['error'] = "ambiguous"
+ self.chunks[i+1][0]['error'] = "ambiguous"
+ elif is_vowels(w[-1]['text']) and not w[-1]['text'].endswith('e'):
+ if is_vowels(self.chunks[i+1][0]['text']):
+ if ''.join(x['text'] for x in w) not in no_hiatus:
+ if ''.join(x['text'] for x in self.chunks[i+1]) not in no_hiatus:
+ w[-1]['error'] = "hiatus"
+ self.chunks[i+1][0]['error'] = "hiatus"
+
+ # annotate word ends
+ for w in self.chunks[:-1]:
+ w[-1]['wordend'] = True
+
+ # collapse words
+ self.chunks = sum(self.chunks, [])
+
+ # annotate weights
+ for i, chunk in enumerate(self.chunks):
+ if (not is_vowels(self.chunks[i]['text'])):
+ continue
+ # for the case of "pays" and related words
+ if 'weights' not in self.chunks[i].keys():
+ self.chunks[i]['weights'] = self.possible_weights_context(i)
+ self.chunks[i]['hemis'] = self.hemistiche(i)
+
+ self.possible = self.fit(0, 0, self.pattern.hemistiches)
+ self.text = self.align2str(self.chunks)
+
+ def contains_break(self, chunk):
+ return '-' in chunk['text'] or 'wordend' in chunk
+
+ def possible_weights(self, pos):
+ if self.template.diaeresis == "classical":
+ return vowels.possible_weights_ctx(self.chunks, pos)
+ elif self.template.diaeresis == "permissive":
+ return vowels.possible_weights_approx(self.chunks[pos]['text'])
+
+ def possible_weights_context(self, pos):
+ if ((pos >= len(self.chunks) - 2 and self.chunks[pos]['text'] == 'e')
+ and not (pos == len(self.chunks) - 2 and
+ is_vowels(self.chunks[pos+1]['text']))
+ and not (pos <= 0 or self.contains_break(self.chunks[pos-1]))
+ and not (pos <= 1 or self.contains_break(self.chunks[pos-2]))):
+ # special case for verse endings, which can get elided (or not)
+ # but we don't elide lone syllables ("prends-le", etc.)
+ if pos == len(self.chunks) - 1:
+ return [0] # ending 'e' is elided
+ if self.chunks[pos+1]['text'] == 's':
+ return [0] # ending 'es' is elided
+ if self.chunks[pos+1]['text'] == 'nt':
+ # ending 'ent' is sometimes elided
+ # actually, this will have an influence on the rhyme's gender
+ return [0, 1]
+ return self.possible_weights(pos)
+ if (pos == len(self.chunks) - 1 and self.chunks[pos]['text'] == 'e' and
+ pos > 0 and (self.chunks[pos-1]['text'].endswith('-c') or
+ self.chunks[pos-1]['text'].endswith('-j'))):
+ return [0] # -ce and -je are elided
+ if (pos >= len(self.chunks) - 1
+ and self.chunks[pos]['text'] in ['ie', 'ée']):
+ return [1]
+ if (pos >= len(self.chunks) - 2
+ and self.chunks[pos]['text'] in ['ée']):
+ return [1]
+ if 'elidable' in self.chunks[pos]:
+ return [0 if x else 1 for x in self.chunks[pos]['elidable']]
+ return self.possible_weights(pos)
+
+ def feminine(self, align, phon):
+ for a in sure_end_fem:
+ if self.text.endswith(a):
+ # check that this isn't a one-syllabe wourd
+ for i in range(4):
+ try:
+ if '-' in self.chunks[-i-1]['text'] or 'wordend' in self.chunks[-i-1]:
+ return ['M', 'F']
+ except IndexError:
+ return ['M', 'F']
+ return ['F']
+ if not self.text.endswith('ent'):
+ return ['M']
+ # verse ends with 'ent'
+ if align and align[-2]['weight'] == 0:
+ return ['F'] # mute -ent
+ if align and align[-2]['weight'] > 0 and align[-2]['text'] == 'e':
+ return ['M'] # non-mute "-ent" by the choice of metric
+ possible = []
+ # now, we must check pronunciation?
+ # "tient" vs. "lient" for instance, "excellent"...
+ for possible_phon in phon:
+ if possible_phon.endswith(')') or possible_phon.endswith('#'):
+ possible.append('M')
+ else:
+ possible.append('F')
+ if possible_phon.endswith('E') and self.text.endswith('aient'):
+ # imparfait and conditionnel are masculine...
+ possible.append('M')
+ return possible
+
+ def fit(self, pos, count, hemistiches):
+ if count > self.pattern.length:
+ return [] # no possibilites
+ if len(hemistiches) > 0 and hemistiches[0] < count:
+ return [] # missed a hemistiche
+ if pos == len(self.chunks):
+ if count == self.pattern.length:
+ return [[]] # empty list is the only possibility
+ else:
+ return []
+ chunk = self.chunks[pos]
+ result = []
+ for weight in chunk.get('weights', [0]):
+ next_hemistiches = hemistiches
+ if (len(hemistiches) > 0 and count + weight == hemistiches[0] and
+ is_vowels(chunk['text']) and (chunk['hemis'] == "ok" or not
+ self.template.check_end_hemistiche and chunk['hemis'] != "cut")):
+ # we hemistiche here
+ next_hemistiches = next_hemistiches[1:]
+ current = dict(self.chunks[pos])
+ if 'weights' in current:
+ current['weight'] = weight
+ for x in self.fit(pos+1, count + weight, next_hemistiches):
+ result.append([current] + x)
+ return result
+
+ hemis_types = {
+ 'ok': '/', # correct
+ 'cut': '?', # falls at the middle of a word
+ 'fem': '\\', # preceding word ends by a mute e
+ }
+
+ def align2str(self, align):
+ return ''.join([x['text'] for x in align])
+
+ def hemistiche(self, pos):
+ ending = self.chunks[pos]['text']
+ if not 'wordend' in self.chunks[pos] and pos < len(self.chunks) - 1:
+ if not 'wordend' in self.chunks[pos+1]:
+ return "cut"
+ ending += self.chunks[pos+1]['text']
+ if (ending in sure_end_fem):
+ if True in self.chunks[pos].get('elidable', [False]):
+ return "ok" # elidable final -e
+ # check that this isn't a one-syllabe wourd (which is allowed)
+ ok = False
+ try:
+ for i in range(2):
+ if '-' in self.chunks[pos-i-1]['text'] or 'wordend' in self.chunks[pos-i-1]:
+ ok = True
+ except IndexError:
+ pass
+ if not ok:
+ # hemistiche ends in feminine
+ return "fem"
+ return "ok"
+
+ def problems(self):
+ result = []
+ errors = set()
+ for c in self.chunks:
+ if 'error' in c:
+ if c['error'] == "ambiguous" and not self.template.forbidden_ok:
+ errors.add(error.ErrorForbiddenPattern)
+ if c['error'] == "hiatus" and not self.template.hiatus_ok:
+ errors.add(error.ErrorHiatus)
+ if c['error'] == "illegal":
+ errors.add(error.ErrorBadCharacters)
+ for k in errors:
+ result.append(k())
+ if len(self.possible) == 0:
+ result.append(error.ErrorBadMetric())
+ return result
+
+ def valid(self):
+ return len(self.problems()) == 0
+
+ def genders(self, phon):
+ result = set()
+ for p in self.possible:
+ result.update(set(self.feminine(p, phon)))
+ if len(self.possible) == 0:
+ # try to infer gender even when metric is wrong
+ result.update(set(self.feminine(None, phon)))
+ return result
+
+
diff --git a/versetest.py b/versetest.py
@@ -0,0 +1,155 @@
+#!/usr/bin/python3
+
+import template
+import verse
+import unittest
+from pprint import pprint
+
+class SanityCheck(unittest.TestCase):
+ def testSimple(self):
+ text = "Hello World!! This is a test"
+ v = verse.Verse(text, template.Template(), template.Pattern("12"))
+ self.assertEqual(text, v.line)
+
+ def testComplex(self):
+ text = "Aye AYAYE aye gue que geque AYAYAY a prt sncf bbbéé"
+ v = verse.Verse(text, template.Template(), template.Pattern("12"))
+ self.assertEqual(text, v.line)
+
+ def testLeadingSpace(self):
+ text = " a"
+ v = verse.Verse(text, template.Template(), template.Pattern("12"))
+ self.assertEqual(text, v.line)
+
+class Eliminate(unittest.TestCase):
+ def testEliminateOneGue(self):
+ text = "gue"
+ v = verse.Verse(text, template.Template(), template.Pattern("12"))
+ c = ''.join([x['text'] for x in v.chunks])
+ self.assertFalse("gue" in c)
+
+ def testEliminateGue(self):
+ text = "gue gue GUE ogues longuement la guerre"
+ v = verse.Verse(text, template.Template(), template.Pattern("12"))
+ c = ''.join([x['text'] for x in v.chunks])
+ self.assertFalse("gue" in c)
+
+class BadChars(unittest.TestCase):
+ def testBadAlone(self):
+ v = verse.Verse("42", template.Template(), template.Pattern("12"))
+ self.assertFalse(v.valid())
+
+ def testBadAndGood(self):
+ v = verse.Verse("bla h42 blah ", template.Template(), template.Pattern("12"))
+ self.assertFalse(v.valid())
+
+
+ def getWeight(self, align):
+ return sum(x.get('weight', 0) for x in align)
+
+ def achievesPossibility(self, aligns, target):
+ for align in aligns:
+ if self.getWeight(align) == target:
+ return True
+ return False
+
+class Counts(unittest.TestCase):
+ def runCount(self, text, limit=12):
+ v = verse.Verse(text, template.Template(), template.Pattern(str(limit)))
+ return v.possible
+
+ def getWeight(self, align):
+ return sum(x.get('weight', 0) for x in align)
+
+ def achievesPossibility(self, aligns, target):
+ for align in aligns:
+ if self.getWeight(align) == target:
+ return True
+ return False
+
+class SigleCounts(Counts):
+ def testW(self):
+ f = self.runCount("W", limit=3)
+ self.assertEqual(1, len(f))
+ self.assertEqual(self.getWeight(f[0]), 3)
+
+ def testB(self):
+ f = self.runCount("b", limit=1)
+ self.assertEqual(1, len(f))
+ self.assertEqual(self.getWeight(f[0]), 1)
+
+ def testMulti(self):
+ f = self.runCount("SNCF WWW", limit=13)
+ self.assertEqual(1, len(f))
+ self.assertEqual(self.getWeight(f[0]), 13)
+
+class SimpleCounts(Counts):
+ def testTrivialMonovoc(self):
+ f = self.runCount("Ba", limit=1)
+ self.assertEqual(1, len(f))
+ self.assertEqual(self.getWeight(f[0]), 1)
+
+ def testMonovoc(self):
+ f = self.runCount("Babababa", limit=4)
+ self.assertEqual(1, len(f))
+ self.assertEqual(self.getWeight(f[0]), 4)
+
+class AspiratedCounts(Counts):
+ def testBaudelaire1half(self):
+ possible = self.runCount("funeste hélas", limit=4)
+ self.assertTrue(self.achievesPossibility(possible, 4))
+ possible = self.runCount("funeste hélas", limit=5)
+ self.assertTrue(self.achievesPossibility(possible, 5))
+
+class RealCounts(Counts):
+ half1 = "Je veux, pour composer"
+ half2 = " chastement mes églogues,"
+ verse = "Allez. Après cela direz-vous que je l’aime ?"
+
+ def testBaudelaire1half(self):
+ f = self.runCount(self.half1, limit=6)
+ self.assertEqual(1, len(f))
+ self.assertEqual(self.getWeight(f[0]), 6)
+
+ def testBaudelaire1half2(self):
+ f = self.runCount(self.half2, limit=6)
+ self.assertEqual(1, len(f))
+ self.assertEqual(self.getWeight(f[0]), 6)
+
+ def testBaudelaire1(self):
+ f = self.runCount(self.half1 + self.half2, limit=12)
+ self.assertEqual(1, len(f))
+ self.assertEqual(self.getWeight(f[0]), 12)
+
+ def testAndromaque(self):
+ f = self.runCount(self.verse, limit=12)
+ self.assertEqual(1, len(f))
+ self.assertEqual(self.getWeight(f[0]), 12)
+
+class BadCounts(Counts):
+ def testBad(self):
+ f = self.runCount("Cela cela", limit=5)
+ pprint(f)
+ self.assertEqual(0, len(f))
+
+class PoemCounts(Counts):
+ v1 = "Qui berce longuement notre esprit enchanté"
+ v2 = "Qu'avez-vous ? Je n'ai rien. Mais... Je n'ai rien, vous dis-je,"
+ v3 = "Princes, toute h mer est de vaisseaux couverte,"
+ v4 = "Souvent le car qui l'a ne le sait pas lui-même"
+ def testV1(self):
+ possible = self.runCount(self.v1, limit=12)
+ self.assertTrue(self.achievesPossibility(possible, 12))
+ def testV2(self):
+ possible = self.runCount(self.v2, limit=12)
+ self.assertTrue(self.achievesPossibility(possible, 12))
+ def testV3(self):
+ possible = self.runCount(self.v3, limit=12)
+ self.assertTrue(self.achievesPossibility(possible, 12))
+ def testV4(self):
+ possible = self.runCount(self.v3, limit="6/6")
+ self.assertTrue(self.achievesPossibility(possible, 12))
+
+if __name__ == "__main__":
+ unittest.main()
+
diff --git a/views/about.html b/views/about.html
@@ -5,7 +5,8 @@
<p>Bienvenue sur <strong>plint</strong>!</p>
<h2 id="info">De quoi s'agit-il au juste ?</h2>
<p>C'est une tentative d'<a href="http://a3nm.net">a3nm</a> pour lutter contre la
-poésie de mauvaise qualité. Plint vérifie qu'un poème respecte des contraintes
+poésie de mauvaise qualité (celle qui veut suivre des règles classiques sans y
+parvenir). Plint vérifie qu'un poème respecte des contraintes
de métrique, de rime et de genre de rime. Il utilise <a
href="http://gitorious.org/frhyme">frhyme</a> pour les rimes (qui utilise
lui-même la base de données <a href="http://lexique.org">Lexique</a>), <a
@@ -44,9 +45,7 @@ exacte, se reporter au code source.</p>
<dt>Hémistiche</dt>
<dd>Les alexandrins classiques sont divisés en deux <em>hémistiches</em> de
6 syllabes. La césure ne doit pas couper un mot et le premier hémistiche ne
- doit pas se finir par un son faible (ie. une fin féminine non élidée).
- L'hémistiche ne doit pas briser la structure du vers, plint vérifie seulement
- qu'il ne se termine pas par un article défini.</dd>
+ doit pas se finir par un son faible (ie. une fin féminine non élidée).</dd>
<dt>Rime.</dt>
<dd>La contrainte la plus connue est que les vers doivent rimer. Les phonèmes
communs dans une rime doivent inclure un son vocalique (par exemple "tâte" et
@@ -202,7 +201,8 @@ programmes), mais aussi pour l'homophonie avec "plainte".</p>
<p>Welcome to <strong>plint</strong>!</p>
<h2 id="info">Wait, what is this?</h2>
<p>This is <a href="http://a3nm.net">a3nm</a>'s attempt to make a better world
-by eradicating incorrect French poetry. It checks the validity of a poem with
+by eradicating that species of bad French poetry that tries to follow classical
+constraints but fails. It checks the validity of a poem with
respect to metric, rhyme and rhyme genre constraints. It uses <a
href="http://gitorious.org/frhyme">frhyme</a> for rhymes (itself built on the
<a href="http://lexique.org">Lexique</a> database), <a
@@ -238,8 +238,7 @@ source code.</p>
<dd>For classical alexandrines, the 12 syllables are separated in two groups
of 6 with an intermediate cesura (the <em>hémistiche</em>). The cesura must
not split a word and must not end in a weak sound (essentially, a non-elided
- feminine ending). The hemistiche should occur at a pleasant point of the
- sentence, plint only checks that it does not end in a definite article.</dd>
+ feminine ending).</dd>
<dt>Rhyme.</dt>
<dd>The most well-known constraint is that verses must rhyme. The rhyming
phonemes must include a vowel (eg. "tâte" and "bête" do not rhyme because
diff --git a/views/page.html b/views/page.html
@@ -8,18 +8,23 @@
{% else %}
<meta name="description" content="plint French poetry checker" />
{% endif %}
- <link rel="stylesheet" href="static/main.css" type="text/css" media="screen" />
+ <link rel="stylesheet" href="/static/main.css" type="text/css" media="screen" />
</head>
<body>
<header>
<h1><a href="/">plint</a></h1>
- <sup><a id="about" href="/about">
+ <sup><a id="about" href="about">
{% if lang == 'fr' %}
aide
{% else %}
help
{% endif %}
</a></sup>
+ {% if nolocale != True %}
+ <div id="lang">
+ <a href="/fr/{{path}}">fr</a> • <a href="/en/{{path}}">en</a>
+ </div>
+ {% endif %}
</header>
<div id="body">
diff --git a/views/results.html b/views/results.html
@@ -33,11 +33,7 @@
<li class="correct" id="l{{loop.index}}">{{line}}
{% else %}
<li class="incorrect" id="l{{loop.index}}">{{line}}
- <ul>
- {% for error in errors %}
- <li><pre>{{error}}</pre></li>
- {% endfor %}
- </ul>
+ <pre>{{errors}}</pre>
{% endif %}
</li>
{% endfor %}
diff --git a/vowels.py b/vowels.py
@@ -6,8 +6,8 @@
from common import strip_accents
from diaeresis import lookup
-def clear(l):
- return [x[0] if isinstance(x, tuple) else x for x in l]
+def clear(x):
+ return (x['text'] + ' ') if 'wordend' in x else x['text']
def intersperse(a, b):
if (len(a) == 0 or a[0] == ' ') and (len(b) == 0 or b[0] == ' '):
@@ -28,7 +28,14 @@ def contains_trema(chunk):
threshold = 10
def make_query(chunks, pos):
- cleared = clear(chunks)
+ cleared = [clear(x) for x in chunks]
+ if cleared[pos].endswith(' '):
+ cleared[pos] = cleared[pos].rstrip()
+ if pos + 1 <= len(cleared):
+ cleared[pos+1] = " " + cleared[pos+1]
+ else:
+ cleared.append(' ')
+
return [cleared[pos]] + intersperse(
''.join(cleared[pos+1:]),
''.join([x[::-1] for x in cleared[:pos][::-1]]))
@@ -39,7 +46,6 @@ def possible_weights_ctx(chunks, pos):
#print (q)
v = lookup(q)
#print (v)
- #print (possible_weights(chunk))
if len(v.keys()) == 1 and v[list(v.keys())[0]] > threshold:
return [int(list(v.keys())[0])]
else: