plint

French poetry validator (local mirror of https://gitlab.com/a3nm/plint)
git clone https://a3nm.net/git/plint/
Log | Files | Refs | README

commit 2dd2007eb28dd1c78cec07a4bd74cc377c589734
parent a0901fd8f1dc81e6d5a0829d3dee3642fc08857e
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sun, 18 Aug 2019 14:27:51 +0200

JSON output format for errors

Diffstat:
TODO | 2--
plint/__main__.py | 27+++++++++++++++++++++++----
plint/chunk.py | 43++++++++++++++++++++++++++++++++++++-------
plint/chunks.py | 42+++++++++++++++++++++++++++++++-----------
plint/error.py | 183+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
plint/template.py | 14+++++++-------
plint/verse.py | 6+++---
7 files changed, 238 insertions(+), 79 deletions(-)

diff --git a/TODO b/TODO @@ -3,8 +3,6 @@ - update the localization - migrate the readme to markdown - turn should_be_accepted into a test -- Produce error reports first in a structured format (JSON) and translate that - into text or into HTML - expand the corpus of classical poetry: more Racine, more other authors (Boileau, Corneille, Prudhomme, etc.) diff --git a/plint/__main__.py b/plint/__main__.py @@ -3,9 +3,10 @@ from plint import localization, error, template, diaeresis import sys import argparse +import json -def run(ocontext=None, weight=None, offset=0): +def run(ocontext=None, weight=None, offset=0, fmt="text"): is_ok = True f2 = None n_syllables = None @@ -14,6 +15,7 @@ def run(ocontext=None, weight=None, offset=0): if weight: n_syllables = int(weight) should_end = False + ret = [] while True: line = sys.stdin.readline() if not line: @@ -21,10 +23,20 @@ def run(ocontext=None, weight=None, offset=0): line = "" errors = template.check(line, f2, last=should_end, n_syllables=n_syllables, offset=offset) if errors: - print(errors.report(), file=sys.stderr) - is_ok = False + if not errors.isEmpty(): + is_ok = False + if not errors.isEmpty(): + if fmt == "text": + print(errors.report(fmt=fmt), file=sys.stderr) + elif fmt == "json": + ret.append(errors.report(fmt=fmt)) + else: + raise ValueError("bad format") if should_end: break + if fmt == "json": + print(json.dumps(ret, sort_keys=True, indent=4, + separators={',', ': '})) return is_ok @@ -36,6 +48,10 @@ def main(): parser.add_argument("template", help="the file containing the template for the input poem", type=str) + parser.add_argument("--format", type=str, + help="error output format (text or json)", + choices = ["text", "json"], + default="text") parser.add_argument("--diaeresis", type=str, help="diaeresis training: diaeresis file to use", default="../data/diaeresis.json") @@ -52,15 +68,18 @@ def main(): template_name = args.template diaeresis.set_diaeresis(args.diaeresis) + f = open(template_name) x = f.read() f.close() + try: template = template.Template(x) except error.TemplateLoadError as e: print("Could not load template %s: %s" % (template_name, e.msg), file=sys.stderr) sys.exit(2) - ok = run(ocontext=args.ocontext, weight=args.weight, offset=args.offset) + ok = run(ocontext=args.ocontext, weight=args.weight, offset=args.offset, + fmt=args.format) sys.exit(0 if ok else 1) diff --git a/plint/chunk.py b/plint/chunk.py @@ -532,23 +532,52 @@ class Chunk: def is_masculine(self): return (self.had_hyphen or False) or (self.word_end or False) - def render(self, key): + def render(self, key, fmt="text"): if key == 'error' and self.error == 'illegal': - return self.illegal_str + if fmt == "text": + return self.illegal_str + elif fmt == "json": + # don't know how to represent the specific characters + # cleanly in JSON + return "illegal_characters" + else: + raise ValueError("bad format") if key == 'original': return str(self.original) elif key == 'weights': - return '-'.join([str(a) for a in self.weights or []]) + if fmt == "text": + return '-'.join([str(a) for a in self.weights or []]) + elif fmt == "json": + if self.weights is None: + return None + return [a for a in self.weights or []] + else: + raise ValueError("bad format") elif key == 'error': - return ErrorCollection.keys.get(self.error, '') * len(self.original) + if fmt == "text": + return ErrorCollection.keys.get(self.error, '') * len(self.original) + elif fmt == "json": + return self.error or None + else: + raise ValueError("bad format") elif key == 'hemis': - return str(self.hemistiche or "") + if fmt == "text": + return str(self.hemistiche or "") + elif fmt == "json": + return self.hemistiche or None + else: + raise ValueError("bad format") else: print(key, file=sys.stderr) assert False - def get_normalized_rendering(self, key, keys): - return ('{:^' + str(self.get_max_render_size(keys)) + '}').format(self.render(key)) + def get_normalized_rendering(self, key, keys, fmt="text"): + if fmt == "text": + return ('{:^' + str(self.get_max_render_size(keys)) + '}').format(self.render(key)) + elif fmt == "json": + return self.render(key, fmt=fmt) + else: + raise ValueError("bad format") def get_min_weight(self): return min(self.weights or [0]) diff --git a/plint/chunks.py b/plint/chunks.py @@ -306,19 +306,39 @@ class Chunks: break return tot - def align_from_keys(self, keys): - lines = {} - for key in keys: - lines[key] = "" - for chunk in self.chunks: + def align_from_keys(self, keys, fmt="text"): + if fmt == "text": + lines = {} for key in keys: - lines[key] += chunk.get_normalized_rendering(key, keys) - if 'weights' in keys: + lines[key] = "" + for chunk in self.chunks: + for key in keys: + lines[key] += chunk.get_normalized_rendering( + key, keys, fmt=fmt) + if 'weights' in keys: + bounds = self.get_weights_bounds() + bounds = [str(x) for x in bounds] + lines['weights'] += " (total: " + ('-'.join(bounds) + if bounds[1] > bounds[0] else bounds[0]) + ")" + return ["> " + lines[key] for key in keys if len(lines[key].strip()) > 0] + elif fmt == "json": + ret = {'chunks': []} + for chunk in self.chunks: + d = {} + for key in keys: + v = chunk.get_normalized_rendering( + key, keys, fmt=fmt) + if v is not None: + d[key] = v + ret['chunks'].append(d) bounds = self.get_weights_bounds() - bounds = [str(x) for x in bounds] - lines['weights'] += " (total: " + ('-'.join(bounds) - if bounds[1] > bounds[0] else bounds[0]) + ")" - return ["> " + lines[key] for key in keys if len(lines[key].strip()) > 0] + ret['total_weight'] = { + 'min': bounds[0], + 'max': bounds[1]} + return ret + else: + raise ValueError("bad format") + def get_weights_bounds(self): bounds = [0, 0] diff --git a/plint/error.py b/plint/error.py @@ -21,22 +21,37 @@ class ErrorCollection(ReportableError): self.pattern = pattern self.verse = verse + def isEmpty(self): + return len(self.errors) == 0 + def say(self, l, short): return l if short else self.prefix + l - def align(self): - return self.verse.align() + def align(self, fmt="text"): + return self.verse.align(fmt=fmt) - def lines(self, short=False): + def lines(self, short=False, fmt="text"): result = [] if self.verse.possible is not None: - result.append([self.say(x, short) for x in self.align()]) + result.append([self.say(x, short) for x in self.align(fmt=fmt)]) for e in self.errors: - result.append([self.say(e.report(self.pattern), short)]) + result.append([self.say(e.report(self.pattern, fmt=fmt), short)]) return result - def report(self, short=False): - return '\n'.join(sum(self.lines(short), [])) + def report(self, short=False, fmt="text"): + if fmt == "text": + return '\n'.join(sum(self.lines(short, fmt=fmt), [])) + elif fmt == "json": + return { + 'line': self.line, + 'line_no': self.line_no, + 'possible_parsings': self.align(fmt=fmt), + 'errors': [ + e.report(self.pattern, fmt=fmt) + for e in self.errors] + } + else: + raise ValueError("bad format") class ErrorBadElement(ReportableError): @@ -44,10 +59,17 @@ class ErrorBadElement(ReportableError): def __init__(self): self.message = None self.key = None + self.report_key = None - def report(self, pattern): - return (self.message + def report(self, pattern, fmt="text"): + if fmt == "text": + return (self.message + " (see '%s' above)") % ErrorCollection.keys[self.key] + elif fmt == "json": + return {'error': self.report_key, + 'error_kind': "local_error_collection"} + else: + raise ValueError("bad format") class ErrorBadCharacters(ErrorBadElement): @@ -56,6 +78,7 @@ class ErrorBadCharacters(ErrorBadElement): super().__init__() self.message = "Illegal Characters" self.key = "illegal" + self.report_key = "illegal_characters" class ErrorForbiddenPattern(ErrorBadElement): @@ -64,6 +87,7 @@ class ErrorForbiddenPattern(ErrorBadElement): super().__init__() self.message = "Illegal ambiguous pattern" self.key = "ambiguous" + self.report_key = "ambiguous_patterns" class ErrorHiatus(ErrorBadElement): @@ -72,6 +96,7 @@ class ErrorHiatus(ErrorBadElement): super().__init__() self.message = "Illegal hiatus" self.key = "hiatus" + self.report_key = "hiatus" class ErrorBadRhyme(ReportableError): @@ -80,31 +105,48 @@ class ErrorBadRhyme(ReportableError): self.expected = expected self.inferred = inferred self.old_phon = old_phon + self.kind_human = None self.kind = None def get_id(self, pattern): raise NotImplementedError - def fmt(self, l): + def fmt(self, l, fmt="text"): raise NotImplementedError - def report(self, pattern): - return ("%s for type %s (expected %s, inferred %s)" - % (self.kind, self.get_id(pattern), self.fmt(self.expected), - self.fmt(self.inferred))) + def report(self, pattern, fmt="text"): + if fmt == "text": + return ("%s for type %s (expected %s, inferred %s)" + % (self.kind, self.get_id(pattern), + self.fmt(self.expected, fmt=fmt), + self.fmt(self.inferred, fmt=fmt))) + elif fmt == "json": + return { + 'error': self.kind, 'error_kind': "rhyme_error", + 'pattern_rhyme_type': self.get_id(pattern), + 'expected': self.fmt(self.expected, fmt=fmt), + 'inferred': self.fmt(self.inferred, fmt=fmt)} + else: + raise ValueError("bad format") class ErrorBadRhymeGenre(ErrorBadRhyme): def __init__(self, expected, inferred, old_phon=None): super().__init__(expected, inferred, old_phon) - self.kind = "Bad rhyme genre" - - def fmt(self, l): - result = ' or '.join(sorted(list(l))) - if result == '': - result = "?" - return "\"" + result + "\"" + self.kind_human = "Bad rhyme genre" + self.kind = "rhyme_genre" + + def fmt(self, l, fmt="text"): + if fmt == "text": + result = ' or '.join(sorted(list(l))) + if result == '': + result = "?" + return "\"" + result + "\"" + elif fmt == "json": + return sorted(list(l)) + else: + raise ValueError("bad format") def get_id(self, pattern): return pattern.feminine_id @@ -123,35 +165,59 @@ class ErrorBadRhymeSound(ErrorBadRhymeObject): def __init__(self, expected, inferred, old_phon=None): super().__init__(expected, inferred, old_phon) - self.kind = "Bad rhyme sound" - - def fmt(self, l): - return '/'.join("\"" + common.to_xsampa(x) + "\"" for x in - sorted(list(l.sufficient_phon()))) + self.kind_human = "Bad rhyme sound" + self.kind = "rhyme_sound" + + def fmt(self, l, fmt="text"): + if fmt == "text": + return ('/'.join("\"" + common.to_xsampa(x) + "\"" + for x in sorted(list(l.sufficient_phon())))) + elif fmt == "json": + return (sorted(common.to_xsampa(x) + for x in list(l.sufficient_phon()))) + else: + raise ValueError("bad format") class ErrorBadRhymeEye(ErrorBadRhymeObject): def __init__(self, expected, inferred, old_phon=None): super().__init__(expected, inferred, old_phon) - self.kind = "Bad rhyme ending" - - def fmt(self, l): - return "\"-" + l.sufficient_eye(self.old_phon) + "\"" + self.kind_human = "Bad rhyme ending" + self.kind = "rhyme_ending" + + def fmt(self, l, fmt="text"): + if fmt == "text": + return "\"-" + l.sufficient_eye(self.old_phon) + "\"" + elif fmt == "json": + return (l.sufficient_eye(self.old_phon)) + else: + raise ValueError("bad format") class ErrorBadMetric(ReportableError): - def report(self, pattern): - plural_hemistiche = '' if len(pattern.hemistiches) == 1 else 's' - plural_syllable = '' if pattern.length == 1 else 's' - if len(pattern.hemistiches) == 0: - hemistiche_string = "" + def report(self, pattern, fmt="text"): + if fmt == "text": + plural_hemistiche = '' if len(pattern.hemistiches) == 1 else 's' + plural_syllable = '' if pattern.length == 1 else 's' + if len(pattern.hemistiches) == 0: + hemistiche_string = "" + else: + hemistiche_positions = (','.join(str(a) + for a in pattern.hemistiches)) + hemistiche_string = ((" with hemistiche%s at " + % plural_hemistiche) + hemistiche_positions) + return ("Illegal metric: expected %d syllable%s%s" % + (pattern.length, plural_syllable, hemistiche_string)) + elif fmt == "json": + return { + 'error': "metric", 'error_kind': "metric_error", + 'expected_syllables': pattern.length, + 'expected_hemistiches': pattern.hemistiches + } else: - hemistiche_positions = ','.join(str(a) for a in pattern.hemistiches) - hemistiche_string = (" with hemistiche%s at " % plural_hemistiche) + hemistiche_positions - return ("Illegal metric: expected %d syllable%s%s" % - (pattern.length, plural_syllable, hemistiche_string)) + raise ValueError("bad format") class ErrorMultipleWordOccurrence(ReportableError): @@ -160,20 +226,47 @@ class ErrorMultipleWordOccurrence(ReportableError): self.word = word self.occurrences = occurrences - def report(self, pattern): - return "Too many occurrences of word \"%s\" for rhyme %s" % (self.word, pattern.my_id) + def report(self, pattern, fmt="text"): + if fmt == "text": + return ("Too many occurrences of word \"%s\" for rhyme %s" + % (self.word, pattern.my_id)) + elif fmt == "json": + return { + 'error': "rhyme_occurrences", 'error_kind': "rhyme_error", + 'pattern_rhyme_type': pattern.my_id, + 'word': self.word + } + else: + raise ValueError("bad format") class ErrorIncompleteTemplate(ReportableError): - def report(self, pattern): - return "Poem is not complete" + def report(self, pattern, fmt="text"): + if fmt == "text": + return "Poem is not complete" + elif fmt == "json": + return { + 'error': "incomplete_poem", + 'error_kind': "global_error" + } + else: + raise ValueError("bad format") + class ErrorOverflowedTemplate(ReportableError): - def report(self, pattern): - return "Verse is beyond end of poem" + def report(self, pattern, fmt="text"): + if fmt == "text": + return "Verse is beyond end of poem" + elif fmt == "json": + return { + 'error': "verse_beyond_end_of_poem", + 'error_kind': "global_error" + } + else: + raise ValueError("bad format") class TemplateLoadError(BaseException): diff --git a/plint/template.py b/plint/template.py @@ -248,13 +248,13 @@ class Template: if normalize(line) == '' and not last: return None - errors, pattern, verse = self.match(line, output_file, last=last, n_syllables=n_syllables, offset=offset) - if len(errors) > 0: - if self.reject_errors: - self.back() - self.line_no -= 1 - return error.ErrorCollection(self.line_no, line, pattern, verse, errors) - return None + errors, pattern, verse = self.match(line, output_file, + last=last, n_syllables=n_syllables, offset=offset) + if len(errors) > 0 and self.reject_errors: + self.back() + self.line_no -= 1 + return error.ErrorCollection(self.line_no, + line, pattern, verse, errors) def str2bool(x): diff --git a/plint/verse.py b/plint/verse.py @@ -66,13 +66,13 @@ class Verse: # where "final" is the offset-th chunk with a weight from the end self.chunks.print_n_syllables(n_syllables, offset, output_file) - def align(self): + def align(self, fmt="text"): keys = ['original', 'error'] - if len(self.possible) == 0: + if self.possible is not None and len(self.possible) == 0: keys.append('weights') if len(self.pattern.hemistiches) > 0: keys.append('hemis') - return self.chunks.align_from_keys(keys) + return self.chunks.align_from_keys(keys, fmt=fmt) def print_possible(self, output_file): if not output_file: