commit 2dd2007eb28dd1c78cec07a4bd74cc377c589734
parent a0901fd8f1dc81e6d5a0829d3dee3642fc08857e
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Sun, 18 Aug 2019 14:27:51 +0200
JSON output format for errors
Diffstat:
7 files changed, 238 insertions(+), 79 deletions(-)
diff --git a/TODO b/TODO
@@ -3,8 +3,6 @@
- update the localization
- migrate the readme to markdown
- turn should_be_accepted into a test
-- Produce error reports first in a structured format (JSON) and translate that
- into text or into HTML
- expand the corpus of classical poetry: more Racine, more other authors
(Boileau, Corneille, Prudhomme, etc.)
diff --git a/plint/__main__.py b/plint/__main__.py
@@ -3,9 +3,10 @@
from plint import localization, error, template, diaeresis
import sys
import argparse
+import json
-def run(ocontext=None, weight=None, offset=0):
+def run(ocontext=None, weight=None, offset=0, fmt="text"):
is_ok = True
f2 = None
n_syllables = None
@@ -14,6 +15,7 @@ def run(ocontext=None, weight=None, offset=0):
if weight:
n_syllables = int(weight)
should_end = False
+ ret = []
while True:
line = sys.stdin.readline()
if not line:
@@ -21,10 +23,20 @@ def run(ocontext=None, weight=None, offset=0):
line = ""
errors = template.check(line, f2, last=should_end, n_syllables=n_syllables, offset=offset)
if errors:
- print(errors.report(), file=sys.stderr)
- is_ok = False
+ if not errors.isEmpty():
+ is_ok = False
+ if not errors.isEmpty():
+ if fmt == "text":
+ print(errors.report(fmt=fmt), file=sys.stderr)
+ elif fmt == "json":
+ ret.append(errors.report(fmt=fmt))
+ else:
+ raise ValueError("bad format")
if should_end:
break
+ if fmt == "json":
+ print(json.dumps(ret, sort_keys=True, indent=4,
+ separators={',', ': '}))
return is_ok
@@ -36,6 +48,10 @@ def main():
parser.add_argument("template",
help="the file containing the template for the input poem",
type=str)
+ parser.add_argument("--format", type=str,
+ help="error output format (text or json)",
+ choices = ["text", "json"],
+ default="text")
parser.add_argument("--diaeresis", type=str,
help="diaeresis training: diaeresis file to use",
default="../data/diaeresis.json")
@@ -52,15 +68,18 @@ def main():
template_name = args.template
diaeresis.set_diaeresis(args.diaeresis)
+
f = open(template_name)
x = f.read()
f.close()
+
try:
template = template.Template(x)
except error.TemplateLoadError as e:
print("Could not load template %s: %s" % (template_name, e.msg), file=sys.stderr)
sys.exit(2)
- ok = run(ocontext=args.ocontext, weight=args.weight, offset=args.offset)
+ ok = run(ocontext=args.ocontext, weight=args.weight, offset=args.offset,
+ fmt=args.format)
sys.exit(0 if ok else 1)
diff --git a/plint/chunk.py b/plint/chunk.py
@@ -532,23 +532,52 @@ class Chunk:
def is_masculine(self):
return (self.had_hyphen or False) or (self.word_end or False)
- def render(self, key):
+ def render(self, key, fmt="text"):
if key == 'error' and self.error == 'illegal':
- return self.illegal_str
+ if fmt == "text":
+ return self.illegal_str
+ elif fmt == "json":
+ # don't know how to represent the specific characters
+ # cleanly in JSON
+ return "illegal_characters"
+ else:
+ raise ValueError("bad format")
if key == 'original':
return str(self.original)
elif key == 'weights':
- return '-'.join([str(a) for a in self.weights or []])
+ if fmt == "text":
+ return '-'.join([str(a) for a in self.weights or []])
+ elif fmt == "json":
+ if self.weights is None:
+ return None
+ return [a for a in self.weights or []]
+ else:
+ raise ValueError("bad format")
elif key == 'error':
- return ErrorCollection.keys.get(self.error, '') * len(self.original)
+ if fmt == "text":
+ return ErrorCollection.keys.get(self.error, '') * len(self.original)
+ elif fmt == "json":
+ return self.error or None
+ else:
+ raise ValueError("bad format")
elif key == 'hemis':
- return str(self.hemistiche or "")
+ if fmt == "text":
+ return str(self.hemistiche or "")
+ elif fmt == "json":
+ return self.hemistiche or None
+ else:
+ raise ValueError("bad format")
else:
print(key, file=sys.stderr)
assert False
- def get_normalized_rendering(self, key, keys):
- return ('{:^' + str(self.get_max_render_size(keys)) + '}').format(self.render(key))
+ def get_normalized_rendering(self, key, keys, fmt="text"):
+ if fmt == "text":
+ return ('{:^' + str(self.get_max_render_size(keys)) + '}').format(self.render(key))
+ elif fmt == "json":
+ return self.render(key, fmt=fmt)
+ else:
+ raise ValueError("bad format")
def get_min_weight(self):
return min(self.weights or [0])
diff --git a/plint/chunks.py b/plint/chunks.py
@@ -306,19 +306,39 @@ class Chunks:
break
return tot
- def align_from_keys(self, keys):
- lines = {}
- for key in keys:
- lines[key] = ""
- for chunk in self.chunks:
+ def align_from_keys(self, keys, fmt="text"):
+ if fmt == "text":
+ lines = {}
for key in keys:
- lines[key] += chunk.get_normalized_rendering(key, keys)
- if 'weights' in keys:
+ lines[key] = ""
+ for chunk in self.chunks:
+ for key in keys:
+ lines[key] += chunk.get_normalized_rendering(
+ key, keys, fmt=fmt)
+ if 'weights' in keys:
+ bounds = self.get_weights_bounds()
+ bounds = [str(x) for x in bounds]
+ lines['weights'] += " (total: " + ('-'.join(bounds)
+ if bounds[1] > bounds[0] else bounds[0]) + ")"
+ return ["> " + lines[key] for key in keys if len(lines[key].strip()) > 0]
+ elif fmt == "json":
+ ret = {'chunks': []}
+ for chunk in self.chunks:
+ d = {}
+ for key in keys:
+ v = chunk.get_normalized_rendering(
+ key, keys, fmt=fmt)
+ if v is not None:
+ d[key] = v
+ ret['chunks'].append(d)
bounds = self.get_weights_bounds()
- bounds = [str(x) for x in bounds]
- lines['weights'] += " (total: " + ('-'.join(bounds)
- if bounds[1] > bounds[0] else bounds[0]) + ")"
- return ["> " + lines[key] for key in keys if len(lines[key].strip()) > 0]
+ ret['total_weight'] = {
+ 'min': bounds[0],
+ 'max': bounds[1]}
+ return ret
+ else:
+ raise ValueError("bad format")
+
def get_weights_bounds(self):
bounds = [0, 0]
diff --git a/plint/error.py b/plint/error.py
@@ -21,22 +21,37 @@ class ErrorCollection(ReportableError):
self.pattern = pattern
self.verse = verse
+ def isEmpty(self):
+ return len(self.errors) == 0
+
def say(self, l, short):
return l if short else self.prefix + l
- def align(self):
- return self.verse.align()
+ def align(self, fmt="text"):
+ return self.verse.align(fmt=fmt)
- def lines(self, short=False):
+ def lines(self, short=False, fmt="text"):
result = []
if self.verse.possible is not None:
- result.append([self.say(x, short) for x in self.align()])
+ result.append([self.say(x, short) for x in self.align(fmt=fmt)])
for e in self.errors:
- result.append([self.say(e.report(self.pattern), short)])
+ result.append([self.say(e.report(self.pattern, fmt=fmt), short)])
return result
- def report(self, short=False):
- return '\n'.join(sum(self.lines(short), []))
+ def report(self, short=False, fmt="text"):
+ if fmt == "text":
+ return '\n'.join(sum(self.lines(short, fmt=fmt), []))
+ elif fmt == "json":
+ return {
+ 'line': self.line,
+ 'line_no': self.line_no,
+ 'possible_parsings': self.align(fmt=fmt),
+ 'errors': [
+ e.report(self.pattern, fmt=fmt)
+ for e in self.errors]
+ }
+ else:
+ raise ValueError("bad format")
class ErrorBadElement(ReportableError):
@@ -44,10 +59,17 @@ class ErrorBadElement(ReportableError):
def __init__(self):
self.message = None
self.key = None
+ self.report_key = None
- def report(self, pattern):
- return (self.message
+ def report(self, pattern, fmt="text"):
+ if fmt == "text":
+ return (self.message
+ " (see '%s' above)") % ErrorCollection.keys[self.key]
+ elif fmt == "json":
+ return {'error': self.report_key,
+ 'error_kind': "local_error_collection"}
+ else:
+ raise ValueError("bad format")
class ErrorBadCharacters(ErrorBadElement):
@@ -56,6 +78,7 @@ class ErrorBadCharacters(ErrorBadElement):
super().__init__()
self.message = "Illegal Characters"
self.key = "illegal"
+ self.report_key = "illegal_characters"
class ErrorForbiddenPattern(ErrorBadElement):
@@ -64,6 +87,7 @@ class ErrorForbiddenPattern(ErrorBadElement):
super().__init__()
self.message = "Illegal ambiguous pattern"
self.key = "ambiguous"
+ self.report_key = "ambiguous_patterns"
class ErrorHiatus(ErrorBadElement):
@@ -72,6 +96,7 @@ class ErrorHiatus(ErrorBadElement):
super().__init__()
self.message = "Illegal hiatus"
self.key = "hiatus"
+ self.report_key = "hiatus"
class ErrorBadRhyme(ReportableError):
@@ -80,31 +105,48 @@ class ErrorBadRhyme(ReportableError):
self.expected = expected
self.inferred = inferred
self.old_phon = old_phon
+ self.kind_human = None
self.kind = None
def get_id(self, pattern):
raise NotImplementedError
- def fmt(self, l):
+ def fmt(self, l, fmt="text"):
raise NotImplementedError
- def report(self, pattern):
- return ("%s for type %s (expected %s, inferred %s)"
- % (self.kind, self.get_id(pattern), self.fmt(self.expected),
- self.fmt(self.inferred)))
+ def report(self, pattern, fmt="text"):
+ if fmt == "text":
+ return ("%s for type %s (expected %s, inferred %s)"
+ % (self.kind, self.get_id(pattern),
+ self.fmt(self.expected, fmt=fmt),
+ self.fmt(self.inferred, fmt=fmt)))
+ elif fmt == "json":
+ return {
+ 'error': self.kind, 'error_kind': "rhyme_error",
+ 'pattern_rhyme_type': self.get_id(pattern),
+ 'expected': self.fmt(self.expected, fmt=fmt),
+ 'inferred': self.fmt(self.inferred, fmt=fmt)}
+ else:
+ raise ValueError("bad format")
class ErrorBadRhymeGenre(ErrorBadRhyme):
def __init__(self, expected, inferred, old_phon=None):
super().__init__(expected, inferred, old_phon)
- self.kind = "Bad rhyme genre"
-
- def fmt(self, l):
- result = ' or '.join(sorted(list(l)))
- if result == '':
- result = "?"
- return "\"" + result + "\""
+ self.kind_human = "Bad rhyme genre"
+ self.kind = "rhyme_genre"
+
+ def fmt(self, l, fmt="text"):
+ if fmt == "text":
+ result = ' or '.join(sorted(list(l)))
+ if result == '':
+ result = "?"
+ return "\"" + result + "\""
+ elif fmt == "json":
+ return sorted(list(l))
+ else:
+ raise ValueError("bad format")
def get_id(self, pattern):
return pattern.feminine_id
@@ -123,35 +165,59 @@ class ErrorBadRhymeSound(ErrorBadRhymeObject):
def __init__(self, expected, inferred, old_phon=None):
super().__init__(expected, inferred, old_phon)
- self.kind = "Bad rhyme sound"
-
- def fmt(self, l):
- return '/'.join("\"" + common.to_xsampa(x) + "\"" for x in
- sorted(list(l.sufficient_phon())))
+ self.kind_human = "Bad rhyme sound"
+ self.kind = "rhyme_sound"
+
+ def fmt(self, l, fmt="text"):
+ if fmt == "text":
+ return ('/'.join("\"" + common.to_xsampa(x) + "\""
+ for x in sorted(list(l.sufficient_phon()))))
+ elif fmt == "json":
+ return (sorted(common.to_xsampa(x)
+ for x in list(l.sufficient_phon())))
+ else:
+ raise ValueError("bad format")
class ErrorBadRhymeEye(ErrorBadRhymeObject):
def __init__(self, expected, inferred, old_phon=None):
super().__init__(expected, inferred, old_phon)
- self.kind = "Bad rhyme ending"
-
- def fmt(self, l):
- return "\"-" + l.sufficient_eye(self.old_phon) + "\""
+ self.kind_human = "Bad rhyme ending"
+ self.kind = "rhyme_ending"
+
+ def fmt(self, l, fmt="text"):
+ if fmt == "text":
+ return "\"-" + l.sufficient_eye(self.old_phon) + "\""
+ elif fmt == "json":
+ return (l.sufficient_eye(self.old_phon))
+ else:
+ raise ValueError("bad format")
class ErrorBadMetric(ReportableError):
- def report(self, pattern):
- plural_hemistiche = '' if len(pattern.hemistiches) == 1 else 's'
- plural_syllable = '' if pattern.length == 1 else 's'
- if len(pattern.hemistiches) == 0:
- hemistiche_string = ""
+ def report(self, pattern, fmt="text"):
+ if fmt == "text":
+ plural_hemistiche = '' if len(pattern.hemistiches) == 1 else 's'
+ plural_syllable = '' if pattern.length == 1 else 's'
+ if len(pattern.hemistiches) == 0:
+ hemistiche_string = ""
+ else:
+ hemistiche_positions = (','.join(str(a)
+ for a in pattern.hemistiches))
+ hemistiche_string = ((" with hemistiche%s at "
+ % plural_hemistiche) + hemistiche_positions)
+ return ("Illegal metric: expected %d syllable%s%s" %
+ (pattern.length, plural_syllable, hemistiche_string))
+ elif fmt == "json":
+ return {
+ 'error': "metric", 'error_kind': "metric_error",
+ 'expected_syllables': pattern.length,
+ 'expected_hemistiches': pattern.hemistiches
+ }
else:
- hemistiche_positions = ','.join(str(a) for a in pattern.hemistiches)
- hemistiche_string = (" with hemistiche%s at " % plural_hemistiche) + hemistiche_positions
- return ("Illegal metric: expected %d syllable%s%s" %
- (pattern.length, plural_syllable, hemistiche_string))
+ raise ValueError("bad format")
class ErrorMultipleWordOccurrence(ReportableError):
@@ -160,20 +226,47 @@ class ErrorMultipleWordOccurrence(ReportableError):
self.word = word
self.occurrences = occurrences
- def report(self, pattern):
- return "Too many occurrences of word \"%s\" for rhyme %s" % (self.word, pattern.my_id)
+ def report(self, pattern, fmt="text"):
+ if fmt == "text":
+ return ("Too many occurrences of word \"%s\" for rhyme %s"
+ % (self.word, pattern.my_id))
+ elif fmt == "json":
+ return {
+ 'error': "rhyme_occurrences", 'error_kind': "rhyme_error",
+ 'pattern_rhyme_type': pattern.my_id,
+ 'word': self.word
+ }
+ else:
+ raise ValueError("bad format")
class ErrorIncompleteTemplate(ReportableError):
- def report(self, pattern):
- return "Poem is not complete"
+ def report(self, pattern, fmt="text"):
+ if fmt == "text":
+ return "Poem is not complete"
+ elif fmt == "json":
+ return {
+ 'error': "incomplete_poem",
+ 'error_kind': "global_error"
+ }
+ else:
+ raise ValueError("bad format")
+
class ErrorOverflowedTemplate(ReportableError):
- def report(self, pattern):
- return "Verse is beyond end of poem"
+ def report(self, pattern, fmt="text"):
+ if fmt == "text":
+ return "Verse is beyond end of poem"
+ elif fmt == "json":
+ return {
+ 'error': "verse_beyond_end_of_poem",
+ 'error_kind': "global_error"
+ }
+ else:
+ raise ValueError("bad format")
class TemplateLoadError(BaseException):
diff --git a/plint/template.py b/plint/template.py
@@ -248,13 +248,13 @@ class Template:
if normalize(line) == '' and not last:
return None
- errors, pattern, verse = self.match(line, output_file, last=last, n_syllables=n_syllables, offset=offset)
- if len(errors) > 0:
- if self.reject_errors:
- self.back()
- self.line_no -= 1
- return error.ErrorCollection(self.line_no, line, pattern, verse, errors)
- return None
+ errors, pattern, verse = self.match(line, output_file,
+ last=last, n_syllables=n_syllables, offset=offset)
+ if len(errors) > 0 and self.reject_errors:
+ self.back()
+ self.line_no -= 1
+ return error.ErrorCollection(self.line_no,
+ line, pattern, verse, errors)
def str2bool(x):
diff --git a/plint/verse.py b/plint/verse.py
@@ -66,13 +66,13 @@ class Verse:
# where "final" is the offset-th chunk with a weight from the end
self.chunks.print_n_syllables(n_syllables, offset, output_file)
- def align(self):
+ def align(self, fmt="text"):
keys = ['original', 'error']
- if len(self.possible) == 0:
+ if self.possible is not None and len(self.possible) == 0:
keys.append('weights')
if len(self.pattern.hemistiches) > 0:
keys.append('hemis')
- return self.chunks.align_from_keys(keys)
+ return self.chunks.align_from_keys(keys, fmt=fmt)
def print_possible(self, output_file):
if not output_file: