plint

French poetry validator (local mirror of https://gitlab.com/a3nm/plint)
git clone https://a3nm.net/git/plint/
Log | Files | Refs | README

commit d3ee60085c5eb8647c2e6c23f1014acc973a0cdf
parent a2dc431e2d39f50220fc9d6687c6a231ebacd0bd
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sun,  5 Jan 2014 14:42:31 +0100

fix problem with apostrophes and rhyme

Diffstat:
common.py | 11+++++++----
verse.py | 2+-
2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/common.py b/common.py @@ -43,13 +43,14 @@ def norm_spaces(text): """Remove multiple consecutive whitespace""" return re.sub("\s+-*\s*", ' ', text) -def rm_punct(text, rm_all=False, rm_apostrophe=False): +def rm_punct(text, rm_all=False, rm_apostrophe=False, rm_apostrophe_end=True): """Remove punctuation from text""" text = re.sub("[" + apostrophes + "]", "'", text) # no weird apostrophes text = re.sub("' *", "'", text) # space after apostrophes if rm_apostrophe: text = re.sub("'", "", text) - text = re.sub("'*$", "", text) # apostrophes at end of line + if rm_apostrophe_end: + text = re.sub("'*$", "", text) # apostrophes at end of line text = re.sub("[‒–—―⁓⸺⸻]", " ", text) # no weird dashes #TODO rather: keep only good chars @@ -83,10 +84,12 @@ def is_consonants(chunk): return False return True -def normalize(text, downcase=True, rm_all=False, rm_apostrophe=False, strip=True): +def normalize(text, downcase=True, rm_all=False, rm_apostrophe=False, + rm_apostrophe_end=True, strip=True): """Normalize text, ie. lowercase, no useless punctuation or whitespace""" res = norm_spaces(rm_punct(text.lower() if downcase else text, - rm_all=rm_all, rm_apostrophe=rm_apostrophe)) + rm_all=rm_all, rm_apostrophe=rm_apostrophe, + rm_apostrophe_end=rm_apostrophe_end)) if strip: return res.rstrip().lstrip() else: diff --git a/verse.py b/verse.py @@ -71,7 +71,7 @@ class Verse: @property def normalized(self): - return ''.join(normalize(x['original'], strip=False) + return ''.join(normalize(x['original'], strip=False, rm_apostrophe_end=False) if 'text_pron' not in x.keys() else x['text'] for x in self.chunks).lstrip().rstrip()