fix problem with apostrophes and rhyme - plint - French poetry validator (local mirror of https://gitlab.com/a3nm/plint)

commit d3ee60085c5eb8647c2e6c23f1014acc973a0cdf
parent a2dc431e2d39f50220fc9d6687c6a231ebacd0bd
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sun,  5 Jan 2014 14:42:31 +0100

fix problem with apostrophes and rhyme

Diffstat:
common.py  | 11 +++++++----
verse.py  | 2 +-

2 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/common.py b/common.py
@@ -43,13 +43,14 @@ def norm_spaces(text):
   """Remove multiple consecutive whitespace"""
   return re.sub("\s+-*\s*", ' ', text)
 
-def rm_punct(text, rm_all=False, rm_apostrophe=False):
+def rm_punct(text, rm_all=False, rm_apostrophe=False, rm_apostrophe_end=True):
   """Remove punctuation from text"""
   text = re.sub("[" + apostrophes + "]", "'", text) # no weird apostrophes
   text = re.sub("' *", "'", text) # space after apostrophes
   if rm_apostrophe:
     text = re.sub("'", "", text)
-  text = re.sub("'*$", "", text) # apostrophes at end of line
+  if rm_apostrophe_end:
+    text = re.sub("'*$", "", text) # apostrophes at end of line
   text = re.sub("[‒–—―⁓⸺⸻]", " ", text) # no weird dashes
 
   #TODO rather: keep only good chars
@@ -83,10 +84,12 @@ def is_consonants(chunk):
       return False
   return True
 
-def normalize(text, downcase=True, rm_all=False, rm_apostrophe=False, strip=True):
+def normalize(text, downcase=True, rm_all=False, rm_apostrophe=False,
+    rm_apostrophe_end=True, strip=True):
   """Normalize text, ie. lowercase, no useless punctuation or whitespace"""
   res = norm_spaces(rm_punct(text.lower() if downcase else text,
-    rm_all=rm_all, rm_apostrophe=rm_apostrophe))
+    rm_all=rm_all, rm_apostrophe=rm_apostrophe,
+    rm_apostrophe_end=rm_apostrophe_end))
   if strip:
     return res.rstrip().lstrip()
   else:
diff --git a/verse.py b/verse.py
@@ -71,7 +71,7 @@ class Verse:
 
   @property
   def normalized(self):
-    return ''.join(normalize(x['original'], strip=False)
+    return ''.join(normalize(x['original'], strip=False, rm_apostrophe_end=False)
             if 'text_pron' not in x.keys() else x['text']
             for x in self.chunks).lstrip().rstrip()

	plint French poetry validator (local mirror of https://gitlab.com/a3nm/plint)
	git clone https://a3nm.net/git/plint/
	Log \| Files \| Refs \| README