plint

French poetry validator
git clone https://a3nm.net/git/plint/
Log | Files | Refs | README

commit da866fcbd6d798f5ba5f73bfb224b8f95dde1857
parent 3e6eb41e7c0b2a85b5fd4230cc3801e5d92278b9
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sat, 21 Sep 2013 11:08:16 +0200

fix problem with apostrophes followed by spaces

Diffstat:
common.py | 5+++--
verse.py | 15++++++++++++++-
2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/common.py b/common.py @@ -6,6 +6,7 @@ import re vowels = 'aeiouyœæ' consonants = "bcçdfghjklmnpqrstvwxzñ'" +apostrophes = "'’" legal = vowels + consonants + ' -' # a variant of x-sampa such that all french phonemes are one-character @@ -44,8 +45,8 @@ def norm_spaces(text): def rm_punct(text, rm_all=False, rm_apostrophe=False): """Remove punctuation from text""" - text = re.sub("’", "'", text) # no weird apostrophes - text = re.sub("' ", "'", text) # space after apostrophes + text = re.sub("[" + apostrophes + "]", "'", text) # no weird apostrophes + text = re.sub("' *", "'", text) # space after apostrophes if rm_apostrophe: text = re.sub("'", "", text) text = re.sub("'*$", "", text) # apostrophes at end of line diff --git a/verse.py b/verse.py @@ -1,7 +1,7 @@ #!/usr/bin/python3 import common -from common import consonants, normalize, is_consonants, is_vowels, sure_end_fem, strip_accents_one +from common import apostrophes, consonants, normalize, is_consonants, is_vowels, sure_end_fem, strip_accents_one import re import vowels import haspirater @@ -66,6 +66,19 @@ class Verse: self.chunks = [[{'original': y, 'text': normalize(y, rm_apostrophe=True)} for y in x] for x in pre_chunks] + # collapse apostrophes + self.chunks2 = [] + acc = [] + for w in self.chunks: + if re.search("[" + apostrophes + "]\s*$", w[-1]['original']): + acc += w + else: + self.chunks2.append(acc + w) + acc = [] + if len(acc) > 0: + self.chunks2.append(acc) + self.chunks = self.chunks2 + # check forbidden characters for w in self.chunks: for y in w: