commit 82139a3b897260dbd9775801b613e3406042a2d2
parent d3ee60085c5eb8647c2e6c23f1014acc973a0cdf
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Sun, 5 Jan 2014 14:52:59 +0100
fix apostrophes again
Diffstat:
1 file changed, 2 insertions(+), 0 deletions(-)
diff --git a/common.py b/common.py
@@ -60,6 +60,8 @@ def rm_punct(text, rm_all=False, rm_apostrophe=False, rm_apostrophe_end=True):
else:
pattern = re.compile("[^\w]", re.UNICODE)
text2 = pattern.sub('', text)
+ text2 = re.sub("\s'*$", " ", text2) # no lonely apostrophes
+ text2 = re.sub("^'*$", "", text2) # not only apostrophes
return text2
def is_vowels(chunk, with_h=False, with_y=True, with_crap=False):