commit bfbd5434223c00b39e0f66a922111abd57aeaf25
parent f06859446c3fc1f99ef4f3b13e9c60dd42747114
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Wed, 26 Dec 2012 11:28:14 +0100
remove weird dashes
Diffstat:
1 file changed, 1 insertion(+), 0 deletions(-)
diff --git a/common.py b/common.py
@@ -47,6 +47,7 @@ def rm_punct(text):
text = re.sub("’", "'", text) # no weird apostrophes
text = re.sub("' ", "'", text) # space after apostrophes
text = re.sub("'*$", "", text) # apostrophes at end of line
+ text = re.sub("[‒–—―⁓⸺⸻]", " ", text) # no weird dashes
#TODO rather: keep only good chars
pattern = re.compile("[^'\w -]", re.UNICODE)