commit 322ee37350f05fcd7edda86a31ad555ddd64dbfd parent 467322a8972cef28ab6790e2a3044269d7e3efb0 Author: Antoine Amarilli <a3nm@a3nm.net> Date: Thu, 27 Aug 2015 02:16:54 +0200 only accept lexique words with correct split Diffstat:
only3.py | | | 11 | ++++++++--- |
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/only3.py b/only3.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -from common import vowels +from common import vowels, fem import re import sys @@ -8,8 +8,13 @@ for l in sys.stdin.readlines(): f = l.split('\t') vowels_regexp = re.compile('([' + vowels + '])') f[-1] = f[-1].strip() - if len(f) <= 3: - f.append("") + if len(f) < 3: + # f.append("") + # only take words with complete syllabification in lexique + continue + if len(f[2].split('-')) != (4 if fem(f[0]) else 3): + # garbled syllabification from lexique + continue chunks = f[1].split('-') # words cannot start with a vowel # as last consonant before last vowel will be kept