commit 72894113536386140a81a875bf6ed39df308f3be
parent 54867108e2fe53802cce69c5594eb4c503068aef
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Mon, 12 Aug 2019 00:08:04 +0200
all sorts of annoying 3-syl vowel clusters
Diffstat:
1 file changed, 25 insertions(+), 0 deletions(-)
diff --git a/vowels.py b/vowels.py
@@ -104,12 +104,37 @@ def possible_weights_seed(chunk):
if (chunk['text'][-1] == 'ï' and len(chunk['text']) >= 3 and not
chunk['text'][-3:-1] == 'ou'):
return [3]
+ # ostéoarthrite
+ if "éoa" in chunk['text']:
+ return [3]
# antiaérien; but let's play it safe
if "iaé" in chunk['text']:
return [2, 3]
# giaour, miaou, niaouli
if "iaou" in chunk['text']:
return [2, 3]
+ # bioélectrique
+ if "ioé" in chunk['text']:
+ return [2, 3]
+ # méiose, nucléion, etc.
+ if "éio" in chunk['text']:
+ return [2, 3]
+ # radioactif, radioamateur, etc.
+ if "ioa" in chunk['text']:
+ return [2, 3]
+ # pléiade
+ if "éio" in chunk['text']:
+ return [2, 3]
+ # pompéien, tarpéien...
+ # in theory the "-ie" should give a diaeresis, so 3 syllabes
+ # let's keep the benefit of the doubt...
+ # => this also gives 3 as a possibility for "obéie"...
+ if "éie" in chunk['text']:
+ return [2, 3]
+ # tolstoïen
+ # same remark
+ if "oïe" in chunk['text']:
+ return [2, 3]
if chunk['text'] in ['ai', 'ou', 'eu', 'ei', 'eau', 'au', 'oi']:
return [1]
# we can't tell