plint

French poetry validator (local mirror of https://gitlab.com/a3nm/plint)
git clone https://a3nm.net/git/plint/
Log | Files | Refs | README

commit c1a68b108950f1281d4b8d2d6748035ccf92dde6
parent eeae0d2afab46d6ae86169cb34803bff4f8dbd29
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Mon, 12 Aug 2019 01:26:00 +0200

code to count syllables

Diffstat:
lexique_comparison/count_syllables_lexique.py | 25+++++++++++++++++++++++++
lexique_comparison/count_syllables_plint.py | 34++++++++++++++++++++++++++++++++++
2 files changed, 59 insertions(+), 0 deletions(-)

diff --git a/lexique_comparison/count_syllables_lexique.py b/lexique_comparison/count_syllables_lexique.py @@ -0,0 +1,25 @@ +#!/usr/bin/python3 + +# count the number of syllables of words according to lexique + +import sys + +vowels = "ae$E2@#u)9ioO(y" +consonants = "dpgmtRwszlbkZjknfvSNJx8" + +for l in sys.stdin.readlines(): + f = l.strip().split("\t") + nsyl = 0 + for a in f[1]: + if a in vowels: + nsyl += 1 + elif a in consonants: + pass + else: + print("unknown phoneme %s" % a, file=sys.stderr) + sys.exit(1) + # workaround bug in lexique + if f[1].endswith("@") and f[0] != "afin de": + nsyl -= 1 + print("%s\t%d" % (f[0], nsyl)) + diff --git a/lexique_comparison/count_syllables_plint.py b/lexique_comparison/count_syllables_plint.py @@ -0,0 +1,34 @@ +#!/usr/bin/python3 + +import os +import sys + +# modules are in the parent folder +sys.path.insert(1, os.path.join(sys.path[0], '..')) + +import verse +import rhyme +import template +import diaeresis +from pprint import pprint + +diaeresis.load_diaeresis("diaeresis.json") + +templateobj = template.Template() +patternobj = template.Pattern("12") + +for l in sys.stdin.readlines(): + w = (l.strip().split("\t"))[0] + v = verse.Verse(w, templateobj, patternobj) + rhymeobj = rhyme.Rhyme(v.normalized, + patternobj.constraint, templateobj.mergers, templateobj.options) + v.phon = rhymeobj.phon + v.annotate() + mx = 0 + mn = 0 + for c in v.chunks: + if 'weights' in c.keys(): + mn += min(c['weights']) + mx += max(c['weights']) + print("%s\t%d\t%d" % (w, mn, mx)) +