plint_extra

various extra tools around plint
git clone https://a3nm.net/git/plint_extra/
Log | Files | Refs | README

commit 02763d08d110e47567aa3ad5dc84ed4b5cb2fd24
parent 055c35f57ee9789a9e717c29e9286450a0496908
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sun, 18 Aug 2019 14:41:34 +0200

import experimental things from gitlab.com/a3nm/plint

Diffstat:
README | 5+++++
lexique_comparison/count_syllables_lexique.py | 25+++++++++++++++++++++++++
lexique_comparison/count_syllables_plint.py | 31+++++++++++++++++++++++++++++++
littre/.gitignore | 9+++++++++
littre/compare_plint.py | 28++++++++++++++++++++++++++++
littre/littre_syll.sh | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
littre/raw.tpl | 2++
7 files changed, 155 insertions(+), 0 deletions(-)

diff --git a/README b/README @@ -20,3 +20,8 @@ something along the lines of: poem2html/make_poem.sh poem_file > www/poem.html done +- littre: comparison of the number of syllables counted by plint to what is + indicated in littre + +- lexique_comparison: comparison of the number of the number of counted + syllables to lexique diff --git a/lexique_comparison/count_syllables_lexique.py b/lexique_comparison/count_syllables_lexique.py @@ -0,0 +1,25 @@ +#!/usr/bin/python3 + +# count the number of syllables of words according to lexique + +import sys + +vowels = "ae$E2@#u)9ioO(y" +consonants = "dpgmtRwszlbkZjknfvSNJx8" + +for l in sys.stdin.readlines(): + f = l.strip().split("\t") + nsyl = 0 + for a in f[1]: + if a in vowels: + nsyl += 1 + elif a in consonants: + pass + else: + print("unknown phoneme %s" % a, file=sys.stderr) + sys.exit(1) + # workaround bug in lexique + if f[1].endswith("@") and f[0] != "afin de": + nsyl -= 1 + print("%s\t%d" % (f[0], nsyl)) + diff --git a/lexique_comparison/count_syllables_plint.py b/lexique_comparison/count_syllables_plint.py @@ -0,0 +1,31 @@ +#!/usr/bin/python3 + +import os +import sys + +# modules are in the parent folder +import plint.pattern +from plint.rhyme import Rhyme +from plint.template import Template +from plint.verse import Verse + +sys.path.insert(1, os.path.join(sys.path[0], '..')) + +template = Template() +pattern = plint.pattern.Pattern("12") + +for l in sys.stdin.readlines(): + w = (l.strip().split("\t"))[0] + verse = Verse(w, template, pattern) + rhyme = Rhyme(verse.normalized, + pattern.constraint, template.mergers, template.options) + verse.phon = rhyme.phon + verse.annotate() + mx = 0 + mn = 0 + for c in verse.chunks: + if 'weights' in c.keys(): + mn += min(c['weights']) + mx += max(c['weights']) + print("%s\t%d\t%d" % (w, mn, mx)) + diff --git a/littre/.gitignore b/littre/.gitignore @@ -0,0 +1,9 @@ +conflicts +plint_num +plint_raw_nums +prons +prons_normal +prons_num +prons_poesie +prons_special +prons_vers diff --git a/littre/compare_plint.py b/littre/compare_plint.py @@ -0,0 +1,28 @@ +#!/usr/bin/python3 + +"""compare file from littre and file from plint for disagreements""" + +import sys + +plint = open(sys.argv[1]) +littre = open(sys.argv[2]) + +while True: + l_plint = plint.readline() + if not l_plint: + break + l_littre = littre.readline() + w_plint, p_plint = l_plint.split('%') + w_littre, p_littre = l_littre.split('%') + p_littre = int(p_littre) + assert(w_plint == w_littre) + w = w_plint + if '-' in p_plint: + lo, hi = p_plint.split('-') + lo = int(lo) + hi = int(hi) + else: + lo = int(p_plint) + hi = lo + if not (lo <= p_littre <= hi): + print ("%s : %d vs %d-%d" % (w, p_littre, lo, hi)) diff --git a/littre/littre_syll.sh b/littre/littre_syll.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +# extract prononciation from xmllittre +# https://bitbucket.org/Mytskine/xmlittre-data.git + +xmlstarlet sel -t -m "//entree" -v "@terme" -v "\"%\"" \ + -v "entete/prononciation" -n "$1"/*.xml > prons +cat prons | grep -E "(syllabes en poésie|en poésie,? de)" > prons_poesie +cat prons | grep -E "(en vers,? de|syllabes en vers)" > prons_vers +cat additions_poesie additions_vers prons_poesie prons_vers | + awk 'BEGIN {FS = "%";} !a[$1]++;' | + while read l; do + echo "$l" | cut -d '%' -f 1 | cut -d ' ' -f 1 | tr -d '\n' + echo -n '%' + echo "$l" | cut -d '%' -f 2- | tr ' ' '\n' | + sed ' + s/^une$/1/; + s/^deux$/2/; + s/^trois$/3/; + s/^quatre$/4/; + s/^cinq$/5/; + s/^cinç$/5/; + s/^six$/6/; + s/^sept$/7/; + s/^disylla.*$/2/; + s/^trisylla.*$/3/; + ' | grep '[0-9]' | head -1 + done > prons_special + +pv prons | + grep -v '%$' | + grep -v ' .*%' | + awk 'BEGIN {FS = "%";} !a[$1]++;' | + while read l; do + echo "$l" | cut -d '%' -f 1 | cut -d ' ' -f 1 | tr -d '\n' + echo -n '%' + echo "$l" | cut -d '%' -f 2- | sed 's/ *- */-/g' | cut -d ' ' -f 1 | tr -d ',' | + sed "s/-[^aâàeéêèiîoôuùûäëïöü-]*'//" | tr '-' '\n' | wc -l + done > prons_normal + +pv prons_special prons_normal | + awk 'BEGIN {FS = "%";} !a[$1]++;' | + tr -d ',' | sort | grep -v '^%' | sed 's/.*/\L&/' > prons_num + +pv prons_num | cut -d '%' -f1 | + ../plint.py raw.tpl 2>&1 | + grep 'total:' | cut -d ':' -f4 | + cut -d ')' -f1 > plint_raw_nums + +paste <(cat prons_num| cut -d'%' -f1) plint_raw_nums | + tr '\t' '%' | sed 's/ *% */%/' \ + > plint_num + +./compare_plint.py plint_num prons_num > conflicts + diff --git a/littre/raw.tpl b/littre/raw.tpl @@ -0,0 +1,2 @@ +! +12 A X