littre_syll.sh (1663B)
1 #!/bin/bash 2 3 # extract prononciation from xmllittre 4 # https://bitbucket.org/Mytskine/xmlittre-data.git 5 6 xmlstarlet sel -t -m "//entree" -v "@terme" -v "\"%\"" \ 7 -v "entete/prononciation" -n "$1"/*.xml > prons 8 cat prons | grep -E "(syllabes en poésie|en poésie,? de)" > prons_poesie 9 cat prons | grep -E "(en vers,? de|syllabes en vers)" > prons_vers 10 cat additions_poesie additions_vers prons_poesie prons_vers | 11 awk 'BEGIN {FS = "%";} !a[$1]++;' | 12 while read l; do 13 echo "$l" | cut -d '%' -f 1 | cut -d ' ' -f 1 | tr -d '\n' 14 echo -n '%' 15 echo "$l" | cut -d '%' -f 2- | tr ' ' '\n' | 16 sed ' 17 s/^une$/1/; 18 s/^deux$/2/; 19 s/^trois$/3/; 20 s/^quatre$/4/; 21 s/^cinq$/5/; 22 s/^cinç$/5/; 23 s/^six$/6/; 24 s/^sept$/7/; 25 s/^disylla.*$/2/; 26 s/^trisylla.*$/3/; 27 ' | grep '[0-9]' | head -1 28 done > prons_special 29 30 pv prons | 31 grep -v '%$' | 32 grep -v ' .*%' | 33 awk 'BEGIN {FS = "%";} !a[$1]++;' | 34 while read l; do 35 echo "$l" | cut -d '%' -f 1 | cut -d ' ' -f 1 | tr -d '\n' 36 echo -n '%' 37 echo "$l" | cut -d '%' -f 2- | sed 's/ *- */-/g' | cut -d ' ' -f 1 | tr -d ',' | 38 sed "s/-[^aâàeéêèiîoôuùûäëïöü-]*'//" | tr '-' '\n' | wc -l 39 done > prons_normal 40 41 pv prons_special prons_normal | 42 awk 'BEGIN {FS = "%";} !a[$1]++;' | 43 tr -d ',' | sort | grep -v '^%' | sed 's/.*/\L&/' > prons_num 44 45 pv prons_num | cut -d '%' -f1 | 46 ../plint.py raw.tpl 2>&1 | 47 grep 'total:' | cut -d ':' -f4 | 48 cut -d ')' -f1 > plint_raw_nums 49 50 paste <(cat prons_num| cut -d'%' -f1) plint_raw_nums | 51 tr '\t' '%' | sed 's/ *% */%/' \ 52 > plint_num 53 54 ./compare_plint.py plint_num prons_num > conflicts 55