plint_extra

various extra tools around plint
git clone https://a3nm.net/git/plint_extra/
Log | Files | Refs | README

littre_syll.sh (1663B)


      1 #!/bin/bash
      2 
      3 # extract prononciation from xmllittre
      4 # https://bitbucket.org/Mytskine/xmlittre-data.git
      5 
      6 xmlstarlet sel -t -m "//entree" -v "@terme" -v "\"%\"" \
      7   -v "entete/prononciation" -n "$1"/*.xml > prons
      8 cat prons | grep -E "(syllabes en poésie|en poésie,? de)" > prons_poesie
      9 cat prons | grep -E "(en vers,? de|syllabes en vers)" > prons_vers
     10 cat additions_poesie additions_vers prons_poesie prons_vers |
     11   awk 'BEGIN {FS = "%";} !a[$1]++;' |
     12   while read l; do
     13     echo "$l" | cut -d '%' -f 1 | cut -d ' ' -f 1 | tr -d '\n'
     14     echo -n '%'
     15     echo "$l" | cut -d '%' -f 2- | tr ' ' '\n' |
     16     sed '
     17       s/^une$/1/;
     18       s/^deux$/2/;
     19       s/^trois$/3/;
     20       s/^quatre$/4/;
     21       s/^cinq$/5/;
     22       s/^cinç$/5/;
     23       s/^six$/6/;
     24       s/^sept$/7/;
     25       s/^disylla.*$/2/;
     26       s/^trisylla.*$/3/;
     27       ' | grep '[0-9]' | head -1
     28   done > prons_special
     29 
     30 pv prons |
     31   grep -v '%$' |
     32   grep -v ' .*%' |
     33   awk 'BEGIN {FS = "%";} !a[$1]++;' |
     34   while read l; do
     35     echo "$l" | cut -d '%' -f 1 | cut -d ' ' -f 1 | tr -d '\n'
     36     echo -n '%'
     37     echo "$l" | cut -d '%' -f 2- | sed 's/ *- */-/g' | cut -d ' ' -f 1 | tr -d ',' |
     38     sed "s/-[^aâàeéêèiîoôuùûäëïöü-]*'//" | tr '-' '\n' | wc -l
     39   done > prons_normal
     40 
     41 pv prons_special prons_normal |
     42   awk 'BEGIN {FS = "%";} !a[$1]++;' |
     43   tr -d ',' | sort | grep -v '^%' | sed 's/.*/\L&/' > prons_num
     44 
     45 pv prons_num | cut -d '%' -f1 |
     46   ../plint.py raw.tpl 2>&1 |
     47   grep 'total:' | cut -d ':' -f4 |
     48   cut -d ')' -f1 > plint_raw_nums
     49 
     50 paste <(cat prons_num| cut -d'%' -f1) plint_raw_nums |
     51   tr '\t' '%' | sed 's/ *% */%/' \
     52     > plint_num
     53 
     54 ./compare_plint.py plint_num prons_num > conflicts
     55