duire

french missing verbs with prefix forms
git clone https://a3nm.net/git/duire/
Log | Files | Refs

commit 44207705bfa2f35be5f7186d6b3984a4db9efcad
parent eff4c5182c2df5278376238cfc2364fdaf53f7f9
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sun,  4 Oct 2015 21:44:02 +0200

split lefff reading code

Diffstat:
duire.sh | 16++++++++--------
read_lefff.sh | 8++++++++
2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/duire.sh b/duire.sh @@ -1,14 +1,14 @@ #!/bin/bash -# uses Lefff, http://alpage.inria.fr/~sagot/lefff-en.html -# files prefixes.txt and missing.txt were generated from lefff +# run read_lefff LEFFF or read_words WORDS first -grep 'cat=v,@W' lefff-ext-3.2-utf.txt | cut -f 1 | sort | uniq > infinitives.txt -cat infinitives.txt | ./calcpref.py | tail -30 > prefixes.txt -./duire.py infinitives.txt prefixes.txt | head -300 > missing.txt +VERBS="infinitives.txt" +NOUNS="nouns.txt" -grep -E 'cat=nc.*@[fm]?s' ~/useful/lefff-ext-3.2-utf.txt | cut -f 1 | sort | uniq > nouns.txt -cat nouns.txt | ./calcpref.py > prefixes_nouns.txt -./duire.py nouns.txt <(tail -60 prefixes_nouns.txt | grep -v '^. ') | +cat "$VERBS" | ./calcpref.py | tail -30 > prefixes.txt +./duire.py "$VERBS" prefixes.txt | head -300 > missing.txt + +cat "$NOUNS" | ./calcpref.py > prefixes_nouns.txt +./duire.py "$NOUNS" <(tail -60 prefixes_nouns.txt | grep -v '^. ') | grep -vE '^.?.?.?:' | head -300 > missing_nouns.txt diff --git a/read_lefff.sh b/read_lefff.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# uses Lefff, http://alpage.inria.fr/~sagot/lefff-en.html + +LEFFF="$1" +grep 'cat=v,@W' "$LEFFF" | cut -f 1 | sort | uniq > infinitives.txt +grep -E 'cat=nc.*@[fm]?s' "$LEFFF" | cut -f 1 | sort | uniq > nouns.txt +