commit 44207705bfa2f35be5f7186d6b3984a4db9efcad
parent eff4c5182c2df5278376238cfc2364fdaf53f7f9
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Sun, 4 Oct 2015 21:44:02 +0200
split lefff reading code
Diffstat:
2 files changed, 16 insertions(+), 8 deletions(-)
diff --git a/duire.sh b/duire.sh
@@ -1,14 +1,14 @@
#!/bin/bash
-# uses Lefff, http://alpage.inria.fr/~sagot/lefff-en.html
-# files prefixes.txt and missing.txt were generated from lefff
+# run read_lefff LEFFF or read_words WORDS first
-grep 'cat=v,@W' lefff-ext-3.2-utf.txt | cut -f 1 | sort | uniq > infinitives.txt
-cat infinitives.txt | ./calcpref.py | tail -30 > prefixes.txt
-./duire.py infinitives.txt prefixes.txt | head -300 > missing.txt
+VERBS="infinitives.txt"
+NOUNS="nouns.txt"
-grep -E 'cat=nc.*@[fm]?s' ~/useful/lefff-ext-3.2-utf.txt | cut -f 1 | sort | uniq > nouns.txt
-cat nouns.txt | ./calcpref.py > prefixes_nouns.txt
-./duire.py nouns.txt <(tail -60 prefixes_nouns.txt | grep -v '^. ') |
+cat "$VERBS" | ./calcpref.py | tail -30 > prefixes.txt
+./duire.py "$VERBS" prefixes.txt | head -300 > missing.txt
+
+cat "$NOUNS" | ./calcpref.py > prefixes_nouns.txt
+./duire.py "$NOUNS" <(tail -60 prefixes_nouns.txt | grep -v '^. ') |
grep -vE '^.?.?.?:' | head -300 > missing_nouns.txt
diff --git a/read_lefff.sh b/read_lefff.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# uses Lefff, http://alpage.inria.fr/~sagot/lefff-en.html
+
+LEFFF="$1"
+grep 'cat=v,@W' "$LEFFF" | cut -f 1 | sort | uniq > infinitives.txt
+grep -E 'cat=nc.*@[fm]?s' "$LEFFF" | cut -f 1 | sort | uniq > nouns.txt
+