squelette

find exceptional genders for a noun ending
git clone https://a3nm.net/git/squelette/
Log | Files | Refs

commit 512101976a19ea408f97a4a2d69afb6f440161e7
parent 879fad7117a15a897c4f6bb64f7a09207b715df2
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sat,  5 Sep 2015 20:07:32 +0200

rename

Diffstat:
prepare.sh | 44--------------------------------------------
squelette.sh | 44++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/prepare.sh b/prepare.sh @@ -1,44 +0,0 @@ -#!/bin/bash -# expects lexique as input -# produces list as output - -cut -f1,4,8,5,14 - | grep '1$' | cut -f1,2,3,4 | - grep NOM | grep -v "[ '-]" | grep -v '\.\s' | cut -f 1,3,4 | - awk 'BEGIN {OFS = "\t"} - { - if ($2 == "f" || $2 == "m") { - print $0; - } else { - print $1, "f", $2; print $1, "m", $2; - }; - }' | sort -k3,3n | - grep -vE `cat forbidden | grep -v '#' | tr '\n' '|' | sed 's/|$//'` \ - > noms_all -cat noms_all | awk '$3 > 0' > noms - -curl 'http://a3nm.net/blog/french_gender_learning/leaves.txt' | cut -f 2 | - tr -d ' ' | sed 's/./& /g' | cut -d ' ' -f2- | tr -d ' ' | sort | - grep -v '^$' | uniq > ambig - -rm -f eval_raw -pv -l ambig | while read l -do - ./eval.sh "$l" 3 >> eval_raw -done - -grep -v '0 0 [0-9]* [0-9]*$' eval_raw | awk '$5 > 2 && $7>20' | - awk '{printf "%s %s %s %.5f\n", $1, $2, $3, $9* ($6/$5)*$4/($4+$5)}' | - sort -k4,4rn | grep -v '^[ 0-9.bcdfghjklmnpqrstvwxzç]*$' > enigmes - -./filter.py < enigmes | awk '$4 > 0.57' | cat - <(echo) | ./mkenigme.sh | - sed '$d' > texte - -head -42 enigmes | cut -d ' ' -f1,3 | - awk '{ - printf "%s %s\n", - $2 == "f" ? "masculin" : "féminin ", $1 - }' > enigmes2 - -cat enigmes2 | - awk '{ printf "trouver un mot %s en -%s\n", $1, $2 }' > liste - diff --git a/squelette.sh b/squelette.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# expects lexique as input +# produces list as output + +cut -f1,4,8,5,14 - | grep '1$' | cut -f1,2,3,4 | + grep NOM | grep -v "[ '-]" | grep -v '\.\s' | cut -f 1,3,4 | + awk 'BEGIN {OFS = "\t"} + { + if ($2 == "f" || $2 == "m") { + print $0; + } else { + print $1, "f", $2; print $1, "m", $2; + }; + }' | sort -k3,3n | + grep -vE `cat forbidden | grep -v '#' | tr '\n' '|' | sed 's/|$//'` \ + > noms_all +cat noms_all | awk '$3 > 0' > noms + +curl 'http://a3nm.net/blog/french_gender_learning/leaves.txt' | cut -f 2 | + tr -d ' ' | sed 's/./& /g' | cut -d ' ' -f2- | tr -d ' ' | sort | + grep -v '^$' | uniq > ambig + +rm -f eval_raw +pv -l ambig | while read l +do + ./eval.sh "$l" 3 >> eval_raw +done + +grep -v '0 0 [0-9]* [0-9]*$' eval_raw | awk '$5 > 2 && $7>20' | + awk '{printf "%s %s %s %.5f\n", $1, $2, $3, $9* ($6/$5)*$4/($4+$5)}' | + sort -k4,4rn | grep -v '^[ 0-9.bcdfghjklmnpqrstvwxzç]*$' > enigmes + +./filter.py < enigmes | awk '$4 > 0.57' | cat - <(echo) | ./mkenigme.sh | + sed '$d' > texte + +head -42 enigmes | cut -d ' ' -f1,3 | + awk '{ + printf "%s %s\n", + $2 == "f" ? "masculin" : "féminin ", $1 + }' > enigmes2 + +cat enigmes2 | + awk '{ printf "trouver un mot %s en -%s\n", $1, $2 }' > liste +