squelette

find exceptional genders for a noun ending
git clone https://a3nm.net/git/squelette/
Log | Files | Refs

commit 53eb129ddee7eb128a160d01445a9b368b78d123
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sat,  5 Sep 2015 18:59:28 +0200

start

Diffstat:
.gitignore | 7+++++++
eval.sh | 23+++++++++++++++++++++++
filter.py | 21+++++++++++++++++++++
forbidden | 1+
mkenigme.sh | 20++++++++++++++++++++
prepare.sh | 35+++++++++++++++++++++++++++++++++++
6 files changed, 107 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,7 @@ +noms +noms_all +ambig +old/* +eval_raw +enigmes +texte diff --git a/eval.sh b/eval.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +F=$(grep "$1\s[fm]" noms | grep '\sf\s' | awk 'BEGIN {s = 0} {s+=$3} END {print s}') +M=$(grep "$1\s[fm]" noms | grep '\sm\s' | awk 'BEGIN { s = 0} {s+=$3} END {print s}') +BF=$(grep ".$1\s[fm]" noms | grep '\sf\s' | sort -k3,3n | tail -$2 | awk 'BEGIN {s = 0} {s+=$3} END {print s}') +BM=$(grep ".$1\s[fm]" noms | grep '\sm\s' | sort -k3,3n | tail -$2 | awk 'BEGIN {s = 0} {s+=$3} END {print s}') +NF=$(grep "$1\s[fm]" noms | grep '\sf\s' | wc -l) +NM=$(grep "$1\s[fm]" noms | grep '\sm\s' | wc -l) +LLF=$(grep "$1\s[fm]" noms_all | grep '\sf\s' | + awk '{print length($1)}' | sort -n | head -1) +LLM=$(grep "$1\s[fm]" noms_all | grep '\sm\s' | + awk '{print length($1)}' | sort -n | head -1) +# caution: $LANG should be UTF +LF=$(($LLF - ${#1})) +LM=$(($LLM - ${#1})) +V=$(bc <<< "$F > $M") +if [ "$V" -eq 1 ] +then + echo "$1 $2 f $F $M $BM $NF $NM $LF $LM" +else + echo "$1 $2 m $M $F $BF $NM $NF $LM $LF" +fi + diff --git a/filter.py b/filter.py @@ -0,0 +1,21 @@ +#!/usr/bin/python3 + +import sys + +ws = set() + +while True: + l = sys.stdin.readline() + if not l: + break + l = l.rstrip() + f = l.split(' ') + ok = True + for w in ws: + if f[0].endswith(w) or w.endswith(f[0]): + ok = False + break + if ok: + print(l) + ws.add(f[0]) + diff --git a/forbidden b/forbidden @@ -0,0 +1 @@ +leitmotive diff --git a/mkenigme.sh b/mkenigme.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +PREV="zzz" +PREVG="zzz" +while read l +do + if [ "$PREV" != zzz ] + then + ANS=$(grep "$PREV\s[fm]" noms_all | grep "\s$PREVGO\s" | sort -k3,3rn | head -3 | cut -f 1 | tr '\n' ' ' | sed 's/^ *//;s/ *$//') + echo "Réponse(s) possible(s) pour un mot $PREVGG en -$PREV: $ANS !" + else + echo "Bonjour à tous ! Jouons ensemble à un jeu divertissant. :)" + fi + PREV=$(cut -d ' ' -f1 <<< "$l") + PREVG=$(cut -d ' ' -f3 <<< "$l") + PREVGO=$(sed 's/m/x/;s/f/m/;s/x/f/' <<< "$PREVG") + PREVGG=$(sed 's/m/masculin/;s/f/féminin/' <<< "$PREVGO") + echo "Y a-t-il un mot $PREVGG de la langue française se terminant en -$PREV ?" +done + diff --git a/prepare.sh b/prepare.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# expects lexique as input +# produces list as output + +cut -f1,4,8,5,14 - | grep '1$' | cut -f1,2,3,4 | + grep NOM | grep -v "[ '-]" | grep -v '\.\s' | cut -f 1,3,4 | + awk 'BEGIN {OFS = "\t"} + { + if ($2 == "f" || $2 == "m") { + print $0; + } else { + print $1, "f", $2; print $1, "m", $2; + }; + }' | sort -k3,3n | + grep -vE `cat forbidden | grep -v '#' | tr '\n' '|' | sed 's/|$//'` \ + > noms_all +cat noms_all | awk '$3 > 0' > noms + +curl 'http://a3nm.net/blog/french_gender_learning/leaves.txt' | cut -f 2 | + tr -d ' ' | sed 's/./& /g' | cut -d ' ' -f2- | tr -d ' ' | sort | + grep -v '^$' | uniq > ambig + +rm -f eval_raw +pv -l ambig | while read l +do + ./eval.sh "$l" 3 >> eval_raw +done + +grep -v '0 0 [0-9]* [0-9]*$' eval_raw | awk '$5 > 2 && $7>20' | + awk '{printf "%s %s %s %.5f\n", $1, $2, $3, ($6/$5)*$4/($4+$5)}' | + sort -k4,4rn | grep -v '^[ 0-9.bcdfghjklmnpqrstvwxzç]*$' > enigmes + +./filter.py < enigmes | awk '$4 > 0.57' | cat - <(echo) | ./mkenigme.sh | + sed '$d' > texte +