commit 53eb129ddee7eb128a160d01445a9b368b78d123
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Sat, 5 Sep 2015 18:59:28 +0200
start
Diffstat:
6 files changed, 107 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,7 @@
+noms
+noms_all
+ambig
+old/*
+eval_raw
+enigmes
+texte
diff --git a/eval.sh b/eval.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+F=$(grep "$1\s[fm]" noms | grep '\sf\s' | awk 'BEGIN {s = 0} {s+=$3} END {print s}')
+M=$(grep "$1\s[fm]" noms | grep '\sm\s' | awk 'BEGIN { s = 0} {s+=$3} END {print s}')
+BF=$(grep ".$1\s[fm]" noms | grep '\sf\s' | sort -k3,3n | tail -$2 | awk 'BEGIN {s = 0} {s+=$3} END {print s}')
+BM=$(grep ".$1\s[fm]" noms | grep '\sm\s' | sort -k3,3n | tail -$2 | awk 'BEGIN {s = 0} {s+=$3} END {print s}')
+NF=$(grep "$1\s[fm]" noms | grep '\sf\s' | wc -l)
+NM=$(grep "$1\s[fm]" noms | grep '\sm\s' | wc -l)
+LLF=$(grep "$1\s[fm]" noms_all | grep '\sf\s' |
+ awk '{print length($1)}' | sort -n | head -1)
+LLM=$(grep "$1\s[fm]" noms_all | grep '\sm\s' |
+ awk '{print length($1)}' | sort -n | head -1)
+# caution: $LANG should be UTF
+LF=$(($LLF - ${#1}))
+LM=$(($LLM - ${#1}))
+V=$(bc <<< "$F > $M")
+if [ "$V" -eq 1 ]
+then
+ echo "$1 $2 f $F $M $BM $NF $NM $LF $LM"
+else
+ echo "$1 $2 m $M $F $BF $NM $NF $LM $LF"
+fi
+
diff --git a/filter.py b/filter.py
@@ -0,0 +1,21 @@
+#!/usr/bin/python3
+
+import sys
+
+ws = set()
+
+while True:
+ l = sys.stdin.readline()
+ if not l:
+ break
+ l = l.rstrip()
+ f = l.split(' ')
+ ok = True
+ for w in ws:
+ if f[0].endswith(w) or w.endswith(f[0]):
+ ok = False
+ break
+ if ok:
+ print(l)
+ ws.add(f[0])
+
diff --git a/forbidden b/forbidden
@@ -0,0 +1 @@
+leitmotive
diff --git a/mkenigme.sh b/mkenigme.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+PREV="zzz"
+PREVG="zzz"
+while read l
+do
+ if [ "$PREV" != zzz ]
+ then
+ ANS=$(grep "$PREV\s[fm]" noms_all | grep "\s$PREVGO\s" | sort -k3,3rn | head -3 | cut -f 1 | tr '\n' ' ' | sed 's/^ *//;s/ *$//')
+ echo "Réponse(s) possible(s) pour un mot $PREVGG en -$PREV: $ANS !"
+ else
+ echo "Bonjour à tous ! Jouons ensemble à un jeu divertissant. :)"
+ fi
+ PREV=$(cut -d ' ' -f1 <<< "$l")
+ PREVG=$(cut -d ' ' -f3 <<< "$l")
+ PREVGO=$(sed 's/m/x/;s/f/m/;s/x/f/' <<< "$PREVG")
+ PREVGG=$(sed 's/m/masculin/;s/f/féminin/' <<< "$PREVGO")
+ echo "Y a-t-il un mot $PREVGG de la langue française se terminant en -$PREV ?"
+done
+
diff --git a/prepare.sh b/prepare.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# expects lexique as input
+# produces list as output
+
+cut -f1,4,8,5,14 - | grep '1$' | cut -f1,2,3,4 |
+ grep NOM | grep -v "[ '-]" | grep -v '\.\s' | cut -f 1,3,4 |
+ awk 'BEGIN {OFS = "\t"}
+ {
+ if ($2 == "f" || $2 == "m") {
+ print $0;
+ } else {
+ print $1, "f", $2; print $1, "m", $2;
+ };
+ }' | sort -k3,3n |
+ grep -vE `cat forbidden | grep -v '#' | tr '\n' '|' | sed 's/|$//'` \
+ > noms_all
+cat noms_all | awk '$3 > 0' > noms
+
+curl 'http://a3nm.net/blog/french_gender_learning/leaves.txt' | cut -f 2 |
+ tr -d ' ' | sed 's/./& /g' | cut -d ' ' -f2- | tr -d ' ' | sort |
+ grep -v '^$' | uniq > ambig
+
+rm -f eval_raw
+pv -l ambig | while read l
+do
+ ./eval.sh "$l" 3 >> eval_raw
+done
+
+grep -v '0 0 [0-9]* [0-9]*$' eval_raw | awk '$5 > 2 && $7>20' |
+ awk '{printf "%s %s %s %.5f\n", $1, $2, $3, ($6/$5)*$4/($4+$5)}' |
+ sort -k4,4rn | grep -v '^[ 0-9.bcdfghjklmnpqrstvwxzç]*$' > enigmes
+
+./filter.py < enigmes | awk '$4 > 0.57' | cat - <(echo) | ./mkenigme.sh |
+ sed '$d' > texte
+