duire

french missing verbs with prefix forms
git clone https://a3nm.net/git/duire/
Log | Files | Refs

commit d698e2137fa0ba51ffd8b54dac0252110794b20c
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Mon, 23 Mar 2015 00:57:40 +0100

initial

Diffstat:
inf.sh | 4++++
prefix.py | 45+++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 49 insertions(+), 0 deletions(-)

diff --git a/inf.sh b/inf.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +grep 'cat=v,@W' lefff-ext-3.2-utf.txt | cut -f 1 | sort | uniq + diff --git a/prefix.py b/prefix.py @@ -0,0 +1,45 @@ +#!/usr/bin/python3 +# -*- encoding: utf-8 + +import sys + +prefixes = ["dé", "re", "par", "ex", "sous", "sur", "in", "as", "bi", "em", +"ac", "rec", "di", "su", "en"] +words = set() +interesting = {} +threshold = 2 + +for l in sys.stdin.readlines(): + words.add(l.rstrip()) + +for w in words: + for p in prefixes: + if w.startswith(p): + w2 = w[len(p):] + if w2 not in interesting.keys(): + interesting[w2] = set() + interesting[w2].add(w) + +for wi, ws in interesting.items(): + if wi in words: + continue + if len(ws) >= threshold: + print(wi, ws) + +#myprefs = {} +#pthresh = 4 +#cons = "bcçdfghjklmnpqrstvwxz" +# for wi, ws in interesting.items(): +# if wi in words: +# continue +# for w2 in words: +# if w2.endswith(wi): +# mypref = w2[:len(wi)] +# if mypref not in myprefs.keys(): +# myprefs[mypref] = 0 +# myprefs[mypref] += 1 +# +# for mypref, v in myprefs.items(): +# if v >= pthresh: +# print (mypref, v) +