commit 25a879c0f2170ac80c0da862a0a5146b4ed1efa4
parent d698e2137fa0ba51ffd8b54dac0252110794b20c
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Mon, 23 Mar 2015 01:41:21 +0100
continue
Diffstat:
calcpref.py | | | 33 | +++++++++++++++++++++++++++++++++ |
prefix.py | | | 49 | ++++++++++++++++++++++--------------------------- |
2 files changed, 55 insertions(+), 27 deletions(-)
diff --git a/calcpref.py b/calcpref.py
@@ -0,0 +1,33 @@
+#!/usr/bin/python3 -O
+# -*- encoding: utf-8
+
+import operator
+import sys
+
+words = set()
+
+for l in sys.stdin.readlines():
+ words.add(l.rstrip())
+
+myprefs = {}
+done = 0
+every = 200
+
+for w in words:
+ if done % every == 0:
+ print("done %d" % done, file=sys.stderr)
+ done += 1
+ for i in range(len(words)):
+ w2 = w[i:]
+ if i == 0 or i == len(words):
+ continue
+ if w2 in words:
+ p = w[:i]
+ if p not in myprefs.keys():
+ myprefs[p] = 0
+ myprefs[p] += 1
+
+sorted_x = sorted(myprefs.items(), key=operator.itemgetter(1))
+for (t, v) in sorted_x:
+ print("%s %s" % (t, v))
+
diff --git a/prefix.py b/prefix.py
@@ -1,45 +1,40 @@
-#!/usr/bin/python3
+#!/usr/bin/python3 -O
# -*- encoding: utf-8
+import operator
import sys
-prefixes = ["dé", "re", "par", "ex", "sous", "sur", "in", "as", "bi", "em",
-"ac", "rec", "di", "su", "en"]
+prefixes = {}
words = set()
interesting = {}
threshold = 2
+maxlen = 2
+keep = 30
+exp = 0.2
-for l in sys.stdin.readlines():
+fwords = open(sys.argv[1])
+for l in fwords.readlines():
words.add(l.rstrip())
+fwords.close()
+fpref = open(sys.argv[2])
+for l in fpref.readlines():
+ t = l.rstrip().split(' ')
+ prefixes[t[0]] = int(t[1])
+fpref.close()
for w in words:
- for p in prefixes:
+ for p, v in prefixes.items():
if w.startswith(p):
w2 = w[len(p):]
+ if w2 in words:
+ continue
if w2 not in interesting.keys():
interesting[w2] = set()
- interesting[w2].add(w)
+ interesting[w2].add((w, v))
-for wi, ws in interesting.items():
- if wi in words:
- continue
- if len(ws) >= threshold:
- print(wi, ws)
+sortint = sorted(interesting.items(), key=lambda x: -sum((t[1]**exp for t in x[1]), 0))
-#myprefs = {}
-#pthresh = 4
-#cons = "bcçdfghjklmnpqrstvwxz"
-# for wi, ws in interesting.items():
-# if wi in words:
-# continue
-# for w2 in words:
-# if w2.endswith(wi):
-# mypref = w2[:len(wi)]
-# if mypref not in myprefs.keys():
-# myprefs[mypref] = 0
-# myprefs[mypref] += 1
-#
-# for mypref, v in myprefs.items():
-# if v >= pthresh:
-# print (mypref, v)
+for (wi, ws) in sortint:
+ ws = sorted(ws, key=operator.itemgetter(1), reverse=True)
+ print(wi+":", ' '.join(t[0] for t in ws))