commit 772fae10bb9ee944b12a524a3bc8e8064b51109e
parent 4e22f1673c38be797733823962edfbb389486a4a
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Tue, 27 Sep 2011 19:46:24 +0200
renaming
Diffstat:
make_db.py | | | 164 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
manage.py | | | 164 | ------------------------------------------------------------------------------- |
2 files changed, 164 insertions(+), 164 deletions(-)
diff --git a/make_db.py b/make_db.py
@@ -0,0 +1,164 @@
+#!/usr/bin/python3 -O
+
+# TODO frequencies are off
+# TODO bibliographe number of syllables?!
+
+import haspirater
+import metric
+import sys
+
+seen = {}
+
+vowels = "aàâãeéèêëiîïoôöuùûüy"
+consonants = "bcçdfghjklmnpqrstvwxz"
+
+phon_vowels = "()$#289aeEioOuy@"
+
+sure_end_fem = ['e', 'es', 'ent']
+phon_non_end_fem = ['#', ')']
+
+class Word:
+ @property
+ def elidable(self):
+ return self.word[0] in vowels or (self.word[0] == 'h' and
+ not haspirater.lookup(self.word))
+
+ @property
+ def phon_ending(self):
+ l = []
+ w = list(self.phon)
+ w.reverse()
+ for x in w:
+ l.append(x)
+ if x in phon_vowels:
+ break
+ l.reverse()
+ return ''.join(l)
+
+ @property
+ def ending(self):
+ l = []
+ w = list(self.word)
+ count = 0
+ w.reverse()
+ for x in w:
+ if x in vowels or x in consonants:
+ l.append(x)
+ if x in vowels and count >= 1:
+ break
+ count += 1
+ l.reverse()
+ return ''.join(l)
+
+ @property
+ def feminine(self):
+ for end in sure_end_fem:
+ if self.word.endswith(end):
+ return True
+ if not self.word.endswith('ent'):
+ return False
+ for end in phon_non_end_fem:
+ if self.phon.endswith(end):
+ return False
+ return True
+
+ @property
+ def render_sql(self):
+ return ('INSERT INTO words VALUES("' + self.word + '", "'
+ + self.phon + '", "'
+ + self.base + '", "'
+ + self.kind + '", '
+ + str(self.freq) + ', '
+ + str(self.nsyl[0]) + ', '
+ + str(self.nsyl[1]) + ', "'
+ + self.ending + '", "'
+ + self.phon_ending + '", '
+ + str(int(self.mult)) + ', '
+ + str(int(self.elidable)) + ', '
+ + str(int(self.feminine)) + ');')
+
+ @property
+ def ok(self):
+ for x in phon_vowels:
+ if x in self.phon_ending:
+ return True
+ return False
+
+ def __init__(self, word, phon, base, kind, freq, nsyl, mult):
+ self.word = word
+ self.phon = phon
+ self.base = base
+ self.kind = kind
+ self.freq = freq
+ self.nsyl = [nsyl, nsyl]
+ self.mult = mult
+ self.redundant = False
+ self.do_extends()
+
+ def align_sum(self, align):
+ s = 0
+ for a in align:
+ #print(a)
+ if isinstance(a, tuple):
+ s += a[1]
+ #print ("DBG for %s: %d" % (self.word, s))
+ return s
+
+ def do_extends(self):
+ for align in metric.parse(self.word, 999):
+ self.extend(self.align_sum(align[0]))
+
+ def extend(self, item):
+ self.nsyl = [min(self.nsyl[0], item),
+ max(self.nsyl[1], item)]
+
+bases = {} #TODO transitive to the topmost base
+
+def derives(a, b):
+ #print ("SKIP derives %s %s" % (a, b))
+ if a == b:
+ return True
+ if a not in bases.keys():
+ return False
+ for x in bases[a]:
+ #print ("SKIP base is %s" % x)
+ if x != a:
+ if derives(x, b):
+ return True
+ return False
+
+print ("""CREATE TABLE words(word varchar(100), phon varchar(100), base
+varchar(100), kind varchar(10), freq float, min_nsyl int, max_nsyl int,
+word_end varchar(10), phon_end varchar(10), multiple bool, elidable
+bool, feminine bool);""")
+
+while True:
+ line = sys.stdin.readline()
+ if not line:
+ break
+ l = line.rstrip().split("\t")
+ word = l.pop(0)
+ phon = l.pop(0)
+ base = l.pop(0)
+ kind = l.pop(0)
+ freq = ((float(l[0]) + float(l[1]))/2 +
+ 100*(float(l[2]) + float(l[3]))/2)
+ l.pop(0)
+ l.pop(0)
+ l.pop(0)
+ l.pop(0)
+ #print ("DBG for %s: %d and %d" % (word, int(l[0]), 1+len([x for x in l[1]
+ #if x == ' ' or x == '-'])))
+ nsyl = max(int(l[0]), 1+len([x for x in l[1] if x == ' ' or x == '-']))
+ l.pop(0)
+ l.pop(0)
+ mult = ',' in l[0]
+ l.pop(0)
+ assert(len(l) == 0)
+ w = Word(word, phon, base, kind, freq, nsyl, mult)
+ if word not in bases.keys():
+ bases[word] = []
+ bases[word].append(base)
+ if w.ok:
+ print(w.render_sql)
+
diff --git a/manage.py b/manage.py
@@ -1,164 +0,0 @@
-#!/usr/bin/python3 -O
-
-# TODO frequencies are off
-# TODO bibliographe number of syllables?!
-
-import haspirater
-import metric
-import sys
-
-seen = {}
-
-vowels = "aàâãeéèêëiîïoôöuùûüy"
-consonants = "bcçdfghjklmnpqrstvwxz"
-
-phon_vowels = "()$#289aeEioOuy@"
-
-sure_end_fem = ['e', 'es', 'ent']
-phon_non_end_fem = ['#', ')']
-
-class Word:
- @property
- def elidable(self):
- return self.word[0] in vowels or (self.word[0] == 'h' and
- not haspirater.lookup(self.word))
-
- @property
- def phon_ending(self):
- l = []
- w = list(self.phon)
- w.reverse()
- for x in w:
- l.append(x)
- if x in phon_vowels:
- break
- l.reverse()
- return ''.join(l)
-
- @property
- def ending(self):
- l = []
- w = list(self.word)
- count = 0
- w.reverse()
- for x in w:
- if x in vowels or x in consonants:
- l.append(x)
- if x in vowels and count >= 1:
- break
- count += 1
- l.reverse()
- return ''.join(l)
-
- @property
- def feminine(self):
- for end in sure_end_fem:
- if self.word.endswith(end):
- return True
- if not self.word.endswith('ent'):
- return False
- for end in phon_non_end_fem:
- if self.phon.endswith(end):
- return False
- return True
-
- @property
- def render_sql(self):
- return ('INSERT INTO words VALUES("' + self.word + '", "'
- + self.phon + '", "'
- + self.base + '", "'
- + self.kind + '", '
- + str(self.freq) + ', '
- + str(self.nsyl[0]) + ', '
- + str(self.nsyl[1]) + ', "'
- + self.ending + '", "'
- + self.phon_ending + '", '
- + str(int(self.mult)) + ', '
- + str(int(self.elidable)) + ', '
- + str(int(self.feminine)) + ');')
-
- @property
- def ok(self):
- for x in phon_vowels:
- if x in self.phon_ending:
- return True
- return False
-
- def __init__(self, word, phon, base, kind, freq, nsyl, mult):
- self.word = word
- self.phon = phon
- self.base = base
- self.kind = kind
- self.freq = freq
- self.nsyl = [nsyl, nsyl]
- self.mult = mult
- self.redundant = False
- self.do_extends()
-
- def align_sum(self, align):
- s = 0
- for a in align:
- #print(a)
- if isinstance(a, tuple):
- s += a[1]
- #print ("DBG for %s: %d" % (self.word, s))
- return s
-
- def do_extends(self):
- for align in metric.parse(self.word, 999):
- self.extend(self.align_sum(align[0]))
-
- def extend(self, item):
- self.nsyl = [min(self.nsyl[0], item),
- max(self.nsyl[1], item)]
-
-bases = {} #TODO transitive to the topmost base
-
-def derives(a, b):
- #print ("SKIP derives %s %s" % (a, b))
- if a == b:
- return True
- if a not in bases.keys():
- return False
- for x in bases[a]:
- #print ("SKIP base is %s" % x)
- if x != a:
- if derives(x, b):
- return True
- return False
-
-print ("""CREATE TABLE words(word varchar(100), phon varchar(100), base
-varchar(100), kind varchar(10), freq float, min_nsyl int, max_nsyl int,
-word_end varchar(10), phon_end varchar(10), multiple bool, elidable
-bool, feminine bool);""")
-
-while True:
- line = sys.stdin.readline()
- if not line:
- break
- l = line.rstrip().split("\t")
- word = l.pop(0)
- phon = l.pop(0)
- base = l.pop(0)
- kind = l.pop(0)
- freq = ((float(l[0]) + float(l[1]))/2 +
- 100*(float(l[2]) + float(l[3]))/2)
- l.pop(0)
- l.pop(0)
- l.pop(0)
- l.pop(0)
- #print ("DBG for %s: %d and %d" % (word, int(l[0]), 1+len([x for x in l[1]
- #if x == ' ' or x == '-'])))
- nsyl = max(int(l[0]), 1+len([x for x in l[1] if x == ' ' or x == '-']))
- l.pop(0)
- l.pop(0)
- mult = ',' in l[0]
- l.pop(0)
- assert(len(l) == 0)
- w = Word(word, phon, base, kind, freq, nsyl, mult)
- if word not in bases.keys():
- bases[word] = []
- bases[word].append(base)
- if w.ok:
- print(w.render_sql)
-