commit 4da377ba38cfb840d3c45f3e3bc50e711e214823
parent 160ed739006ce8f2eac1ef9cce887f0ccd2f5e67
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Thu, 29 Sep 2011 15:18:33 +0200
continuing cleanup
Diffstat:
3 files changed, 24 insertions(+), 24 deletions(-)
diff --git a/README b/README
@@ -11,6 +11,10 @@ drime is a French rhyme dictionary engine with advanced features, most
notably rime selection based on phonetic or visual similarity,
frequency, syllable count, and rhyme genre.
+== 2. Requirements ==
+
+drime requires the haspirater module <http://gitorious.org/haspirater/>.
+
== 2. Generating the DB ==
The program database isn't shipped, but scripts are provided to build it
diff --git a/make_db.py b/make_db.py
@@ -7,26 +7,26 @@
import haspirater
import metric
+from common import is_vowels, is_consonants, sure_end_fem
import sys
seen = {}
-vowels = "aàâãeéèêëiîïoôöuùûüy"
-consonants = "bcçdfghjklmnpqrstvwxz"
-
+# phonetic vowel sounds
phon_vowels = "()$#289aeEioOuy@"
-
-sure_end_fem = ['e', 'es', 'ent']
+# not a feminine ending, independently of spelling
phon_non_end_fem = ['#', ')']
class Word:
@property
def elidable(self):
- return self.word[0] in vowels or (self.word[0] == 'h' and
+ """Can this word cause elision in the previous word?"""
+ return is_vowels(self.word[0]) or (self.word[0] == 'h' and
not haspirater.lookup(self.word))
@property
def phon_ending(self):
+ """Compute minimal phonetic rhyme"""
l = []
w = list(self.phon)
w.reverse()
@@ -39,14 +39,15 @@ class Word:
@property
def ending(self):
+ """Compute minimal visual rhyme"""
l = []
w = list(self.word)
count = 0
w.reverse()
for x in w:
- if x in vowels or x in consonants:
+ if is_vowels(x) or is_consonants(x):
l.append(x)
- if x in vowels and count >= 1:
+ if is_vowels(x) and count >= 1:
break
count += 1
l.reverse()
@@ -54,11 +55,14 @@ class Word:
@property
def feminine(self):
+ """Would this word be a feminine rhyme?"""
for end in sure_end_fem:
if self.word.endswith(end):
return True
if not self.word.endswith('ent'):
return False
+ # word ends in -ent, it's hard to tell from writing, so look at phon
+ # example: "tient" vs. "lient"
for end in phon_non_end_fem:
if self.phon.endswith(end):
return False
@@ -75,7 +79,6 @@ class Word:
+ str(self.nsyl[1]) + ', "'
+ self.ending + '", "'
+ self.phon_ending + '", '
- + str(int(self.mult)) + ', '
+ str(int(self.elidable)) + ', '
+ str(int(self.feminine)) + ');')
@@ -86,14 +89,13 @@ class Word:
return True
return False
- def __init__(self, word, phon, base, kind, freq, nsyl, mult):
+ def __init__(self, word, phon, base, kind, freq, nsyl):
self.word = word
self.phon = phon
self.base = base
self.kind = kind
self.freq = freq
self.nsyl = [nsyl, nsyl]
- self.mult = mult
self.redundant = False
self.do_extends()
@@ -131,7 +133,7 @@ def derives(a, b):
print ("""CREATE TABLE words(word varchar(100), phon varchar(100), base
varchar(100), kind varchar(10), freq float, min_nsyl int, max_nsyl int,
-word_end varchar(10), phon_end varchar(10), multiple bool, elidable
+word_end varchar(10), phon_end varchar(10), elidable
bool, feminine bool);""")
while True:
@@ -143,21 +145,14 @@ while True:
phon = l.pop(0)
base = l.pop(0)
kind = l.pop(0)
- freq = ((float(l[0]) + float(l[1]))/2 +
- 100*(float(l[2]) + float(l[3]))/2)
- l.pop(0)
- l.pop(0)
- l.pop(0)
- l.pop(0)
+ freq = float(l.pop(0))
#print ("DBG for %s: %d and %d" % (word, int(l[0]), 1+len([x for x in l[1]
#if x == ' ' or x == '-'])))
nsyl = max(int(l[0]), 1+len([x for x in l[1] if x == ' ' or x == '-']))
l.pop(0)
l.pop(0)
- mult = ',' in l[0]
- l.pop(0)
assert(len(l) == 0)
- w = Word(word, phon, base, kind, freq, nsyl, mult)
+ w = Word(word, phon, base, kind, freq, nsyl)
if word not in bases.keys():
bases[word] = []
bases[word].append(base)
diff --git a/make_db.sh b/make_db.sh
@@ -2,6 +2,7 @@
cd "$( dirname "$0" )"
-cat - additions | cut -f 1,2,3,4,7,8,9,10,24,28,29 |
- sort -k1,1 |
- ./make_db.py
+cat - additions | # add custom exceptions
+ cut -f 1,2,3,4,7,8,9,10,24,28 | # select relevant fields
+ awk '{FS=" "; OFS=" "; print $1, $2, $3, $4, ($5+$6)/2 + 100*($7+$8)/2, $9, $10}' #| # aggregate frequencies
+ #./make_db.py