drime

French rhyme dictionary with web and CLI interface
git clone https://a3nm.net/git/drime/
Log | Files | Refs | README

commit f9d1dff14c55394a180f99e0b179b1a240c86f27
parent bd59f05eddd501b9d6f58ac145f412c693fa7d9e
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Wed,  9 Nov 2011 12:04:42 +0100

change shebang, renamings

Diffstat:
lexique2sql.py | 157+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
lexique2sql.sh | 8++++++++
make_db.py | 157-------------------------------------------------------------------------------
make_db.sh | 8--------
prepare.sh | 6++++++
5 files changed, 171 insertions(+), 165 deletions(-)

diff --git a/lexique2sql.py b/lexique2sql.py @@ -0,0 +1,157 @@ +#!/usr/bin/python3 -O + +"""Prepare the rhyme database + +Input should have tab-separated fields: word, pronunciation, base word, +grammatical category, frequency. Output is a """ + +# TODO frequencies are off +# TODO "bibliographe" number of syllables?! + +import haspirater +import metric +from common import is_vowels, is_consonants, sure_end_fem +import sys +import _mysql + +seen = {} + +# phonetic vowel sounds +phon_vowels = "()$#289aeEioOuy@" +# not a feminine ending, independently of spelling +phon_non_end_fem = ['#', ')'] + +# +print ("""CREATE TABLE words( + word varchar(100), -- word + phon varchar(100), -- pronunciation + base varchar(100), -- base word + kind varchar(10), -- grammatical category + freq float, -- frequency + min_nsyl int, -- lower bound on the number of syllabes + max_nsyl int, -- upper bound on the number of syllabes + word_end varchar(10), -- minimal word-level rhyme + phon_end varchar(10), -- minimal phon-level rhyme + elidable bool, -- can cause elision + feminine bool -- genre of the rhyme +);""") + +class Word: + @property + def elidable(self): + """Can this word cause elision in the previous word?""" + return is_vowels(self.word[0]) or (self.word[0] == 'h' and + not haspirater.lookup(self.word)) + + @property + def phon_ending(self): + """Compute minimal phonetic rhyme""" + l = [] + w = list(self.phon) + w.reverse() + for x in w: + l.append(x) + if x in phon_vowels: + break + l.reverse() + return ''.join(l) + + @property + def ending(self): + """Compute minimal visual rhyme""" + l = [] + w = list(self.word) + count = 0 + w.reverse() + for x in w: + if is_vowels(x) or is_consonants(x): + l.append(x) + if is_vowels(x) and count >= 1: + break + count += 1 + l.reverse() + return ''.join(l) + + @property + def feminine(self): + """Would this word be a feminine rhyme?""" + for end in sure_end_fem: + if self.word.endswith(end): + return True + if not self.word.endswith('ent'): + return False + # word ends in -ent, it's hard to tell from writing, so look at phon + # example: "tient" vs. "lient" + for end in phon_non_end_fem: + if self.phon.endswith(end): + return False + return True + + @property + def render_sql(self): + return ('INSERT INTO words VALUES("' + + _mysql.escape_string(self.word) + '", "' + + _mysql.escape_string(self.phon) + '", "' + + _mysql.escape_string(self.base) + '", "' + + _mysql.escape_string(self.kind) + '", ' + + _mysql.escape_string(str(self.freq)) + ', ' + + _mysql.escape_string(str(self.nsyl[0])) + ', ' + + _mysql.escape_string(str(self.nsyl[1])) + ', "' + + _mysql.escape_string(self.ending) + '", "' + + _mysql.escape_string(self.phon_ending) + '", ' + + _mysql.escape_string(str(int(self.elidable))) + ', ' + + _mysql.escape_string(str(int(self.feminine))) + ');' + ) + + @property + def ok(self): + # Remove words with no vowels + for x in phon_vowels: + if x in self.phon_ending: + return True + return False + + def __init__(self, word, phon, base, kind, freq): + self.word = word + self.phon = phon + self.base = base + self.kind = kind + self.freq = freq + self.nsyl = None + self.redundant = False + self.do_extends() + + def align_sum(self, align): + s = 0 + for a in align: + #print(a) + if isinstance(a, tuple): + s += a[1] + #print ("DBG for %s: %d" % (self.word, s)) + return s + + def do_extends(self): + for align in metric.parse(self.word, 999): + self.extend(self.align_sum(align[0])) + + def extend(self, item): + if self.nsyl == None: + self.nsyl = [item, item] + else: + self.nsyl = [min(self.nsyl[0], item), max(self.nsyl[1], item)] + +while True: + line = sys.stdin.readline() + if not line: + break + l = line.rstrip().split("\t") + word = l.pop(0) + phon = l.pop(0) + base = l.pop(0) + kind = l.pop(0) + freq = float(l.pop(0)) + assert(len(l) == 0) + w = Word(word, phon, base, kind, freq) + if w.ok: + print(w.render_sql) + diff --git a/lexique2sql.sh b/lexique2sql.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +cd "$( dirname "$0" )" + +cat - additions | # add custom exceptions + cut -f 1,2,3,4,7,8,9,10,24,28 | # select relevant fields + awk '{FS=" "; OFS=" "; print $1, $2, $3, $4, ($5+$6)/2 + 100*($7+$8)/2}' | # aggregate frequencies + ./make_db.py diff --git a/make_db.py b/make_db.py @@ -1,157 +0,0 @@ -#!/usr/bin/python - -"""Prepare the rhyme database - -Input should have tab-separated fields: word, pronunciation, base word, -grammatical category, frequency. Output is a """ - -# TODO frequencies are off -# TODO "bibliographe" number of syllables?! - -import haspirater -import metric -from common import is_vowels, is_consonants, sure_end_fem -import sys -import _mysql - -seen = {} - -# phonetic vowel sounds -phon_vowels = "()$#289aeEioOuy@" -# not a feminine ending, independently of spelling -phon_non_end_fem = ['#', ')'] - -# -print ("""CREATE TABLE words( - word varchar(100), -- word - phon varchar(100), -- pronunciation - base varchar(100), -- base word - kind varchar(10), -- grammatical category - freq float, -- frequency - min_nsyl int, -- lower bound on the number of syllabes - max_nsyl int, -- upper bound on the number of syllabes - word_end varchar(10), -- minimal word-level rhyme - phon_end varchar(10), -- minimal phon-level rhyme - elidable bool, -- can cause elision - feminine bool -- genre of the rhyme -);""") - -class Word: - @property - def elidable(self): - """Can this word cause elision in the previous word?""" - return is_vowels(self.word[0]) or (self.word[0] == 'h' and - not haspirater.lookup(self.word)) - - @property - def phon_ending(self): - """Compute minimal phonetic rhyme""" - l = [] - w = list(self.phon) - w.reverse() - for x in w: - l.append(x) - if x in phon_vowels: - break - l.reverse() - return ''.join(l) - - @property - def ending(self): - """Compute minimal visual rhyme""" - l = [] - w = list(self.word) - count = 0 - w.reverse() - for x in w: - if is_vowels(x) or is_consonants(x): - l.append(x) - if is_vowels(x) and count >= 1: - break - count += 1 - l.reverse() - return ''.join(l) - - @property - def feminine(self): - """Would this word be a feminine rhyme?""" - for end in sure_end_fem: - if self.word.endswith(end): - return True - if not self.word.endswith('ent'): - return False - # word ends in -ent, it's hard to tell from writing, so look at phon - # example: "tient" vs. "lient" - for end in phon_non_end_fem: - if self.phon.endswith(end): - return False - return True - - @property - def render_sql(self): - return ('INSERT INTO words VALUES("' - + _mysql.escape_string(self.word) + '", "' - + _mysql.escape_string(self.phon) + '", "' - + _mysql.escape_string(self.base) + '", "' - + _mysql.escape_string(self.kind) + '", ' - + _mysql.escape_string(str(self.freq)) + ', ' - + _mysql.escape_string(str(self.nsyl[0])) + ', ' - + _mysql.escape_string(str(self.nsyl[1])) + ', "' - + _mysql.escape_string(self.ending) + '", "' - + _mysql.escape_string(self.phon_ending) + '", ' - + _mysql.escape_string(str(int(self.elidable))) + ', ' - + _mysql.escape_string(str(int(self.feminine))) + ');' - ) - - @property - def ok(self): - # Remove words with no vowels - for x in phon_vowels: - if x in self.phon_ending: - return True - return False - - def __init__(self, word, phon, base, kind, freq): - self.word = word - self.phon = phon - self.base = base - self.kind = kind - self.freq = freq - self.nsyl = None - self.redundant = False - self.do_extends() - - def align_sum(self, align): - s = 0 - for a in align: - #print(a) - if isinstance(a, tuple): - s += a[1] - #print ("DBG for %s: %d" % (self.word, s)) - return s - - def do_extends(self): - for align in metric.parse(self.word, 999): - self.extend(self.align_sum(align[0])) - - def extend(self, item): - if self.nsyl == None: - self.nsyl = [item, item] - else: - self.nsyl = [min(self.nsyl[0], item), max(self.nsyl[1], item)] - -while True: - line = sys.stdin.readline() - if not line: - break - l = line.rstrip().split("\t") - word = l.pop(0) - phon = l.pop(0) - base = l.pop(0) - kind = l.pop(0) - freq = float(l.pop(0)) - assert(len(l) == 0) - w = Word(word, phon, base, kind, freq) - if w.ok: - print(w.render_sql) - diff --git a/make_db.sh b/make_db.sh @@ -1,8 +0,0 @@ -#!/bin/bash - -cd "$( dirname "$0" )" - -cat - additions | # add custom exceptions - cut -f 1,2,3,4,7,8,9,10,24,28 | # select relevant fields - awk '{FS=" "; OFS=" "; print $1, $2, $3, $4, ($5+$6)/2 + 100*($7+$8)/2}' | # aggregate frequencies - ./make_db.py diff --git a/prepare.sh b/prepare.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +cd "$( dirname "$0" )" + +./lexique2sql | sqlite db.sqlite +