drime

French rhyme dictionary with web and CLI interface
git clone https://a3nm.net/git/drime/
Log | Files | Refs | README

commit c1acbbd1530fd004a8141deea2705d0c0136a653
parent 4a585843c14b255bc1f16dd80b08964b3fd231d5
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sat, 24 Dec 2011 02:07:56 +0100

continue

Diffstat:
README | 21++++++++++++++++++---
db_mysql.py | 8++++----
query.py | 17+++++++++++------
static/main.css | 44+++++++++++++++++++++++++++++++++++++++++---
templates/about.html | 17++++++++++++-----
templates/disambig.html | 23++++++++++-------------
templates/page.html | 9+++++----
templates/results.html | 7++++---
8 files changed, 105 insertions(+), 41 deletions(-)

diff --git a/README b/README @@ -1,5 +1,6 @@ WARNING -- this code does *not* work yet! TODO: dos2unix +TODO: placement in rhymes for enlacement? drime - by Antoine Amarilli A French rhyme dictionary @@ -9,14 +10,14 @@ Licence: GPL version 3 == 1. Features == drime is a French rhyme dictionary engine with advanced features, most -notably rime selection based on phonetic or visual similarity, -frequency, syllable count, and rhyme genre. +notably rhyme selection based on phonetic or visual similarity, +frequency, syllable count, and rhyme gender. == 2. Requirements == drime requires the haspirater module <http://gitorious.org/haspirater/>. -== 2. Generating the DB == +== 3. Generating the DB == The program database isn't shipped, but scripts are provided to build it from the Lexique3 database <http://lexique.org/>: @@ -27,5 +28,19 @@ from the Lexique3 database <http://lexique.org/>: - lexique2sql.sh takes the tweaked version of Lexique on stdin and produces the SQL database on stdout. +To import the output of lexique2sql.sh in a sqlite3 database, run: + + cat output.sql | sqlite3 db.sqlite + +This can take some time: you can monitor progress using the pv utility: + + pv -l output.sql | sqlite3 db.sqlite + +To import the output of lexique2sql.sh in a MySQL database (on localhost, +database 'drime', as user 'drime', interactive password authentication), run: + + cat output.sql | mysql --default-character-set=utf8 -D drime -u drime -p + +== 4. Using the DB == diff --git a/db_mysql.py b/db_mysql.py @@ -6,10 +6,10 @@ from db_mysql_config import config def run_query(r, v): db = MySQLdb.connect( - host=config.host, - user=config.user, - passwd=config.passwd, - db=config.db, + host=config['host'], + user=config['user'], + passwd=config['passwd'], + db=config['db'], cursorclass=MySQLdb.cursors.DictCursor, use_unicode=True) cursor = db.cursor() diff --git a/query.py b/query.py @@ -3,6 +3,7 @@ import sys import operator from db_mysql import run_query +from common import from_xsampa, to_xsampa PAGESIZE=50 @@ -24,7 +25,7 @@ def query(q, nsyl='', gender=True, page=0): word = q.strip().split(' ') nsyl = nsyl.strip() if word[-1].startswith('[') and word[-1].endswith(']'): - phon = word[-1][1:-1] + phon = from_xsampa(word[-1][1:-1]) word = word[:-1] else: phon = None @@ -60,6 +61,7 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size): SELECT t1.freq AS t1_freq, t1.word AS t1_word, t1.phon AS t1_phon, + t1.base AS t1_base, t1.feminine AS t1_feminine, t2.word AS t2_word, t2.phon AS t2_phon, @@ -73,11 +75,12 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size): FROM words AS t1 INNER JOIN words AS t2 ON (t1.phon_end = t2.phon_end OR t1.word_end = t2.word_end) WHERE (t1.word = ? OR ?) AND (t1.phon = ? OR ?) - AND (? OR t2.max_nsyl >= ?) - AND (? OR t2.min_nsyl <= ? OR (t2.elidable AND t2.min_nsyl - 1 <= ?)) + AND ((? OR t2.max_nsyl >= ?) + AND (? OR t2.min_nsyl <= ? + OR (t2.elidable AND t2.min_nsyl - 1 <= ? AND ?))) ORDER BY t1.freq, t1.phon, t1.word ''', (word, word == None, phon, phon == None, - minsyll == None, minsyll, maxsyll == None, maxsyll, maxsyll,)) + minsyll == None, minsyll, maxsyll == None, maxsyll, maxsyll, elide,)) result = {} cache = {} seen = {} @@ -85,7 +88,7 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size): if x['t1_feminine'] != x['t2_feminine'] and gender: continue key = (x['t1_word'], x['t1_phon'], - x['t1_word'] + ' [' + x['t1_phon'] + ']') + x['t1_word'] + ' [' + to_xsampa(x['t1_phon']) + ']') if key not in result.keys(): result[key] = [] cache[key] = [] @@ -110,7 +113,9 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size): ' (' + row['base'] + ')' if row['base'] != row['word'] else '') - if row['word'] == x['t1_word'] and row['phon'] == x['t1_phon']: + row['phon'] = to_xsampa(row['phon']) + if (row['word'] in [x['t1_word'], x['t1_base']] + and row['phon'] == to_xsampa(x['t1_phon'])): cache[key].append(row) else: result[key].append(row) diff --git a/static/main.css b/static/main.css @@ -2,8 +2,8 @@ h1 { background: #0c0; float: left; margin: 0; - margin-right: 1em; - font-size: 150%; + margin-right: 0.3em; + font-size: 140%; } #body { @@ -42,10 +42,48 @@ label { text-align: center; } -table { +table, .faketable { width: 100%; } .odd { background: #efe; } + +.help { + vertical-align: super; +} + +#nsyl { + width: 6em; +} + +#query { + width: 20em; +} + +#gender_label { + display: inline-block; + vertical-align: bottom; + position: relative; + top: -1px; + *overflow: hidden; +} + +.ellipsis { + text-overflow: ellipsis; +} + +.faketable { + display: table; +} + +#disambig a { + text-decoration: none; + color: black; +} + +#disambig li { + padding: 0.3em; +} + diff --git a/templates/about.html b/templates/about.html @@ -16,10 +16,10 @@ use proper nouns or rare words. You can also provide a pronunciation written between square brackets using <a href="#pron">the right convention</a> to disambiguate if multiple pronunciations are possible. Example: <a href="/?query=fils%20[fis]">fils [fis]</a>.</p> -<p>In the <strong>n_syllabes</strong> field, you can specify a number of -syllabes to limit on. You can either specify an exact number or a range (eg. -"1-3"). You can suffix a "+" to indicate that you can accept one syllabe more if -the word could cause an elision in the previous word. TODO ref. The syllabe +<p>In the <strong>n_syllables</strong> field, you can specify a number of +syllables to limit on. You can either specify an exact number or a range (eg. +"1-3"). You can suffix a "+" to indicate that you can accept one syllable more if +the word could cause an elision in the previous word. TODO ref. The syllable counts for words are approximate: the system will always overapproximate your query.</p> <p>The checkbox limits to rhymes that respect rhyme gender. If you're unsure @@ -36,6 +36,13 @@ common letters, and the frequency of the word.</p> TODO more info about missing/redundant results <h2 id="pron">How is pronunciation written?</h2> -<p>TODO</p> +<p>It is <a href="http://en.wikipedia.org/wiki/X-SAMPA">X-SAMPA</a>. Information +about French pronunciation can be found on the <a + href="https://fr.wiktionary.org/wiki/Annexe:Prononciation/fran%C3%A7ais">French + Wiktionary</a>. Be aware that because of Lexique limitations, the +pronunciation indicated is a colloquial one and not the one that you would use to +versify (for instance, "placement" is reported as "plasmA~" but would be read as +"plas@mA~"). Heuristics are used to work around this when filtering on the +number of syllables.</p> {% endblock %} diff --git a/templates/disambig.html b/templates/disambig.html @@ -2,22 +2,19 @@ {% block body %} <p>Did you mean:</p> -TODO include indications -TODO keep the other GET params -<table> +<ul id="disambig"> {% for k in keys %} +<li class="{{ loop.cycle('odd', 'even') }}"> <a href="?query={{ k[-1] | escape }}&nsyl={{ nsyl }}&gender={{ gender }}"> - <tr class="{% loop.cycle('odd', 'even') %}"> - <td>{{ k[0] }}</td> - <td>{{ k[1] }}</td> - <td> - {% for v in result[k][:5] %} - {{ v[0] }} - {% endfor %} - </td> - </tr> + {{ k[0] }} [{{ k[1] }}], rhyming with + {% for v in example[k][:5] %} + {{ v.word }} + {% endfor %} + ... + </a> + </li> {% endfor %} -</table> +</ul> {% endblock %} diff --git a/templates/page.html b/templates/page.html @@ -15,17 +15,18 @@ <input id="query" name="query" placeholder="word" value="{{ q }}"/> - <label class="redundant" for="nsyl">Number of syllabes</label> + <label class="redundant" for="nsyl">Number of syllables</label> <input id="nsyl" name="nsyl" - placeholder="n_syllabes" + placeholder="n_syllables" value="{{ nsyl }}"/> + <label id="gender_label"> <input type="checkbox" id="gender" name="gender" {% if gender %} checked="{{ gender }}" {% endif %} /> - <label for="gender">Respect gender?</label> - <input type="submit" /> + Respect gender?</label> + <input type="submit" value="Search" /> </form> </header> <div id="body"> diff --git a/templates/results.html b/templates/results.html @@ -1,22 +1,23 @@ {% extends "page.html" %} {% block body %} +<p>Displaying results for: <strong>{{ keys[0][-1] }}</strong></p> <table> <tr> <th>word</th> - <th>pron<a href="about/#pron" class="help">?</a></th> + <th>pron<a href="about#pron" class="help">?</a></th> <th>phon</th> <th>eye</th> - <th>derivation</th> <th>freq</th> + <th>derivation</th> {% for r in result %} <tr class="{{ loop.cycle('odd', 'even') }}"> <td>{{ r.word }}</td> <td>{{ r.phon }}</td> <td class="num">{{ r.phon_rhyme }}</td> <td class="num">{{ r.word_rhyme }}</td> - <td>{{ r.derivation }}</td> <td class="num">{{ r.freq }}</td> + <td>{{ r.derivation }}</td> </tr> {% endfor %} </table>