commit c1acbbd1530fd004a8141deea2705d0c0136a653
parent 4a585843c14b255bc1f16dd80b08964b3fd231d5
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Sat, 24 Dec 2011 02:07:56 +0100
continue
Diffstat:
8 files changed, 105 insertions(+), 41 deletions(-)
diff --git a/README b/README
@@ -1,5 +1,6 @@
WARNING -- this code does *not* work yet!
TODO: dos2unix
+TODO: placement in rhymes for enlacement?
drime - by Antoine Amarilli
A French rhyme dictionary
@@ -9,14 +10,14 @@ Licence: GPL version 3
== 1. Features ==
drime is a French rhyme dictionary engine with advanced features, most
-notably rime selection based on phonetic or visual similarity,
-frequency, syllable count, and rhyme genre.
+notably rhyme selection based on phonetic or visual similarity,
+frequency, syllable count, and rhyme gender.
== 2. Requirements ==
drime requires the haspirater module <http://gitorious.org/haspirater/>.
-== 2. Generating the DB ==
+== 3. Generating the DB ==
The program database isn't shipped, but scripts are provided to build it
from the Lexique3 database <http://lexique.org/>:
@@ -27,5 +28,19 @@ from the Lexique3 database <http://lexique.org/>:
- lexique2sql.sh takes the tweaked version of Lexique on stdin and produces
the SQL database on stdout.
+To import the output of lexique2sql.sh in a sqlite3 database, run:
+
+ cat output.sql | sqlite3 db.sqlite
+
+This can take some time: you can monitor progress using the pv utility:
+
+ pv -l output.sql | sqlite3 db.sqlite
+
+To import the output of lexique2sql.sh in a MySQL database (on localhost,
+database 'drime', as user 'drime', interactive password authentication), run:
+
+ cat output.sql | mysql --default-character-set=utf8 -D drime -u drime -p
+
+== 4. Using the DB ==
diff --git a/db_mysql.py b/db_mysql.py
@@ -6,10 +6,10 @@ from db_mysql_config import config
def run_query(r, v):
db = MySQLdb.connect(
- host=config.host,
- user=config.user,
- passwd=config.passwd,
- db=config.db,
+ host=config['host'],
+ user=config['user'],
+ passwd=config['passwd'],
+ db=config['db'],
cursorclass=MySQLdb.cursors.DictCursor,
use_unicode=True)
cursor = db.cursor()
diff --git a/query.py b/query.py
@@ -3,6 +3,7 @@
import sys
import operator
from db_mysql import run_query
+from common import from_xsampa, to_xsampa
PAGESIZE=50
@@ -24,7 +25,7 @@ def query(q, nsyl='', gender=True, page=0):
word = q.strip().split(' ')
nsyl = nsyl.strip()
if word[-1].startswith('[') and word[-1].endswith(']'):
- phon = word[-1][1:-1]
+ phon = from_xsampa(word[-1][1:-1])
word = word[:-1]
else:
phon = None
@@ -60,6 +61,7 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
SELECT t1.freq AS t1_freq,
t1.word AS t1_word,
t1.phon AS t1_phon,
+ t1.base AS t1_base,
t1.feminine AS t1_feminine,
t2.word AS t2_word,
t2.phon AS t2_phon,
@@ -73,11 +75,12 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
FROM words AS t1 INNER JOIN words AS t2 ON
(t1.phon_end = t2.phon_end OR t1.word_end = t2.word_end)
WHERE (t1.word = ? OR ?) AND (t1.phon = ? OR ?)
- AND (? OR t2.max_nsyl >= ?)
- AND (? OR t2.min_nsyl <= ? OR (t2.elidable AND t2.min_nsyl - 1 <= ?))
+ AND ((? OR t2.max_nsyl >= ?)
+ AND (? OR t2.min_nsyl <= ?
+ OR (t2.elidable AND t2.min_nsyl - 1 <= ? AND ?)))
ORDER BY t1.freq, t1.phon, t1.word
''', (word, word == None, phon, phon == None,
- minsyll == None, minsyll, maxsyll == None, maxsyll, maxsyll,))
+ minsyll == None, minsyll, maxsyll == None, maxsyll, maxsyll, elide,))
result = {}
cache = {}
seen = {}
@@ -85,7 +88,7 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
if x['t1_feminine'] != x['t2_feminine'] and gender:
continue
key = (x['t1_word'], x['t1_phon'],
- x['t1_word'] + ' [' + x['t1_phon'] + ']')
+ x['t1_word'] + ' [' + to_xsampa(x['t1_phon']) + ']')
if key not in result.keys():
result[key] = []
cache[key] = []
@@ -110,7 +113,9 @@ def do_query(word, phon, minsyll, maxsyll, elide, gender, offset, size):
' (' + row['base'] + ')'
if row['base'] != row['word']
else '')
- if row['word'] == x['t1_word'] and row['phon'] == x['t1_phon']:
+ row['phon'] = to_xsampa(row['phon'])
+ if (row['word'] in [x['t1_word'], x['t1_base']]
+ and row['phon'] == to_xsampa(x['t1_phon'])):
cache[key].append(row)
else:
result[key].append(row)
diff --git a/static/main.css b/static/main.css
@@ -2,8 +2,8 @@ h1 {
background: #0c0;
float: left;
margin: 0;
- margin-right: 1em;
- font-size: 150%;
+ margin-right: 0.3em;
+ font-size: 140%;
}
#body {
@@ -42,10 +42,48 @@ label {
text-align: center;
}
-table {
+table, .faketable {
width: 100%;
}
.odd {
background: #efe;
}
+
+.help {
+ vertical-align: super;
+}
+
+#nsyl {
+ width: 6em;
+}
+
+#query {
+ width: 20em;
+}
+
+#gender_label {
+ display: inline-block;
+ vertical-align: bottom;
+ position: relative;
+ top: -1px;
+ *overflow: hidden;
+}
+
+.ellipsis {
+ text-overflow: ellipsis;
+}
+
+.faketable {
+ display: table;
+}
+
+#disambig a {
+ text-decoration: none;
+ color: black;
+}
+
+#disambig li {
+ padding: 0.3em;
+}
+
diff --git a/templates/about.html b/templates/about.html
@@ -16,10 +16,10 @@ use proper nouns or rare words. You can also provide a pronunciation written
between square brackets using <a href="#pron">the right convention</a> to
disambiguate if multiple pronunciations are possible. Example: <a
href="/?query=fils%20[fis]">fils [fis]</a>.</p>
-<p>In the <strong>n_syllabes</strong> field, you can specify a number of
-syllabes to limit on. You can either specify an exact number or a range (eg.
-"1-3"). You can suffix a "+" to indicate that you can accept one syllabe more if
-the word could cause an elision in the previous word. TODO ref. The syllabe
+<p>In the <strong>n_syllables</strong> field, you can specify a number of
+syllables to limit on. You can either specify an exact number or a range (eg.
+"1-3"). You can suffix a "+" to indicate that you can accept one syllable more if
+the word could cause an elision in the previous word. TODO ref. The syllable
counts for words are approximate: the system will always overapproximate your
query.</p>
<p>The checkbox limits to rhymes that respect rhyme gender. If you're unsure
@@ -36,6 +36,13 @@ common letters, and the frequency of the word.</p>
TODO more info about missing/redundant results
<h2 id="pron">How is pronunciation written?</h2>
-<p>TODO</p>
+<p>It is <a href="http://en.wikipedia.org/wiki/X-SAMPA">X-SAMPA</a>. Information
+about French pronunciation can be found on the <a
+ href="https://fr.wiktionary.org/wiki/Annexe:Prononciation/fran%C3%A7ais">French
+ Wiktionary</a>. Be aware that because of Lexique limitations, the
+pronunciation indicated is a colloquial one and not the one that you would use to
+versify (for instance, "placement" is reported as "plasmA~" but would be read as
+"plas@mA~"). Heuristics are used to work around this when filtering on the
+number of syllables.</p>
{% endblock %}
diff --git a/templates/disambig.html b/templates/disambig.html
@@ -2,22 +2,19 @@
{% block body %}
<p>Did you mean:</p>
-TODO include indications
-TODO keep the other GET params
-<table>
+<ul id="disambig">
{% for k in keys %}
+<li class="{{ loop.cycle('odd', 'even') }}">
<a href="?query={{ k[-1] | escape }}&nsyl={{ nsyl }}&gender={{ gender }}">
- <tr class="{% loop.cycle('odd', 'even') %}">
- <td>{{ k[0] }}</td>
- <td>{{ k[1] }}</td>
- <td>
- {% for v in result[k][:5] %}
- {{ v[0] }}
- {% endfor %}
- </td>
- </tr>
+ {{ k[0] }} [{{ k[1] }}], rhyming with
+ {% for v in example[k][:5] %}
+ {{ v.word }}
+ {% endfor %}
+ ...
+ </a>
+ </li>
{% endfor %}
-</table>
+</ul>
{% endblock %}
diff --git a/templates/page.html b/templates/page.html
@@ -15,17 +15,18 @@
<input id="query" name="query"
placeholder="word"
value="{{ q }}"/>
- <label class="redundant" for="nsyl">Number of syllabes</label>
+ <label class="redundant" for="nsyl">Number of syllables</label>
<input id="nsyl" name="nsyl"
- placeholder="n_syllabes"
+ placeholder="n_syllables"
value="{{ nsyl }}"/>
+ <label id="gender_label">
<input type="checkbox" id="gender" name="gender"
{% if gender %}
checked="{{ gender }}"
{% endif %}
/>
- <label for="gender">Respect gender?</label>
- <input type="submit" />
+ Respect gender?</label>
+ <input type="submit" value="Search" />
</form>
</header>
<div id="body">
diff --git a/templates/results.html b/templates/results.html
@@ -1,22 +1,23 @@
{% extends "page.html" %}
{% block body %}
+<p>Displaying results for: <strong>{{ keys[0][-1] }}</strong></p>
<table>
<tr>
<th>word</th>
- <th>pron<a href="about/#pron" class="help">?</a></th>
+ <th>pron<a href="about#pron" class="help">?</a></th>
<th>phon</th>
<th>eye</th>
- <th>derivation</th>
<th>freq</th>
+ <th>derivation</th>
{% for r in result %}
<tr class="{{ loop.cycle('odd', 'even') }}">
<td>{{ r.word }}</td>
<td>{{ r.phon }}</td>
<td class="num">{{ r.phon_rhyme }}</td>
<td class="num">{{ r.word_rhyme }}</td>
- <td>{{ r.derivation }}</td>
<td class="num">{{ r.freq }}</td>
+ <td>{{ r.derivation }}</td>
</tr>
{% endfor %}
</table>