limit the number of occurrences of words in rhymes - plint - French poetry validator (local mirror of https://gitlab.com/a3nm/plint)

commit 2c2d4e1bfa8e22343ee71bcf6833c00e02a63261
parent c5ac88484bb02de828d2e2b7c126e5705cb72d08
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Mon, 30 Apr 2012 18:26:50 +0200

limit the number of occurrences of words in rhymes

Diffstat:
README  | 2 ++
TODO  | 3 ---
common.py  | 5 +++--
error.py  | 12 ++++++++++++
lexique_occurrences_retrieve.sh  | 14 ++++++++++++++
template.py  | 15 +++++++++++++++
views/about.html  | 7 +++++--

7 files changed, 51 insertions(+), 7 deletions(-)
diff --git a/README b/README
@@ -27,6 +27,8 @@ It depends on haspirater and frhyme, and the web interface requires CherryPy.
 
 Place the haspirater.json and haspirater.py from haspirater in the main
 directory. Place the generated frhyme.json and frhyme.py in the main directory.
+Generate the occurrences file as the standard output of
+lexique_occurrences_retrieve.sh.
 
 == 3. Usage ==
 
diff --git a/TODO b/TODO
@@ -1,6 +1,3 @@
-check that there are no more repetitions of the same word for a rhyme than there
-are different functions for it
-
 larger label for radios
 
 no diérèse on 'uei'?
diff --git a/common.py b/common.py
@@ -76,9 +76,10 @@ def is_consonants(chunk):
       return False
   return True
 
-def normalize(text, with_apostrophe=False):
+def normalize(text, with_apostrophe=False, downcase=True):
   """Normalize text, ie. lowercase, no useless punctuation or whitespace"""
-  return norm_spaces(rm_punct(text.lower(), with_apostrophe)).rstrip().lstrip()
+  return norm_spaces(rm_punct(text.lower() if downcase else text,
+      with_apostrophe)).rstrip().lstrip()
 
 def subst(string, subs):
   if len(subs) == 0:
diff --git a/error.py b/error.py
@@ -164,3 +164,15 @@ class ErrorBadMetric(Error):
         + (["... other options omitted ..."] if truncated else [])
         )
 
+class ErrorMultipleWordOccurrence(Error):
+  def __init__(self, word, occurrences):
+    self.word = word
+    self.occurrences = occurrences
+
+  def get_id(self):
+    return self.pattern.myid
+
+  def report(self):
+    return Error.report(self, "%d occurrences of word %s for rhyme %s"
+        % (self.occurrences, self.word, self.get_id()))
+
diff --git a/lexique_occurrences_retrieve.sh b/lexique_occurrences_retrieve.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+ZIP="Lexique371b.zip"
+URL="http://www.lexique.org/public/$ZIP"
+FILE="Lexique371/Bases+Scripts/Lexique3.txt"
+
+cd "$( dirname "$0" )"
+
+wget $URL
+unzip -qq $ZIP $FILE
+cat $FILE | ./frhyme/lexique/lexique_fix.sh | cut -f1 |
+  rev | cut -d' ' -f1 | rev | sort | uniq -c |
+  awk '{print $2, $1}'
+
diff --git a/template.py b/template.py
@@ -4,6 +4,7 @@ from hemistiches import check_hemistiches
 import copy
 import rhyme
 from common import normalize, legal, strip_accents_one
+from nature import nature_count
 
 class Pattern:
   def __init__(self, metric, myid, femid, constraint):
@@ -34,6 +35,7 @@ class Template:
     self.position = 0
     self.env = {}
     self.femenv = {}
+    self.occenv = {}
     self.reject_errors = False
 
   def load(self, s):
@@ -64,6 +66,7 @@ class Template:
   def match(self, line):
     """Check a line against current pattern, return errors"""
 
+    line_with_case = normalize(line, downcase=False)
     line = normalize(line)
     pattern = self.get()
     # compute alignments, check hemistiches, sort by score
@@ -107,6 +110,17 @@ class Template:
         self.env[pattern.myid].eye = old_e
         errors.append(error.ErrorBadRhymeSound(self.env[pattern.myid], None))
 
+    # occurrences
+    if pattern.myid not in self.occenv.keys():
+      self.occenv[pattern.myid] = {}
+    last_word = line_with_case.split(' ')[-1]
+    if last_word not in self.occenv[pattern.myid].keys():
+      self.occenv[pattern.myid][last_word] = 0
+    self.occenv[pattern.myid][last_word] += 1
+    if self.occenv[pattern.myid][last_word] > nature_count(last_word):
+      errors.append(error.ErrorMultipleWordOccurrence(last_word,
+        self.occenv[pattern.myid][last_word]))
+
     # rhyme genres
     # inequality constraint
     # TODO this is simplistic and order-dependent
@@ -162,6 +176,7 @@ class Template:
     self.position = 0
     self.env = self.reset_conditional(self.env)
     self.femenv = self.reset_conditional(self.femenv)
+    self.occenv = {} # always reset
 
   def get(self):
     """Get next state, resetting if needed"""
diff --git a/views/about.html b/views/about.html
@@ -52,7 +52,9 @@ exacte, se reporter au code source.</p>
   ("mettre" et "maître") et [a] et [ɑ] ("patte" et "pâte") mais pas [ɛ̃] et [œ̃]
   ("brin" et "brun") ou [ɔ] et [o] ("cotte" et "côte"). Plint comprend des
   options pour autoriser les rimes normandes (lettres communes à la fin) ou les
-  rimes par assonance (dernier son vocalique commun).</dd>
+  rimes par assonance (dernier son vocalique commun). On ne peut pas faire rimer
+  un mot avec lui-même (mais on peut faire rimer un mot avec des homographes de
+  nature grammaticale différente).</dd>
   <dt>Genre des rimes.</dt>
   <dd>En poésie classique, les rimes doivent être <em>féminines</em> ou
   <em>masculines</em>. Une rime est féminine si elle se termine par un "e", "es"
@@ -205,7 +207,8 @@ source code.</p>
   [ɑ] ("patte" and "pâte") but distinguishes [ɛ̃] and [œ̃] ("brin" and "brun") and
   [ɔ] and [o] ("cotte" and "côte"). plint includes options for rhymes <em>pour
     l'œil</em> (common letter endings) and <em>assonances</em> (common ending
-  vocalic phonemes).</dd>
+  vocalic phonemes). A word cannot rhyme with itself, but rhymes between
+  homographs of a different grammatical nature are allowed.</dd>
   <dt>Rhyme genre.</dt>
   <dd>In classical verse, rhymes must be made between feminine verse endings, or
   masculine verse endings. A verse ending is feminine if it ends with a silent

	plint French poetry validator (local mirror of https://gitlab.com/a3nm/plint)
	git clone https://a3nm.net/git/plint/
	Log \| Files \| Refs \| README

README	\|	2	++
TODO	\|	3	---
common.py	\|	5	+++--
error.py	\|	12	++++++++++++
lexique_occurrences_retrieve.sh	\|	14	++++++++++++++
template.py	\|	15	+++++++++++++++
views/about.html	\|	7	+++++--