refactoring - plint - French poetry validator (local mirror of https://gitlab.com/a3nm/plint)

commit 42bde95087c10d81e146f4a560727f94be824407
parent 8d59d3e20eeee614e80fec970c3500efc05d0f82
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sat, 25 Jun 2011 12:12:31 -0400

refactoring

Diffstat:
common.py  | 43 +++++++++++++++++++++++++++++++++++++++++++
poetlint.py  | 98 +++++++------------------------------------------------------------------------
vowels.py  | 44 ++++++++++++++++++++++++++++++++++++++++++++

3 files changed, 95 insertions(+), 90 deletions(-)
diff --git a/common.py b/common.py
@@ -0,0 +1,43 @@
+#!/usr/bin/python3
+
+import unicodedata
+import re
+
+vowels = 'aeiouyœæ'
+
+# http://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-string
+def strip_accents_one(s, with_except):
+  r = []
+  for x in s:
+    if with_except and x in ['è', 'é']:
+      r.append(x)
+    else:
+      r += unicodedata.normalize('NFD', x)
+  return r
+
+def strip_accents(s, with_except=False):
+  return ''.join(
+      (c for c in strip_accents_one(s, with_except)
+      if unicodedata.category(c) != 'Mn'))
+
+def norm_spaces(text):
+  return re.sub("\s+-*\s*", ' ', text)
+
+def rm_punct(text):
+  text = re.sub("'", '', text)
+  #TODO rather: keep only good chars
+  pattern = re.compile('[^\w -]', re.UNICODE)
+  return pattern.sub(' ', text)
+
+def is_vowels(chunk, with_h = False, with_y = True):
+  if not with_y and chunk == 'y':
+    return False
+  for char in strip_accents(chunk):
+    if char not in vowels:
+      if char != 'h' or not with_h:
+        return False
+  return True
+
+def normalize(text):
+  return norm_spaces(rm_punct(text.lower())).rstrip().lstrip()
+
diff --git a/poetlint.py b/poetlint.py
@@ -5,87 +5,18 @@ import sys
 import unicodedata
 import haspirater
 import rhyme
-#import cProfile
 from pprint import pprint
+from vowels import possible_weights
+from common import strip_accents, normalize, is_vowels
 
 #TODO no clear femid env for implicit repeat
 #TODO femid pattern groups (not all the same)
 
-
 consonants = "[bcçdfghjklmnpqrstvwxz*-]"
-vowels = 'aeiouyœæ'
 
-# TODO -ment at hemistiche
+# Forbidden at the end of a hemistiche. "-ent" would also be forbidden
+# in some cases but not others...
 sure_end_fem = ['es', 'e']
-end_fem = sure_end_fem + ['ent']
-
-hemistiche_pos = 6
-num_verse = 12
-
-def contains_trema(chunk):
-  for x in ['ä', 'ï', 'ö', 'ü', 'ÿ']:
-    if x in chunk:
-      return True
-  return False
-
-def possible_weights(chunk):
-  if len(chunk) == 1:
-    return [1]
-  # old spelling and weird exceptions
-  if chunk in ['ouï']:
-    return [2]
-  if chunk in ['eüi', 'aoû']:
-    return [1]
-  if contains_trema(chunk):
-    return [2]
-  chunk = strip_accents(chunk, True)
-  # TODO 'ée' ? ('déesse')
-  if chunk in ['ai', 'ou', 'eu', 'ei', 'eau', 'eoi', 'eui', 'au', 'oi',
-      'oie', 'œi', 'œu', 'eaie', 'aie', 'oei', 'oeu', 'ea', 'ae', 'eo',
-      'eoie', 'oe', 'eai', 'eue', 'aa', 'oo', 'ee', 'ii', 'aii',
-      'yeu', 'ye']:
-    return [1]
-  for x in ['oa', 'ea', 'eua', 'ao', 'euo', 'ua', 'uo', 'yo', 'yau']:
-    if x in chunk:
-      return [2]
-  if chunk == 'ée':
-    return [1, 2]
-  if chunk[0] == 'i':
-    return [1, 2]
-  if chunk[0] == 'u' and (strip_accents(chunk[1]) in ['i', 'e']):
-    return [1, 2]
-  if chunk[0] == 'o' and chunk[1] == 'u' and len(chunk) >= 3 and strip_accents(chunk[2]) in ['i', 'e']:
-    return [1, 2]
-  if 'é' in chunk or 'è' in chunk:
-    return [2]
-  # only non-accented left
-  
-  # TODO hmm
-  return [99]
-
-# http://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-string
-def strip_accents_one(s, with_except):
-  r = []
-  for x in s:
-    if with_except and x in ['è', 'é']:
-      r.append(x)
-    else:
-      r += unicodedata.normalize('NFD', x)
-  return r
-
-def strip_accents(s, with_except=False):
-  return ''.join(
-      (c for c in strip_accents_one(s, with_except)
-      if unicodedata.category(c) != 'Mn'))
-
-def norm_spaces(text):
-  return re.sub("\s+-*\s*", ' ', text)
-
-def rm_punct(text):
-  text = re.sub("'", '', text)
-  #TODO rather: keep only good chars
-  pattern = re.compile('[^\w -]', re.UNICODE)
-  return pattern.sub(' ', text)
 
 def annotate_aspirated(word):
   if word[0] != 'h':
@@ -95,18 +26,6 @@ def annotate_aspirated(word):
   else:
     return word
 
-def is_vowels(chunk, with_h = False, with_y = True):
-  if not with_y and chunk == 'y':
-    return False
-  for char in strip_accents(chunk):
-    if char not in vowels:
-      if char != 'h' or not with_h:
-        return False
-  return True
-
-def count_vowel_chunks(word):
-  return sum([1 for chunk in word if is_vowels(chunk)])
-
 def check_spaces(align, pos):
   if pos >= len(align):
     return "bad"
@@ -223,9 +142,6 @@ def prepend(l, ls):
     r.append(l + x)
   return r
 
-def normalize(text):
-  return norm_spaces(rm_punct(text.lower())).rstrip().lstrip()
-
 def parse(text, bound):
   original_text = normalize(text)
   text = re.sub("qu", 'q', original_text)
@@ -256,7 +172,7 @@ def parse(text, bound):
             nwords.append('y')
     words[i] = nwords
     if i > 0:
-      if count_vowel_chunks(words[i-1]) > 1:
+      if sum([1 for chunk in words[i-1] if is_vowels(chunk)]) > 1:
         if words[i-1][-1] == 'e' and is_vowels(words[i][0], True):
           words[i-1].pop(-1)
           words[i-1][-1] = words[i-1][-1]+"'"
@@ -428,6 +344,7 @@ class Template:
           pattern.rhyme)
       #print("nVALUE")
       #pprint(self.env[pattern.myid])
+      #pprint(self.env[pattern.myid])
     else:
       self.env[pattern.myid] = rhyme.check_rhyme(self.env[pattern.myid],
           (normalize(line), pattern.rhyme))
@@ -453,13 +370,14 @@ class Template:
     if len(self.femenv[pattern.femid]) == 0:
       errors.append(ErrorBadRhymeGenre(old, new))
       #TODO debug
-      errors.append(ErrorBadMetric(possible))
+      #errors.append(ErrorBadMetric(possible))
 
     return errors, pattern
 
   def parse_template(self, l):
     split = l.split(' ')
     metric = split[0]
+    #TODO generate unique ids if need be
     myid = split[1]
     femid = split[2]
     if len(split) >= 4:
diff --git a/vowels.py b/vowels.py
@@ -0,0 +1,44 @@
+#!/usr/bin/python3
+
+from common import strip_accents
+
+def contains_trema(chunk):
+  for x in ['ä', 'ï', 'ö', 'ü', 'ÿ']:
+    if x in chunk:
+      return True
+  return False
+
+def possible_weights(chunk):
+  if len(chunk) == 1:
+    return [1]
+  # old spelling and weird exceptions
+  if chunk in ['ouï']:
+    return [2]
+  if chunk in ['eüi', 'aoû']:
+    return [1]
+  if contains_trema(chunk):
+    return [2]
+  chunk = strip_accents(chunk, True)
+  # TODO 'ée' ? ('déesse')
+  if chunk in ['ai', 'ou', 'eu', 'ei', 'eau', 'eoi', 'eui', 'au', 'oi',
+      'oie', 'œi', 'œu', 'eaie', 'aie', 'oei', 'oeu', 'ea', 'ae', 'eo',
+      'eoie', 'oe', 'eai', 'eue', 'aa', 'oo', 'ee', 'ii', 'aii',
+      'yeu', 'ye']:
+    return [1]
+  for x in ['oa', 'ea', 'eua', 'ao', 'euo', 'ua', 'uo', 'yo', 'yau']:
+    if x in chunk:
+      return [2]
+  if chunk == 'ée':
+    return [1, 2]
+  if chunk[0] == 'i':
+    return [1, 2]
+  if chunk[0] == 'u' and (strip_accents(chunk[1]) in ['i', 'e']):
+    return [1, 2]
+  if chunk[0] == 'o' and chunk[1] == 'u' and len(chunk) >= 3 and strip_accents(chunk[2]) in ['i', 'e']:
+    return [1, 2]
+  if 'é' in chunk or 'è' in chunk:
+    return [2]
+  # only non-accented left
+  
+  # TODO hmm
+  return [99]

	plint French poetry validator (local mirror of https://gitlab.com/a3nm/plint)
	git clone https://a3nm.net/git/plint/
	Log \| Files \| Refs \| README

common.py	\|	43	+++++++++++++++++++++++++++++++++++++++++++
poetlint.py	\|	98	+++++++------------------------------------------------------------------------
vowels.py	\|	44	++++++++++++++++++++++++++++++++++++++++++++