commit 42bde95087c10d81e146f4a560727f94be824407
parent 8d59d3e20eeee614e80fec970c3500efc05d0f82
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Sat, 25 Jun 2011 12:12:31 -0400
refactoring
Diffstat:
common.py | | | 43 | +++++++++++++++++++++++++++++++++++++++++++ |
poetlint.py | | | 98 | +++++++------------------------------------------------------------------------ |
vowels.py | | | 44 | ++++++++++++++++++++++++++++++++++++++++++++ |
3 files changed, 95 insertions(+), 90 deletions(-)
diff --git a/common.py b/common.py
@@ -0,0 +1,43 @@
+#!/usr/bin/python3
+
+import unicodedata
+import re
+
+vowels = 'aeiouyϾ'
+
+# http://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-string
+def strip_accents_one(s, with_except):
+ r = []
+ for x in s:
+ if with_except and x in ['è', 'é']:
+ r.append(x)
+ else:
+ r += unicodedata.normalize('NFD', x)
+ return r
+
+def strip_accents(s, with_except=False):
+ return ''.join(
+ (c for c in strip_accents_one(s, with_except)
+ if unicodedata.category(c) != 'Mn'))
+
+def norm_spaces(text):
+ return re.sub("\s+-*\s*", ' ', text)
+
+def rm_punct(text):
+ text = re.sub("'", '', text)
+ #TODO rather: keep only good chars
+ pattern = re.compile('[^\w -]', re.UNICODE)
+ return pattern.sub(' ', text)
+
+def is_vowels(chunk, with_h = False, with_y = True):
+ if not with_y and chunk == 'y':
+ return False
+ for char in strip_accents(chunk):
+ if char not in vowels:
+ if char != 'h' or not with_h:
+ return False
+ return True
+
+def normalize(text):
+ return norm_spaces(rm_punct(text.lower())).rstrip().lstrip()
+
diff --git a/poetlint.py b/poetlint.py
@@ -5,87 +5,18 @@ import sys
import unicodedata
import haspirater
import rhyme
-#import cProfile
from pprint import pprint
+from vowels import possible_weights
+from common import strip_accents, normalize, is_vowels
#TODO no clear femid env for implicit repeat
#TODO femid pattern groups (not all the same)
-
consonants = "[bcçdfghjklmnpqrstvwxz*-]"
-vowels = 'aeiouyϾ'
-# TODO -ment at hemistiche
+# Forbidden at the end of a hemistiche. "-ent" would also be forbidden
+# in some cases but not others...
sure_end_fem = ['es', 'e']
-end_fem = sure_end_fem + ['ent']
-
-hemistiche_pos = 6
-num_verse = 12
-
-def contains_trema(chunk):
- for x in ['ä', 'ï', 'ö', 'ü', 'ÿ']:
- if x in chunk:
- return True
- return False
-
-def possible_weights(chunk):
- if len(chunk) == 1:
- return [1]
- # old spelling and weird exceptions
- if chunk in ['ouï']:
- return [2]
- if chunk in ['eüi', 'aoû']:
- return [1]
- if contains_trema(chunk):
- return [2]
- chunk = strip_accents(chunk, True)
- # TODO 'ée' ? ('déesse')
- if chunk in ['ai', 'ou', 'eu', 'ei', 'eau', 'eoi', 'eui', 'au', 'oi',
- 'oie', 'œi', 'œu', 'eaie', 'aie', 'oei', 'oeu', 'ea', 'ae', 'eo',
- 'eoie', 'oe', 'eai', 'eue', 'aa', 'oo', 'ee', 'ii', 'aii',
- 'yeu', 'ye']:
- return [1]
- for x in ['oa', 'ea', 'eua', 'ao', 'euo', 'ua', 'uo', 'yo', 'yau']:
- if x in chunk:
- return [2]
- if chunk == 'ée':
- return [1, 2]
- if chunk[0] == 'i':
- return [1, 2]
- if chunk[0] == 'u' and (strip_accents(chunk[1]) in ['i', 'e']):
- return [1, 2]
- if chunk[0] == 'o' and chunk[1] == 'u' and len(chunk) >= 3 and strip_accents(chunk[2]) in ['i', 'e']:
- return [1, 2]
- if 'é' in chunk or 'è' in chunk:
- return [2]
- # only non-accented left
-
- # TODO hmm
- return [99]
-
-# http://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-string
-def strip_accents_one(s, with_except):
- r = []
- for x in s:
- if with_except and x in ['è', 'é']:
- r.append(x)
- else:
- r += unicodedata.normalize('NFD', x)
- return r
-
-def strip_accents(s, with_except=False):
- return ''.join(
- (c for c in strip_accents_one(s, with_except)
- if unicodedata.category(c) != 'Mn'))
-
-def norm_spaces(text):
- return re.sub("\s+-*\s*", ' ', text)
-
-def rm_punct(text):
- text = re.sub("'", '', text)
- #TODO rather: keep only good chars
- pattern = re.compile('[^\w -]', re.UNICODE)
- return pattern.sub(' ', text)
def annotate_aspirated(word):
if word[0] != 'h':
@@ -95,18 +26,6 @@ def annotate_aspirated(word):
else:
return word
-def is_vowels(chunk, with_h = False, with_y = True):
- if not with_y and chunk == 'y':
- return False
- for char in strip_accents(chunk):
- if char not in vowels:
- if char != 'h' or not with_h:
- return False
- return True
-
-def count_vowel_chunks(word):
- return sum([1 for chunk in word if is_vowels(chunk)])
-
def check_spaces(align, pos):
if pos >= len(align):
return "bad"
@@ -223,9 +142,6 @@ def prepend(l, ls):
r.append(l + x)
return r
-def normalize(text):
- return norm_spaces(rm_punct(text.lower())).rstrip().lstrip()
-
def parse(text, bound):
original_text = normalize(text)
text = re.sub("qu", 'q', original_text)
@@ -256,7 +172,7 @@ def parse(text, bound):
nwords.append('y')
words[i] = nwords
if i > 0:
- if count_vowel_chunks(words[i-1]) > 1:
+ if sum([1 for chunk in words[i-1] if is_vowels(chunk)]) > 1:
if words[i-1][-1] == 'e' and is_vowels(words[i][0], True):
words[i-1].pop(-1)
words[i-1][-1] = words[i-1][-1]+"'"
@@ -428,6 +344,7 @@ class Template:
pattern.rhyme)
#print("nVALUE")
#pprint(self.env[pattern.myid])
+ #pprint(self.env[pattern.myid])
else:
self.env[pattern.myid] = rhyme.check_rhyme(self.env[pattern.myid],
(normalize(line), pattern.rhyme))
@@ -453,13 +370,14 @@ class Template:
if len(self.femenv[pattern.femid]) == 0:
errors.append(ErrorBadRhymeGenre(old, new))
#TODO debug
- errors.append(ErrorBadMetric(possible))
+ #errors.append(ErrorBadMetric(possible))
return errors, pattern
def parse_template(self, l):
split = l.split(' ')
metric = split[0]
+ #TODO generate unique ids if need be
myid = split[1]
femid = split[2]
if len(split) >= 4:
diff --git a/vowels.py b/vowels.py
@@ -0,0 +1,44 @@
+#!/usr/bin/python3
+
+from common import strip_accents
+
+def contains_trema(chunk):
+ for x in ['ä', 'ï', 'ö', 'ü', 'ÿ']:
+ if x in chunk:
+ return True
+ return False
+
+def possible_weights(chunk):
+ if len(chunk) == 1:
+ return [1]
+ # old spelling and weird exceptions
+ if chunk in ['ouï']:
+ return [2]
+ if chunk in ['eüi', 'aoû']:
+ return [1]
+ if contains_trema(chunk):
+ return [2]
+ chunk = strip_accents(chunk, True)
+ # TODO 'ée' ? ('déesse')
+ if chunk in ['ai', 'ou', 'eu', 'ei', 'eau', 'eoi', 'eui', 'au', 'oi',
+ 'oie', 'œi', 'œu', 'eaie', 'aie', 'oei', 'oeu', 'ea', 'ae', 'eo',
+ 'eoie', 'oe', 'eai', 'eue', 'aa', 'oo', 'ee', 'ii', 'aii',
+ 'yeu', 'ye']:
+ return [1]
+ for x in ['oa', 'ea', 'eua', 'ao', 'euo', 'ua', 'uo', 'yo', 'yau']:
+ if x in chunk:
+ return [2]
+ if chunk == 'ée':
+ return [1, 2]
+ if chunk[0] == 'i':
+ return [1, 2]
+ if chunk[0] == 'u' and (strip_accents(chunk[1]) in ['i', 'e']):
+ return [1, 2]
+ if chunk[0] == 'o' and chunk[1] == 'u' and len(chunk) >= 3 and strip_accents(chunk[2]) in ['i', 'e']:
+ return [1, 2]
+ if 'é' in chunk or 'è' in chunk:
+ return [2]
+ # only non-accented left
+
+ # TODO hmm
+ return [99]