add missing file metric.py - drime - French rhyme dictionary with web and CLI interface

commit 8fcced6ac814fd9091d265755682a12e2d17f54d
parent 8c82ee9e591bb8dcbda275a32e964d197e66c2d8
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Tue, 27 Dec 2011 01:13:18 +0100

add missing file metric.py

Diffstat:
metric.py  | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

1 file changed, 125 insertions(+), 0 deletions(-)
diff --git a/metric.py b/metric.py
@@ -0,0 +1,125 @@
+#!/usr/bin/python
+#coding: utf-8
+
+# this file is pretty generic, because it's part of a larger project I haven't
+# released yet, i should clean this up someday
+
+import re
+from common import normalize, is_vowels, consonants, sure_end_fem
+from vowels import possible_weights
+import haspirater
+
+def annotate_aspirated(word):
+  """Annotate aspirated 'h'"""
+  if word[0] != 'h':
+    return word
+  if haspirater.lookup(word):
+    return '*'+word
+  else:
+    return word
+
+def fit(chunks, pos, left):
+  """bruteforce exploration of all possible vowel cluster weghting,
+  within a maximum total of left"""
+  if pos >= len(chunks):
+    return [[]] # the only possibility is the empty list
+  if left < 0:
+    return [] # no possibilities
+  # skip consonants
+  if (not is_vowels(chunks[pos])):
+    return [[chunks[pos]] + x for x in fit(chunks, pos+1, left)]
+  else:
+    if (pos >= len(chunks) - 2 and chunks[pos] == 'e'):
+      # special case for verse endings, which can get elided (or not)
+      if pos == len(chunks) - 1:
+        weights = [0] # ending 'e' is elided
+      elif chunks[pos+1] == 's':
+        weights = [0] # ending 'es' is elided
+      elif chunks[pos+1] == 'nt':
+        # ending 'ent' is sometimes elided
+        # actually, this will have an influence on the rhyme's gender
+        weights = [0, 1]
+      else:
+        weights = possible_weights(chunks[pos])
+    else:
+      weights = possible_weights(chunks[pos])
+    result = []
+    for weight in weights:
+      # combine all possibilities
+      result += [[(chunks[pos], weight)] + x for x in fit(chunks, pos+1,
+        left - weight)]
+    return result
+
+def feminine(align, verse):
+  for a in sure_end_fem:
+    if verse.endswith(a):
+      return ['F']
+  if not verse.endswith('ent'):
+    return ['M']
+  # verse ends with 'ent'
+  if align[-2][1] == 0:
+    return ['F'] # mute -ent
+  if align[-2][1] > 0 and align[-2][0] == 'e':
+    return ['M'] # non-mute "-ent" by the choice of metric
+  # and now, what? "tient" vs. "lient" for instance, 
+  # TODO check pronunciation? :-/
+  return ['M', 'F']
+
+def parse(text, bound):
+  """Return possible aligns for text, bound is an upper bound on the
+  align length to limit running time"""
+
+  original_text = normalize(text)
+
+  # avoid some vowel problems
+  text = re.sub("qu", 'q', original_text)
+  text = re.sub("gue", 'ge', text)
+  text = re.sub("gué", 'gé', text)
+  text = re.sub("guè", 'gè', text)
+  text = re.sub("gua", 'ga', text)
+
+  # split in words
+  words = text.split(' ')
+  words = [annotate_aspirated(word) for word in words if word != '']
+
+  pattern = re.compile('(['+consonants+'*-]*)', re.UNICODE)
+
+  # cut each word in chunks of vowels and consonants, with some specific
+  # kludges
+  for i in range(len(words)):
+    words[i] = re.split(pattern, words[i])
+    words[i] = [chunk for chunk in words[i] if chunk != '']
+    nwords = []
+    # the case of 'y' is special
+    for chunk in words[i]:
+      if 'y' not in chunk or len(chunk) == 1 or chunk[0] == 'y':
+        nwords.append(chunk)
+      else:
+        a = chunk.split('y')
+        nwords.append(a[0])
+        nwords.append('Y')
+        if a[1] != '':
+          nwords.append(a[1])
+        else:
+          # the case of "pays" is very special :-(
+          if words[i] == ['p', 'ay', 's']:
+            nwords.append('y')
+    words[i] = nwords
+    # remove mute 'e'
+    if i > 0:
+      if sum([1 for chunk in words[i-1] if is_vowels(chunk)]) > 1:
+        if words[i-1][-1] == 'e' and is_vowels(words[i][0], True):
+          words[i-1].pop(-1)
+          words[i-1][-1] = words[i-1][-1]+"'"
+
+  # group back words
+  for word in words:
+    word.append(' ')
+  chunks = sum(words, [])[:-1]
+ 
+  # return all possibilities to weigh the vowel clusters, annotated by
+  # the femininity of the align (depending both on the align and
+  # original text)
+  return list(map((lambda x : (x, feminine(x, original_text))),
+    fit(chunks, 0, bound)))
+

	drime French rhyme dictionary with web and CLI interface
	git clone https://a3nm.net/git/drime/
	Log \| Files \| Refs \| README