woops - drime - French rhyme dictionary with web and CLI interface

commit 03034013d9d6738e8a76a26151214c5e65174344
parent 072bb59ffc91f72c56b2380488f584f03809c9d1
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Tue, 27 Dec 2011 01:14:32 +0100

woops

Diffstat:
common.py  | 2 --
common.py  | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

2 files changed, 89 insertions(+), 2 deletions(-)
diff --git a/common.py b/common.py
@@ -1 +0,0 @@
-/home/a3_nm/DOCUMENTS/poetlint/common.py-
\ No newline at end of file
diff --git a/common.py b/common.py
@@ -0,0 +1,89 @@
+#!/usr/bin/python3
+#coding: utf-8
+
+import unicodedata
+import re
+
+vowels = 'aeiouyœæ'
+consonants = "bcçdfghjklmnpqrstvwxz"
+
+# a variant of x-sampa such that all french phonemes are one-character
+SUBSTS = [
+  ('#', 'A~'),
+  ('$', 'O~'),
+  (')', 'E~'),
+  ('(', '9~'),
+    ]
+
+# Forbidden at the end of a hemistiche. "-ent" would also be forbidden
+# in some cases but not others...
+sure_end_fem = ['es', 'e', 'ë']
+
+# http://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-string
+def strip_accents_one(s, with_except=False):
+  """Strip accent from a string
+  
+  with_except keeps specifically 'é' and 'è'"""
+  r = []
+  for x in s:
+    if with_except and x in ['è', 'é']:
+      r.append(x)
+    else:
+      r += unicodedata.normalize('NFD', x)
+  return r
+
+def strip_accents(s, with_except=False):
+  return ''.join(
+      (c for c in strip_accents_one(s, with_except)
+      if unicodedata.category(c) != 'Mn'))
+
+def norm_spaces(text):
+  """Remove multiple consecutive whitespace"""
+  return re.sub("\s+-*\s*", ' ', text)
+
+def rm_punct(text, with_apostrophe = False):
+  """Remove punctuation from text"""
+  if not with_apostrophe:
+    text = re.sub("'", '', text)
+  #TODO rather: keep only good chars
+  pattern = re.compile("[^'\w -]", re.UNICODE)
+  return pattern.sub(' ', text)
+
+def is_vowels(chunk, with_h=False, with_y=True):
+  """Test if a chunk is vowels
+
+  with_h counts 'h' as vowel, with_y allows 'y'"""
+
+  if not with_y and chunk == 'y':
+    return False
+  for char in strip_accents(chunk):
+    if char not in vowels:
+      if char != 'h' or not with_h:
+        return False
+  return True
+
+def is_consonants(chunk):
+  """Test if a chunk is consonants"""
+
+  for char in strip_accents(chunk):
+    if char not in consonants:
+      return False
+  return True
+
+def normalize(text, with_apostrophe=False):
+  """Normalize text, ie. lowercase, no useless punctuation or whitespace"""
+  return norm_spaces(rm_punct(text.lower(), with_apostrophe)).rstrip().lstrip()
+
+def subst(string, subs):
+  if len(subs) == 0:
+    return string
+  return subst(string.replace(subs[0][0], subs[0][1]), subs[1:])
+
+def to_xsampa(s):
+  """convert our modified format to x-sampa"""
+  return subst(s, SUBSTS)
+
+def from_xsampa(s):
+  """convert x-sampa to our modified format"""
+  return subst(s, [(x[1], x[0]) for x in SUBSTS])
+

	drime French rhyme dictionary with web and CLI interface
	git clone https://a3nm.net/git/drime/
	Log \| Files \| Refs \| README

common.py	\|	2	--
common.py	\|	89	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++