plint

French poetry validator
git clone https://a3nm.net/git/plint/
Log | Files | Refs | README

commit 51bb1f5b98f0752965f654fc0bf2f878f0c5c448
parent 309f7af597d5319c1cc63246d5883b5cbd60ca28
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sun, 27 Jul 2014 11:12:16 +0200

fix bug with hemistiche and splithyph

Diffstat:
verse.py | 35+++++++++++++++++++++++------------
1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/verse.py b/verse.py @@ -93,11 +93,17 @@ class Verse: words = remove_trivial(words, (lambda w: re.match("^\s*$", w) or len(normalize(w, rm_all=True)) == 0)) words2 = sum([self.splithyph(w) for w in words], []) - pre_chunks = [re.split(consonants_regexp, word) for word in words2] - pre_chunks = [remove_trivial(x, (lambda w: re.match("^\s*$", w) or - len(normalize(w, rm_all=True)) == 0)) for x in pre_chunks] - self.chunks = [[{'original': y, 'text': normalize(y, rm_apostrophe=True)} - for y in x] for x in pre_chunks] + pre_chunks = [(b, re.split(consonants_regexp, word)) for (b, word) in words2] + pre_chunks = [(b, remove_trivial(x, (lambda w: re.match("^\s*$", w) or + len(normalize(w, rm_all=True)) == 0))) for (b, x) in pre_chunks] + self.chunks = [] + for (b, chunk) in pre_chunks: + self.chunks.append([{'original': y, 'text': normalize(y, rm_apostrophe=True)} + for y in chunk]) + if not b: + # word end is a fake word end + for y in self.chunks[-1]: + y['hemis'] = 'cut' # collapse apostrophes self.chunks2 = [] @@ -182,6 +188,9 @@ class Verse: # instruct that we must use text for the pronunciation new_word.append({'original': part, 'text': x, 'text_pron': True, 'elision': [False, True], 'no_hiatus': True}) + # propagate information from splithyph + if 'hemis' in w[0].keys(): + new_word[-1]['hemis'] = w[0]['hemis'] self.chunks[i] = new_word # the last one is also elidable if self.chunks[i][-1]['text'] == 'e': @@ -264,7 +273,8 @@ class Verse: def splithyph(self, word): """split hyphen-delimited word parts into separate words if they are only - consonants, so that the sigle code later can deal with them (e.g. "k-way")""" + consonants, so that the sigle code later can deal with them (e.g. "k-way") + annotates parts with boolean indicating if there is a word end afterward""" pre_chunks2 = [] cs = re.split(self.hyphen_regexp, word) @@ -272,23 +282,23 @@ class Verse: for i in range(len(cs)): if re.match("^-*$", cs[i]): if len(pre_chunks2) > 0: - pre_chunks2[-1] += cs[i] + pre_chunks2[-1] = (pre_chunks2[-1][0], pre_chunks2[-1][1] + cs[i]) continue else: miss = cs[i] continue if is_consonants(normalize(cs[i])): - pre_chunks2.append(miss + cs[i]) + pre_chunks2.append((False if i < len(cs) - 1 else True, miss + cs[i])) miss = "" else: - pre_chunks2.append(miss + "".join(cs[i:])) + pre_chunks2.append((True, miss + "".join(cs[i:]))) miss = "" break if miss != "": if len(pre_chunks2) > 0: - pre_chunks2[-1] += miss + pre_chunks2[-1] = (pre_chunks2[-1][0], pre_chunks2[-1][1] + miss) else: - pre_chunks2 = [miss] + pre_chunks2 = [(True, miss)] return pre_chunks2 def annotate(self): @@ -299,7 +309,8 @@ class Verse: # for the case of "pays" and related words if 'weights' not in self.chunks[i].keys(): self.chunks[i]['weights'] = self.possible_weights_context(i) - self.chunks[i]['hemis'] = self.hemistiche(i) + if 'hemis' not in self.chunks[i].keys(): + self.chunks[i]['hemis'] = self.hemistiche(i) self.text = self.align2str(self.chunks) def parse(self):