Merge gitlab.com:a3nm/frhyme - frhyme - guess the last phonemes of a French word (local mirror of https://gitlab.com/a3nm/frhyme)

commit 6e9af935a279923df039026980c92b979e26947a
parent 5f623dafafdd06a94b2165224cd5a0eba4cc1451
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Fri, 16 Aug 2019 00:03:44 +0200

Merge gitlab.com:a3nm/frhyme

Diffstat:
.gitignore  | 3 +++
LICENSE  | 18 ++++++++++++++++++
README  | 4 +++-
additions  | 109 -------------------------------------------------------------------------------
buildtrie.py  | 45 ---------------------------------------------
compresstrie.py  | 43 -------------------------------------------
frhyme.py  | 68 --------------------------------------------------------------------
frhyme/__init__.py  | 1 +
frhyme/buildtrie.py  | 45 +++++++++++++++++++++++++++++++++++++++++++++
frhyme/compresstrie.py  | 43 +++++++++++++++++++++++++++++++++++++++++++
frhyme/frhyme.py  | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
lexique/lexique_fix.sh  | 8 --------
lexique/lexique_prepare.sh  | 6 ------
lexique/lexique_retrieve.sh  | 12 ------------
lexique/subst.pl  | 38 --------------------------------------
make.sh  | 9 ---------
scripts/additions  | 109 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
scripts/install.sh  | 5 +++++
scripts/lexique/lexique_fix.sh  | 8 ++++++++
scripts/lexique/lexique_prepare.sh  | 6 ++++++
scripts/lexique/lexique_retrieve.sh  | 12 ++++++++++++
scripts/lexique/subst.pl  | 38 ++++++++++++++++++++++++++++++++++++++
scripts/make.sh  | 9 +++++++++
scripts/truncate.sh  | 5 +++++
setup.py  | 20 ++++++++++++++++++++
truncate.sh  | 5 -----

26 files changed, 393 insertions(+), 344 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,6 @@
 frhyme.json
 lexique.txt
 lexique/Lexique*
+build/
+dist/
+frhyme.egg-info/
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,18 @@
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README b/README
@@ -55,8 +55,10 @@ should be trained from a pronunciation database. The recommended way to do so is
 to use a tweaked Lexique <http://lexique.org> along with a provided bugfix file,
 as follows:
 
+  cd scripts
   lexique/lexique_retrieve.sh > lexique.txt
-  ./make.sh NPHON lexique.txt additions > frhyme.json
+  ./make.sh NPHON lexique.txt additions > ../frhyme/frhyme.json
+  cd ..
 
 where NPHON is the number of trailing phonemes to keep (suggested value: 4).
 Beware, this may take up several hundred megabytes of RAM. The resulting file
diff --git a/additions b/additions
@@ -1,109 +0,0 @@
-almanach	almana
-dompte	d$t
-domptent	d$t
-dompterai	d$tRE
-dompterait	d$tRE
-dompter	d$te
-dompteur	d$t9R
-dompteurs	d$t9R
-dompteuse	d$t2z
-dompteuses	d$t2z
-domptez	d$te
-tabis	tabi
-libye	libi
-est	E
-bœuf	b9f
-bœufs	b2
-dis-je	diZ
-employ	#plwa
-amusemens	amyzm#
-parens	paR#
-peur	p9R
-vapeur	vap9R
-moeurs	m9R
-mœurs	m9R
-tous	tu
-Achille	aSil
-Achilles	aSil
-ignora	iJORa
-ignorai	iJORE
-ignoraient	iJORE
-ignorais	iJORE
-ignorait	iJORE
-ignorance	iJOR#s
-ignorances	iJOR#s
-ignorant	iJOR#
-ignorante	iJOR#t
-ignorantes	iJOR#t
-ignorantins	iJOR#t)
-ignorants	iJOR#
-ignorassent	iJORas
-ignore	iJOR
-ignorent	iJOR
-ignorer	iJORe
-donc	d$
-pattern	patERn
-est-ce	Es
-Rouen	Rw#
-c'est	sE
-l'est	lE
--il	il
-die	di
-'en	#
-étais-je	etEZ
-lords	lOR
-post-scriptum	pOstskRipt9m
-Arras	aRas
-arras	aRas
-laissez-les	lEselE
-ruz	Ry
-c'est	sE
-l'est	lE
-m'en	m#
-Soize	swaz
-Cianán	kajnan
-inuit	inwit
-inuits	inwit
-mindel	m)dEl
-mindels	m)dEl
-citroën	sitROEn
-Citroën	sitROEn
-inlay	inlE
-inlays	inlE
-ber	bER
-bers	bER
-ehud	eud
-Ehud	eud
-rubén	Ruben
-Rubén	Ruben
-Jefferson	ZEfERsOn
-ruolz	RwOls
-ruolz	RyOls
-maremme	maREm
-maremmes	maREm
-jackpot	dZakpOt
-jackpots	dZakpOt
-poële	pwal
-poëles	pwal
-poëlon	pwal$
-poëlées	pwale
-Terese	teReze
-pôvre	povR
-pôvres	povR
-Jocelyn	Zos2l)
-saburre	sabyR
-Sylla	sila
-m'sieur	msj2
-corner	kORnER
-bostryche	bOstRiS
-bostryches	bOstRiS
-abrivent	abRiv#
-abrivents	abRiv#
-apocyn	apOs)
-apocyns	apOs)
-Rostand	ROst#
-Zürich	zyRik
-Dresde	dREzd
-zooment	zum
-n'es	nE
-Créuse	kReyz
diff --git a/buildtrie.py b/buildtrie.py
@@ -1,45 +0,0 @@
-#!/usr/bin/python3 -O
-
-"""From a list of values (arbitrary) and keys (words), create a trie
-representing this mapping"""
-
-import json
-import sys
-
-# first item is a dictionnary from values to an int indicating the
-# number of occurrences with this prefix having this value
-# second item is a dictionnary from letters to descendent nodes
-def empty_node():
-  return [{}, {}]
-
-trie = empty_node()
-
-def insert(trie, key, val):
-  """Insert val for key in trie"""
-  values, children = trie
-  # create a new value, if needed
-  if len(key) == 0:
-    if val not in values.keys():
-      values[val] = 0
-    # increment count for val
-    values[val] += 1
-  if len(key) > 0:
-    # create a new node if needed
-    if key[0] not in children.keys():
-      children[key[0]] = empty_node()
-    # recurse
-    return insert(children[key[0]], key[1:], val)
-
-while True:
-  line = sys.stdin.readline()
-  if not line:
-    break
-  line = line.strip().split('\t')
-  # a trailing space is used to mark termination of the word
-  # this is useful in cases where a prefix of a word is a complete,
-  # different word with a different value
-  # two spaces because some data words have multiple spaces
-  insert(trie, line[0]+'  ', line[1])
-
-print(json.dumps(trie))
-
diff --git a/compresstrie.py b/compresstrie.py
@@ -1,43 +0,0 @@
-#!/usr/bin/env python3
-
-"""Read json trie in stdin, trim unneeded branches and output json dump
-to stdout"""
-
-import json
-import sys
-
-trie = json.load(sys.stdin)
-
-def compress(trie):
-  """Compress the trie"""
-  ref = None
-  num = 0
-  ok = True
-  if trie[0] != {}:
-    if len(trie[0].keys()) > 1:
-      return None
-    ref = list(trie[0].keys())[0]
-    num = trie[0][ref]
-  for child in trie[1].values():
-    x = compress(child)
-    if not ok or x == None:
-      ok = False
-      continue
-    r, n = x
-    if ref == None:
-      ref = r
-    if ref != r:
-      ok = False
-    num += n
-  if not ok:
-    return None
-  trie[0] = {}
-  trie[0][ref] = num
-  trie[1] = {}
-  #print(ref, file=sys.stderr)
-  return ref, num
-
-compress(trie)
-
-print(json.dumps(trie))
-
diff --git a/frhyme.py b/frhyme.py
@@ -1,68 +0,0 @@
-#!/usr/bin/python3 -O
-
-"""Try to guess the last few phonemes of a French word, by a lookup in a
-precompiled trie"""
-
-import os
-import json
-import sys
-from pprint import pprint
-
-DEFAULT_NBEST=5
-
-f = open(os.path.join(os.path.dirname(
-  os.path.realpath(__file__)), 'frhyme.json'))
-trie = json.load(f)
-f.close()
-
-def to_list(d, rev=True):
-  return [(d[a], a[::-1] if rev else a) for a in d.keys()]
-
-def trie2list(trie):
-  v, c = trie
-  if c == {}:
-    return to_list(v)
-  else:
-    d = {}
-    for child in c.keys():
-      l = trie2list(c[child])
-      for x in l:
-        if x[1] not in d.keys():
-          d[x[1]] = 0
-        d[x[1]] += x[0]
-    return to_list(d, False)
-
-def add_dict(a, b):
-  return dict( [ (n, a.get(n, 0)+b.get(n, 0)) for n in set(a)|set(b) ] )
-
-def do_lookup(trie, key):
-  if len(key) == 0 or key[0] not in trie[1].keys():
-    return trie2list(trie)
-  return do_lookup(trie[1][key[0]], key[1:])
-
-def nbest(l, t):
-  l = sorted(l)[-t:]
-  l.reverse()
-  return l
-
-def lookup(key, n=DEFAULT_NBEST):
-  """Return n top pronunciations for key"""
-  return nbest(do_lookup(trie, key[::-1] + '  '), n)
-
-def wrap_lookup(line, n):
-  pprint(lookup(line.lower().strip(), n))
-
-if __name__ == '__main__':
-  n = DEFAULT_NBEST
-  if len(sys.argv) >= 2:
-    n = int(sys.argv[1])
-  if len(sys.argv) > 2:
-    for arg in sys.argv[2:]:
-      wrap_lookup(arg, n)
-  else:
-    while True:
-      line = sys.stdin.readline()
-      if not line:
-        break
-      wrap_lookup(line, n)
-
diff --git a/frhyme/__init__.py b/frhyme/__init__.py
@@ -0,0 +1 @@
+from .frhyme import *
diff --git a/frhyme/buildtrie.py b/frhyme/buildtrie.py
@@ -0,0 +1,45 @@
+#!/usr/bin/python3 -O
+
+"""From a list of values (arbitrary) and keys (words), create a trie
+representing this mapping"""
+
+import json
+import sys
+
+# first item is a dictionnary from values to an int indicating the
+# number of occurrences with this prefix having this value
+# second item is a dictionnary from letters to descendent nodes
+def empty_node():
+  return [{}, {}]
+
+trie = empty_node()
+
+def insert(trie, key, val):
+  """Insert val for key in trie"""
+  values, children = trie
+  # create a new value, if needed
+  if len(key) == 0:
+    if val not in values.keys():
+      values[val] = 0
+    # increment count for val
+    values[val] += 1
+  if len(key) > 0:
+    # create a new node if needed
+    if key[0] not in children.keys():
+      children[key[0]] = empty_node()
+    # recurse
+    return insert(children[key[0]], key[1:], val)
+
+while True:
+  line = sys.stdin.readline()
+  if not line:
+    break
+  line = line.strip().split('\t')
+  # a trailing space is used to mark termination of the word
+  # this is useful in cases where a prefix of a word is a complete,
+  # different word with a different value
+  # two spaces because some data words have multiple spaces
+  insert(trie, line[0]+'  ', line[1])
+
+print(json.dumps(trie))
+
diff --git a/frhyme/compresstrie.py b/frhyme/compresstrie.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+
+"""Read json trie in stdin, trim unneeded branches and output json dump
+to stdout"""
+
+import json
+import sys
+
+trie = json.load(sys.stdin)
+
+def compress(trie):
+  """Compress the trie"""
+  ref = None
+  num = 0
+  ok = True
+  if trie[0] != {}:
+    if len(trie[0].keys()) > 1:
+      return None
+    ref = list(trie[0].keys())[0]
+    num = trie[0][ref]
+  for child in trie[1].values():
+    x = compress(child)
+    if not ok or x == None:
+      ok = False
+      continue
+    r, n = x
+    if ref == None:
+      ref = r
+    if ref != r:
+      ok = False
+    num += n
+  if not ok:
+    return None
+  trie[0] = {}
+  trie[0][ref] = num
+  trie[1] = {}
+  #print(ref, file=sys.stderr)
+  return ref, num
+
+compress(trie)
+
+print(json.dumps(trie))
+
diff --git a/frhyme/frhyme.py b/frhyme/frhyme.py
@@ -0,0 +1,68 @@
+#!/usr/bin/python3 -O
+
+"""Try to guess the last few phonemes of a French word, by a lookup in a
+precompiled trie"""
+
+import os
+import json
+import sys
+from pprint import pprint
+
+DEFAULT_NBEST=5
+
+f = open(os.path.join(os.path.dirname(
+  os.path.realpath(__file__)), 'frhyme.json'))
+trie = json.load(f)
+f.close()
+
+def to_list(d, rev=True):
+  return [(d[a], a[::-1] if rev else a) for a in d.keys()]
+
+def trie2list(trie):
+  v, c = trie
+  if c == {}:
+    return to_list(v)
+  else:
+    d = {}
+    for child in c.keys():
+      l = trie2list(c[child])
+      for x in l:
+        if x[1] not in d.keys():
+          d[x[1]] = 0
+        d[x[1]] += x[0]
+    return to_list(d, False)
+
+def add_dict(a, b):
+  return dict( [ (n, a.get(n, 0)+b.get(n, 0)) for n in set(a)|set(b) ] )
+
+def do_lookup(trie, key):
+  if len(key) == 0 or key[0] not in trie[1].keys():
+    return trie2list(trie)
+  return do_lookup(trie[1][key[0]], key[1:])
+
+def nbest(l, t):
+  l = sorted(l)[-t:]
+  l.reverse()
+  return l
+
+def lookup(key, n=DEFAULT_NBEST):
+  """Return n top pronunciations for key"""
+  return nbest(do_lookup(trie, key[::-1] + '  '), n)
+
+def wrap_lookup(line, n):
+  pprint(lookup(line.lower().strip(), n))
+
+if __name__ == '__main__':
+  n = DEFAULT_NBEST
+  if len(sys.argv) >= 2:
+    n = int(sys.argv[1])
+  if len(sys.argv) > 2:
+    for arg in sys.argv[2:]:
+      wrap_lookup(arg, n)
+  else:
+    while True:
+      line = sys.stdin.readline()
+      if not line:
+        break
+      wrap_lookup(line, n)
+
diff --git a/lexique/lexique_fix.sh b/lexique/lexique_fix.sh
@@ -1,8 +0,0 @@
-#!/bin/bash
-
-# General fixes for lexique
-
-cd "$( dirname "$0" )"
-
-sed 1d | ./subst.pl
-
diff --git a/lexique/lexique_prepare.sh b/lexique/lexique_prepare.sh
@@ -1,6 +0,0 @@
-#!/bin/bash
-
-# Prepare the Lexique file for use with frhyme
-
-cut -f 1,2 | uniq
-
diff --git a/lexique/lexique_retrieve.sh b/lexique/lexique_retrieve.sh
@@ -1,12 +0,0 @@
-#!/bin/bash
-
-ZIP="Lexique382.zip"
-URL="http://www.lexique.org/databases/Lexique382/$ZIP"
-FILE="Lexique382.tsv"
-
-cd "$( dirname "$0" )"
-
-wget $URL
-unzip -qq $ZIP $FILE
-cat $FILE | ./lexique_fix.sh | ./lexique_prepare.sh
-
diff --git a/lexique/subst.pl b/lexique/subst.pl
@@ -1,38 +0,0 @@
-#!/usr/bin/perl
-
-# This file fixes Lexique's pronunciation info from the home-grown
-# format described in
-# http://www.lexique.org/outils/Manuel_Lexique.htm#_Toc108519023 to a
-# variation of the X-SAMPA standard
-
-
-sub subst {
-  my $a = shift;
-  # substitutions to apply
-  my @s = (
-    ['§', '$'],
-    ['@', '#'],
-    ['1', '('],
-    ['5', ')'],
-    ['°', '@'],
-    ['3', '@'],
-    ['H', '8'],
-    ['N', 'J'],
-    ['G', 'N'],
-  );
-  foreach my $t (@s) {
-    $a =~ s/${$t}[0]/${$t}[1]/g
-  }
-  return $a;
-}
-
-while (<>) {
-  chop;
-  if (/^([^\t]*)\t([^\t]*)(.*)$/) {
-    my $repl = subst $2;
-    print "$1\t$repl$3\n";
-  } else {
-    die "Cannot process line: $_\n";
-  }
-}
-
diff --git a/make.sh b/make.sh
@@ -1,9 +0,0 @@
-#!/bin/bash
-
-NUM=$1
-shift
-
-cat $* | ./truncate.sh $NUM |
-  rev | awk --field-separator="\t" '{printf "%s\t%s\n", $2, $1}' |
-  ./buildtrie.py | ./compresstrie.py
-
diff --git a/scripts/additions b/scripts/additions
@@ -0,0 +1,109 @@
+almanach	almana
+dompte	d$t
+domptent	d$t
+dompterai	d$tRE
+dompterait	d$tRE
+dompter	d$te
+dompteur	d$t9R
+dompteurs	d$t9R
+dompteuse	d$t2z
+dompteuses	d$t2z
+domptez	d$te
+tabis	tabi
+libye	libi
+est	E
+bœuf	b9f
+bœufs	b2
+dis-je	diZ
+employ	#plwa
+amusemens	amyzm#
+parens	paR#
+peur	p9R
+vapeur	vap9R
+moeurs	m9R
+mœurs	m9R
+tous	tu
+Achille	aSil
+Achilles	aSil
+ignora	iJORa
+ignorai	iJORE
+ignoraient	iJORE
+ignorais	iJORE
+ignorait	iJORE
+ignorance	iJOR#s
+ignorances	iJOR#s
+ignorant	iJOR#
+ignorante	iJOR#t
+ignorantes	iJOR#t
+ignorantins	iJOR#t)
+ignorants	iJOR#
+ignorassent	iJORas
+ignore	iJOR
+ignorent	iJOR
+ignorer	iJORe
+donc	d$
+pattern	patERn
+est-ce	Es
+Rouen	Rw#
+c'est	sE
+l'est	lE
+-il	il
+die	di
+'en	#
+étais-je	etEZ
+lords	lOR
+post-scriptum	pOstskRipt9m
+Arras	aRas
+arras	aRas
+laissez-les	lEselE
+ruz	Ry
+c'est	sE
+l'est	lE
+m'en	m#
+Soize	swaz
+Cianán	kajnan
+inuit	inwit
+inuits	inwit
+mindel	m)dEl
+mindels	m)dEl
+citroën	sitROEn
+Citroën	sitROEn
+inlay	inlE
+inlays	inlE
+ber	bER
+bers	bER
+ehud	eud
+Ehud	eud
+rubén	Ruben
+Rubén	Ruben
+Jefferson	ZEfERsOn
+ruolz	RwOls
+ruolz	RyOls
+maremme	maREm
+maremmes	maREm
+jackpot	dZakpOt
+jackpots	dZakpOt
+poële	pwal
+poëles	pwal
+poëlon	pwal$
+poëlées	pwale
+Terese	teReze
+pôvre	povR
+pôvres	povR
+Jocelyn	Zos2l)
+saburre	sabyR
+Sylla	sila
+m'sieur	msj2
+corner	kORnER
+bostryche	bOstRiS
+bostryches	bOstRiS
+abrivent	abRiv#
+abrivents	abRiv#
+apocyn	apOs)
+apocyns	apOs)
+Rostand	ROst#
+Zürich	zyRik
+Dresde	dREzd
+zooment	zum
+n'es	nE
+Créuse	kReyz
diff --git a/scripts/install.sh b/scripts/install.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+echo "Downloading Lexique"
+lexique/lexique_retrieve.sh > lexique.txt
+./make.sh 4 lexique.txt additions > ../frhyme/frhyme.json
diff --git a/scripts/lexique/lexique_fix.sh b/scripts/lexique/lexique_fix.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# General fixes for lexique
+
+cd "$( dirname "$0" )"
+
+sed 1d | ./subst.pl
+
diff --git a/scripts/lexique/lexique_prepare.sh b/scripts/lexique/lexique_prepare.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+# Prepare the Lexique file for use with frhyme
+
+cut -f 1,2 | uniq
+
diff --git a/scripts/lexique/lexique_retrieve.sh b/scripts/lexique/lexique_retrieve.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+ZIP="Lexique382.zip"
+URL="http://www.lexique.org/databases/Lexique382/$ZIP"
+FILE="Lexique382.tsv"
+
+cd "$( dirname "$0" )"
+
+wget $URL
+unzip -qq $ZIP $FILE
+cat $FILE | ./lexique_fix.sh | ./lexique_prepare.sh
+
diff --git a/scripts/lexique/subst.pl b/scripts/lexique/subst.pl
@@ -0,0 +1,38 @@
+#!/usr/bin/perl
+
+# This file fixes Lexique's pronunciation info from the home-grown
+# format described in
+# http://www.lexique.org/outils/Manuel_Lexique.htm#_Toc108519023 to a
+# variation of the X-SAMPA standard
+
+
+sub subst {
+  my $a = shift;
+  # substitutions to apply
+  my @s = (
+    ['§', '$'],
+    ['@', '#'],
+    ['1', '('],
+    ['5', ')'],
+    ['°', '@'],
+    ['3', '@'],
+    ['H', '8'],
+    ['N', 'J'],
+    ['G', 'N'],
+  );
+  foreach my $t (@s) {
+    $a =~ s/${$t}[0]/${$t}[1]/g
+  }
+  return $a;
+}
+
+while (<>) {
+  chop;
+  if (/^([^\t]*)\t([^\t]*)(.*)$/) {
+    my $repl = subst $2;
+    print "$1\t$repl$3\n";
+  } else {
+    die "Cannot process line: $_\n";
+  }
+}
+
diff --git a/scripts/make.sh b/scripts/make.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+NUM=$1
+shift
+
+cat $* | ./truncate.sh $NUM |
+  rev | awk --field-separator="\t" '{printf "%s\t%s\n", $2, $1}' |
+  ../frhyme/buildtrie.py | ../frhyme/compresstrie.py
+
diff --git a/scripts/truncate.sh b/scripts/truncate.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+NUM=${1:?Usage: $0 NUM_PHONEMES}
+awk  --field-separator="\t" '{ printf "%s\t%s\n", $1, substr( $2, length($2) - '$NUM' + 1) }'
+
diff --git a/setup.py b/setup.py
@@ -0,0 +1,20 @@
+import setuptools
+
+with open("README", "r") as fh:
+    long_description = fh.read()
+
+setuptools.setup(
+    name='frhyme',
+    version='0.2',
+    author="Antoine Amarilli",
+    author_email="a3nm@a3nm.net",
+    package_data={'frhyme' :['*json']},
+    description="Guess the last phonemes of a French word",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://gitlab.com/a3nm/frhyme",
+    packages=setuptools.find_packages(),
+    classifiers=[
+        "Programming Language :: Python :: 3",
+    ],
+)
diff --git a/truncate.sh b/truncate.sh
@@ -1,5 +0,0 @@
-#!/bin/bash
-
-NUM=${1:?Usage: $0 NUM_PHONEMES}
-awk  --field-separator="\t" '{ printf "%s\t%s\n", $1, substr( $2, length($2) - '$NUM' + 1) }'
-

	frhyme guess the last phonemes of a French word (local mirror of https://gitlab.com/a3nm/frhyme)
	git clone https://a3nm.net/git/frhyme/
	Log \| Files \| Refs \| README \| LICENSE

.gitignore	\|	3	+++
LICENSE	\|	18	++++++++++++++++++
README	\|	4	+++-
additions	\|	109	-------------------------------------------------------------------------------
buildtrie.py	\|	45	---------------------------------------------
compresstrie.py	\|	43	-------------------------------------------
frhyme.py	\|	68	--------------------------------------------------------------------
frhyme/__init__.py	\|	1	+
frhyme/buildtrie.py	\|	45	+++++++++++++++++++++++++++++++++++++++++++++
frhyme/compresstrie.py	\|	43	+++++++++++++++++++++++++++++++++++++++++++
frhyme/frhyme.py	\|	68	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
lexique/lexique_fix.sh	\|	8	--------
lexique/lexique_prepare.sh	\|	6	------
lexique/lexique_retrieve.sh	\|	12	------------
lexique/subst.pl	\|	38	--------------------------------------
make.sh	\|	9	---------
scripts/additions	\|	109	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
scripts/install.sh	\|	5	+++++
scripts/lexique/lexique_fix.sh	\|	8	++++++++
scripts/lexique/lexique_prepare.sh	\|	6	++++++
scripts/lexique/lexique_retrieve.sh	\|	12	++++++++++++
scripts/lexique/subst.pl	\|	38	++++++++++++++++++++++++++++++++++++++
scripts/make.sh	\|	9	+++++++++
scripts/truncate.sh	\|	5	+++++
setup.py	\|	20	++++++++++++++++++++
truncate.sh	\|	5	-----