commit 2a15dc1fcceec987ee183c3cae0cc8d517b2ebec
parent f635fbce9148d0258bb3195dcfb81f3a7e4637e3
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Sat, 29 Sep 2012 18:46:35 +0200
add other missing file
Diffstat:
subst.pl | | | 38 | ++++++++++++++++++++++++++++++++++++++ |
1 file changed, 38 insertions(+), 0 deletions(-)
diff --git a/subst.pl b/subst.pl
@@ -0,0 +1,38 @@
+#!/usr/bin/perl
+
+# This file fixes Lexique's pronunciation info from the home-grown
+# format described in
+# http://www.lexique.org/outils/Manuel_Lexique.htm#_Toc108519023 to a
+# variation of the X-SAMPA standard
+
+
+sub subst {
+ my $a = shift;
+ # substitutions to apply
+ my @s = (
+ ['§', '$'],
+ ['@', '#'],
+ ['1', '('],
+ ['5', ')'],
+ ['°', '@'],
+ ['3', '@'],
+ ['H', '8'],
+ ['N', 'J'],
+ ['G', 'N'],
+ );
+ foreach my $t (@s) {
+ $a =~ s/${$t}[0]/${$t}[1]/g
+ }
+ return $a;
+}
+
+while (<>) {
+ chop;
+ if (/^([^\t]*)\t([^\t]*)(.*)$/) {
+ my $repl = subst $2;
+ print "$1\t$repl$3\n";
+ } else {
+ die "Cannot process line: $_\n";
+ }
+}
+