commit e2d61441ce7f24377f8bc62e12062f690debed15
parent de0eb48e95a926e432137ec9b6efe43b91c5ffa5
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Tue, 27 Dec 2011 00:51:04 +0100
add subst.pl
Diffstat:
subst.pl | | | 38 | ++++++++++++++++++++++++++++++++++++++ |
1 file changed, 38 insertions(+), 0 deletions(-)
diff --git a/subst.pl b/subst.pl
@@ -0,0 +1,38 @@
+#!/usr/bin/perl
+
+# This file fixes Lexique's pronunciation info from the home-grown
+# format described in
+# http://www.lexique.org/outils/Manuel_Lexique.htm#_Toc108519023 to a
+# variation of the X-SAMPA standard
+
+
+sub subst {
+ my $a = shift;
+ # substitutions to apply
+ my @s = (
+ ['§', '$'],
+ ['@', '#'],
+ ['1', '('],
+ ['5', ')'],
+ ['°', '@'],
+ ['3', '@'],
+ ['H', '8'],
+ ['N', 'J'],
+ ['G', 'N'],
+ );
+ foreach my $t (@s) {
+ $a =~ s/${$t}[0]/${$t}[1]/g
+ }
+ return $a;
+}
+
+while (<>) {
+ chop;
+ if (/^([^\t]*)\t([^\t]*)(.*)$/) {
+ my $repl = subst $2;
+ print "$1\t$repl$3\n";
+ } else {
+ die "Cannot process line: $_\n";
+ }
+}
+