lexique

various versions of the French lexical database at lexique.org
git clone https://a3nm.net/git/lexique/
Log | Files | Refs | README

subst.pl (715B)


      1 #!/usr/bin/perl
      2 
      3 # This file fixes Lexique's pronunciation info from the home-grown
      4 # format described in
      5 # http://www.lexique.org/outils/Manuel_Lexique.htm#_Toc108519023 to a
      6 # variation of the X-SAMPA standard
      7 # Author: Antoine Amarilli
      8 # Public domain
      9 
     10 
     11 sub subst {
     12   my $a = shift;
     13   # substitutions to apply
     14   my @s = (
     15     ['§', '$'],
     16     ['@', '#'],
     17     ['1', '('],
     18     ['5', ')'],
     19     ['°', '@'],
     20     ['3', '@'],
     21     ['H', '8'],
     22     ['N', 'J'],
     23     ['G', 'N'],
     24   );
     25   foreach my $t (@s) {
     26     $a =~ s/${$t}[0]/${$t}[1]/g
     27   }
     28   return $a;
     29 }
     30 
     31 while (<>) {
     32   chop;
     33   if (/^([^\t]*)\t([^\t]*)(.*)$/) {
     34     my $repl = subst $2;
     35     print "$1\t$repl$3\n";
     36   } else {
     37     die "Cannot process line: $_\n";
     38   }
     39 }
     40