haspirater

detect aspirated 'h' in French words (local mirror of https://gitlab.com/a3nm/haspirater)
git clone https://a3nm.net/git/haspirater/
Log | Files | Refs | README | LICENSE

detect.pl (666B)


      1 #!/usr/bin/perl
      2 
      3 # From a list of '\n'-separated words, output occurrences of words
      4 # starting by 'h' when it can be inferred whether the word is aspirated
      5 # or not. The format is "0 word" for non-aspirated and "1 word" for
      6 # aspirated.
      7 
      8 my $asp; # will the next word be aspirated?
      9 
     10 while (<>) {
     11   $_ = lc($_);
     12   print "$asp $_" if (/^h/i && $asp >= 0);
     13   chop;
     14   # we store in asp what the current word indicates about the next word
     15   $asp = -1; # default is unknown
     16   $asp = 0 if /^[lj]'$/;
     17   $asp = 0 if /^qu'$/;
     18   $asp = 1 if /^que$/;
     19   $asp = 0 if /^cet$/;
     20   $asp = 1 if /^ce$/;
     21   # only meaningful are "je", "de", "le" and "la"
     22   $asp = 1 if /^[jdl][ea]$/;
     23 }
     24