detect.pl (666B)
1 #!/usr/bin/perl 2 3 # From a list of '\n'-separated words, output occurrences of words 4 # starting by 'h' when it can be inferred whether the word is aspirated 5 # or not. The format is "0 word" for non-aspirated and "1 word" for 6 # aspirated. 7 8 my $asp; # will the next word be aspirated? 9 10 while (<>) { 11 $_ = lc($_); 12 print "$asp $_" if (/^h/i && $asp >= 0); 13 chop; 14 # we store in asp what the current word indicates about the next word 15 $asp = -1; # default is unknown 16 $asp = 0 if /^[lj]'$/; 17 $asp = 0 if /^qu'$/; 18 $asp = 1 if /^que$/; 19 $asp = 0 if /^cet$/; 20 $asp = 1 if /^ce$/; 21 # only meaningful are "je", "de", "le" and "la" 22 $asp = 1 if /^[jdl][ea]$/; 23 } 24