commit a32b4fb181c846616027803c4b69e6bd26842488
parent c869dc9f1cde9aaa8663ecb75a5ba50160bbe7c5
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Sun, 18 Aug 2019 10:35:02 +0200
rm useless scripts
Diffstat:
3 files changed, 1 insertion(+), 47 deletions(-)
diff --git a/prepare_occurrences/lexique_fix.sh b/prepare_occurrences/lexique_fix.sh
@@ -1,8 +0,0 @@
-#!/bin/bash
-
-# General fixes for lexique
-
-cd "$( dirname "$0" )"
-
-sed 1d | ./subst.pl
-
diff --git a/prepare_occurrences/lexique_occurrences_retrieve.sh b/prepare_occurrences/lexique_occurrences_retrieve.sh
@@ -9,7 +9,7 @@ cd "$DIR"
wget $URL
unzip -qq $ZIP $FILE
-cat $FILE | ./lexique_fix.sh | cut -f1 |
+cat $FILE | sed 1d | cut -f1 |
rev | cut -d' ' -f1 | rev |
cat - additions_occurrences |
sort | uniq -c |
diff --git a/prepare_occurrences/subst.pl b/prepare_occurrences/subst.pl
@@ -1,38 +0,0 @@
-#!/usr/bin/perl
-
-# This file fixes Lexique's pronunciation info from the home-grown
-# format described in
-# http://www.lexique.org/outils/Manuel_Lexique.htm#_Toc108519023 to a
-# variation of the X-SAMPA standard
-
-
-sub subst {
- my $a = shift;
- # substitutions to apply
- my @s = (
- ['§', '$'],
- ['@', '#'],
- ['1', '('],
- ['5', ')'],
- ['°', '@'],
- ['3', '@'],
- ['H', '8'],
- ['N', 'J'],
- ['G', 'N'],
- );
- foreach my $t (@s) {
- $a =~ s/${$t}[0]/${$t}[1]/g
- }
- return $a;
-}
-
-while (<>) {
- chop;
- if (/^([^\t]*)\t([^\t]*)(.*)$/) {
- my $repl = subst $2;
- print "$1\t$repl$3\n";
- } else {
- die "Cannot process line: $_\n";
- }
-}
-