plint

French poetry validator (local mirror of https://gitlab.com/a3nm/plint)
git clone https://a3nm.net/git/plint/
Log | Files | Refs | README

commit c869dc9f1cde9aaa8663ecb75a5ba50160bbe7c5
parent 53fa7d503793a57c6fbf44ce9febeacc0f85e3f5
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sun, 18 Aug 2019 10:34:04 +0200

mv occurrence preparation script to own subfolder

Diffstat:
.gitignore | 3++-
README | 2+-
additions_occurrences | 1-
lexique_fix.sh | 8--------
lexique_occurrences_retrieve.sh | 16----------------
prepare_occurrences/additions_occurrences | 1+
prepare_occurrences/lexique_fix.sh | 8++++++++
prepare_occurrences/lexique_occurrences_retrieve.sh | 17+++++++++++++++++
prepare_occurrences/subst.pl | 38++++++++++++++++++++++++++++++++++++++
subst.pl | 38--------------------------------------
10 files changed, 67 insertions(+), 65 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -1,6 +1,6 @@ __pycache__/* .idea -Lexique382.tsv +Lexique*.tsv frhyme frhyme/* haspirater @@ -15,6 +15,7 @@ messages.pot plint/res/*.mo diaeresis?.json Lexique*.zip +Lexique*.zip.* haspirater *.pyo poem diff --git a/README b/README @@ -131,7 +131,7 @@ This is how the file data/occurrences is generated Run: - ./lexique_occurrences_retrieve.sh > data/occurrences + ./prepare_occurrences/lexique_occurrences_retrieve.sh > data/occurrences == 6. Updating the localization == diff --git a/additions_occurrences b/additions_occurrences @@ -1 +0,0 @@ -chose diff --git a/lexique_fix.sh b/lexique_fix.sh @@ -1,8 +0,0 @@ -#!/bin/bash - -# General fixes for lexique - -cd "$( dirname "$0" )" - -sed 1d | ./subst.pl - diff --git a/lexique_occurrences_retrieve.sh b/lexique_occurrences_retrieve.sh @@ -1,16 +0,0 @@ -#!/bin/bash - -ZIP="Lexique383.zip" -URL="http://www.lexique.org/databases/Lexique383/$ZIP" -FILE="Lexique383.tsv" - -cd "$( dirname "$0" )" - -wget $URL -unzip -qq $ZIP $FILE -cat $FILE | ./lexique_fix.sh | cut -f1 | - rev | cut -d' ' -f1 | rev | - cat - additions_occurrences | - sort | uniq -c | - awk '{print $2, $1}' - diff --git a/prepare_occurrences/additions_occurrences b/prepare_occurrences/additions_occurrences @@ -0,0 +1 @@ +chose diff --git a/prepare_occurrences/lexique_fix.sh b/prepare_occurrences/lexique_fix.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# General fixes for lexique + +cd "$( dirname "$0" )" + +sed 1d | ./subst.pl + diff --git a/prepare_occurrences/lexique_occurrences_retrieve.sh b/prepare_occurrences/lexique_occurrences_retrieve.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +ZIP="Lexique383.zip" +URL="http://www.lexique.org/databases/Lexique383/$ZIP" +FILE="Lexique383.tsv" + +DIR="$( cd "$( dirname "$0" )" && pwd )" +cd "$DIR" + +wget $URL +unzip -qq $ZIP $FILE +cat $FILE | ./lexique_fix.sh | cut -f1 | + rev | cut -d' ' -f1 | rev | + cat - additions_occurrences | + sort | uniq -c | + awk '{print $2, $1}' + diff --git a/prepare_occurrences/subst.pl b/prepare_occurrences/subst.pl @@ -0,0 +1,38 @@ +#!/usr/bin/perl + +# This file fixes Lexique's pronunciation info from the home-grown +# format described in +# http://www.lexique.org/outils/Manuel_Lexique.htm#_Toc108519023 to a +# variation of the X-SAMPA standard + + +sub subst { + my $a = shift; + # substitutions to apply + my @s = ( + ['§', '$'], + ['@', '#'], + ['1', '('], + ['5', ')'], + ['°', '@'], + ['3', '@'], + ['H', '8'], + ['N', 'J'], + ['G', 'N'], + ); + foreach my $t (@s) { + $a =~ s/${$t}[0]/${$t}[1]/g + } + return $a; +} + +while (<>) { + chop; + if (/^([^\t]*)\t([^\t]*)(.*)$/) { + my $repl = subst $2; + print "$1\t$repl$3\n"; + } else { + die "Cannot process line: $_\n"; + } +} + diff --git a/subst.pl b/subst.pl @@ -1,38 +0,0 @@ -#!/usr/bin/perl - -# This file fixes Lexique's pronunciation info from the home-grown -# format described in -# http://www.lexique.org/outils/Manuel_Lexique.htm#_Toc108519023 to a -# variation of the X-SAMPA standard - - -sub subst { - my $a = shift; - # substitutions to apply - my @s = ( - ['§', '$'], - ['@', '#'], - ['1', '('], - ['5', ')'], - ['°', '@'], - ['3', '@'], - ['H', '8'], - ['N', 'J'], - ['G', 'N'], - ); - foreach my $t (@s) { - $a =~ s/${$t}[0]/${$t}[1]/g - } - return $a; -} - -while (<>) { - chop; - if (/^([^\t]*)\t([^\t]*)(.*)$/) { - my $repl = subst $2; - print "$1\t$repl$3\n"; - } else { - die "Cannot process line: $_\n"; - } -} -