mv occurrence preparation script to own subfolder - plint - French poetry validator (local mirror of https://gitlab.com/a3nm/plint)

commit c869dc9f1cde9aaa8663ecb75a5ba50160bbe7c5
parent 53fa7d503793a57c6fbf44ce9febeacc0f85e3f5
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sun, 18 Aug 2019 10:34:04 +0200

mv occurrence preparation script to own subfolder

Diffstat:
.gitignore  | 3 ++-
README  | 2 +-
additions_occurrences  | 1 -
lexique_fix.sh  | 8 --------
lexique_occurrences_retrieve.sh  | 16 ----------------
prepare_occurrences/additions_occurrences  | 1 +
prepare_occurrences/lexique_fix.sh  | 8 ++++++++
prepare_occurrences/lexique_occurrences_retrieve.sh  | 17 +++++++++++++++++
prepare_occurrences/subst.pl  | 38 ++++++++++++++++++++++++++++++++++++++
subst.pl  | 38 --------------------------------------

10 files changed, 67 insertions(+), 65 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,6 @@
 __pycache__/*
 .idea
-Lexique382.tsv
+Lexique*.tsv
 frhyme
 frhyme/*
 haspirater
@@ -15,6 +15,7 @@ messages.pot
 plint/res/*.mo
 diaeresis?.json
 Lexique*.zip
+Lexique*.zip.*
 haspirater
 *.pyo
 poem
diff --git a/README b/README
@@ -131,7 +131,7 @@ This is how the file data/occurrences is generated
 
 Run:
 
-  ./lexique_occurrences_retrieve.sh > data/occurrences
+  ./prepare_occurrences/lexique_occurrences_retrieve.sh > data/occurrences
 
 
 == 6. Updating the localization ==
diff --git a/additions_occurrences b/additions_occurrences
@@ -1 +0,0 @@
-chose
diff --git a/lexique_fix.sh b/lexique_fix.sh
@@ -1,8 +0,0 @@
-#!/bin/bash
-
-# General fixes for lexique
-
-cd "$( dirname "$0" )"
-
-sed 1d | ./subst.pl
-
diff --git a/lexique_occurrences_retrieve.sh b/lexique_occurrences_retrieve.sh
@@ -1,16 +0,0 @@
-#!/bin/bash
-
-ZIP="Lexique383.zip"
-URL="http://www.lexique.org/databases/Lexique383/$ZIP"
-FILE="Lexique383.tsv"
-
-cd "$( dirname "$0" )"
-
-wget $URL
-unzip -qq $ZIP $FILE
-cat $FILE | ./lexique_fix.sh | cut -f1 |
-  rev | cut -d' ' -f1 | rev |
-  cat - additions_occurrences |
-  sort | uniq -c |
-  awk '{print $2, $1}'
-
diff --git a/prepare_occurrences/additions_occurrences b/prepare_occurrences/additions_occurrences
@@ -0,0 +1 @@
+chose
diff --git a/prepare_occurrences/lexique_fix.sh b/prepare_occurrences/lexique_fix.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# General fixes for lexique
+
+cd "$( dirname "$0" )"
+
+sed 1d | ./subst.pl
+
diff --git a/prepare_occurrences/lexique_occurrences_retrieve.sh b/prepare_occurrences/lexique_occurrences_retrieve.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+ZIP="Lexique383.zip"
+URL="http://www.lexique.org/databases/Lexique383/$ZIP"
+FILE="Lexique383.tsv"
+
+DIR="$( cd "$( dirname "$0" )" && pwd )"
+cd "$DIR"
+
+wget $URL
+unzip -qq $ZIP $FILE
+cat $FILE | ./lexique_fix.sh | cut -f1 |
+  rev | cut -d' ' -f1 | rev |
+  cat - additions_occurrences |
+  sort | uniq -c |
+  awk '{print $2, $1}'
+
diff --git a/prepare_occurrences/subst.pl b/prepare_occurrences/subst.pl
@@ -0,0 +1,38 @@
+#!/usr/bin/perl
+
+# This file fixes Lexique's pronunciation info from the home-grown
+# format described in
+# http://www.lexique.org/outils/Manuel_Lexique.htm#_Toc108519023 to a
+# variation of the X-SAMPA standard
+
+
+sub subst {
+  my $a = shift;
+  # substitutions to apply
+  my @s = (
+    ['§', '$'],
+    ['@', '#'],
+    ['1', '('],
+    ['5', ')'],
+    ['°', '@'],
+    ['3', '@'],
+    ['H', '8'],
+    ['N', 'J'],
+    ['G', 'N'],
+  );
+  foreach my $t (@s) {
+    $a =~ s/${$t}[0]/${$t}[1]/g
+  }
+  return $a;
+}
+
+while (<>) {
+  chop;
+  if (/^([^\t]*)\t([^\t]*)(.*)$/) {
+    my $repl = subst $2;
+    print "$1\t$repl$3\n";
+  } else {
+    die "Cannot process line: $_\n";
+  }
+}
+
diff --git a/subst.pl b/subst.pl
@@ -1,38 +0,0 @@
-#!/usr/bin/perl
-
-# This file fixes Lexique's pronunciation info from the home-grown
-# format described in
-# http://www.lexique.org/outils/Manuel_Lexique.htm#_Toc108519023 to a
-# variation of the X-SAMPA standard
-
-
-sub subst {
-  my $a = shift;
-  # substitutions to apply
-  my @s = (
-    ['§', '$'],
-    ['@', '#'],
-    ['1', '('],
-    ['5', ')'],
-    ['°', '@'],
-    ['3', '@'],
-    ['H', '8'],
-    ['N', 'J'],
-    ['G', 'N'],
-  );
-  foreach my $t (@s) {
-    $a =~ s/${$t}[0]/${$t}[1]/g
-  }
-  return $a;
-}
-
-while (<>) {
-  chop;
-  if (/^([^\t]*)\t([^\t]*)(.*)$/) {
-    my $repl = subst $2;
-    print "$1\t$repl$3\n";
-  } else {
-    die "Cannot process line: $_\n";
-  }
-}
-

	plint French poetry validator (local mirror of https://gitlab.com/a3nm/plint)
	git clone https://a3nm.net/git/plint/
	Log \| Files \| Refs \| README

.gitignore	\|	3	++-
README	\|	2	+-
additions_occurrences	\|	1	-
lexique_fix.sh	\|	8	--------
lexique_occurrences_retrieve.sh	\|	16	----------------
prepare_occurrences/additions_occurrences	\|	1	+
prepare_occurrences/lexique_fix.sh	\|	8	++++++++
prepare_occurrences/lexique_occurrences_retrieve.sh	\|	17	+++++++++++++++++
prepare_occurrences/subst.pl	\|	38	++++++++++++++++++++++++++++++++++++++
subst.pl	\|	38	--------------------------------------