nlsplit

split natural language text in chunks at reasonable language boundaries
git clone https://a3nm.net/git/nlsplit/
Log | Files | Refs | README

commit 0f21ebaa834f97af49ad835df3093af9d843731a
parent c3b66e815db167596142143c5e51401a1def64e3
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sun,  9 Oct 2011 21:06:23 +0200

rename

Diffstat:
nlsplit_read.c | 62++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
nlsplit_test.c | 62--------------------------------------------------------------
2 files changed, 62 insertions(+), 62 deletions(-)

diff --git a/nlsplit_read.c b/nlsplit_read.c @@ -0,0 +1,62 @@ +/* nlsplit_read for nlsplit by a3nm (2011) */ + +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> + +const char help[] = + "Check and collect together output of nlsplit from stdin to stdout.\n" + "SIZE is the maximal size of a chunk, in bytes.\n"; + +/* maximal size of pieces */ +int size; + +#define E_SYNTAX 1 +#define E_MEMORY 2 + +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MID(l, r, s) ((((l) + ((r) + ((l) > (r) ? (s) : 0)))/2) % (s)) + +void usage(char** argv) { + /* show usage and exit */ + fprintf(stderr, "Usage: %s SIZE\n", argv[0]); + fprintf(stderr, help); + exit(E_SYNTAX); +} + +int main(int argc, char **argv) { + + int last_piece = -1; + int piece, length; + int i; + char *chunk; + float confidence; + + if (argc != 2) usage(argv); + if (!(size = atoi(argv[1]))) usage(argv); + + chunk = malloc(size * sizeof(char)); + if (!chunk) { + perror("malloc"); + return E_MEMORY; + } + + while (scanf("-- piece %d length %d confidence %f", + &piece, &length, &confidence) == 3) { + assert(piece == last_piece + 1); + assert(length > 0); + assert(length <= size); + assert(confidence >= 0); + getchar(); // newline + for (i=0; i<length; i++) + putchar(getchar()); + getchar(); // newline + last_piece = piece; + } + + assert(feof(stdin)); + + return 0; +} + diff --git a/nlsplit_test.c b/nlsplit_test.c @@ -1,62 +0,0 @@ -/* nlsplit_read for nlsplit by a3nm (2011) */ - -#include <stdio.h> -#include <stdlib.h> -#include <assert.h> - -const char help[] = - "Check and collect together output of nlsplit from stdin to stdout.\n" - "SIZE is the maximal size of a chunk, in bytes.\n"; - -/* maximal size of pieces */ -int size; - -#define E_SYNTAX 1 -#define E_MEMORY 2 - -#define MAX(a, b) (((a) > (b)) ? (a) : (b)) -#define MIN(a, b) (((a) < (b)) ? (a) : (b)) -#define MID(l, r, s) ((((l) + ((r) + ((l) > (r) ? (s) : 0)))/2) % (s)) - -void usage(char** argv) { - /* show usage and exit */ - fprintf(stderr, "Usage: %s SIZE\n", argv[0]); - fprintf(stderr, help); - exit(E_SYNTAX); -} - -int main(int argc, char **argv) { - - int last_piece = -1; - int piece, length; - int i; - char *chunk; - float confidence; - - if (argc != 2) usage(argv); - if (!(size = atoi(argv[1]))) usage(argv); - - chunk = malloc(size * sizeof(char)); - if (!chunk) { - perror("malloc"); - return E_MEMORY; - } - - while (scanf("-- piece %d length %d confidence %f", - &piece, &length, &confidence) == 3) { - assert(piece == last_piece + 1); - assert(length > 0); - assert(length <= size); - assert(confidence >= 0); - getchar(); // newline - for (i=0; i<length; i++) - putchar(getchar()); - getchar(); // newline - last_piece = piece; - } - - assert(feof(stdin)); - - return 0; -} -