nlsplit

split natural language text in chunks at reasonable language boundaries
git clone https://a3nm.net/git/nlsplit/
Log | Files | Refs | README

nlsplit_read.c (1179B)


      1 /* nlsplit_read for nlsplit by a3nm (2011) */
      2 
      3 #include <stdio.h>
      4 #include <stdlib.h>
      5 #include <assert.h>
      6 
      7 const char help[] =
      8   "Check and collect together output of nlsplit from stdin to stdout.\n"
      9   "SIZE is the maximal size of a chunk, in bytes.\n";
     10 
     11 /* maximal size of pieces */
     12 int size;
     13 
     14 #define E_SYNTAX 1
     15 #define E_MEMORY 2
     16 
     17 void usage(char** argv) {
     18   /* show usage and exit */
     19   fprintf(stderr, "Usage: %s SIZE\n", argv[0]);
     20   fprintf(stderr, help);
     21   exit(E_SYNTAX);
     22 }
     23 
     24 int main(int argc, char **argv) {
     25 
     26   int last_piece = -1;
     27   int piece, length;
     28   int i;
     29   char *chunk;
     30   float confidence;
     31 
     32   if (argc != 2) usage(argv);
     33   if (!(size = atoi(argv[1]))) usage(argv);
     34   
     35   chunk = malloc(size * sizeof(char));
     36   if (!chunk) {
     37     perror("malloc");
     38     return E_MEMORY;
     39   }
     40 
     41   while (scanf("-- chunk %d length %d confidence %f",
     42         &piece, &length, &confidence) == 3) {
     43     assert(piece == last_piece + 1);
     44     assert(length > 0);
     45     assert(length <= size);
     46     assert(confidence >= 0);
     47     getchar(); // newline
     48     for (i=0; i<length; i++)
     49       putchar(getchar());
     50     getchar(); // newline
     51     last_piece = piece;
     52   }
     53 
     54   assert(feof(stdin));
     55 
     56   return 0;
     57 }
     58