nlsplit

split natural language text in chunks at reasonable language boundaries
git clone https://a3nm.net/git/nlsplit/
Log | Files | Refs | README

commit 8aa81cfe5e7313c5dc14131cc970504b8f3bcd55
parent 1a9c6fa293478bc0292a7be739390c2ef1a5fd26
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Mon, 10 Oct 2011 00:27:22 +0200

remove useless blank lines

Diffstat:
nlsplit.c | 8--------
1 file changed, 0 insertions(+), 8 deletions(-)

diff --git a/nlsplit.c b/nlsplit.c @@ -180,13 +180,11 @@ int split() { /* do not break when reading EOF, because we must output last chunk */ while (1) { /* read char */ - last = c; c_int = getchar(); c = c_int; /* cut if we have to */ - assert(pos <= size); if (c_int == EOF || pos == size || (hd != tl && min_confidence > 0 && @@ -222,20 +220,15 @@ int split() { } /* break if we must */ - if (c_int == EOF) break; /* add char */ - piece[(offset + (pos++)) % size] = c; /* produce split points */ - current = 0; - if (c == '\n' || c == ' ' || c == '\t') if (last_punct) whitespace_after_punct++; - if (c == '\n') { n_words = 0; l_first_word = 0; @@ -309,7 +302,6 @@ int split() { } /* push point if we have one */ - if (current > 0) { push(points, hd, &tl, current, offset + pos, offset); }