nlsplit

split natural language text in chunks at reasonable language boundaries
git clone https://a3nm.net/git/nlsplit/
Log | Files | Refs | README

commit 209fe4f6ff1c99fe5476f09581e50fa3bd2f8eae
parent 5e26236a13c2721026a2f8b142ce017de8be56a8
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Sun,  9 Oct 2011 22:07:03 +0200

fix bug

Diffstat:
nlsplit.c | 4++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nlsplit.c b/nlsplit.c @@ -257,6 +257,7 @@ int split() { } } else { n_newlines = 0; + l_line++; if (c == ' ' || c == '\t') { current += WHITESPACE_SCORE; if (l_first_word == 0) { @@ -295,7 +296,7 @@ int split() { /* first char of the line */ if (!(c >= 'a' && c <= 'z')) push(points, hd, &tl, NEWLINE_WITH_NON_LOWERCASE_SCORE - - NEWLINE_SCORE , offset + pos - l_line, offset); + NEWLINE_SCORE, offset + pos - l_line, offset); if (n_words == 0) l_first_word++; if (c >= 'a' && c <= 'z' && @@ -310,7 +311,6 @@ int split() { || c == '(' || c == ')') last_punct = c; } - l_line++; } /* push point if we have one */