nlsplit

split natural language text in chunks at reasonable language boundaries
git clone https://a3nm.net/git/nlsplit/
Log | Files | Refs | README

commit 1a9c6fa293478bc0292a7be739390c2ef1a5fd26
parent 5082e7a7351ec600c2c53ee2bd42862632103d1c
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Mon, 10 Oct 2011 00:24:23 +0200

remove debug

Diffstat:
nlsplit.c | 5-----
1 file changed, 0 insertions(+), 5 deletions(-)

diff --git a/nlsplit.c b/nlsplit.c @@ -117,7 +117,6 @@ int push(point *points, int hd, int *tl, float confidence, long position, /* except in rare cases where we insert at an old position */ /* which could make us underestimate slightly confidence for some splits */ - //printf("push %f %ld (%d %d)!\n", confidence, position, hd, *tl); assert(hd < size); assert(*tl < size); @@ -206,7 +205,6 @@ int split() { points[hd].confidence = EOF_SCORE; points[hd].position = offset + pos; } - //printf("== %d %d\n", offset, pos); printf("-- chunk %d length %ld confidence %f\n", npiece, points[hd].position - offset, points[hd].confidence); /* output the data */ @@ -217,7 +215,6 @@ int split() { pos = (offset + pos) - points[hd].position; assert(pos < size); offset = points[hd].position; - //printf("== %d %d\n", offset, pos); /* pop the point */ hd = (hd + 1) % size; /* increment piece counter */ @@ -267,8 +264,6 @@ int split() { if (!n_words) { /* we have just read the first word */ int delta = max_l_line - l_last_line - l_first_word - 1; - //printf("maxlline %d llastline %d lfirstword %d lline %d offset %d pos %d delta %d\n", - // max_l_line, l_last_line, l_first_word, l_line, offset, pos, delta); if (delta > DELTA_THRESHOLD) { /* first word of current line would fit on previous line */ push(points, hd, &tl, delta, offset + pos - l_line - 1, offset);