nlsplit

split natural language text in chunks at reasonable language boundaries
git clone https://a3nm.net/git/nlsplit/
Log | Files | Refs | README

commit 57e893f47c6f8fb992332604abc3fe2994ba8dd6
parent 8490c583570a2120232c986804c9fdb1bbf39f61
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Fri,  7 Oct 2011 20:09:50 +0200

explain mess

Diffstat:
nlsplit.c | 5++++-
1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/nlsplit.c b/nlsplit.c @@ -110,11 +110,14 @@ int push(point *points, int hd, int *tl, float confidence, long position, /* insert (confidence, position) in circular buffer (points, hd, tl) */ /* if position is already in buffer, insert sum of confidence values */ /* perform update of tl, return new value of tl */ + /* buffer is sorted by decreasing confidence, and increasing position */ + /* except in rare cases where we insert at an old position */ + /* which could make us underestimate slightly confidence for some splits */ //printf("push %f %ld (%d %d)!\n", confidence, position, hd, *tl); assert(hd < size); assert(*tl < size); - // TODO check order + /* refuse insert of positions before offset */ if (position <= offset) return *tl; int old_pos = get(points, hd, *tl, position);