commit 9476d6a67d3b81063a17dc4265377fa33dc23bf1
parent bdb3c97e5147258bb6e37081b10e9a6bbcf45c3f
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Fri, 6 Sep 2019 18:50:52 +0200
debug for splith
Diffstat:
splith.py | | | 41 | +++++++++++++++++++++++++++++++++++++++-- |
1 file changed, 39 insertions(+), 2 deletions(-)
diff --git a/splith.py b/splith.py
@@ -29,6 +29,9 @@ parser.add_argument("--distthreshold",
parser.add_argument("--whitethreshold",
help="threshold to detect white space",
type=float, default=1)
+parser.add_argument("--debug",
+ help="write debug image",
+ action='store_true')
args = parser.parse_args()
img = imageio.imread(args.filename)
@@ -79,13 +82,15 @@ cut[0] = True
# Step 2: remove content smaller than --mincontentheight by merging cuts
+cut2 = list(cut)
+
last = 0
for r in range(in_h):
if cut[r]:
if r-last < args.mincontentheight:
# forget about the previous non-cut region, it is too small
for rr in range(last, r+1):
- cut[rr] = True
+ cut2[rr] = True
last = r
# Step 3: group contiguous cuts
@@ -95,7 +100,7 @@ contiguous = []
last = 0
for r in range(in_h):
- if not cut[r]:
+ if not cut2[r]:
if last < r-1:
contiguous.append((r-last, last))
last = r
@@ -135,6 +140,38 @@ for i in range(len(contiguous_sort)):
final_cuts = sorted(final_cuts)
+if args.debug:
+ matrix = numpy.full((in_h,in_w,3), 255, dtype=numpy.uint8)
+
+ for r in range(in_h):
+ for c in range(in_w):
+ matrix[r][c] = img[r][c]
+ if cut[r]:
+ matrix[r][c][1] = 255
+ matrix[r][c][2] = 0
+ elif cut2[r]:
+ matrix[r][c][1] = 0
+ matrix[r][c][2] = 255
+ for i in range(len(final_cuts)-1):
+ pcut = final_cuts[i]
+ ncut = final_cuts[i+1]
+ start = pcut[0] + pcut[1]
+ end = ncut[0]
+ for c in range(in_w):
+ if end-start >= args.minheight:
+ matrix[start][c][0] = 255
+ matrix[start][c][1] = 0
+ matrix[start][c][2] = 0
+ else:
+ matrix[start][c][0] = 0
+ matrix[start][c][1] = 255
+ matrix[start][c][2] = 0
+
+ outfname = os.path.join(args.output_folder, "debug.png")
+
+ imageio.imwrite(outfname, matrix)
+ sys.exit(0)
+
# Step 5: produce the output files
# just discards splits smaller than minheight
# also trims white space from left and right