commit f5efa107b771473b63f21da81e997224af700e91
parent ad743d318aec6732b8899834355bb926dd1897a9
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Sat, 7 Sep 2019 00:24:37 +0200
superficial cleanup
Diffstat:
splitw.py | | | 69 | +++++++++++++++++++++++++++------------------------------------------ |
1 file changed, 27 insertions(+), 42 deletions(-)
diff --git a/splitw.py b/splitw.py
@@ -19,26 +19,26 @@ parser.add_argument("--minlength",
help="minimum len of a low-height point",
type=int, default=5)
parser.add_argument("--margin",
- help="margin for low height computation",
- type=int, default=10)
+ help="margin for low height computation",
+ type=int, default=10)
parser.add_argument("--heightthreshold",
- help="negligible height difference for low-height points",
- type=int, default=5)
+ help="negligible height difference for low-height points",
+ type=int, default=5)
parser.add_argument("--weightthreshold",
- help="negligible weight difference",
- type=int, default=10)
+ help="negligible weight difference",
+ type=int, default=10)
parser.add_argument("--weightwindow",
- help="window for weight",
- type=int, default=5)
+ help="window for weight",
+ type=int, default=5)
parser.add_argument("--maxbardistance",
- help="maximum bar distance",
- type=int, default=15)
+ help="maximum bar distance",
+ type=int, default=15)
parser.add_argument("--minbarweight",
- help="minimum weight multiplicator for bar",
- type=int, default=4)
+ help="minimum weight multiplicator for bar",
+ type=int, default=4)
parser.add_argument("--outlierquantile",
- help="eliminate this proportion of outlier weights/heights",
- type=float, default=0.03)
+ help="eliminate this proportion of outlier weights/heights",
+ type=float, default=0.03)
parser.add_argument("--whitethreshold",
help="threshold to detect white space",
type=float, default=3)
@@ -61,7 +61,7 @@ if hasattr(type(img[0][0]), '__iter__'):
in_h = len(img)
in_w = len(img[0])
-# Step 1: find places of minimal height
+# Step 1: find columns of low height
top = [None] * in_w
bottom = [None] * in_w
@@ -81,24 +81,22 @@ for c in range(in_w):
height[c] = bottom[c] - top[c]
heights = sorted(height[args.margin:-args.margin])
-mn_height=heights[int(len(heights)*args.outlierquantile)]
-#print(mn_height)
+mn_height = heights[int(len(heights)*args.outlierquantile)]
lowheight = [height[c] < mn_height + args.heightthreshold for c in range(in_w)]
-#print(lowheight)
-# Step 2: eliminate places of minimal height that are too isolated
+# Step 2: eliminate columns of minimal height that are too isolated
last = 0
for c in range(in_w):
if not lowheight[c]:
if c-last < args.minlength:
- # forget about the previous non-cut region, it is too small
+ # forget about the previous region, it is too small
for cc in range(last, c+1):
lowheight[cc] = False
last = c
-# Step 3: compute the total weights
+# Step 3: compute the total weights and find columns of low weight
cumul = [sum(255-img[r][c] for r in range(in_h)) for c in range(in_w)]
mn_weights=[]
@@ -110,14 +108,10 @@ for c in range(args.margin, in_w-2*args.margin -args.weightwindow):
mn_weights = sorted(mn_weights)
mn_weight = mn_weights[int(len(mn_weights)*args.outlierquantile)]
-#print(mn_weight)
lowweight = [sum(cumul[max(0, c-ceil(1.*args.weightwindow/2)):min(in_w-1,c+floor(1.*args.weightwindow/2))]) <
mn_weight + args.weightthreshold*in_h*args.weightwindow for c in range(in_w)]
-#print(cumul)
-# print(lowweight)
-
-# Step 4: find bars
+# Step 4: find barlines
last = -1
maxweight = -1
@@ -133,7 +127,6 @@ for c in range(in_w):
if cumul[c] > maxweight:
maxweight = cumul[c]
maxweightpos = c
- #print(c, last, maxweight)
if lowweight[c] and last > 0 and maxweight > 0:
# could be good
if maxweight > args.minbarweight*mn_weight/args.weightwindow:
@@ -144,11 +137,10 @@ for c in range(in_w):
maxweightpos = -1
last = c
-#print (bars)
if bars[-1] != in_w-1:
bars.append(in_w-1)
-# ensure we will not get stuck
+# Step 5: ensure we will not get stuck by adding cutting points where necessary
bars2 = [bars[0]]
for b in range(len(bars)-1):
@@ -174,8 +166,6 @@ for b in range(len(bars)-1):
bars2.append(bars2[-1]+args.width)
bars2.append(cbar)
-#print(bars2)
-
chunks = []
for b in range(len(bars2)-1):
@@ -183,12 +173,10 @@ for b in range(len(bars2)-1):
nbar = bars2[b+1]
chunks.append((cbar, nbar-cbar+1))
-# print (chunks)
-
-
-# Step 5: optimal fit
+# Step 6: optimal fit
# naive bruteforce for word wrapping
+# TODO: this could be a dynamic algorithm
def fit(remaining, current_bucket, current_bucket_weight, previous_buckets, worst_difference):
if len(remaining) == 0:
return (max(worst_difference, args.width-current_bucket_weight),
@@ -210,10 +198,8 @@ def fit(remaining, current_bucket, current_bucket_weight, previous_buckets, wors
return solution2
sol = fit(chunks[1:], [chunks[0]], chunks[0][1], [], 0)
-#print(sol)
-final_chunks = [(x[0][0], x[-1][0] + x[-1][1] - x[0][0]) for x in sol[1]]
-#print(final_chunks)
+final_chunks = [(x[0][0], x[-1][0] + x[-1][1] - x[0][0]) for x in sol[1]]
if args.debug:
matrix = numpy.full((in_h,in_w,3), 255, dtype=numpy.uint8)
@@ -244,12 +230,12 @@ if args.debug:
imageio.imwrite(outfname, matrix)
sys.exit(0)
-# Step 6: draw things, allocating the slack
+# Step 7: draw chunks
num = 0
for (start, width) in final_chunks:
- # count the stretchable things
+ # count the stretchable columns
stretchable = 0
naive_stretch = False
for c in range(start, start+width):
@@ -258,11 +244,11 @@ for (start, width) in final_chunks:
non_stretchable = width-stretchable
target_stretchable = args.width - non_stretchable
if stretchable == 0:
+ # if nothing is stretchable, fall back on naive stretching
naive_stretch = True
factor = 1.*args.width/width
else:
factor = 1.*target_stretchable/stretchable
- #print("factor %f" % factor)
matrix = numpy.full((in_h,args.width), 255, dtype=numpy.uint8)
@@ -277,7 +263,6 @@ for (start, width) in final_chunks:
outpos += 1
else:
outpos += factor
- #print(c, outc, outpos)
while outc <= outpos and outc < args.width:
matrix[r][outc] = img[r][c]
outc += 1