songflower

reflow bitmap sheet music to a different paper format
git clone https://a3nm.net/git/songflower/
Log | Files | Refs | README | LICENSE

commit ebea45c85068e5337f0679910cab1cd1eccc2803
parent 2c601fa54d6fb18ca42c105990f8706b85cbf138
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Thu,  5 Sep 2019 23:46:05 +0200

better cut when we cannot find a bar

Diffstat:
splitw.py | 290+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 290 insertions(+), 0 deletions(-)

diff --git a/splitw.py b/splitw.py @@ -0,0 +1,290 @@ +#!/usr/bin/python3 -O + +import imageio +import collections +import sys +import numpy +import argparse +import os.path +from math import ceil, floor + +parser = argparse.ArgumentParser( + description="Split a musical system into bits of maximal width") +parser.add_argument("filename", + help="input PNG file name", type=str) +parser.add_argument("output_folder", + help="folder to write output files", type=str) +parser.add_argument("width", + help="maximum width in pixels", type=int) +parser.add_argument("--minlength", + help="minimum len of a low-height point", + type=int, default=5) +parser.add_argument("--margin", + help="margin for low height computation", + type=int, default=10) +parser.add_argument("--heightthreshold", + help="negligible height difference for low-height points", + type=int, default=5) +parser.add_argument("--weightthreshold", + help="negligible weight difference", + type=int, default=10) +parser.add_argument("--weightwindow", + help="window for weight", + type=int, default=5) +parser.add_argument("--maxbardistance", + help="maximum bar distance", + type=int, default=15) +parser.add_argument("--minbarweight", + help="minimum weight multiplicator for bar", + type=int, default=4) +parser.add_argument("--outlierquantile", + help="eliminate this proportion of outlier weights/heights", + type=float, default=0.03) +parser.add_argument("--whitethreshold", + help="threshold to detect white space", + type=float, default=3) +parser.add_argument("--minchunk", + help="minimal width of a chunk when cutting outside a bar", + type=float, default=20) +parser.add_argument("--debug", + help="write debug image", + type=bool, default=False) +args = parser.parse_args() + +img = imageio.imread(args.filename) + +# https://stackoverflow.com/a/38549260 +if hasattr(type(img[0][0]), '__iter__'): + print ("converting input image to grayscale") + # https://stackoverflow.com/a/51571053 + img = numpy.dot(img[... , :3] , [0.299 , 0.587, 0.114]) + +in_h = len(img) +in_w = len(img[0]) + +# Step 1: find places of minimal height + +top = [None] * in_w +bottom = [None] * in_w +height = [None] * in_w + +for c in range(in_w): + c_top = 0 + while c_top < in_h and 255-img[c_top][c] <= args.whitethreshold: + c_top += 1 + top[c] = c_top + + c_bottom = in_h + while c_bottom >= 0 and 255-img[c_bottom-1][c] <= args.whitethreshold: + c_bottom -= 1 + bottom[c] = c_bottom + + height[c] = bottom[c] - top[c] + +heights = sorted(height[args.margin:-args.margin]) +mn_height=heights[int(len(heights)*args.outlierquantile)] +#print(mn_height) + +lowheight = [height[c] < mn_height + args.heightthreshold for c in range(in_w)] +#print(lowheight) + +# Step 2: eliminate places of minimal height that are too isolated + +last = 0 +for c in range(in_w): + if not lowheight[c]: + if c-last < args.minlength: + # forget about the previous non-cut region, it is too small + for cc in range(last, c+1): + lowheight[cc] = False + last = c + +# Step 3: compute the total weights + +cumul = [sum(255-img[r][c] for r in range(in_h)) for c in range(in_w)] +mn_weights=[] + +for c in range(args.margin, in_w-2*args.margin -args.weightwindow): + if lowheight[c]: + mn_weights.append(sum(cumul[c:c+args.weightwindow])) + +mn_weights = sorted(mn_weights) +mn_weight = mn_weights[int(len(mn_weights)*args.outlierquantile)] + +#print(mn_weight) +lowweight = [sum(cumul[max(0, c-ceil(1.*args.weightwindow/2)):min(in_w-1,c+floor(1.*args.weightwindow/2))]) < + mn_weight + args.weightthreshold*in_h*args.weightwindow for c in range(in_w)] +#print(cumul) + +# print(lowweight) + +# Step 4: find bars + +last = -1 +maxweight = -1 +maxweightpos = -1 + +bars = [0] + +for c in range(in_w): + if not lowheight[c]: + last = -1 # give up + continue + if not lowweight[c]: + if cumul[c] > maxweight: + maxweight = cumul[c] + maxweightpos = c + #print(c, last, maxweight) + if lowweight[c] and last > 0 and maxweight > 0: + # could be good + if maxweight > args.minbarweight*mn_weight/args.weightwindow: + if c-last < args.maxbardistance: + bars.append(maxweightpos) + if lowweight[c]: + maxweight = -1 + maxweightpos = -1 + last = c + +#print (bars) +if bars[-1] != in_w-1: + bars.append(in_w-1) + +# ensure we will not get stuck + +bars2 = [bars[0]] +for b in range(len(bars)-1): + cbar = bars[b+1] + while cbar-bars2[-1] > args.width: + # let's first try to find a reasonable cut point... + cc = bars2[-1]+args.width + ok = False + while cc > bars2[-1]+args.minchunk: + if lowweight[cc] and lowheight[cc]: + ok = True + break + cc -= 1 + if ok: + # cut where we found + bars2.append(cc) + else: + # last resort: we cut anywhere + if cbar-bars2[-1] < 2*(args.width-1): + # try to be a bit more clever here: cut at the middle + bars2.append(int((bars2[-1]+cbar)/2)) + else: + bars2.append(bars2[-1]+args.width) + bars2.append(cbar) + +#print(bars2) + +chunks = [] + +for b in range(len(bars2)-1): + cbar = bars2[b] + nbar = bars2[b+1] + chunks.append((cbar, nbar-cbar+1)) + +# print (chunks) + + +# Step 5: optimal fit + +# naive bruteforce for knapsack +def fit(remaining, current_bucket, current_bucket_weight, previous_buckets, worst_difference): + if len(remaining) == 0: + return (max(worst_difference, args.width-current_bucket_weight), + previous_buckets + [current_bucket]) + # solution1: finish current bucket and start a new bucket + solution1 = fit(remaining[1:], [remaining[0]], remaining[0][1], + previous_buckets + [current_bucket], max(worst_difference, + args.width-current_bucket_weight)) + if current_bucket_weight+remaining[0][1] > args.width: + # it does not fit so there's no choice + return solution1 + # solution2: add next item to current bucket + solution2 = fit(remaining[1:], current_bucket + [remaining[0]], + current_bucket_weight + remaining[0][1], previous_buckets, + worst_difference) + if solution1[0] < solution2[0]: + return solution1 + else: + return solution2 + +sol = fit(chunks[1:], [chunks[0]], chunks[0][1], [], 0) +#print(sol) +final_chunks = [(x[0][0], x[-1][0] + x[-1][1] - x[0][0]) for x in sol[1]] +#print(final_chunks) + + +if args.debug: + matrix = numpy.full((in_h,in_w,3), 255, dtype=numpy.uint8) + + for r in range(in_h): + for c in range(in_w): + matrix[r][c] = img[r][c] + if lowheight[c]: + matrix[r][c][1] = 255 + if lowweight[c]: + matrix[r][c][2] = 255 + if c in bars: + matrix[r][c][0] = 255 + matrix[r][c][1] = 0 + matrix[r][c][2] = 0 + elif c in bars2: + matrix[r][c][0] = 0 + matrix[r][c][1] = 255 + matrix[r][c][2] = 0 + if c in [y[0] for y in final_chunks]: + matrix[r][c][0] = 0 + matrix[r][c][1] = 0 + matrix[r][c][2] = 255 + + outfname = os.path.join(args.output_folder, "debug.png") + + imageio.imwrite(outfname, matrix) + sys.exit(0) + +# Step 6: draw things, allocating the slack + +num = 0 + +for (start, width) in final_chunks: + # count the stretchable things + stretchable = 0 + naive_stretch = False + for c in range(start, start+width): + if lowweight[c] and lowheight[c]: + stretchable += 1 + non_stretchable = width-stretchable + target_stretchable = args.width - non_stretchable + if stretchable == 0: + naive_stretch = True + factor = 1.*args.width/width + else: + factor = 1.*target_stretchable/stretchable + #print("factor %f" % factor) + + matrix = numpy.full((in_h,args.width), 255, dtype=numpy.uint8) + + for r in range(in_h): + outc = 0 + outpos = 0. + for c in range(start, start+width): + if naive_stretch: + outpos += factor + else: + if not (lowweight[c] and lowheight[c]): + outpos += 1 + else: + outpos += factor + #print(c, outc, outpos) + while outc <= outpos and outc < args.width: + matrix[r][outc] = img[r][c] + outc += 1 + + outfname = os.path.join(args.output_folder, os.path.basename(args.filename).split('.')[0] + "_" + "{:04d}".format(num) + ".png") + + imageio.imwrite(outfname, matrix) + print("wrote %s" % outfname) + num += 1 +