songflower

reflow bitmap sheet music to a different paper format
git clone https://a3nm.net/git/songflower/
Log | Files | Refs | README | LICENSE

commit e8bf45b82c5e2f3fde174c28d24e7d42c8ffed1e
parent ebea45c85068e5337f0679910cab1cd1eccc2803
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Thu,  5 Sep 2019 23:46:54 +0200

add files

Diffstat:
.gitignore | 9+++++++++
combine.py | 103+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
melodia_title_page.svg | 118+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
process.py | 350+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
splith.py | 169+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 749 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,9 @@ +out +out2 +out3 +*.bak +chunks/ +pages/ +lines/ +outpages/ +tmp/ diff --git a/combine.py b/combine.py @@ -0,0 +1,103 @@ +#!/usr/bin/python3 -O + +# combine files in a folder (must be same width) to make images of given height +# with margins + +import imageio +import collections +import sys +import numpy +import argparse +import os.path +import os +from math import ceil, floor + +parser = argparse.ArgumentParser( + description="Split a grayscale PNG image into horizontal strips") +parser.add_argument("folder", + help="folder for input PNGs (alphabetical order)", type=str) +parser.add_argument("output_folder", + help="folder to write output files", type=str) +parser.add_argument("height", + help="height of produced images", type=int) +parser.add_argument("--hmargin", + help="left and right margins in pixels", + type=int, default=10) +parser.add_argument("--separator", + help="minimal vertical separation between images", + type=int, default=10) +parser.add_argument("--vmargin", + help="top and bottom margins in pixels", + type=int, default=10) +args = parser.parse_args() + +def make_image(images, names, ofile): + global args + + matrix = numpy.full((args.height, 2*args.hmargin+len(images[0][0])), 255, dtype=numpy.uint8) + + #print(list(len(x) for x in images)) + #print(len(images)) + + cpos = args.hmargin + h = sum(len(x) for x in images) + if h + 2*args.vmargin + (len(images)-1)*args.separator > args.height: + print("ERROR: image(s) too large: " + " ".join(names)) + print("These images were ignored") + return None + + if len(images) == 1: + # center the image + rpos = int(args.vmargin + (args.height - 2*args.vmargin - h)/2) + for r in range(len(images[0])): + for c in range(len(images[0][0])): + matrix[rpos + r][cpos + c] = images[0][r][c] + + else: + # multiple images, separate them but do not center + # separation per image + permargin = int((args.height - 2*args.vmargin - h)/(len(images)-1)) + # rounding error + offmargin = args.height - 2*args.vmargin - h - permargin*len(images) + offset = args.vmargin + for i in range(len(images)): + for r in range(len(images[i])): + for c in range(len(images[i][0])): + matrix[r + offset][cpos + c] = images[i][r][c] + offset += len(images[i]) + offset += permargin + if i < offmargin: + offset += 1 + + imageio.imwrite(ofile, matrix) + + return ofile + +availheight = args.height - 2*args.vmargin + +imgs = [] +names = [] +totalheight = -args.separator +num = 0 +for f in sorted(os.listdir(args.folder)): + img = imageio.imread(os.path.join(args.folder, f)) + if len(img) + args.separator + totalheight > availheight: + # must finish current batch! + outfname = os.path.join(args.output_folder, "out_" + "{:04d}".format(num) + ".png") + ret = make_image(imgs, names, outfname) + if ret: + print("wrote %s into %s" % (",".join(names), ret)) + num += 1 + imgs = [] + names = [] + totalheight = -args.separator + + totalheight += args.separator + len(img) + imgs.append(img) + names.append(f) + +if (len(imgs) > 0): + # last batch + outfname = os.path.join(args.output_folder, "out_" + "{:04d}".format(num) + ".png") + make_image(imgs, outfname) + diff --git a/melodia_title_page.svg b/melodia_title_page.svg @@ -0,0 +1,118 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- Created with Inkscape (http://www.inkscape.org/) --> + +<svg + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns="http://www.w3.org/2000/svg" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + width="190.42178mm" + height="88.758537mm" + viewBox="0 0 190.42178 88.758537" + version="1.1" + id="svg8" + inkscape:version="0.92.4 (5da689c313, 2019-01-14)" + sodipodi:docname="melodia_title_page.svg"> + <defs + id="defs2" /> + <sodipodi:namedview + id="base" + pagecolor="#ffffff" + bordercolor="#666666" + borderopacity="1.0" + inkscape:pageopacity="0.0" + inkscape:pageshadow="2" + inkscape:zoom="0.7" + inkscape:cx="453.67818" + inkscape:cy="126.1432" + inkscape:document-units="mm" + inkscape:current-layer="layer1" + showgrid="false" + fit-margin-top="0" + fit-margin-left="0" + fit-margin-right="0" + fit-margin-bottom="0" + inkscape:window-width="954" + inkscape:window-height="1132" + inkscape:window-x="2244" + inkscape:window-y="66" + inkscape:window-maximized="1" /> + <metadata + id="metadata5"> + <rdf:RDF> + <cc:Work + rdf:about=""> + <dc:format>image/svg+xml</dc:format> + <dc:type + rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> + <dc:title></dc:title> + </cc:Work> + </rdf:RDF> + </metadata> + <g + inkscape:label="Layer 1" + inkscape:groupmode="layer" + id="layer1" + transform="translate(-78.506562,-10.588571)"> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:39.31922531px;line-height:1.25;font-family:'DejaVu Serif';-inkscape-font-specification:'DejaVu Serif';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458335px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + x="77.956093" + y="37.758156" + id="text12"><tspan + sodipodi:role="line" + id="tspan10" + x="77.956093" + y="37.758156" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Nimbus Roman';-inkscape-font-specification:'Nimbus Roman, Bold';stroke-width:0.26458335px">MELODIA</tspan></text> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.61136341px;line-height:1.25;font-family:'DejaVu Serif';-inkscape-font-specification:'DejaVu Serif';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + x="173.87093" + y="50.910534" + id="text16"><tspan + sodipodi:role="line" + x="173.87093" + y="50.910534" + style="text-align:center;text-anchor:middle;stroke-width:0.26458332px" + id="tspan18">A comprehensive course in sight-singing</tspan></text> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:9.43055153px;line-height:1.25;font-family:'DejaVu Serif';-inkscape-font-specification:'DejaVu Serif';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + x="78.258446" + y="66.582916" + id="text22"><tspan + sodipodi:role="line" + x="78.258446" + y="66.582916" + style="font-weight:bold;stroke-width:0.26458332px" + id="tspan24">By Samuel W. Cole and Leo R. Lewis</tspan></text> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:11.69080734px;line-height:1.25;font-family:'DejaVu Serif';-inkscape-font-specification:'DejaVu Serif';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + x="131.29546" + y="83.821426" + id="text30"><tspan + sodipodi:role="line" + id="tspan28" + x="131.29546" + y="83.821426" + style="stroke-width:0.26458332px">Public domain</tspan></text> + <text + id="text36" + y="98.231644" + x="268.20081" + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:6.80881834px;line-height:1.25;font-family:'DejaVu Serif';-inkscape-font-specification:'DejaVu Serif';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + xml:space="preserve"><tspan + style="font-size:5.02080727px;text-align:end;text-anchor:end;stroke-width:0.26458332px" + y="98.231644" + x="268.20081" + sodipodi:role="line" + id="tspan38">Reflowed to tablet format using Songflower — <tspan + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:'DejaVu Sans Mono';-inkscape-font-specification:'DejaVu Sans Mono';stroke-width:0.26458332px" + id="tspan42">a3nm.net/git/songflower</tspan></tspan></text> + </g> +</svg> diff --git a/process.py b/process.py @@ -0,0 +1,350 @@ +#!/usr/bin/python3 -O + +import imageio +import sys +import numpy + +THRESHH=0.1 # threshold for line separation +MINH=50 # minimum line height +MARGH=15 # height oveflow + +THRESHV=30 # threshold for bar detection +MINV=50 # minimum distance between bars +MARGV=2 # margin around bars +SOMETHINGTHRESH=5 # threshold to find the line beginning and end +AROUNDBARS=6 # margin around bars +AROUNDBARS2=3 # margin around bars +#THRESHBARS=50 # threshold around bars +OFFSET=10 # real line contents +COEFF1=30 # penalty for variation around bar +COEFF2=-10 # gain for difference of bar +COEFF3=10000 # penalty for height of bar area +ZONETHRESH=100 # detect something in height + +MARGINH=20 # margin in output at top and bottom +MARGINW=20 # margin in output at left and right +SEPH=20 # separation between lines + +STRETCHDIST=30 # separation between stretchpoints +NUMSTRETCH=15 # stretchpoint on whole length +AROUND=2 # margin around stretchpoints +THRESHSTRETCH=.3 # threshold around stretchpoints + + +fname = sys.argv[1] +bname = fname.split('.')[0] +outw = int(sys.argv[2]) +outh = int(sys.argv[3]) + +img = imageio.imread(fname) + +# first let's find the white lines + +cumul = [sum([(255 - p) for p in img[j]]) for j in range(len(img))] + +mn = min(cumul) +print(mn) + + +availh = outh - 2*MARGINH +coveredh = [False] * len(img) +ncoveredh = 0 +cut = [False]*len(img) + +# add the cuts +sorth = [(cumul[i], i) for i in range(len(img))] +sorth = sorted(sorth) + +for i in range(len(sorth)): + (val, pos) = sorth[i] + #print("cut %d %d %d covered %d of %d" % (val, pos, sorth[0][0], ncoveredh, len(img))) + if val > sorth[0][0] + THRESHH*len(img[0]) and ncoveredh == len(img): + break # we are below threshold and have enough cuts + cut[pos] = True + #print("cut %d, ncoveredh %d, height %d" % (pos, ncoveredh, len(img))) + # mark the covered regions + #print("boundary %d %d" % (len(img), pos+int(availh/2))) + for j in range(max(0, pos-int(availh/2)), min(len(img), pos+int(availh/2))): + #print("consider %d" % j) + if not coveredh[j]: + ncoveredh += 1 + coveredh[j] = True + +# the first and last must be cuts +cut[-1] = True +cut[0] = True + +# merge the adjacent cuts + +height = [None] * len(img) +starts = [] + +last = 0 +for r in range(len(img)): + if cut[r]: + if r-last < MINH: + # forget about the previous region, it is too small + for rr in range(last, r+1): + cut[rr] = True + else: + # we have a proper region + starts.append((last+1-MARGH, r-last+MARGH)) + last = r + +print(starts) + +worklist = [] + +# now process every start + +for (start, height) in starts: + # find bars + w = len(img[0]) + cumulv = [0] * w + for r in range(start, start+height): + for c in range(len(img[r])): + cumulv[c] += 255-img[r][c] + + sort = [] + + minc = None + maxc = None + # add beginning and end + minf = -255*(COEFF1+COEFF2)*height + for c in range(len(img[r])): + if cumulv[c] > SOMETHINGTHRESH*height: + print("added %d" % c) + minc = c + sort.append((minf, minc)) + break + for c in (range(len(img[r])))[::-1]: + if cumulv[c] > SOMETHINGTHRESH*height: + print("added %d" % c) + maxc = c + sort.append((minf, maxc)) + break + + for i in range(max(minc, AROUNDBARS), min(maxc, w-AROUNDBARS)): + mymin = 255*height + mymax = 0 + myminh = len(img) + mymaxh = 0 + bad = False + for j in range(max(0, i-AROUNDBARS), min(w, i+AROUNDBARS)): + if cumulv[j] > cumulv[i]: + # not a local max + bad = True + break + dar = 0 + for r in range(height+2*MARGH): + if img[max(start+r-MARGH, 0)][j] < ZONETHRESH: + #print(r, img[start+r-MARGH][j]) + dar = r + break + myminh = min(myminh, start+dar-MARGH) + for r in (range(height+2*MARGH))[::-1]: + if img[min(start+r-MARGH, len(img)-1)][j] < ZONETHRESH: + dar = r + break + mymaxh = max(mymaxh, start+dar-MARGH) + if abs(j-i) <= AROUNDBARS2: + continue + mymin = min(mymin, cumulv[j]) + mymax = max(mymax, cumulv[j]) + if not bad: + # print("at pos %d the val is %d and the min is %d and max is %d and height is %d %d %d" % (i, cumulv[i], mymin, mymax, myminh, mymaxh, mymaxh-myminh)) + # weigh by the variation in AROUNDBARS except AROUNDBARS2 + # and by the difference between the bar and its surroundings + sort.append((COEFF1*abs(mymin - mymax) + + COEFF2*abs(cumulv[i] - mymax) + COEFF3*(mymaxh-myminh), i)) + #sort = [(cumulv[i], i) for i in range(w)] + sort = sorted(sort) + + mnv = min(cumulv[minc+OFFSET:maxc-OFFSET]) + print("minc %d maxc %d mnv %d" % (minc, maxc, mnv)) + + availw = outw - 2*MARGINW + + cuts = [] + taken = [False] * w + covered = [False] * w + ncovered = 0 # ensure that everyone is at distance availw/2-MARGV from a cut + + for i in range(len(sort)): + (val, pos) = sort[i] + if val > mnv + THRESHV*height and ncovered == w: + break # too far away (everyone must be covered) + if (taken[pos]): + continue # already taken + + # bad = False + # if val >= 0: + # for j in range(max(0, pos-AROUNDBARS), min(w, pos+AROUNDBARS)): + # if abs(j-pos) <= AROUNDBARS2: + # continue # not the bar itself + # # print("candidate bar %d val %d at j %d cumul is %d thresh %d" % (pos, val, j, cumulv[j], mnv+THRESHBARS*height)) + # if (cumulv[j] > mnv+THRESHBARS*height): + # bad = True # too close to non-minimal stuff + # if bad: + # continue + + # now write the cut + cuts.append(pos) + # and take all around + for j in range(max(0, pos-MINV), min(w, pos+MINV)): + taken[j] = True + for j in range(max(0, pos-int(availw/2-MARGV)), min(w, pos+int(availw/2-MARGV))): + if not covered[j]: + ncovered += 1 + covered[j] = True + + cuts = sorted(cuts) + # the cuts are ready + print(cuts) + + # now let's bucket the cuts + # TODO: more clever bucketing to minimize the deviation to average + groups = [] + curpos = cuts[0] + + for i in range(len(cuts)-1): + # ensure every bar fits + if(cuts[i+1]-cuts[i]+2*MARGV > availw): + print("ERROR: at start %d height %d cuts %d %d, bar does not fit" % + (start, height, cuts[i], cuts[i+1])) + sys.exit(2) + + for i in range(len(cuts)-1): + curcut = cuts[i+1] + (MARGV if i<len(cuts)-1 else 0) + pcut = cuts[i] + if curcut - curpos > availw: + # spill over! + groups.append((curpos, pcut-curpos+MARGV)) + curpos = pcut-MARGV + else: + # extend + pass + if curpos != cuts[-1]: + # add the last group + groups.append((curpos, cuts[-1]-curpos)) + + print(groups) + + for (gstart, gw) in groups: + worklist.append((start, gstart, height, gw)) + +print(worklist) + +# bucket into pages + +pages = [] +curlist = [] +curh = 0 + +for i in range(len(worklist)): + newh = curh+SEPH+worklist[i][2] + #print("finished pages %d and curh %d" % (len(pages), curh)) + if newh < availh: + # it fits + curh = newh + curlist.append(worklist[i]) + else: + # spill over + pages.append(curlist) + curh = worklist[i][2] + curlist = [worklist[i]] + +if (len(curlist) > 0): + # add the last group + pages.append(curlist) + +print(pages) + +for (i, page) in enumerate(pages): + matrix = numpy.full((outh,outw), 255) + + totalh = sum(x[2] for x in page) + SEPH*(len(page)-1) + margin = availh - totalh + permargin = int(margin/len(page)) + # rounding offset + offmargin = margin-len(page)*permargin + + # fit stuff on page + + cpos = MARGINH + + print("page") + print(page) + for (wi, work) in enumerate(page): + print("work") + print(work) + print("cpos") + print(cpos) + # copy the stuff + space = availw - work[3] + + maxnumstretch = int(NUMSTRETCH*work[3]/w) + stretch = [] + + # find the stretchpoints + cumulv = [0] * work[3] + for r in range(work[0], work[0]+work[2]): + for c in range(work[1], work[1] + work[3]): + cumulv[c-work[1]] += 255-img[r][c] + + sort = [(cumulv[ii], ii) for ii in range(work[3])] + sort = sorted(sort) + + actualstretchdist = int(STRETCHDIST*work[3]/w) + + taken = [False] * work[3] + for (val, pos) in sort: + if taken[pos]: + continue + bad = False + for j in range(max(0, pos-AROUND), min(work[3], pos+AROUND)): + if (cumulv[j] > val+THRESHSTRETCH*work[2]): + #print("cumul at %d is %d vs current val %d and with margin %d" % (j, cumulv[j], val, val+THRESHSTRETCH*work[2])) + bad = True # too close to non-minimal stuff + if bad: + continue + stretch.append(work[1]+pos) + if len(stretch) > maxnumstretch: + break + for j in range(max(0, pos-actualstretchdist), min(work[3], pos+actualstretchdist)): + taken[j] = True + + #print("the stretch") + #print(stretch) + #print("end the stretch") + perstretch = int(space/len(stretch)) + offstretch = space-len(stretch)*perstretch + #print("perstretch %d" % perstretch) + #print("bounds on c: %d %d" % (work[1], work[1] + work[3])) + + sstretch = set(stretch) + + coffset = 0 + nstretch = 0 + for c in range(work[1], work[1] + work[3]): + if c in sstretch: + #print("we have a stretch") + # copy as many times as needed + rlen = perstretch + if nstretch < offstretch: + rlen += 1 # distribute the additional space + for ii in range(rlen): + for r in range(work[0], work[0] + work[2]): + matrix[cpos+(r-work[0])][MARGINW + (c-work[1]) + coffset + ii] = img[r][c] + coffset += rlen + nstretch += 1 + for r in range(work[0], work[0] + work[2]): + matrix[cpos+(r-work[0])][MARGINW + (c-work[1]) + coffset] = img[r][c] + + cpos += work[2] + SEPH + permargin + (1 if wi < offmargin else 0) + + # now dump the image + outfname = bname + "_" + "{:04d}".format(i) + ".png" + print("writing %s" % outfname) + imageio.imwrite(outfname, matrix) + print("wrote %s" % outfname) diff --git a/splith.py b/splith.py @@ -0,0 +1,169 @@ +#!/usr/bin/python3 -O + +import imageio +import collections +import sys +import numpy +import argparse +import os.path +from math import ceil, floor + +parser = argparse.ArgumentParser( + description="Split a grayscale PNG image into horizontal strips") +parser.add_argument("filename", + help="input PNG file name", type=str) +parser.add_argument("output_folder", + help="folder to write output files", type=str) +parser.add_argument("--maxheight", + help="maximum height of a split", + type=int, default=10000) +parser.add_argument("--minheight", + help="minimum height of a split", + type=int, default=30) +parser.add_argument("--mincontentheight", + help="minimum height of a split", + type=int, default=5) +parser.add_argument("--distthreshold", + help="maximum height difference across splits", + type=int, default=300) +parser.add_argument("--whitethreshold", + help="threshold to detect white space", + type=float, default=1) +args = parser.parse_args() + +img = imageio.imread(args.filename) + +# https://stackoverflow.com/a/38549260 +if hasattr(type(img[0][0]), '__iter__'): + print ("converting input image to grayscale") + # https://stackoverflow.com/a/51571053 + img = numpy.dot(img[... , :3] , [0.299 , 0.587, 0.114]) + +in_h = len(img) +in_w = len(img[0]) + +# Step 1: find "cut lines" with minimal sum up to --whitethreshold +# while respecting --maxheight + +# ensure the respect of maxheight +covered = [False] * in_h +ncovered = 0 + +# table of the cut lines +cut = [False]*in_h + +# compute sums +row_cumul = [255*in_w - numpy.sum(img[j]) for j in range(in_h)] + +# sort potential cut lines by score +cut_candidates = [(row_cumul[i], i) for i in range(in_h)] +cut_candidates = sorted(cut_candidates) +mn_score = cut_candidates[0][0] + +# consider all potential cut lines in sorted order +for i in range(len(cut_candidates)): + (score, pos) = cut_candidates[i] + if score > mn_score + args.whitethreshold*in_w and ncovered == in_h: + break # over threshold, and enough cuts to respect maxheight + cut[pos] = True + # mark the covered regions (distance of --maxheight) + for j in range(max(0, pos-floor(1.*args.maxheight/2)), + min(in_h, pos+ceil(1.*args.maxheight/2))): + if not covered[j]: + ncovered += 1 + covered[j] = True + +# the first and last must be cuts +cut[-1] = True +cut[0] = True + +# Step 2: remove content smaller than --mincontentheight by merging cuts + +last = 0 +for r in range(in_h): + if cut[r]: + if r-last < args.mincontentheight: + # forget about the previous non-cut region, it is too small + for rr in range(last, r+1): + cut[rr] = True + last = r + +# Step 3: group contiguous cuts + +contiguous = [] + +last = 0 + +for r in range(in_h): + if not cut[r]: + if last < r-1: + contiguous.append((r-last, last)) + last = r + +if last < in_h-1: + contiguous.append((in_h-1-last, last)) + +# Step 4: find potential cuts scored by the number of contiguous cut lines +# again respecting --maxheight + +contiguous_sort = sorted(contiguous, reverse=True) +best = contiguous_sort[0][0] + +covered2 = [False] * in_h +ncovered2 = 0 + +final_cuts = [] + +for i in range(len(contiguous_sort)): + (height, pos) = contiguous_sort[i] + if height < best - args.distthreshold and ncovered2 == in_h: + break # we are under threshold and have enough cuts + final_cuts.append((pos, height)) + # mark the covered regions + for j in range(max(0, pos-floor(1.*args.maxheight/2)), + min(in_h, pos+height+ceil(1.*args.maxheight/2))): + if not covered2[j]: + ncovered2 += 1 + covered2[j] = True + +final_cuts = sorted(final_cuts) + +# Step 5: produce the output files +# just discards splits smaller than minheight +# also trims white space from left and right + +num = 0 +for i in range(len(final_cuts)-1): + pcut = final_cuts[i] + ncut = final_cuts[i+1] + start = pcut[0] + pcut[1] + end = ncut[0] + + if end-start < args.minheight: + continue + + l_end = 0 + r_end = in_w + + while (l_end <= r_end and 255*(end-start)-sum(img[x][l_end] for x in range(start, end)) < + (end-start)*args.whitethreshold): + l_end += 1 + while (l_end <= r_end and 255*(end-start)-sum(img[x][r_end-1] for x in range(start, end)) < + (end-start)*args.whitethreshold): + r_end -= 1 + + if l_end == r_end: + continue + + matrix = numpy.full((end-start,r_end-l_end), 255, dtype=numpy.uint8) + + for r in range(start, end): + for c in range(l_end, r_end): + matrix[r-start][c-l_end] = img[r][c] + + outfname = os.path.join(args.output_folder, os.path.basename(args.filename).split('.')[0] + "_" + "{:04d}".format(num) + ".png") + + imageio.imwrite(outfname, matrix) + print("wrote %s" % outfname) + num += 1 +