commit e8bf45b82c5e2f3fde174c28d24e7d42c8ffed1e
parent ebea45c85068e5337f0679910cab1cd1eccc2803
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Thu, 5 Sep 2019 23:46:54 +0200
add files
Diffstat:
.gitignore | | | 9 | +++++++++ |
combine.py | | | 103 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
melodia_title_page.svg | | | 118 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
process.py | | | 350 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
splith.py | | | 169 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
5 files changed, 749 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,9 @@
+out
+out2
+out3
+*.bak
+chunks/
+pages/
+lines/
+outpages/
+tmp/
diff --git a/combine.py b/combine.py
@@ -0,0 +1,103 @@
+#!/usr/bin/python3 -O
+
+# combine files in a folder (must be same width) to make images of given height
+# with margins
+
+import imageio
+import collections
+import sys
+import numpy
+import argparse
+import os.path
+import os
+from math import ceil, floor
+
+parser = argparse.ArgumentParser(
+ description="Split a grayscale PNG image into horizontal strips")
+parser.add_argument("folder",
+ help="folder for input PNGs (alphabetical order)", type=str)
+parser.add_argument("output_folder",
+ help="folder to write output files", type=str)
+parser.add_argument("height",
+ help="height of produced images", type=int)
+parser.add_argument("--hmargin",
+ help="left and right margins in pixels",
+ type=int, default=10)
+parser.add_argument("--separator",
+ help="minimal vertical separation between images",
+ type=int, default=10)
+parser.add_argument("--vmargin",
+ help="top and bottom margins in pixels",
+ type=int, default=10)
+args = parser.parse_args()
+
+def make_image(images, names, ofile):
+ global args
+
+ matrix = numpy.full((args.height, 2*args.hmargin+len(images[0][0])), 255, dtype=numpy.uint8)
+
+ #print(list(len(x) for x in images))
+ #print(len(images))
+
+ cpos = args.hmargin
+ h = sum(len(x) for x in images)
+ if h + 2*args.vmargin + (len(images)-1)*args.separator > args.height:
+ print("ERROR: image(s) too large: " + " ".join(names))
+ print("These images were ignored")
+ return None
+
+ if len(images) == 1:
+ # center the image
+ rpos = int(args.vmargin + (args.height - 2*args.vmargin - h)/2)
+ for r in range(len(images[0])):
+ for c in range(len(images[0][0])):
+ matrix[rpos + r][cpos + c] = images[0][r][c]
+
+ else:
+ # multiple images, separate them but do not center
+ # separation per image
+ permargin = int((args.height - 2*args.vmargin - h)/(len(images)-1))
+ # rounding error
+ offmargin = args.height - 2*args.vmargin - h - permargin*len(images)
+ offset = args.vmargin
+ for i in range(len(images)):
+ for r in range(len(images[i])):
+ for c in range(len(images[i][0])):
+ matrix[r + offset][cpos + c] = images[i][r][c]
+ offset += len(images[i])
+ offset += permargin
+ if i < offmargin:
+ offset += 1
+
+ imageio.imwrite(ofile, matrix)
+
+ return ofile
+
+availheight = args.height - 2*args.vmargin
+
+imgs = []
+names = []
+totalheight = -args.separator
+num = 0
+for f in sorted(os.listdir(args.folder)):
+ img = imageio.imread(os.path.join(args.folder, f))
+ if len(img) + args.separator + totalheight > availheight:
+ # must finish current batch!
+ outfname = os.path.join(args.output_folder, "out_" + "{:04d}".format(num) + ".png")
+ ret = make_image(imgs, names, outfname)
+ if ret:
+ print("wrote %s into %s" % (",".join(names), ret))
+ num += 1
+ imgs = []
+ names = []
+ totalheight = -args.separator
+
+ totalheight += args.separator + len(img)
+ imgs.append(img)
+ names.append(f)
+
+if (len(imgs) > 0):
+ # last batch
+ outfname = os.path.join(args.output_folder, "out_" + "{:04d}".format(num) + ".png")
+ make_image(imgs, outfname)
+
diff --git a/melodia_title_page.svg b/melodia_title_page.svg
@@ -0,0 +1,118 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="190.42178mm"
+ height="88.758537mm"
+ viewBox="0 0 190.42178 88.758537"
+ version="1.1"
+ id="svg8"
+ inkscape:version="0.92.4 (5da689c313, 2019-01-14)"
+ sodipodi:docname="melodia_title_page.svg">
+ <defs
+ id="defs2" />
+ <sodipodi:namedview
+ id="base"
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1.0"
+ inkscape:pageopacity="0.0"
+ inkscape:pageshadow="2"
+ inkscape:zoom="0.7"
+ inkscape:cx="453.67818"
+ inkscape:cy="126.1432"
+ inkscape:document-units="mm"
+ inkscape:current-layer="layer1"
+ showgrid="false"
+ fit-margin-top="0"
+ fit-margin-left="0"
+ fit-margin-right="0"
+ fit-margin-bottom="0"
+ inkscape:window-width="954"
+ inkscape:window-height="1132"
+ inkscape:window-x="2244"
+ inkscape:window-y="66"
+ inkscape:window-maximized="1" />
+ <metadata
+ id="metadata5">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <g
+ inkscape:label="Layer 1"
+ inkscape:groupmode="layer"
+ id="layer1"
+ transform="translate(-78.506562,-10.588571)">
+ <text
+ xml:space="preserve"
+ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:39.31922531px;line-height:1.25;font-family:'DejaVu Serif';-inkscape-font-specification:'DejaVu Serif';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458335px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ x="77.956093"
+ y="37.758156"
+ id="text12"><tspan
+ sodipodi:role="line"
+ id="tspan10"
+ x="77.956093"
+ y="37.758156"
+ style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Nimbus Roman';-inkscape-font-specification:'Nimbus Roman, Bold';stroke-width:0.26458335px">MELODIA</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.61136341px;line-height:1.25;font-family:'DejaVu Serif';-inkscape-font-specification:'DejaVu Serif';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ x="173.87093"
+ y="50.910534"
+ id="text16"><tspan
+ sodipodi:role="line"
+ x="173.87093"
+ y="50.910534"
+ style="text-align:center;text-anchor:middle;stroke-width:0.26458332px"
+ id="tspan18">A comprehensive course in sight-singing</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:9.43055153px;line-height:1.25;font-family:'DejaVu Serif';-inkscape-font-specification:'DejaVu Serif';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ x="78.258446"
+ y="66.582916"
+ id="text22"><tspan
+ sodipodi:role="line"
+ x="78.258446"
+ y="66.582916"
+ style="font-weight:bold;stroke-width:0.26458332px"
+ id="tspan24">By Samuel W. Cole and Leo R. Lewis</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:11.69080734px;line-height:1.25;font-family:'DejaVu Serif';-inkscape-font-specification:'DejaVu Serif';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ x="131.29546"
+ y="83.821426"
+ id="text30"><tspan
+ sodipodi:role="line"
+ id="tspan28"
+ x="131.29546"
+ y="83.821426"
+ style="stroke-width:0.26458332px">Public domain</tspan></text>
+ <text
+ id="text36"
+ y="98.231644"
+ x="268.20081"
+ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:6.80881834px;line-height:1.25;font-family:'DejaVu Serif';-inkscape-font-specification:'DejaVu Serif';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ xml:space="preserve"><tspan
+ style="font-size:5.02080727px;text-align:end;text-anchor:end;stroke-width:0.26458332px"
+ y="98.231644"
+ x="268.20081"
+ sodipodi:role="line"
+ id="tspan38">Reflowed to tablet format using Songflower — <tspan
+ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:'DejaVu Sans Mono';-inkscape-font-specification:'DejaVu Sans Mono';stroke-width:0.26458332px"
+ id="tspan42">a3nm.net/git/songflower</tspan></tspan></text>
+ </g>
+</svg>
diff --git a/process.py b/process.py
@@ -0,0 +1,350 @@
+#!/usr/bin/python3 -O
+
+import imageio
+import sys
+import numpy
+
+THRESHH=0.1 # threshold for line separation
+MINH=50 # minimum line height
+MARGH=15 # height oveflow
+
+THRESHV=30 # threshold for bar detection
+MINV=50 # minimum distance between bars
+MARGV=2 # margin around bars
+SOMETHINGTHRESH=5 # threshold to find the line beginning and end
+AROUNDBARS=6 # margin around bars
+AROUNDBARS2=3 # margin around bars
+#THRESHBARS=50 # threshold around bars
+OFFSET=10 # real line contents
+COEFF1=30 # penalty for variation around bar
+COEFF2=-10 # gain for difference of bar
+COEFF3=10000 # penalty for height of bar area
+ZONETHRESH=100 # detect something in height
+
+MARGINH=20 # margin in output at top and bottom
+MARGINW=20 # margin in output at left and right
+SEPH=20 # separation between lines
+
+STRETCHDIST=30 # separation between stretchpoints
+NUMSTRETCH=15 # stretchpoint on whole length
+AROUND=2 # margin around stretchpoints
+THRESHSTRETCH=.3 # threshold around stretchpoints
+
+
+fname = sys.argv[1]
+bname = fname.split('.')[0]
+outw = int(sys.argv[2])
+outh = int(sys.argv[3])
+
+img = imageio.imread(fname)
+
+# first let's find the white lines
+
+cumul = [sum([(255 - p) for p in img[j]]) for j in range(len(img))]
+
+mn = min(cumul)
+print(mn)
+
+
+availh = outh - 2*MARGINH
+coveredh = [False] * len(img)
+ncoveredh = 0
+cut = [False]*len(img)
+
+# add the cuts
+sorth = [(cumul[i], i) for i in range(len(img))]
+sorth = sorted(sorth)
+
+for i in range(len(sorth)):
+ (val, pos) = sorth[i]
+ #print("cut %d %d %d covered %d of %d" % (val, pos, sorth[0][0], ncoveredh, len(img)))
+ if val > sorth[0][0] + THRESHH*len(img[0]) and ncoveredh == len(img):
+ break # we are below threshold and have enough cuts
+ cut[pos] = True
+ #print("cut %d, ncoveredh %d, height %d" % (pos, ncoveredh, len(img)))
+ # mark the covered regions
+ #print("boundary %d %d" % (len(img), pos+int(availh/2)))
+ for j in range(max(0, pos-int(availh/2)), min(len(img), pos+int(availh/2))):
+ #print("consider %d" % j)
+ if not coveredh[j]:
+ ncoveredh += 1
+ coveredh[j] = True
+
+# the first and last must be cuts
+cut[-1] = True
+cut[0] = True
+
+# merge the adjacent cuts
+
+height = [None] * len(img)
+starts = []
+
+last = 0
+for r in range(len(img)):
+ if cut[r]:
+ if r-last < MINH:
+ # forget about the previous region, it is too small
+ for rr in range(last, r+1):
+ cut[rr] = True
+ else:
+ # we have a proper region
+ starts.append((last+1-MARGH, r-last+MARGH))
+ last = r
+
+print(starts)
+
+worklist = []
+
+# now process every start
+
+for (start, height) in starts:
+ # find bars
+ w = len(img[0])
+ cumulv = [0] * w
+ for r in range(start, start+height):
+ for c in range(len(img[r])):
+ cumulv[c] += 255-img[r][c]
+
+ sort = []
+
+ minc = None
+ maxc = None
+ # add beginning and end
+ minf = -255*(COEFF1+COEFF2)*height
+ for c in range(len(img[r])):
+ if cumulv[c] > SOMETHINGTHRESH*height:
+ print("added %d" % c)
+ minc = c
+ sort.append((minf, minc))
+ break
+ for c in (range(len(img[r])))[::-1]:
+ if cumulv[c] > SOMETHINGTHRESH*height:
+ print("added %d" % c)
+ maxc = c
+ sort.append((minf, maxc))
+ break
+
+ for i in range(max(minc, AROUNDBARS), min(maxc, w-AROUNDBARS)):
+ mymin = 255*height
+ mymax = 0
+ myminh = len(img)
+ mymaxh = 0
+ bad = False
+ for j in range(max(0, i-AROUNDBARS), min(w, i+AROUNDBARS)):
+ if cumulv[j] > cumulv[i]:
+ # not a local max
+ bad = True
+ break
+ dar = 0
+ for r in range(height+2*MARGH):
+ if img[max(start+r-MARGH, 0)][j] < ZONETHRESH:
+ #print(r, img[start+r-MARGH][j])
+ dar = r
+ break
+ myminh = min(myminh, start+dar-MARGH)
+ for r in (range(height+2*MARGH))[::-1]:
+ if img[min(start+r-MARGH, len(img)-1)][j] < ZONETHRESH:
+ dar = r
+ break
+ mymaxh = max(mymaxh, start+dar-MARGH)
+ if abs(j-i) <= AROUNDBARS2:
+ continue
+ mymin = min(mymin, cumulv[j])
+ mymax = max(mymax, cumulv[j])
+ if not bad:
+ # print("at pos %d the val is %d and the min is %d and max is %d and height is %d %d %d" % (i, cumulv[i], mymin, mymax, myminh, mymaxh, mymaxh-myminh))
+ # weigh by the variation in AROUNDBARS except AROUNDBARS2
+ # and by the difference between the bar and its surroundings
+ sort.append((COEFF1*abs(mymin - mymax) +
+ COEFF2*abs(cumulv[i] - mymax) + COEFF3*(mymaxh-myminh), i))
+ #sort = [(cumulv[i], i) for i in range(w)]
+ sort = sorted(sort)
+
+ mnv = min(cumulv[minc+OFFSET:maxc-OFFSET])
+ print("minc %d maxc %d mnv %d" % (minc, maxc, mnv))
+
+ availw = outw - 2*MARGINW
+
+ cuts = []
+ taken = [False] * w
+ covered = [False] * w
+ ncovered = 0 # ensure that everyone is at distance availw/2-MARGV from a cut
+
+ for i in range(len(sort)):
+ (val, pos) = sort[i]
+ if val > mnv + THRESHV*height and ncovered == w:
+ break # too far away (everyone must be covered)
+ if (taken[pos]):
+ continue # already taken
+
+ # bad = False
+ # if val >= 0:
+ # for j in range(max(0, pos-AROUNDBARS), min(w, pos+AROUNDBARS)):
+ # if abs(j-pos) <= AROUNDBARS2:
+ # continue # not the bar itself
+ # # print("candidate bar %d val %d at j %d cumul is %d thresh %d" % (pos, val, j, cumulv[j], mnv+THRESHBARS*height))
+ # if (cumulv[j] > mnv+THRESHBARS*height):
+ # bad = True # too close to non-minimal stuff
+ # if bad:
+ # continue
+
+ # now write the cut
+ cuts.append(pos)
+ # and take all around
+ for j in range(max(0, pos-MINV), min(w, pos+MINV)):
+ taken[j] = True
+ for j in range(max(0, pos-int(availw/2-MARGV)), min(w, pos+int(availw/2-MARGV))):
+ if not covered[j]:
+ ncovered += 1
+ covered[j] = True
+
+ cuts = sorted(cuts)
+ # the cuts are ready
+ print(cuts)
+
+ # now let's bucket the cuts
+ # TODO: more clever bucketing to minimize the deviation to average
+ groups = []
+ curpos = cuts[0]
+
+ for i in range(len(cuts)-1):
+ # ensure every bar fits
+ if(cuts[i+1]-cuts[i]+2*MARGV > availw):
+ print("ERROR: at start %d height %d cuts %d %d, bar does not fit" %
+ (start, height, cuts[i], cuts[i+1]))
+ sys.exit(2)
+
+ for i in range(len(cuts)-1):
+ curcut = cuts[i+1] + (MARGV if i<len(cuts)-1 else 0)
+ pcut = cuts[i]
+ if curcut - curpos > availw:
+ # spill over!
+ groups.append((curpos, pcut-curpos+MARGV))
+ curpos = pcut-MARGV
+ else:
+ # extend
+ pass
+ if curpos != cuts[-1]:
+ # add the last group
+ groups.append((curpos, cuts[-1]-curpos))
+
+ print(groups)
+
+ for (gstart, gw) in groups:
+ worklist.append((start, gstart, height, gw))
+
+print(worklist)
+
+# bucket into pages
+
+pages = []
+curlist = []
+curh = 0
+
+for i in range(len(worklist)):
+ newh = curh+SEPH+worklist[i][2]
+ #print("finished pages %d and curh %d" % (len(pages), curh))
+ if newh < availh:
+ # it fits
+ curh = newh
+ curlist.append(worklist[i])
+ else:
+ # spill over
+ pages.append(curlist)
+ curh = worklist[i][2]
+ curlist = [worklist[i]]
+
+if (len(curlist) > 0):
+ # add the last group
+ pages.append(curlist)
+
+print(pages)
+
+for (i, page) in enumerate(pages):
+ matrix = numpy.full((outh,outw), 255)
+
+ totalh = sum(x[2] for x in page) + SEPH*(len(page)-1)
+ margin = availh - totalh
+ permargin = int(margin/len(page))
+ # rounding offset
+ offmargin = margin-len(page)*permargin
+
+ # fit stuff on page
+
+ cpos = MARGINH
+
+ print("page")
+ print(page)
+ for (wi, work) in enumerate(page):
+ print("work")
+ print(work)
+ print("cpos")
+ print(cpos)
+ # copy the stuff
+ space = availw - work[3]
+
+ maxnumstretch = int(NUMSTRETCH*work[3]/w)
+ stretch = []
+
+ # find the stretchpoints
+ cumulv = [0] * work[3]
+ for r in range(work[0], work[0]+work[2]):
+ for c in range(work[1], work[1] + work[3]):
+ cumulv[c-work[1]] += 255-img[r][c]
+
+ sort = [(cumulv[ii], ii) for ii in range(work[3])]
+ sort = sorted(sort)
+
+ actualstretchdist = int(STRETCHDIST*work[3]/w)
+
+ taken = [False] * work[3]
+ for (val, pos) in sort:
+ if taken[pos]:
+ continue
+ bad = False
+ for j in range(max(0, pos-AROUND), min(work[3], pos+AROUND)):
+ if (cumulv[j] > val+THRESHSTRETCH*work[2]):
+ #print("cumul at %d is %d vs current val %d and with margin %d" % (j, cumulv[j], val, val+THRESHSTRETCH*work[2]))
+ bad = True # too close to non-minimal stuff
+ if bad:
+ continue
+ stretch.append(work[1]+pos)
+ if len(stretch) > maxnumstretch:
+ break
+ for j in range(max(0, pos-actualstretchdist), min(work[3], pos+actualstretchdist)):
+ taken[j] = True
+
+ #print("the stretch")
+ #print(stretch)
+ #print("end the stretch")
+ perstretch = int(space/len(stretch))
+ offstretch = space-len(stretch)*perstretch
+ #print("perstretch %d" % perstretch)
+ #print("bounds on c: %d %d" % (work[1], work[1] + work[3]))
+
+ sstretch = set(stretch)
+
+ coffset = 0
+ nstretch = 0
+ for c in range(work[1], work[1] + work[3]):
+ if c in sstretch:
+ #print("we have a stretch")
+ # copy as many times as needed
+ rlen = perstretch
+ if nstretch < offstretch:
+ rlen += 1 # distribute the additional space
+ for ii in range(rlen):
+ for r in range(work[0], work[0] + work[2]):
+ matrix[cpos+(r-work[0])][MARGINW + (c-work[1]) + coffset + ii] = img[r][c]
+ coffset += rlen
+ nstretch += 1
+ for r in range(work[0], work[0] + work[2]):
+ matrix[cpos+(r-work[0])][MARGINW + (c-work[1]) + coffset] = img[r][c]
+
+ cpos += work[2] + SEPH + permargin + (1 if wi < offmargin else 0)
+
+ # now dump the image
+ outfname = bname + "_" + "{:04d}".format(i) + ".png"
+ print("writing %s" % outfname)
+ imageio.imwrite(outfname, matrix)
+ print("wrote %s" % outfname)
diff --git a/splith.py b/splith.py
@@ -0,0 +1,169 @@
+#!/usr/bin/python3 -O
+
+import imageio
+import collections
+import sys
+import numpy
+import argparse
+import os.path
+from math import ceil, floor
+
+parser = argparse.ArgumentParser(
+ description="Split a grayscale PNG image into horizontal strips")
+parser.add_argument("filename",
+ help="input PNG file name", type=str)
+parser.add_argument("output_folder",
+ help="folder to write output files", type=str)
+parser.add_argument("--maxheight",
+ help="maximum height of a split",
+ type=int, default=10000)
+parser.add_argument("--minheight",
+ help="minimum height of a split",
+ type=int, default=30)
+parser.add_argument("--mincontentheight",
+ help="minimum height of a split",
+ type=int, default=5)
+parser.add_argument("--distthreshold",
+ help="maximum height difference across splits",
+ type=int, default=300)
+parser.add_argument("--whitethreshold",
+ help="threshold to detect white space",
+ type=float, default=1)
+args = parser.parse_args()
+
+img = imageio.imread(args.filename)
+
+# https://stackoverflow.com/a/38549260
+if hasattr(type(img[0][0]), '__iter__'):
+ print ("converting input image to grayscale")
+ # https://stackoverflow.com/a/51571053
+ img = numpy.dot(img[... , :3] , [0.299 , 0.587, 0.114])
+
+in_h = len(img)
+in_w = len(img[0])
+
+# Step 1: find "cut lines" with minimal sum up to --whitethreshold
+# while respecting --maxheight
+
+# ensure the respect of maxheight
+covered = [False] * in_h
+ncovered = 0
+
+# table of the cut lines
+cut = [False]*in_h
+
+# compute sums
+row_cumul = [255*in_w - numpy.sum(img[j]) for j in range(in_h)]
+
+# sort potential cut lines by score
+cut_candidates = [(row_cumul[i], i) for i in range(in_h)]
+cut_candidates = sorted(cut_candidates)
+mn_score = cut_candidates[0][0]
+
+# consider all potential cut lines in sorted order
+for i in range(len(cut_candidates)):
+ (score, pos) = cut_candidates[i]
+ if score > mn_score + args.whitethreshold*in_w and ncovered == in_h:
+ break # over threshold, and enough cuts to respect maxheight
+ cut[pos] = True
+ # mark the covered regions (distance of --maxheight)
+ for j in range(max(0, pos-floor(1.*args.maxheight/2)),
+ min(in_h, pos+ceil(1.*args.maxheight/2))):
+ if not covered[j]:
+ ncovered += 1
+ covered[j] = True
+
+# the first and last must be cuts
+cut[-1] = True
+cut[0] = True
+
+# Step 2: remove content smaller than --mincontentheight by merging cuts
+
+last = 0
+for r in range(in_h):
+ if cut[r]:
+ if r-last < args.mincontentheight:
+ # forget about the previous non-cut region, it is too small
+ for rr in range(last, r+1):
+ cut[rr] = True
+ last = r
+
+# Step 3: group contiguous cuts
+
+contiguous = []
+
+last = 0
+
+for r in range(in_h):
+ if not cut[r]:
+ if last < r-1:
+ contiguous.append((r-last, last))
+ last = r
+
+if last < in_h-1:
+ contiguous.append((in_h-1-last, last))
+
+# Step 4: find potential cuts scored by the number of contiguous cut lines
+# again respecting --maxheight
+
+contiguous_sort = sorted(contiguous, reverse=True)
+best = contiguous_sort[0][0]
+
+covered2 = [False] * in_h
+ncovered2 = 0
+
+final_cuts = []
+
+for i in range(len(contiguous_sort)):
+ (height, pos) = contiguous_sort[i]
+ if height < best - args.distthreshold and ncovered2 == in_h:
+ break # we are under threshold and have enough cuts
+ final_cuts.append((pos, height))
+ # mark the covered regions
+ for j in range(max(0, pos-floor(1.*args.maxheight/2)),
+ min(in_h, pos+height+ceil(1.*args.maxheight/2))):
+ if not covered2[j]:
+ ncovered2 += 1
+ covered2[j] = True
+
+final_cuts = sorted(final_cuts)
+
+# Step 5: produce the output files
+# just discards splits smaller than minheight
+# also trims white space from left and right
+
+num = 0
+for i in range(len(final_cuts)-1):
+ pcut = final_cuts[i]
+ ncut = final_cuts[i+1]
+ start = pcut[0] + pcut[1]
+ end = ncut[0]
+
+ if end-start < args.minheight:
+ continue
+
+ l_end = 0
+ r_end = in_w
+
+ while (l_end <= r_end and 255*(end-start)-sum(img[x][l_end] for x in range(start, end)) <
+ (end-start)*args.whitethreshold):
+ l_end += 1
+ while (l_end <= r_end and 255*(end-start)-sum(img[x][r_end-1] for x in range(start, end)) <
+ (end-start)*args.whitethreshold):
+ r_end -= 1
+
+ if l_end == r_end:
+ continue
+
+ matrix = numpy.full((end-start,r_end-l_end), 255, dtype=numpy.uint8)
+
+ for r in range(start, end):
+ for c in range(l_end, r_end):
+ matrix[r-start][c-l_end] = img[r][c]
+
+ outfname = os.path.join(args.output_folder, os.path.basename(args.filename).split('.')[0] + "_" + "{:04d}".format(num) + ".png")
+
+ imageio.imwrite(outfname, matrix)
+ print("wrote %s" % outfname)
+ num += 1
+