add files - songflower - reflow bitmap sheet music to a different paper format

commit e8bf45b82c5e2f3fde174c28d24e7d42c8ffed1e
parent ebea45c85068e5337f0679910cab1cd1eccc2803
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Thu,  5 Sep 2019 23:46:54 +0200

add files

Diffstat:
.gitignore  | 9 +++++++++
combine.py  | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
melodia_title_page.svg  | 118 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
process.py  | 350 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
splith.py  | 169 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

5 files changed, 749 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,9 @@
+out
+out2
+out3
+*.bak
+chunks/
+pages/
+lines/
+outpages/
+tmp/
diff --git a/combine.py b/combine.py
@@ -0,0 +1,103 @@
+#!/usr/bin/python3 -O
+
+# combine files in a folder (must be same width) to make images of given height
+# with margins
+
+import imageio
+import collections
+import sys
+import numpy
+import argparse
+import os.path
+import os
+from math import ceil, floor
+
+parser = argparse.ArgumentParser(
+        description="Split a grayscale PNG image into horizontal strips")
+parser.add_argument("folder", 
+        help="folder for input PNGs (alphabetical order)", type=str)
+parser.add_argument("output_folder", 
+        help="folder to write output files", type=str)
+parser.add_argument("height", 
+        help="height of produced images", type=int)
+parser.add_argument("--hmargin", 
+        help="left and right margins in pixels",
+        type=int, default=10)
+parser.add_argument("--separator", 
+        help="minimal vertical separation between images",
+        type=int, default=10)
+parser.add_argument("--vmargin", 
+        help="top and bottom margins in pixels",
+        type=int, default=10)
+args = parser.parse_args()
+
+def make_image(images, names, ofile):
+    global args
+
+    matrix = numpy.full((args.height, 2*args.hmargin+len(images[0][0])), 255, dtype=numpy.uint8)
+
+    #print(list(len(x) for x in images))
+    #print(len(images))
+
+    cpos = args.hmargin
+    h = sum(len(x) for x in images)
+    if h + 2*args.vmargin + (len(images)-1)*args.separator > args.height:
+        print("ERROR: image(s) too large: " + " ".join(names))
+        print("These images were ignored")
+        return None
+
+    if len(images) == 1:
+        # center the image
+        rpos = int(args.vmargin + (args.height - 2*args.vmargin - h)/2)
+        for r in range(len(images[0])):
+            for c in range(len(images[0][0])):
+                matrix[rpos + r][cpos + c] = images[0][r][c]
+
+    else:
+        # multiple images, separate them but do not center
+        # separation per image
+        permargin = int((args.height - 2*args.vmargin - h)/(len(images)-1))
+        # rounding error
+        offmargin = args.height - 2*args.vmargin - h - permargin*len(images)
+        offset = args.vmargin
+        for i in range(len(images)):
+            for r in range(len(images[i])):
+                for c in range(len(images[i][0])):
+                    matrix[r + offset][cpos + c] = images[i][r][c]
+            offset += len(images[i])
+            offset += permargin
+            if i < offmargin:
+                offset += 1
+
+    imageio.imwrite(ofile, matrix)
+
+    return ofile
+    
+availheight = args.height - 2*args.vmargin
+
+imgs = []
+names = []
+totalheight = -args.separator
+num = 0
+for f in sorted(os.listdir(args.folder)):
+    img = imageio.imread(os.path.join(args.folder, f))
+    if len(img) + args.separator + totalheight > availheight:
+        # must finish current batch!
+        outfname = os.path.join(args.output_folder, "out_" + "{:04d}".format(num) + ".png")
+        ret = make_image(imgs, names, outfname)
+        if ret:
+            print("wrote %s into %s" % (",".join(names), ret))
+        num += 1
+        imgs = []
+        names = []
+        totalheight = -args.separator
+
+    totalheight += args.separator + len(img)
+    imgs.append(img)
+    names.append(f)
+
+if (len(imgs) > 0):
+    # last batch
+    outfname = os.path.join(args.output_folder, "out_" + "{:04d}".format(num) + ".png")
+    make_image(imgs, outfname)
+
diff --git a/melodia_title_page.svg b/melodia_title_page.svg
@@ -0,0 +1,118 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="190.42178mm"
+   height="88.758537mm"
+   viewBox="0 0 190.42178 88.758537"
+   version="1.1"
+   id="svg8"
+   inkscape:version="0.92.4 (5da689c313, 2019-01-14)"
+   sodipodi:docname="melodia_title_page.svg">
+  <defs
+     id="defs2" />
+  <sodipodi:namedview
+     id="base"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageopacity="0.0"
+     inkscape:pageshadow="2"
+     inkscape:zoom="0.7"
+     inkscape:cx="453.67818"
+     inkscape:cy="126.1432"
+     inkscape:document-units="mm"
+     inkscape:current-layer="layer1"
+     showgrid="false"
+     fit-margin-top="0"
+     fit-margin-left="0"
+     fit-margin-right="0"
+     fit-margin-bottom="0"
+     inkscape:window-width="954"
+     inkscape:window-height="1132"
+     inkscape:window-x="2244"
+     inkscape:window-y="66"
+     inkscape:window-maximized="1" />
+  <metadata
+     id="metadata5">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <g
+     inkscape:label="Layer 1"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(-78.506562,-10.588571)">
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:39.31922531px;line-height:1.25;font-family:'DejaVu Serif';-inkscape-font-specification:'DejaVu Serif';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458335px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       x="77.956093"
+       y="37.758156"
+       id="text12"><tspan
+         sodipodi:role="line"
+         id="tspan10"
+         x="77.956093"
+         y="37.758156"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'Nimbus Roman';-inkscape-font-specification:'Nimbus Roman,  Bold';stroke-width:0.26458335px">MELODIA</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.61136341px;line-height:1.25;font-family:'DejaVu Serif';-inkscape-font-specification:'DejaVu Serif';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       x="173.87093"
+       y="50.910534"
+       id="text16"><tspan
+         sodipodi:role="line"
+         x="173.87093"
+         y="50.910534"
+         style="text-align:center;text-anchor:middle;stroke-width:0.26458332px"
+         id="tspan18">A comprehensive course in sight-singing</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:9.43055153px;line-height:1.25;font-family:'DejaVu Serif';-inkscape-font-specification:'DejaVu Serif';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       x="78.258446"
+       y="66.582916"
+       id="text22"><tspan
+         sodipodi:role="line"
+         x="78.258446"
+         y="66.582916"
+         style="font-weight:bold;stroke-width:0.26458332px"
+         id="tspan24">By Samuel W. Cole and Leo R. Lewis</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:11.69080734px;line-height:1.25;font-family:'DejaVu Serif';-inkscape-font-specification:'DejaVu Serif';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       x="131.29546"
+       y="83.821426"
+       id="text30"><tspan
+         sodipodi:role="line"
+         id="tspan28"
+         x="131.29546"
+         y="83.821426"
+         style="stroke-width:0.26458332px">Public domain</tspan></text>
+    <text
+       id="text36"
+       y="98.231644"
+       x="268.20081"
+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:6.80881834px;line-height:1.25;font-family:'DejaVu Serif';-inkscape-font-specification:'DejaVu Serif';letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       xml:space="preserve"><tspan
+         style="font-size:5.02080727px;text-align:end;text-anchor:end;stroke-width:0.26458332px"
+         y="98.231644"
+         x="268.20081"
+         sodipodi:role="line"
+         id="tspan38">Reflowed to tablet format using Songflower — <tspan
+   style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:'DejaVu Sans Mono';-inkscape-font-specification:'DejaVu Sans Mono';stroke-width:0.26458332px"
+   id="tspan42">a3nm.net/git/songflower</tspan></tspan></text>
+  </g>
+</svg>
diff --git a/process.py b/process.py
@@ -0,0 +1,350 @@
+#!/usr/bin/python3 -O
+
+import imageio
+import sys
+import numpy
+
+THRESHH=0.1 # threshold for line separation
+MINH=50 # minimum line height
+MARGH=15 # height oveflow
+
+THRESHV=30 # threshold for bar detection
+MINV=50 # minimum distance between bars
+MARGV=2 # margin around bars
+SOMETHINGTHRESH=5 # threshold to find the line beginning and end
+AROUNDBARS=6 # margin around bars
+AROUNDBARS2=3 # margin around bars
+#THRESHBARS=50 # threshold around bars
+OFFSET=10 # real line contents
+COEFF1=30 # penalty for variation around bar
+COEFF2=-10 # gain for difference of bar
+COEFF3=10000 # penalty for height of bar area
+ZONETHRESH=100 # detect something in height
+
+MARGINH=20 # margin in output at top and bottom
+MARGINW=20 # margin in output at left and right
+SEPH=20 # separation between lines
+
+STRETCHDIST=30 # separation between stretchpoints
+NUMSTRETCH=15 # stretchpoint on whole length
+AROUND=2 # margin around stretchpoints
+THRESHSTRETCH=.3 # threshold around stretchpoints
+
+
+fname = sys.argv[1]
+bname = fname.split('.')[0]
+outw = int(sys.argv[2])
+outh = int(sys.argv[3])
+
+img = imageio.imread(fname)
+
+# first let's find the white lines
+
+cumul = [sum([(255 - p) for p in img[j]]) for j in range(len(img))]
+
+mn = min(cumul)
+print(mn)
+
+
+availh = outh - 2*MARGINH
+coveredh = [False] * len(img)
+ncoveredh = 0
+cut = [False]*len(img)
+
+# add the cuts
+sorth = [(cumul[i], i) for i in range(len(img))]
+sorth = sorted(sorth)
+
+for i in range(len(sorth)):
+    (val, pos) = sorth[i]
+    #print("cut %d %d %d covered %d of %d" % (val, pos, sorth[0][0], ncoveredh, len(img)))
+    if val > sorth[0][0] + THRESHH*len(img[0]) and ncoveredh == len(img):
+        break # we are below threshold and have enough cuts
+    cut[pos] = True
+    #print("cut %d, ncoveredh %d, height %d" % (pos, ncoveredh, len(img)))
+    # mark the covered regions
+    #print("boundary %d %d" % (len(img), pos+int(availh/2)))
+    for j in range(max(0, pos-int(availh/2)), min(len(img), pos+int(availh/2))):
+        #print("consider %d" % j)
+        if not coveredh[j]:
+            ncoveredh += 1
+            coveredh[j] = True
+
+# the first and last must be cuts
+cut[-1] = True
+cut[0] = True
+
+# merge the adjacent cuts
+
+height = [None] * len(img)
+starts = []
+
+last = 0
+for r in range(len(img)):
+    if cut[r]:
+        if r-last < MINH:
+            # forget about the previous region, it is too small
+            for rr in range(last, r+1):
+                cut[rr] = True
+        else:
+            # we have a proper region
+            starts.append((last+1-MARGH, r-last+MARGH))
+        last = r
+
+print(starts)
+
+worklist = []
+
+# now process every start
+
+for (start, height) in starts:
+    # find bars
+    w = len(img[0])
+    cumulv = [0] * w
+    for r in range(start, start+height):
+        for c in range(len(img[r])):
+            cumulv[c] += 255-img[r][c]
+
+    sort = []
+
+    minc = None
+    maxc = None
+    # add beginning and end
+    minf = -255*(COEFF1+COEFF2)*height
+    for c in range(len(img[r])):
+        if cumulv[c] > SOMETHINGTHRESH*height:
+            print("added %d" % c)
+            minc = c
+            sort.append((minf, minc))
+            break
+    for c in (range(len(img[r])))[::-1]:
+        if cumulv[c] > SOMETHINGTHRESH*height:
+            print("added %d" % c)
+            maxc = c
+            sort.append((minf, maxc))
+            break
+
+    for i in range(max(minc, AROUNDBARS), min(maxc, w-AROUNDBARS)):
+        mymin = 255*height
+        mymax = 0
+        myminh = len(img)
+        mymaxh = 0
+        bad = False
+        for j in range(max(0, i-AROUNDBARS), min(w, i+AROUNDBARS)):
+            if cumulv[j] > cumulv[i]:
+                # not a local max
+                bad = True
+                break
+            dar = 0
+            for r in range(height+2*MARGH):
+                if img[max(start+r-MARGH, 0)][j] < ZONETHRESH:
+                    #print(r, img[start+r-MARGH][j])
+                    dar = r
+                    break
+            myminh = min(myminh, start+dar-MARGH)
+            for r in (range(height+2*MARGH))[::-1]:
+                if img[min(start+r-MARGH, len(img)-1)][j] < ZONETHRESH:
+                    dar = r
+                    break
+            mymaxh = max(mymaxh, start+dar-MARGH)
+            if abs(j-i) <= AROUNDBARS2:
+                continue
+            mymin = min(mymin, cumulv[j])
+            mymax = max(mymax, cumulv[j])
+        if not bad:
+            # print("at pos %d the val is %d and the min is %d and max is %d and height is %d %d %d" % (i, cumulv[i], mymin, mymax, myminh, mymaxh, mymaxh-myminh))
+            # weigh by the variation in AROUNDBARS except AROUNDBARS2
+            # and by the difference between the bar and its surroundings
+            sort.append((COEFF1*abs(mymin - mymax) + 
+                    COEFF2*abs(cumulv[i] - mymax) + COEFF3*(mymaxh-myminh), i))
+    #sort = [(cumulv[i], i) for i in range(w)]
+    sort = sorted(sort)
+
+    mnv = min(cumulv[minc+OFFSET:maxc-OFFSET])
+    print("minc %d maxc %d mnv %d" % (minc, maxc, mnv))
+    
+    availw = outw - 2*MARGINW
+
+    cuts = []
+    taken = [False] * w
+    covered = [False] * w
+    ncovered = 0 # ensure that everyone is at distance availw/2-MARGV from a cut
+
+    for i in range(len(sort)):
+        (val, pos) = sort[i]
+        if val > mnv + THRESHV*height and ncovered == w:
+            break # too far away (everyone must be covered)
+        if (taken[pos]):
+            continue # already taken
+
+        # bad = False
+        #         if val >= 0:
+        #             for j in range(max(0, pos-AROUNDBARS), min(w, pos+AROUNDBARS)):
+        #                 if abs(j-pos) <= AROUNDBARS2:
+        #                     continue # not the bar itself
+        #                 # print("candidate bar %d val %d at j %d cumul is %d thresh %d" % (pos, val, j, cumulv[j], mnv+THRESHBARS*height))
+        #                 if (cumulv[j] > mnv+THRESHBARS*height):
+        #                     bad = True # too close to non-minimal stuff
+        #             if bad:
+        #                 continue
+
+        # now write the cut
+        cuts.append(pos)
+        # and take all around
+        for j in range(max(0, pos-MINV), min(w, pos+MINV)):
+            taken[j] = True
+        for j in range(max(0, pos-int(availw/2-MARGV)), min(w, pos+int(availw/2-MARGV))):
+            if not covered[j]:
+                ncovered += 1
+                covered[j] = True
+
+    cuts = sorted(cuts)
+    # the cuts are ready
+    print(cuts)
+
+    # now let's bucket the cuts
+    # TODO: more clever bucketing to minimize the deviation to average
+    groups = []
+    curpos = cuts[0]
+
+    for i in range(len(cuts)-1):
+        # ensure every bar fits
+        if(cuts[i+1]-cuts[i]+2*MARGV > availw):
+            print("ERROR: at start %d height %d cuts %d %d, bar does not fit" %
+                    (start, height, cuts[i], cuts[i+1]))
+            sys.exit(2)
+
+    for i in range(len(cuts)-1):
+        curcut = cuts[i+1] + (MARGV if i<len(cuts)-1 else 0)
+        pcut = cuts[i]
+        if curcut - curpos > availw:
+            # spill over!
+            groups.append((curpos, pcut-curpos+MARGV))
+            curpos = pcut-MARGV
+        else:
+            # extend
+            pass
+    if curpos != cuts[-1]:
+        # add the last group
+        groups.append((curpos, cuts[-1]-curpos))
+
+    print(groups)
+
+    for (gstart, gw) in groups:
+        worklist.append((start, gstart, height, gw))
+
+print(worklist)
+
+# bucket into pages
+
+pages = []
+curlist = []
+curh = 0
+
+for i in range(len(worklist)):
+    newh = curh+SEPH+worklist[i][2]
+    #print("finished pages %d and curh %d" % (len(pages), curh))
+    if newh < availh:
+        # it fits
+        curh = newh
+        curlist.append(worklist[i])
+    else:
+        # spill over
+        pages.append(curlist)
+        curh = worklist[i][2]
+        curlist = [worklist[i]]
+
+if (len(curlist) > 0):
+    # add the last group
+    pages.append(curlist)
+
+print(pages)
+
+for (i, page) in enumerate(pages):
+    matrix = numpy.full((outh,outw), 255)
+
+    totalh = sum(x[2] for x in page) + SEPH*(len(page)-1)
+    margin = availh - totalh
+    permargin = int(margin/len(page))
+    # rounding offset
+    offmargin = margin-len(page)*permargin
+
+    # fit stuff on page
+
+    cpos = MARGINH
+
+    print("page")
+    print(page)
+    for (wi, work) in enumerate(page):
+        print("work")
+        print(work)
+        print("cpos")
+        print(cpos)
+        # copy the stuff
+        space = availw - work[3]
+        
+        maxnumstretch = int(NUMSTRETCH*work[3]/w)
+        stretch = []
+
+        # find the stretchpoints
+        cumulv = [0] * work[3]
+        for r in range(work[0], work[0]+work[2]):
+            for c in range(work[1], work[1] + work[3]):
+                cumulv[c-work[1]] += 255-img[r][c]
+
+        sort = [(cumulv[ii], ii) for ii in range(work[3])]
+        sort = sorted(sort)
+        
+        actualstretchdist = int(STRETCHDIST*work[3]/w)
+        
+        taken = [False] * work[3]
+        for (val, pos) in sort:
+            if taken[pos]:
+                continue
+            bad = False
+            for j in range(max(0, pos-AROUND), min(work[3], pos+AROUND)):
+                if (cumulv[j] > val+THRESHSTRETCH*work[2]):
+                    #print("cumul at %d is %d vs current val %d and with margin %d" % (j, cumulv[j], val, val+THRESHSTRETCH*work[2]))
+                    bad = True # too close to non-minimal stuff
+            if bad:
+                continue
+            stretch.append(work[1]+pos)
+            if len(stretch) > maxnumstretch:
+                break
+            for j in range(max(0, pos-actualstretchdist), min(work[3], pos+actualstretchdist)):
+                taken[j] = True
+
+        #print("the stretch")
+        #print(stretch)
+        #print("end the stretch")
+        perstretch = int(space/len(stretch))
+        offstretch = space-len(stretch)*perstretch
+        #print("perstretch %d" % perstretch)
+        #print("bounds on c: %d %d" % (work[1], work[1] + work[3]))
+
+        sstretch = set(stretch)
+
+        coffset = 0
+        nstretch = 0
+        for c in range(work[1], work[1] + work[3]):
+            if c in sstretch:
+                #print("we have a stretch")
+                # copy as many times as needed
+                rlen = perstretch
+                if nstretch < offstretch:
+                    rlen += 1 # distribute the additional space
+                for ii in range(rlen):
+                    for r in range(work[0], work[0] + work[2]):
+                        matrix[cpos+(r-work[0])][MARGINW + (c-work[1]) + coffset + ii] = img[r][c]
+                coffset += rlen
+                nstretch += 1
+            for r in range(work[0], work[0] + work[2]):
+                matrix[cpos+(r-work[0])][MARGINW + (c-work[1]) + coffset] = img[r][c]
+
+        cpos += work[2] + SEPH + permargin + (1 if wi < offmargin else 0)
+
+    # now dump the image
+    outfname = bname + "_" + "{:04d}".format(i) + ".png"
+    print("writing %s" % outfname)
+    imageio.imwrite(outfname, matrix)
+    print("wrote %s" % outfname)
diff --git a/splith.py b/splith.py
@@ -0,0 +1,169 @@
+#!/usr/bin/python3 -O
+
+import imageio
+import collections
+import sys
+import numpy
+import argparse
+import os.path
+from math import ceil, floor
+
+parser = argparse.ArgumentParser(
+        description="Split a grayscale PNG image into horizontal strips")
+parser.add_argument("filename", 
+        help="input PNG file name", type=str)
+parser.add_argument("output_folder", 
+        help="folder to write output files", type=str)
+parser.add_argument("--maxheight", 
+        help="maximum height of a split",
+        type=int, default=10000)
+parser.add_argument("--minheight", 
+        help="minimum height of a split",
+        type=int, default=30)
+parser.add_argument("--mincontentheight",
+        help="minimum height of a split",
+        type=int, default=5)
+parser.add_argument("--distthreshold",
+        help="maximum height difference across splits",
+        type=int, default=300)
+parser.add_argument("--whitethreshold",
+        help="threshold to detect white space",
+        type=float, default=1)
+args = parser.parse_args()
+
+img = imageio.imread(args.filename)
+
+# https://stackoverflow.com/a/38549260
+if hasattr(type(img[0][0]), '__iter__'):
+    print ("converting input image to grayscale")
+    # https://stackoverflow.com/a/51571053
+    img = numpy.dot(img[... , :3] , [0.299 , 0.587, 0.114])
+
+in_h = len(img)
+in_w = len(img[0])
+
+# Step 1: find "cut lines" with minimal sum up to --whitethreshold
+#   while respecting --maxheight
+
+# ensure the respect of maxheight
+covered = [False] * in_h
+ncovered = 0
+
+# table of the cut lines
+cut = [False]*in_h
+
+# compute sums
+row_cumul = [255*in_w - numpy.sum(img[j]) for j in range(in_h)]
+
+# sort potential cut lines by score
+cut_candidates = [(row_cumul[i], i) for i in range(in_h)]
+cut_candidates = sorted(cut_candidates)
+mn_score = cut_candidates[0][0]
+
+# consider all potential cut lines in sorted order
+for i in range(len(cut_candidates)):
+    (score, pos) = cut_candidates[i]
+    if score > mn_score + args.whitethreshold*in_w and ncovered == in_h:
+        break # over threshold, and enough cuts to respect maxheight
+    cut[pos] = True
+    # mark the covered regions (distance of --maxheight)
+    for j in range(max(0, pos-floor(1.*args.maxheight/2)), 
+            min(in_h, pos+ceil(1.*args.maxheight/2))):
+        if not covered[j]:
+            ncovered += 1
+            covered[j] = True
+
+# the first and last must be cuts
+cut[-1] = True
+cut[0] = True
+
+# Step 2: remove content smaller than --mincontentheight by merging cuts
+
+last = 0
+for r in range(in_h):
+    if cut[r]:
+        if r-last < args.mincontentheight:
+            # forget about the previous non-cut region, it is too small
+            for rr in range(last, r+1):
+                cut[rr] = True
+        last = r
+
+# Step 3: group contiguous cuts
+
+contiguous = []
+
+last = 0
+
+for r in range(in_h):
+    if not cut[r]:
+        if last < r-1:
+            contiguous.append((r-last, last))
+        last = r
+
+if last < in_h-1:
+    contiguous.append((in_h-1-last, last))
+
+# Step 4: find potential cuts scored by the number of contiguous cut lines
+#   again respecting --maxheight
+
+contiguous_sort = sorted(contiguous, reverse=True)
+best = contiguous_sort[0][0]
+
+covered2 = [False] * in_h
+ncovered2 = 0
+
+final_cuts = []
+
+for i in range(len(contiguous_sort)):
+    (height, pos) = contiguous_sort[i]
+    if height < best - args.distthreshold and ncovered2 == in_h:
+        break # we are under threshold and have enough cuts
+    final_cuts.append((pos, height))
+    # mark the covered regions
+    for j in range(max(0, pos-floor(1.*args.maxheight/2)),
+            min(in_h, pos+height+ceil(1.*args.maxheight/2))):
+        if not covered2[j]:
+            ncovered2 += 1
+            covered2[j] = True
+
+final_cuts = sorted(final_cuts)
+
+# Step 5: produce the output files
+#   just discards splits smaller than minheight
+#   also trims white space from left and right
+
+num = 0
+for i in range(len(final_cuts)-1):
+    pcut = final_cuts[i]
+    ncut = final_cuts[i+1]
+    start = pcut[0] + pcut[1]
+    end = ncut[0]
+
+    if end-start < args.minheight:
+        continue
+
+    l_end = 0
+    r_end = in_w
+
+    while (l_end <= r_end and 255*(end-start)-sum(img[x][l_end] for x in range(start, end)) <
+            (end-start)*args.whitethreshold):
+        l_end += 1
+    while (l_end <= r_end and 255*(end-start)-sum(img[x][r_end-1] for x in range(start, end)) <
+            (end-start)*args.whitethreshold):
+        r_end -= 1
+
+    if l_end == r_end:
+        continue
+
+    matrix = numpy.full((end-start,r_end-l_end), 255, dtype=numpy.uint8)
+
+    for r in range(start, end):
+        for c in range(l_end, r_end):
+            matrix[r-start][c-l_end] = img[r][c]
+
+    outfname = os.path.join(args.output_folder, os.path.basename(args.filename).split('.')[0] + "_" + "{:04d}".format(num) + ".png")
+
+    imageio.imwrite(outfname, matrix)
+    print("wrote %s" % outfname)
+    num += 1
+

	songflower reflow bitmap sheet music to a different paper format
	git clone https://a3nm.net/git/songflower/
	Log \| Files \| Refs \| README \| LICENSE

.gitignore	\|	9	+++++++++
combine.py	\|	103	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
melodia_title_page.svg	\|	118	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
process.py	\|	350	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
splith.py	\|	169	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++