songflower

reflow bitmap sheet music to a different paper format
git clone https://a3nm.net/git/songflower/
Log | Files | Refs | README | LICENSE

process.py (10988B)


      1 #!/usr/bin/python3 -O
      2 
      3 import imageio
      4 import sys
      5 import numpy
      6 
      7 THRESHH=0.1 # threshold for line separation
      8 MINH=50 # minimum line height
      9 MARGH=15 # height oveflow
     10 
     11 THRESHV=30 # threshold for bar detection
     12 MINV=50 # minimum distance between bars
     13 MARGV=2 # margin around bars
     14 SOMETHINGTHRESH=5 # threshold to find the line beginning and end
     15 AROUNDBARS=6 # margin around bars
     16 AROUNDBARS2=3 # margin around bars
     17 #THRESHBARS=50 # threshold around bars
     18 OFFSET=10 # real line contents
     19 COEFF1=30 # penalty for variation around bar
     20 COEFF2=-10 # gain for difference of bar
     21 COEFF3=10000 # penalty for height of bar area
     22 ZONETHRESH=100 # detect something in height
     23 
     24 MARGINH=20 # margin in output at top and bottom
     25 MARGINW=20 # margin in output at left and right
     26 SEPH=20 # separation between lines
     27 
     28 STRETCHDIST=30 # separation between stretchpoints
     29 NUMSTRETCH=15 # stretchpoint on whole length
     30 AROUND=2 # margin around stretchpoints
     31 THRESHSTRETCH=.3 # threshold around stretchpoints
     32 
     33 
     34 fname = sys.argv[1]
     35 bname = fname.split('.')[0]
     36 outw = int(sys.argv[2])
     37 outh = int(sys.argv[3])
     38 
     39 img = imageio.imread(fname)
     40 
     41 # first let's find the white lines
     42 
     43 cumul = [sum([(255 - p) for p in img[j]]) for j in range(len(img))]
     44 
     45 mn = min(cumul)
     46 print(mn)
     47 
     48 
     49 availh = outh - 2*MARGINH
     50 coveredh = [False] * len(img)
     51 ncoveredh = 0
     52 cut = [False]*len(img)
     53 
     54 # add the cuts
     55 sorth = [(cumul[i], i) for i in range(len(img))]
     56 sorth = sorted(sorth)
     57 
     58 for i in range(len(sorth)):
     59     (val, pos) = sorth[i]
     60     #print("cut %d %d %d covered %d of %d" % (val, pos, sorth[0][0], ncoveredh, len(img)))
     61     if val > sorth[0][0] + THRESHH*len(img[0]) and ncoveredh == len(img):
     62         break # we are below threshold and have enough cuts
     63     cut[pos] = True
     64     #print("cut %d, ncoveredh %d, height %d" % (pos, ncoveredh, len(img)))
     65     # mark the covered regions
     66     #print("boundary %d %d" % (len(img), pos+int(availh/2)))
     67     for j in range(max(0, pos-int(availh/2)), min(len(img), pos+int(availh/2))):
     68         #print("consider %d" % j)
     69         if not coveredh[j]:
     70             ncoveredh += 1
     71             coveredh[j] = True
     72 
     73 # the first and last must be cuts
     74 cut[-1] = True
     75 cut[0] = True
     76 
     77 # merge the adjacent cuts
     78 
     79 height = [None] * len(img)
     80 starts = []
     81 
     82 last = 0
     83 for r in range(len(img)):
     84     if cut[r]:
     85         if r-last < MINH:
     86             # forget about the previous region, it is too small
     87             for rr in range(last, r+1):
     88                 cut[rr] = True
     89         else:
     90             # we have a proper region
     91             starts.append((last+1-MARGH, r-last+MARGH))
     92         last = r
     93 
     94 print(starts)
     95 
     96 worklist = []
     97 
     98 # now process every start
     99 
    100 for (start, height) in starts:
    101     # find bars
    102     w = len(img[0])
    103     cumulv = [0] * w
    104     for r in range(start, start+height):
    105         for c in range(len(img[r])):
    106             cumulv[c] += 255-img[r][c]
    107 
    108     sort = []
    109 
    110     minc = None
    111     maxc = None
    112     # add beginning and end
    113     minf = -255*(COEFF1+COEFF2)*height
    114     for c in range(len(img[r])):
    115         if cumulv[c] > SOMETHINGTHRESH*height:
    116             print("added %d" % c)
    117             minc = c
    118             sort.append((minf, minc))
    119             break
    120     for c in (range(len(img[r])))[::-1]:
    121         if cumulv[c] > SOMETHINGTHRESH*height:
    122             print("added %d" % c)
    123             maxc = c
    124             sort.append((minf, maxc))
    125             break
    126 
    127     for i in range(max(minc, AROUNDBARS), min(maxc, w-AROUNDBARS)):
    128         mymin = 255*height
    129         mymax = 0
    130         myminh = len(img)
    131         mymaxh = 0
    132         bad = False
    133         for j in range(max(0, i-AROUNDBARS), min(w, i+AROUNDBARS)):
    134             if cumulv[j] > cumulv[i]:
    135                 # not a local max
    136                 bad = True
    137                 break
    138             dar = 0
    139             for r in range(height+2*MARGH):
    140                 if img[max(start+r-MARGH, 0)][j] < ZONETHRESH:
    141                     #print(r, img[start+r-MARGH][j])
    142                     dar = r
    143                     break
    144             myminh = min(myminh, start+dar-MARGH)
    145             for r in (range(height+2*MARGH))[::-1]:
    146                 if img[min(start+r-MARGH, len(img)-1)][j] < ZONETHRESH:
    147                     dar = r
    148                     break
    149             mymaxh = max(mymaxh, start+dar-MARGH)
    150             if abs(j-i) <= AROUNDBARS2:
    151                 continue
    152             mymin = min(mymin, cumulv[j])
    153             mymax = max(mymax, cumulv[j])
    154         if not bad:
    155             # print("at pos %d the val is %d and the min is %d and max is %d and height is %d %d %d" % (i, cumulv[i], mymin, mymax, myminh, mymaxh, mymaxh-myminh))
    156             # weigh by the variation in AROUNDBARS except AROUNDBARS2
    157             # and by the difference between the bar and its surroundings
    158             sort.append((COEFF1*abs(mymin - mymax) + 
    159                     COEFF2*abs(cumulv[i] - mymax) + COEFF3*(mymaxh-myminh), i))
    160     #sort = [(cumulv[i], i) for i in range(w)]
    161     sort = sorted(sort)
    162 
    163     mnv = min(cumulv[minc+OFFSET:maxc-OFFSET])
    164     print("minc %d maxc %d mnv %d" % (minc, maxc, mnv))
    165     
    166     availw = outw - 2*MARGINW
    167 
    168     cuts = []
    169     taken = [False] * w
    170     covered = [False] * w
    171     ncovered = 0 # ensure that everyone is at distance availw/2-MARGV from a cut
    172 
    173     for i in range(len(sort)):
    174         (val, pos) = sort[i]
    175         if val > mnv + THRESHV*height and ncovered == w:
    176             break # too far away (everyone must be covered)
    177         if (taken[pos]):
    178             continue # already taken
    179 
    180         # bad = False
    181         #         if val >= 0:
    182         #             for j in range(max(0, pos-AROUNDBARS), min(w, pos+AROUNDBARS)):
    183         #                 if abs(j-pos) <= AROUNDBARS2:
    184         #                     continue # not the bar itself
    185         #                 # print("candidate bar %d val %d at j %d cumul is %d thresh %d" % (pos, val, j, cumulv[j], mnv+THRESHBARS*height))
    186         #                 if (cumulv[j] > mnv+THRESHBARS*height):
    187         #                     bad = True # too close to non-minimal stuff
    188         #             if bad:
    189         #                 continue
    190 
    191         # now write the cut
    192         cuts.append(pos)
    193         # and take all around
    194         for j in range(max(0, pos-MINV), min(w, pos+MINV)):
    195             taken[j] = True
    196         for j in range(max(0, pos-int(availw/2-MARGV)), min(w, pos+int(availw/2-MARGV))):
    197             if not covered[j]:
    198                 ncovered += 1
    199                 covered[j] = True
    200 
    201     cuts = sorted(cuts)
    202     # the cuts are ready
    203     print(cuts)
    204 
    205     # now let's bucket the cuts
    206     # TODO: more clever bucketing to minimize the deviation to average
    207     groups = []
    208     curpos = cuts[0]
    209 
    210     for i in range(len(cuts)-1):
    211         # ensure every bar fits
    212         if(cuts[i+1]-cuts[i]+2*MARGV > availw):
    213             print("ERROR: at start %d height %d cuts %d %d, bar does not fit" %
    214                     (start, height, cuts[i], cuts[i+1]))
    215             sys.exit(2)
    216 
    217     for i in range(len(cuts)-1):
    218         curcut = cuts[i+1] + (MARGV if i<len(cuts)-1 else 0)
    219         pcut = cuts[i]
    220         if curcut - curpos > availw:
    221             # spill over!
    222             groups.append((curpos, pcut-curpos+MARGV))
    223             curpos = pcut-MARGV
    224         else:
    225             # extend
    226             pass
    227     if curpos != cuts[-1]:
    228         # add the last group
    229         groups.append((curpos, cuts[-1]-curpos))
    230 
    231     print(groups)
    232 
    233     for (gstart, gw) in groups:
    234         worklist.append((start, gstart, height, gw))
    235 
    236 print(worklist)
    237 
    238 # bucket into pages
    239 
    240 pages = []
    241 curlist = []
    242 curh = 0
    243 
    244 for i in range(len(worklist)):
    245     newh = curh+SEPH+worklist[i][2]
    246     #print("finished pages %d and curh %d" % (len(pages), curh))
    247     if newh < availh:
    248         # it fits
    249         curh = newh
    250         curlist.append(worklist[i])
    251     else:
    252         # spill over
    253         pages.append(curlist)
    254         curh = worklist[i][2]
    255         curlist = [worklist[i]]
    256 
    257 if (len(curlist) > 0):
    258     # add the last group
    259     pages.append(curlist)
    260 
    261 print(pages)
    262 
    263 for (i, page) in enumerate(pages):
    264     matrix = numpy.full((outh,outw), 255)
    265 
    266     totalh = sum(x[2] for x in page) + SEPH*(len(page)-1)
    267     margin = availh - totalh
    268     permargin = int(margin/len(page))
    269     # rounding offset
    270     offmargin = margin-len(page)*permargin
    271 
    272     # fit stuff on page
    273 
    274     cpos = MARGINH
    275 
    276     print("page")
    277     print(page)
    278     for (wi, work) in enumerate(page):
    279         print("work")
    280         print(work)
    281         print("cpos")
    282         print(cpos)
    283         # copy the stuff
    284         space = availw - work[3]
    285         
    286         maxnumstretch = int(NUMSTRETCH*work[3]/w)
    287         stretch = []
    288 
    289         # find the stretchpoints
    290         cumulv = [0] * work[3]
    291         for r in range(work[0], work[0]+work[2]):
    292             for c in range(work[1], work[1] + work[3]):
    293                 cumulv[c-work[1]] += 255-img[r][c]
    294 
    295         sort = [(cumulv[ii], ii) for ii in range(work[3])]
    296         sort = sorted(sort)
    297         
    298         actualstretchdist = int(STRETCHDIST*work[3]/w)
    299         
    300         taken = [False] * work[3]
    301         for (val, pos) in sort:
    302             if taken[pos]:
    303                 continue
    304             bad = False
    305             for j in range(max(0, pos-AROUND), min(work[3], pos+AROUND)):
    306                 if (cumulv[j] > val+THRESHSTRETCH*work[2]):
    307                     #print("cumul at %d is %d vs current val %d and with margin %d" % (j, cumulv[j], val, val+THRESHSTRETCH*work[2]))
    308                     bad = True # too close to non-minimal stuff
    309             if bad:
    310                 continue
    311             stretch.append(work[1]+pos)
    312             if len(stretch) > maxnumstretch:
    313                 break
    314             for j in range(max(0, pos-actualstretchdist), min(work[3], pos+actualstretchdist)):
    315                 taken[j] = True
    316 
    317         #print("the stretch")
    318         #print(stretch)
    319         #print("end the stretch")
    320         perstretch = int(space/len(stretch))
    321         offstretch = space-len(stretch)*perstretch
    322         #print("perstretch %d" % perstretch)
    323         #print("bounds on c: %d %d" % (work[1], work[1] + work[3]))
    324 
    325         sstretch = set(stretch)
    326 
    327         coffset = 0
    328         nstretch = 0
    329         for c in range(work[1], work[1] + work[3]):
    330             if c in sstretch:
    331                 #print("we have a stretch")
    332                 # copy as many times as needed
    333                 rlen = perstretch
    334                 if nstretch < offstretch:
    335                     rlen += 1 # distribute the additional space
    336                 for ii in range(rlen):
    337                     for r in range(work[0], work[0] + work[2]):
    338                         matrix[cpos+(r-work[0])][MARGINW + (c-work[1]) + coffset + ii] = img[r][c]
    339                 coffset += rlen
    340                 nstretch += 1
    341             for r in range(work[0], work[0] + work[2]):
    342                 matrix[cpos+(r-work[0])][MARGINW + (c-work[1]) + coffset] = img[r][c]
    343 
    344         cpos += work[2] + SEPH + permargin + (1 if wi < offmargin else 0)
    345 
    346     # now dump the image
    347     outfname = bname + "_" + "{:04d}".format(i) + ".png"
    348     print("writing %s" % outfname)
    349     imageio.imwrite(outfname, matrix)
    350     print("wrote %s" % outfname)