process.py (10988B)
1 #!/usr/bin/python3 -O 2 3 import imageio 4 import sys 5 import numpy 6 7 THRESHH=0.1 # threshold for line separation 8 MINH=50 # minimum line height 9 MARGH=15 # height oveflow 10 11 THRESHV=30 # threshold for bar detection 12 MINV=50 # minimum distance between bars 13 MARGV=2 # margin around bars 14 SOMETHINGTHRESH=5 # threshold to find the line beginning and end 15 AROUNDBARS=6 # margin around bars 16 AROUNDBARS2=3 # margin around bars 17 #THRESHBARS=50 # threshold around bars 18 OFFSET=10 # real line contents 19 COEFF1=30 # penalty for variation around bar 20 COEFF2=-10 # gain for difference of bar 21 COEFF3=10000 # penalty for height of bar area 22 ZONETHRESH=100 # detect something in height 23 24 MARGINH=20 # margin in output at top and bottom 25 MARGINW=20 # margin in output at left and right 26 SEPH=20 # separation between lines 27 28 STRETCHDIST=30 # separation between stretchpoints 29 NUMSTRETCH=15 # stretchpoint on whole length 30 AROUND=2 # margin around stretchpoints 31 THRESHSTRETCH=.3 # threshold around stretchpoints 32 33 34 fname = sys.argv[1] 35 bname = fname.split('.')[0] 36 outw = int(sys.argv[2]) 37 outh = int(sys.argv[3]) 38 39 img = imageio.imread(fname) 40 41 # first let's find the white lines 42 43 cumul = [sum([(255 - p) for p in img[j]]) for j in range(len(img))] 44 45 mn = min(cumul) 46 print(mn) 47 48 49 availh = outh - 2*MARGINH 50 coveredh = [False] * len(img) 51 ncoveredh = 0 52 cut = [False]*len(img) 53 54 # add the cuts 55 sorth = [(cumul[i], i) for i in range(len(img))] 56 sorth = sorted(sorth) 57 58 for i in range(len(sorth)): 59 (val, pos) = sorth[i] 60 #print("cut %d %d %d covered %d of %d" % (val, pos, sorth[0][0], ncoveredh, len(img))) 61 if val > sorth[0][0] + THRESHH*len(img[0]) and ncoveredh == len(img): 62 break # we are below threshold and have enough cuts 63 cut[pos] = True 64 #print("cut %d, ncoveredh %d, height %d" % (pos, ncoveredh, len(img))) 65 # mark the covered regions 66 #print("boundary %d %d" % (len(img), pos+int(availh/2))) 67 for j in range(max(0, pos-int(availh/2)), min(len(img), pos+int(availh/2))): 68 #print("consider %d" % j) 69 if not coveredh[j]: 70 ncoveredh += 1 71 coveredh[j] = True 72 73 # the first and last must be cuts 74 cut[-1] = True 75 cut[0] = True 76 77 # merge the adjacent cuts 78 79 height = [None] * len(img) 80 starts = [] 81 82 last = 0 83 for r in range(len(img)): 84 if cut[r]: 85 if r-last < MINH: 86 # forget about the previous region, it is too small 87 for rr in range(last, r+1): 88 cut[rr] = True 89 else: 90 # we have a proper region 91 starts.append((last+1-MARGH, r-last+MARGH)) 92 last = r 93 94 print(starts) 95 96 worklist = [] 97 98 # now process every start 99 100 for (start, height) in starts: 101 # find bars 102 w = len(img[0]) 103 cumulv = [0] * w 104 for r in range(start, start+height): 105 for c in range(len(img[r])): 106 cumulv[c] += 255-img[r][c] 107 108 sort = [] 109 110 minc = None 111 maxc = None 112 # add beginning and end 113 minf = -255*(COEFF1+COEFF2)*height 114 for c in range(len(img[r])): 115 if cumulv[c] > SOMETHINGTHRESH*height: 116 print("added %d" % c) 117 minc = c 118 sort.append((minf, minc)) 119 break 120 for c in (range(len(img[r])))[::-1]: 121 if cumulv[c] > SOMETHINGTHRESH*height: 122 print("added %d" % c) 123 maxc = c 124 sort.append((minf, maxc)) 125 break 126 127 for i in range(max(minc, AROUNDBARS), min(maxc, w-AROUNDBARS)): 128 mymin = 255*height 129 mymax = 0 130 myminh = len(img) 131 mymaxh = 0 132 bad = False 133 for j in range(max(0, i-AROUNDBARS), min(w, i+AROUNDBARS)): 134 if cumulv[j] > cumulv[i]: 135 # not a local max 136 bad = True 137 break 138 dar = 0 139 for r in range(height+2*MARGH): 140 if img[max(start+r-MARGH, 0)][j] < ZONETHRESH: 141 #print(r, img[start+r-MARGH][j]) 142 dar = r 143 break 144 myminh = min(myminh, start+dar-MARGH) 145 for r in (range(height+2*MARGH))[::-1]: 146 if img[min(start+r-MARGH, len(img)-1)][j] < ZONETHRESH: 147 dar = r 148 break 149 mymaxh = max(mymaxh, start+dar-MARGH) 150 if abs(j-i) <= AROUNDBARS2: 151 continue 152 mymin = min(mymin, cumulv[j]) 153 mymax = max(mymax, cumulv[j]) 154 if not bad: 155 # print("at pos %d the val is %d and the min is %d and max is %d and height is %d %d %d" % (i, cumulv[i], mymin, mymax, myminh, mymaxh, mymaxh-myminh)) 156 # weigh by the variation in AROUNDBARS except AROUNDBARS2 157 # and by the difference between the bar and its surroundings 158 sort.append((COEFF1*abs(mymin - mymax) + 159 COEFF2*abs(cumulv[i] - mymax) + COEFF3*(mymaxh-myminh), i)) 160 #sort = [(cumulv[i], i) for i in range(w)] 161 sort = sorted(sort) 162 163 mnv = min(cumulv[minc+OFFSET:maxc-OFFSET]) 164 print("minc %d maxc %d mnv %d" % (minc, maxc, mnv)) 165 166 availw = outw - 2*MARGINW 167 168 cuts = [] 169 taken = [False] * w 170 covered = [False] * w 171 ncovered = 0 # ensure that everyone is at distance availw/2-MARGV from a cut 172 173 for i in range(len(sort)): 174 (val, pos) = sort[i] 175 if val > mnv + THRESHV*height and ncovered == w: 176 break # too far away (everyone must be covered) 177 if (taken[pos]): 178 continue # already taken 179 180 # bad = False 181 # if val >= 0: 182 # for j in range(max(0, pos-AROUNDBARS), min(w, pos+AROUNDBARS)): 183 # if abs(j-pos) <= AROUNDBARS2: 184 # continue # not the bar itself 185 # # print("candidate bar %d val %d at j %d cumul is %d thresh %d" % (pos, val, j, cumulv[j], mnv+THRESHBARS*height)) 186 # if (cumulv[j] > mnv+THRESHBARS*height): 187 # bad = True # too close to non-minimal stuff 188 # if bad: 189 # continue 190 191 # now write the cut 192 cuts.append(pos) 193 # and take all around 194 for j in range(max(0, pos-MINV), min(w, pos+MINV)): 195 taken[j] = True 196 for j in range(max(0, pos-int(availw/2-MARGV)), min(w, pos+int(availw/2-MARGV))): 197 if not covered[j]: 198 ncovered += 1 199 covered[j] = True 200 201 cuts = sorted(cuts) 202 # the cuts are ready 203 print(cuts) 204 205 # now let's bucket the cuts 206 # TODO: more clever bucketing to minimize the deviation to average 207 groups = [] 208 curpos = cuts[0] 209 210 for i in range(len(cuts)-1): 211 # ensure every bar fits 212 if(cuts[i+1]-cuts[i]+2*MARGV > availw): 213 print("ERROR: at start %d height %d cuts %d %d, bar does not fit" % 214 (start, height, cuts[i], cuts[i+1])) 215 sys.exit(2) 216 217 for i in range(len(cuts)-1): 218 curcut = cuts[i+1] + (MARGV if i<len(cuts)-1 else 0) 219 pcut = cuts[i] 220 if curcut - curpos > availw: 221 # spill over! 222 groups.append((curpos, pcut-curpos+MARGV)) 223 curpos = pcut-MARGV 224 else: 225 # extend 226 pass 227 if curpos != cuts[-1]: 228 # add the last group 229 groups.append((curpos, cuts[-1]-curpos)) 230 231 print(groups) 232 233 for (gstart, gw) in groups: 234 worklist.append((start, gstart, height, gw)) 235 236 print(worklist) 237 238 # bucket into pages 239 240 pages = [] 241 curlist = [] 242 curh = 0 243 244 for i in range(len(worklist)): 245 newh = curh+SEPH+worklist[i][2] 246 #print("finished pages %d and curh %d" % (len(pages), curh)) 247 if newh < availh: 248 # it fits 249 curh = newh 250 curlist.append(worklist[i]) 251 else: 252 # spill over 253 pages.append(curlist) 254 curh = worklist[i][2] 255 curlist = [worklist[i]] 256 257 if (len(curlist) > 0): 258 # add the last group 259 pages.append(curlist) 260 261 print(pages) 262 263 for (i, page) in enumerate(pages): 264 matrix = numpy.full((outh,outw), 255) 265 266 totalh = sum(x[2] for x in page) + SEPH*(len(page)-1) 267 margin = availh - totalh 268 permargin = int(margin/len(page)) 269 # rounding offset 270 offmargin = margin-len(page)*permargin 271 272 # fit stuff on page 273 274 cpos = MARGINH 275 276 print("page") 277 print(page) 278 for (wi, work) in enumerate(page): 279 print("work") 280 print(work) 281 print("cpos") 282 print(cpos) 283 # copy the stuff 284 space = availw - work[3] 285 286 maxnumstretch = int(NUMSTRETCH*work[3]/w) 287 stretch = [] 288 289 # find the stretchpoints 290 cumulv = [0] * work[3] 291 for r in range(work[0], work[0]+work[2]): 292 for c in range(work[1], work[1] + work[3]): 293 cumulv[c-work[1]] += 255-img[r][c] 294 295 sort = [(cumulv[ii], ii) for ii in range(work[3])] 296 sort = sorted(sort) 297 298 actualstretchdist = int(STRETCHDIST*work[3]/w) 299 300 taken = [False] * work[3] 301 for (val, pos) in sort: 302 if taken[pos]: 303 continue 304 bad = False 305 for j in range(max(0, pos-AROUND), min(work[3], pos+AROUND)): 306 if (cumulv[j] > val+THRESHSTRETCH*work[2]): 307 #print("cumul at %d is %d vs current val %d and with margin %d" % (j, cumulv[j], val, val+THRESHSTRETCH*work[2])) 308 bad = True # too close to non-minimal stuff 309 if bad: 310 continue 311 stretch.append(work[1]+pos) 312 if len(stretch) > maxnumstretch: 313 break 314 for j in range(max(0, pos-actualstretchdist), min(work[3], pos+actualstretchdist)): 315 taken[j] = True 316 317 #print("the stretch") 318 #print(stretch) 319 #print("end the stretch") 320 perstretch = int(space/len(stretch)) 321 offstretch = space-len(stretch)*perstretch 322 #print("perstretch %d" % perstretch) 323 #print("bounds on c: %d %d" % (work[1], work[1] + work[3])) 324 325 sstretch = set(stretch) 326 327 coffset = 0 328 nstretch = 0 329 for c in range(work[1], work[1] + work[3]): 330 if c in sstretch: 331 #print("we have a stretch") 332 # copy as many times as needed 333 rlen = perstretch 334 if nstretch < offstretch: 335 rlen += 1 # distribute the additional space 336 for ii in range(rlen): 337 for r in range(work[0], work[0] + work[2]): 338 matrix[cpos+(r-work[0])][MARGINW + (c-work[1]) + coffset + ii] = img[r][c] 339 coffset += rlen 340 nstretch += 1 341 for r in range(work[0], work[0] + work[2]): 342 matrix[cpos+(r-work[0])][MARGINW + (c-work[1]) + coffset] = img[r][c] 343 344 cpos += work[2] + SEPH + permargin + (1 if wi < offmargin else 0) 345 346 # now dump the image 347 outfname = bname + "_" + "{:04d}".format(i) + ".png" 348 print("writing %s" % outfname) 349 imageio.imwrite(outfname, matrix) 350 print("wrote %s" % outfname)