commit b36585ad164b6a6b7367fbf1cb28a9fe94bfe4b5
parent 8a369a6743204a1d258f9ed72947dd2809078802
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Sat, 7 Sep 2019 00:15:14 +0200
rm old code
Diffstat:
process.py | | | 350 | ------------------------------------------------------------------------------- |
1 file changed, 0 insertions(+), 350 deletions(-)
diff --git a/process.py b/process.py
@@ -1,350 +0,0 @@
-#!/usr/bin/python3 -O
-
-import imageio
-import sys
-import numpy
-
-THRESHH=0.1 # threshold for line separation
-MINH=50 # minimum line height
-MARGH=15 # height oveflow
-
-THRESHV=30 # threshold for bar detection
-MINV=50 # minimum distance between bars
-MARGV=2 # margin around bars
-SOMETHINGTHRESH=5 # threshold to find the line beginning and end
-AROUNDBARS=6 # margin around bars
-AROUNDBARS2=3 # margin around bars
-#THRESHBARS=50 # threshold around bars
-OFFSET=10 # real line contents
-COEFF1=30 # penalty for variation around bar
-COEFF2=-10 # gain for difference of bar
-COEFF3=10000 # penalty for height of bar area
-ZONETHRESH=100 # detect something in height
-
-MARGINH=20 # margin in output at top and bottom
-MARGINW=20 # margin in output at left and right
-SEPH=20 # separation between lines
-
-STRETCHDIST=30 # separation between stretchpoints
-NUMSTRETCH=15 # stretchpoint on whole length
-AROUND=2 # margin around stretchpoints
-THRESHSTRETCH=.3 # threshold around stretchpoints
-
-
-fname = sys.argv[1]
-bname = fname.split('.')[0]
-outw = int(sys.argv[2])
-outh = int(sys.argv[3])
-
-img = imageio.imread(fname)
-
-# first let's find the white lines
-
-cumul = [sum([(255 - p) for p in img[j]]) for j in range(len(img))]
-
-mn = min(cumul)
-print(mn)
-
-
-availh = outh - 2*MARGINH
-coveredh = [False] * len(img)
-ncoveredh = 0
-cut = [False]*len(img)
-
-# add the cuts
-sorth = [(cumul[i], i) for i in range(len(img))]
-sorth = sorted(sorth)
-
-for i in range(len(sorth)):
- (val, pos) = sorth[i]
- #print("cut %d %d %d covered %d of %d" % (val, pos, sorth[0][0], ncoveredh, len(img)))
- if val > sorth[0][0] + THRESHH*len(img[0]) and ncoveredh == len(img):
- break # we are below threshold and have enough cuts
- cut[pos] = True
- #print("cut %d, ncoveredh %d, height %d" % (pos, ncoveredh, len(img)))
- # mark the covered regions
- #print("boundary %d %d" % (len(img), pos+int(availh/2)))
- for j in range(max(0, pos-int(availh/2)), min(len(img), pos+int(availh/2))):
- #print("consider %d" % j)
- if not coveredh[j]:
- ncoveredh += 1
- coveredh[j] = True
-
-# the first and last must be cuts
-cut[-1] = True
-cut[0] = True
-
-# merge the adjacent cuts
-
-height = [None] * len(img)
-starts = []
-
-last = 0
-for r in range(len(img)):
- if cut[r]:
- if r-last < MINH:
- # forget about the previous region, it is too small
- for rr in range(last, r+1):
- cut[rr] = True
- else:
- # we have a proper region
- starts.append((last+1-MARGH, r-last+MARGH))
- last = r
-
-print(starts)
-
-worklist = []
-
-# now process every start
-
-for (start, height) in starts:
- # find bars
- w = len(img[0])
- cumulv = [0] * w
- for r in range(start, start+height):
- for c in range(len(img[r])):
- cumulv[c] += 255-img[r][c]
-
- sort = []
-
- minc = None
- maxc = None
- # add beginning and end
- minf = -255*(COEFF1+COEFF2)*height
- for c in range(len(img[r])):
- if cumulv[c] > SOMETHINGTHRESH*height:
- print("added %d" % c)
- minc = c
- sort.append((minf, minc))
- break
- for c in (range(len(img[r])))[::-1]:
- if cumulv[c] > SOMETHINGTHRESH*height:
- print("added %d" % c)
- maxc = c
- sort.append((minf, maxc))
- break
-
- for i in range(max(minc, AROUNDBARS), min(maxc, w-AROUNDBARS)):
- mymin = 255*height
- mymax = 0
- myminh = len(img)
- mymaxh = 0
- bad = False
- for j in range(max(0, i-AROUNDBARS), min(w, i+AROUNDBARS)):
- if cumulv[j] > cumulv[i]:
- # not a local max
- bad = True
- break
- dar = 0
- for r in range(height+2*MARGH):
- if img[max(start+r-MARGH, 0)][j] < ZONETHRESH:
- #print(r, img[start+r-MARGH][j])
- dar = r
- break
- myminh = min(myminh, start+dar-MARGH)
- for r in (range(height+2*MARGH))[::-1]:
- if img[min(start+r-MARGH, len(img)-1)][j] < ZONETHRESH:
- dar = r
- break
- mymaxh = max(mymaxh, start+dar-MARGH)
- if abs(j-i) <= AROUNDBARS2:
- continue
- mymin = min(mymin, cumulv[j])
- mymax = max(mymax, cumulv[j])
- if not bad:
- # print("at pos %d the val is %d and the min is %d and max is %d and height is %d %d %d" % (i, cumulv[i], mymin, mymax, myminh, mymaxh, mymaxh-myminh))
- # weigh by the variation in AROUNDBARS except AROUNDBARS2
- # and by the difference between the bar and its surroundings
- sort.append((COEFF1*abs(mymin - mymax) +
- COEFF2*abs(cumulv[i] - mymax) + COEFF3*(mymaxh-myminh), i))
- #sort = [(cumulv[i], i) for i in range(w)]
- sort = sorted(sort)
-
- mnv = min(cumulv[minc+OFFSET:maxc-OFFSET])
- print("minc %d maxc %d mnv %d" % (minc, maxc, mnv))
-
- availw = outw - 2*MARGINW
-
- cuts = []
- taken = [False] * w
- covered = [False] * w
- ncovered = 0 # ensure that everyone is at distance availw/2-MARGV from a cut
-
- for i in range(len(sort)):
- (val, pos) = sort[i]
- if val > mnv + THRESHV*height and ncovered == w:
- break # too far away (everyone must be covered)
- if (taken[pos]):
- continue # already taken
-
- # bad = False
- # if val >= 0:
- # for j in range(max(0, pos-AROUNDBARS), min(w, pos+AROUNDBARS)):
- # if abs(j-pos) <= AROUNDBARS2:
- # continue # not the bar itself
- # # print("candidate bar %d val %d at j %d cumul is %d thresh %d" % (pos, val, j, cumulv[j], mnv+THRESHBARS*height))
- # if (cumulv[j] > mnv+THRESHBARS*height):
- # bad = True # too close to non-minimal stuff
- # if bad:
- # continue
-
- # now write the cut
- cuts.append(pos)
- # and take all around
- for j in range(max(0, pos-MINV), min(w, pos+MINV)):
- taken[j] = True
- for j in range(max(0, pos-int(availw/2-MARGV)), min(w, pos+int(availw/2-MARGV))):
- if not covered[j]:
- ncovered += 1
- covered[j] = True
-
- cuts = sorted(cuts)
- # the cuts are ready
- print(cuts)
-
- # now let's bucket the cuts
- # TODO: more clever bucketing to minimize the deviation to average
- groups = []
- curpos = cuts[0]
-
- for i in range(len(cuts)-1):
- # ensure every bar fits
- if(cuts[i+1]-cuts[i]+2*MARGV > availw):
- print("ERROR: at start %d height %d cuts %d %d, bar does not fit" %
- (start, height, cuts[i], cuts[i+1]))
- sys.exit(2)
-
- for i in range(len(cuts)-1):
- curcut = cuts[i+1] + (MARGV if i<len(cuts)-1 else 0)
- pcut = cuts[i]
- if curcut - curpos > availw:
- # spill over!
- groups.append((curpos, pcut-curpos+MARGV))
- curpos = pcut-MARGV
- else:
- # extend
- pass
- if curpos != cuts[-1]:
- # add the last group
- groups.append((curpos, cuts[-1]-curpos))
-
- print(groups)
-
- for (gstart, gw) in groups:
- worklist.append((start, gstart, height, gw))
-
-print(worklist)
-
-# bucket into pages
-
-pages = []
-curlist = []
-curh = 0
-
-for i in range(len(worklist)):
- newh = curh+SEPH+worklist[i][2]
- #print("finished pages %d and curh %d" % (len(pages), curh))
- if newh < availh:
- # it fits
- curh = newh
- curlist.append(worklist[i])
- else:
- # spill over
- pages.append(curlist)
- curh = worklist[i][2]
- curlist = [worklist[i]]
-
-if (len(curlist) > 0):
- # add the last group
- pages.append(curlist)
-
-print(pages)
-
-for (i, page) in enumerate(pages):
- matrix = numpy.full((outh,outw), 255)
-
- totalh = sum(x[2] for x in page) + SEPH*(len(page)-1)
- margin = availh - totalh
- permargin = int(margin/len(page))
- # rounding offset
- offmargin = margin-len(page)*permargin
-
- # fit stuff on page
-
- cpos = MARGINH
-
- print("page")
- print(page)
- for (wi, work) in enumerate(page):
- print("work")
- print(work)
- print("cpos")
- print(cpos)
- # copy the stuff
- space = availw - work[3]
-
- maxnumstretch = int(NUMSTRETCH*work[3]/w)
- stretch = []
-
- # find the stretchpoints
- cumulv = [0] * work[3]
- for r in range(work[0], work[0]+work[2]):
- for c in range(work[1], work[1] + work[3]):
- cumulv[c-work[1]] += 255-img[r][c]
-
- sort = [(cumulv[ii], ii) for ii in range(work[3])]
- sort = sorted(sort)
-
- actualstretchdist = int(STRETCHDIST*work[3]/w)
-
- taken = [False] * work[3]
- for (val, pos) in sort:
- if taken[pos]:
- continue
- bad = False
- for j in range(max(0, pos-AROUND), min(work[3], pos+AROUND)):
- if (cumulv[j] > val+THRESHSTRETCH*work[2]):
- #print("cumul at %d is %d vs current val %d and with margin %d" % (j, cumulv[j], val, val+THRESHSTRETCH*work[2]))
- bad = True # too close to non-minimal stuff
- if bad:
- continue
- stretch.append(work[1]+pos)
- if len(stretch) > maxnumstretch:
- break
- for j in range(max(0, pos-actualstretchdist), min(work[3], pos+actualstretchdist)):
- taken[j] = True
-
- #print("the stretch")
- #print(stretch)
- #print("end the stretch")
- perstretch = int(space/len(stretch))
- offstretch = space-len(stretch)*perstretch
- #print("perstretch %d" % perstretch)
- #print("bounds on c: %d %d" % (work[1], work[1] + work[3]))
-
- sstretch = set(stretch)
-
- coffset = 0
- nstretch = 0
- for c in range(work[1], work[1] + work[3]):
- if c in sstretch:
- #print("we have a stretch")
- # copy as many times as needed
- rlen = perstretch
- if nstretch < offstretch:
- rlen += 1 # distribute the additional space
- for ii in range(rlen):
- for r in range(work[0], work[0] + work[2]):
- matrix[cpos+(r-work[0])][MARGINW + (c-work[1]) + coffset + ii] = img[r][c]
- coffset += rlen
- nstretch += 1
- for r in range(work[0], work[0] + work[2]):
- matrix[cpos+(r-work[0])][MARGINW + (c-work[1]) + coffset] = img[r][c]
-
- cpos += work[2] + SEPH + permargin + (1 if wi < offmargin else 0)
-
- # now dump the image
- outfname = bname + "_" + "{:04d}".format(i) + ".png"
- print("writing %s" % outfname)
- imageio.imwrite(outfname, matrix)
- print("wrote %s" % outfname)