|
|
#!/usr/bin/env python # -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function
import sys import numpy as np import scipy.optimize import matplotlib.pyplot as plt import cv2 import ellipse
DEBUG_IMAGES = []
def debug_show(name, src):
global DEBUG_IMAGES
filename = 'debug{:02d}_{}.png'.format(len(DEBUG_IMAGES), name) cv2.imwrite(filename, src)
h, w = src.shape[:2]
fx = w/1280.0 fy = h/700.0
f = 1.0/np.ceil(max(fx, fy))
if f < 1.0: img = cv2.resize(src, (0, 0), None, f, f, cv2.INTER_AREA) else: img = src.copy()
DEBUG_IMAGES.append(img)
def translation(x, y): return np.array([[1, 0, x], [0, 1, y], [0, 0, 1]], dtype=float)
def rotation(theta): c = np.cos(theta) s = np.sin(theta) return np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]], dtype=float)
def perspective_warp(a, b): return np.array([[1, 0, 0], [0, 1, 0], [a, b, 1]], dtype=float)
def slant(sx): return np.array([[1, sx, 0], [0, 1, 0], [0, 0, 1]], dtype=float)
def softmax(x, k=1.0): b = x.max() return np.log( np.exp(k*(x-b)).sum() ) / k + b
def skewed_widths(contours, H): xvals = [] for c in contours: pts = cv2.perspectiveTransform(c, H) x = pts[:,:,0] xvals.append( x.max() - x.min() ) xvals = np.array(xvals) return softmax(xvals, 0.1)
def centered_warp(u0, v0, a, b): return np.dot(translation(u0, v0), np.dot(perspective_warp(a, b), translation(-u0, -v0)))
def warp_containing_points(img, pts, H, border=4, shape_only=False):
'''
display = img.copy() for pt in pts.reshape((-1,2)).astype(int): cv2.circle(display, tuple(pt), 4, (255, 0, 0), -1, cv2.LINE_AA) debug_show('warp', display) '''
pts2 = cv2.perspectiveTransform(pts, H) x0, y0, w, h = cv2.boundingRect(pts2) print('got bounding rect', x0, y0, w, h) T = translation(-x0+border, -y0+border) TH = np.dot(T, H)
if shape_only: return (h+2*border, w+2*border), TH else: dst = cv2.warpPerspective(img, TH, (w+2*border, h+2*border), borderMode=cv2.BORDER_REPLICATE) return dst, TH
def conic_area_discrepancy(conics, x, H, opt_results=None):
areas = []
for conic in conics: cx = ellipse.conic_transform(conic, H) k, ab = ellipse.conic_scale(cx) if np.isinf(ab): areas.append(1e20) else: areas.append(ab)
areas = np.array(areas)
areas /= areas.mean() # rescale so mean is 1.0 areas -= 1 # subtract off mean
rval = 0.5*np.dot(areas, areas)
if opt_results is not None: if not opt_results or rval < opt_results[-1][-1]: opt_results.append( (x, H, rval) )
return rval
def threshold(img):
if len(img.shape) > 2: img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
mean = img.mean() if mean < 100: img = 255-img
return cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 101, 21)
def get_contours(img):
work = threshold(img)
debug_show('threshold', work)
contours, hierarchy = cv2.findContours(work, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
return contours, hierarchy
def get_conics(img, contours, hierarchy, abs_area_cutoff=0.0001, mean_area_cutoff=0.15):
hierarchy = hierarchy.reshape((-1, 4))
conics = [] used_contours = [] areas = [] okcontours = [] allchildren = [] pts = np.empty((0,1,2), dtype='float32') centroid_accum = np.zeros(2) total_area = 0.0
centroids = []
abs_area_cutoff *= img.shape[0] * img.shape[1] print('abs_area_cutoff = ',abs_area_cutoff)
for i, (c, h) in enumerate(zip(contours, hierarchy.reshape((-1, 4)))):
next_idx, prev_idx, child_idx, parent_idx = h
if parent_idx >= 0: continue
m = ellipse.moments_from_dict(cv2.moments(c))
if m[0] <= abs_area_cutoff: continue
children = []
while child_idx >= 0: child_contour = contours[child_idx] cm = cv2.moments(child_contour) if cm['m00'] > abs_area_cutoff: children.append(child_contour) allchildren.append(child_contour) child_idx = hierarchy[child_idx][0]
if children: work = np.zeros(img.shape[:2], dtype=np.uint8) cv2.drawContours(work, contours, i, (1,1,1), -1) cv2.drawContours(work, children, -1, (0,0,0), -1) m = ellipse.moments_from_dict(cv2.moments(work, True))
centroids.append(m[1:3]/m[0]) centroid_accum += m[1:3] total_area += m[0] pts = np.vstack((pts, c.astype('float32'))) conic = ellipse.conic_from_moments(m) okcontours.append(c) conics.append(conic) areas.append(m[0])
display = img.copy() cv2.drawContours(display, okcontours+allchildren, -1, (0, 255, 0), 6, cv2.LINE_AA)
debug_show('contours_only', display)
for c, a in zip(okcontours, areas):
x, y, w, h = cv2.boundingRect(c)
s = str('{:,d}'.format(int(a))) #ctr = (x + w/2 - 15*len(s), y+h/2+10) ctr = (x, y+h+20)
cv2.putText(display, s, ctr, cv2.FONT_HERSHEY_SIMPLEX, 2.0, (0, 0, 0), 12, cv2.LINE_AA)
cv2.putText(display, s, ctr, cv2.FONT_HERSHEY_SIMPLEX, 2.0, (0, 255, 0), 6, cv2.LINE_AA)
debug_show('contours', display)
areas = np.array(areas) amean = areas.mean()
print('got {} contours with {} small.'.format( len(areas), (areas < mean_area_cutoff*amean).sum()))
idx = np.where(areas > mean_area_cutoff*amean)[0]
conics = np.array(conics) conics = conics[idx] centroid_accum /= total_area
display = img.copy() for conic in conics: x0, y0, a, b, theta = ellipse.gparams_from_conic(conic) cv2.ellipse(display, (int(x0), int(y0)), (int(a), int(b)), theta*180/np.pi, 0, 360, (0,0,255), 6, cv2.LINE_AA)
debug_show('conics', display)
contours = [okcontours[i].astype('float32') for i in idx]
if 0:
centroids = np.array([centroids[i] for i in idx]) areas = areas[idx]
def polyfit(x, y): coeffs = np.polyfit(x, y, deg=1) ypred = np.polyval(coeffs, x) ymean = np.mean(y) sstot = np.sum((y - ymean)**2) ssres = np.sum((y.flatten() - ypred.flatten())**2) r2 = 1 - ssres/sstot return coeffs, r2
xfit, xr2 = polyfit(centroids[:,0], areas) yfit, yr2 = polyfit(centroids[:,1], areas)
xlabel = 'X coordinate (r²={:.2f})'.format(xr2) ylabel = 'Y coordinate (r²={:.2f})'.format(yr2)
plt.plot(centroids[:,0], areas, 'b.', zorder=1) plt.plot(centroids[:,1], areas, 'r.', zorder=1) plt.gca().autoscale(False) plt.plot([0, 3000], np.polyval(xfit, [0,3000]), 'b--', zorder=0, label=xlabel) plt.plot([0, 3000], np.polyval(yfit, [0,3000]), 'r--', zorder=0, label=ylabel) plt.legend(loc='upper right') plt.xlabel('X/Y coordinate (px)') plt.ylabel('Contour area (px²)') plt.savefig('position-vs-area.pdf')
return conics, contours, centroid_accum
def optimize_conics(conics, p0):
x0 = np.array([0.0, 0.0])
hfunc = lambda x: centered_warp(p0[0], p0[1], x[0], x[1])
opt_results = []
f = lambda x: conic_area_discrepancy(conics, x, hfunc(x), opt_results)
res = scipy.optimize.minimize(f, x0, method='Powell')
H = hfunc(res.x)
rects = []
if 0:
phi = np.linspace(0, 2*np.pi, 16, endpoint=False) width, height = 0, 0 for x, H, fval in opt_results: allxy = [] for conic in conics: Hconic = ellipse.conic_transform(conic, H) gparams = ellipse.gparams_from_conic(Hconic) x, y = ellipse.gparams_evaluate(gparams, phi) xy = np.dstack((x.reshape((-1, 1, 1)), y.reshape((-1, 1, 1)))) allxy.append(xy) allxy = np.vstack(tuple(allxy)).astype(np.float32) rect = cv2.boundingRect(allxy) rects.append(rect) x, y, w, h = rect width = max(width, w) height = max(height, h) border = int(0.05 * min(width, height)) width += border height += border aspect = float(width)/height if aspect < 2.0: width = 2*height else: height = width/2
for i, (rect, (x, H, fval)) in enumerate(zip(rects, opt_results)): display = np.zeros((height, width), dtype=np.uint8) x, y, w, h = rect xoffs = width/2 - (x+w/2) yoffs = height/2 - (y+h/2) for conic in conics: Hconic = ellipse.conic_transform(conic, H) x0, y0, a, b, theta = ellipse.gparams_from_conic(Hconic) cv2.ellipse(display, (int(x0+xoffs), int(y0+yoffs)), (int(a), int(b)), theta*180/np.pi, 0, 360, (255,255,255), 6, cv2.LINE_AA) cv2.putText(display, 'Area discrepancy: {:.3f}'.format(fval), (16, height-24), cv2.FONT_HERSHEY_SIMPLEX, 2.0, (255,255,255), 6, cv2.LINE_AA) cv2.imwrite('frame{:04d}.png'.format(i), display)
return H
def orientation_detect(img, contours, H, rho=8.0, ntheta=512):
# ignore this, just deal with edge-detected text
pts = np.vstack(tuple(contours))
shape, TH = warp_containing_points(img, pts, H, shape_only=True)
text_edges = np.zeros(shape, dtype=np.uint8)
for contour in contours: contour = cv2.perspectiveTransform(contour.astype(np.float32), TH) cv2.drawContours(text_edges, [contour.astype(int)], 0, (255,255,255))
debug_show('edges', text_edges)
# generate a linspace of thetas thetas = np.linspace(-0.5*np.pi, 0.5*np.pi, ntheta, endpoint=False)
# rho is pixels per r bin in polar (theta, r) histogram # irho is bins per pixel irho = 1.0/rho
# get height and width h, w = text_edges.shape
# maximum bin index is given by hypotenuse of (w, h) divided by pixels per bin bin_max = int(np.ceil(np.hypot(w, h)*irho))
# initialize zeroed histogram height bin_max and width num theta hist = np.zeros((bin_max, ntheta))
# let u and v be x and y coordinates (respectively) of non-zero # pixels in edge map v, u = np.mgrid[0:h, 0:w] v = v[text_edges.view(bool)] u = u[text_edges.view(bool)]
# get center coordinates u0 = w*0.5 v0 = h*0.5
# for each i and theta = thetas[i] for i, theta in enumerate(thetas):
# for each nonzero edge pixel, compute bin in r direction from # pixel location and cos/sin of theta bin_idx = ( (-(u-u0)*np.sin(theta) # x term + (v-v0)*np.cos(theta))*irho # y term, both # divided by pixels # per bin + 0.5*bin_max ) # offset for center pixel
assert( bin_idx.min() >= 0 and bin_idx.max() < bin_max )
# 0.5 is for correct rounding here # # e.g. np.bincount([1, 1, 0, 3]) = [1, 2, 0, 1] # returns count of each integer in the array
bc = np.bincount((bin_idx + 0.5).astype(int))
# push this into the histogram hist[:len(bc),i] = bc
# number of zero pixels in each column num_zero = (hist == 0).sum(axis=0)
# find the maximum number of zero pixels best_theta_idx = num_zero.argmax()
# actual detected theta - could just return this now theta = thetas[best_theta_idx]
# compose with previous homography RH = np.dot(rotation(-theta), H)
if 1: # just debug visualization
debug_hist = (255*hist/hist.max()).astype('uint8') debug_hist = cv2.cvtColor(debug_hist, cv2.COLOR_GRAY2RGB)
cv2.line(debug_hist, (best_theta_idx, 0), (best_theta_idx, bin_max), (255,0,0), 1, cv2.LINE_AA)
debug_show('histogram', debug_hist)
p0 = np.array((u0, v0)) t = np.array((np.cos(theta), np.sin(theta)))
warped = cv2.warpPerspective(img, TH, (shape[1], shape[0]), borderMode=cv2.BORDER_REPLICATE)
debug_show('prerotate_noline', warped)
cv2.line(warped, tuple(map(int, p0 - rho*bin_max*t)), tuple(map(int, p0 + rho*bin_max*t)), (255, 0, 0), 6, cv2.LINE_AA)
debug_show('prerotate', warped)
warped, _ = warp_containing_points(img, pts, RH) debug_show('preskew', warped)
return RH
def skew_detect(img, contours, RH):
hulls = [cv2.convexHull(c) for c in contours] pts = np.vstack(tuple(hulls))
display, TRH = warp_containing_points(img, pts, RH)
for h in hulls: h = cv2.perspectiveTransform(h, TRH).astype(int) cv2.drawContours(display, [h], 0, (255, 0, 255), 6, cv2.LINE_AA)
debug_show('convex_hulls_before', display)
f = lambda x: skewed_widths(contours, np.dot(slant(x), RH))
res = scipy.optimize.minimize_scalar(f, (-2.0, 0.0, 2.0))
SRH = np.dot(slant(res.x), RH) warped, Hfinal = warp_containing_points(img, pts, SRH)
display = warped.copy()
for h in hulls: h = cv2.perspectiveTransform(h, Hfinal).astype(int) cv2.drawContours(display, [h], 0, (255, 0, 255), 6, cv2.LINE_AA)
debug_show('convex_hulls_after', display)
debug_show('final', warped)
return SRH
def main():
img = cv2.imread(sys.argv[1]) debug_show('input', img)
contours, hierarchy = get_contours(img)
conics, contours, centroid = get_conics(img, contours, hierarchy) H = optimize_conics(conics, centroid) RH = orientation_detect(img, contours, H) SRH = skew_detect(img, contours, RH)
for img in DEBUG_IMAGES: cv2.imshow('Debug', img) while cv2.waitKey(5) < 0: pass
if __name__ == '__main__': main()
|