Repo for the search and displace ingest module that takes odf, docx and pdf and transforms it into .md to be used with search and displace operations
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

504 lines
14 KiB

3 years ago
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. from __future__ import unicode_literals, print_function
  4. import sys
  5. import numpy as np
  6. import scipy.optimize
  7. import matplotlib.pyplot as plt
  8. import cv2
  9. import ellipse
  10. DEBUG_IMAGES = []
  11. def debug_show(name, src):
  12. global DEBUG_IMAGES
  13. filename = 'debug{:02d}_{}.png'.format(len(DEBUG_IMAGES), name)
  14. cv2.imwrite(filename, src)
  15. h, w = src.shape[:2]
  16. fx = w/1280.0
  17. fy = h/700.0
  18. f = 1.0/np.ceil(max(fx, fy))
  19. if f < 1.0:
  20. img = cv2.resize(src, (0, 0), None, f, f, cv2.INTER_AREA)
  21. else:
  22. img = src.copy()
  23. DEBUG_IMAGES.append(img)
  24. def translation(x, y):
  25. return np.array([[1, 0, x], [0, 1, y], [0, 0, 1]], dtype=float)
  26. def rotation(theta):
  27. c = np.cos(theta)
  28. s = np.sin(theta)
  29. return np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]], dtype=float)
  30. def perspective_warp(a, b):
  31. return np.array([[1, 0, 0], [0, 1, 0], [a, b, 1]], dtype=float)
  32. def slant(sx):
  33. return np.array([[1, sx, 0], [0, 1, 0], [0, 0, 1]], dtype=float)
  34. def softmax(x, k=1.0):
  35. b = x.max()
  36. return np.log( np.exp(k*(x-b)).sum() ) / k + b
  37. def skewed_widths(contours, H):
  38. xvals = []
  39. for c in contours:
  40. pts = cv2.perspectiveTransform(c, H)
  41. x = pts[:,:,0]
  42. xvals.append( x.max() - x.min() )
  43. xvals = np.array(xvals)
  44. return softmax(xvals, 0.1)
  45. def centered_warp(u0, v0, a, b):
  46. return np.dot(translation(u0, v0),
  47. np.dot(perspective_warp(a, b),
  48. translation(-u0, -v0)))
  49. def warp_containing_points(img, pts, H, border=4, shape_only=False):
  50. '''
  51. display = img.copy()
  52. for pt in pts.reshape((-1,2)).astype(int):
  53. cv2.circle(display, tuple(pt), 4, (255, 0, 0),
  54. -1, cv2.LINE_AA)
  55. debug_show('warp', display)
  56. '''
  57. pts2 = cv2.perspectiveTransform(pts, H)
  58. x0, y0, w, h = cv2.boundingRect(pts2)
  59. print('got bounding rect', x0, y0, w, h)
  60. T = translation(-x0+border, -y0+border)
  61. TH = np.dot(T, H)
  62. if shape_only:
  63. return (h+2*border, w+2*border), TH
  64. else:
  65. dst = cv2.warpPerspective(img, TH, (w+2*border, h+2*border),
  66. borderMode=cv2.BORDER_REPLICATE)
  67. return dst, TH
  68. def conic_area_discrepancy(conics, x, H, opt_results=None):
  69. areas = []
  70. for conic in conics:
  71. cx = ellipse.conic_transform(conic, H)
  72. k, ab = ellipse.conic_scale(cx)
  73. if np.isinf(ab):
  74. areas.append(1e20)
  75. else:
  76. areas.append(ab)
  77. areas = np.array(areas)
  78. areas /= areas.mean() # rescale so mean is 1.0
  79. areas -= 1 # subtract off mean
  80. rval = 0.5*np.dot(areas, areas)
  81. if opt_results is not None:
  82. if not opt_results or rval < opt_results[-1][-1]:
  83. opt_results.append( (x, H, rval) )
  84. return rval
  85. def threshold(img):
  86. if len(img.shape) > 2:
  87. img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
  88. mean = img.mean()
  89. if mean < 100:
  90. img = 255-img
  91. return cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
  92. cv2.THRESH_BINARY_INV, 101, 21)
  93. def get_contours(img):
  94. work = threshold(img)
  95. debug_show('threshold', work)
  96. contours, hierarchy = cv2.findContours(work, cv2.RETR_CCOMP,
  97. cv2.CHAIN_APPROX_NONE)
  98. return contours, hierarchy
  99. def get_conics(img, contours, hierarchy,
  100. abs_area_cutoff=0.0001, mean_area_cutoff=0.15):
  101. hierarchy = hierarchy.reshape((-1, 4))
  102. conics = []
  103. used_contours = []
  104. areas = []
  105. okcontours = []
  106. allchildren = []
  107. pts = np.empty((0,1,2), dtype='float32')
  108. centroid_accum = np.zeros(2)
  109. total_area = 0.0
  110. centroids = []
  111. abs_area_cutoff *= img.shape[0] * img.shape[1]
  112. print('abs_area_cutoff = ',abs_area_cutoff)
  113. for i, (c, h) in enumerate(zip(contours, hierarchy.reshape((-1, 4)))):
  114. next_idx, prev_idx, child_idx, parent_idx = h
  115. if parent_idx >= 0:
  116. continue
  117. m = ellipse.moments_from_dict(cv2.moments(c))
  118. if m[0] <= abs_area_cutoff:
  119. continue
  120. children = []
  121. while child_idx >= 0:
  122. child_contour = contours[child_idx]
  123. cm = cv2.moments(child_contour)
  124. if cm['m00'] > abs_area_cutoff:
  125. children.append(child_contour)
  126. allchildren.append(child_contour)
  127. child_idx = hierarchy[child_idx][0]
  128. if children:
  129. work = np.zeros(img.shape[:2], dtype=np.uint8)
  130. cv2.drawContours(work, contours, i, (1,1,1), -1)
  131. cv2.drawContours(work, children, -1, (0,0,0), -1)
  132. m = ellipse.moments_from_dict(cv2.moments(work, True))
  133. centroids.append(m[1:3]/m[0])
  134. centroid_accum += m[1:3]
  135. total_area += m[0]
  136. pts = np.vstack((pts, c.astype('float32')))
  137. conic = ellipse.conic_from_moments(m)
  138. okcontours.append(c)
  139. conics.append(conic)
  140. areas.append(m[0])
  141. display = img.copy()
  142. cv2.drawContours(display, okcontours+allchildren,
  143. -1, (0, 255, 0),
  144. 6, cv2.LINE_AA)
  145. debug_show('contours_only', display)
  146. for c, a in zip(okcontours, areas):
  147. x, y, w, h = cv2.boundingRect(c)
  148. s = str('{:,d}'.format(int(a)))
  149. #ctr = (x + w/2 - 15*len(s), y+h/2+10)
  150. ctr = (x, y+h+20)
  151. cv2.putText(display, s, ctr,
  152. cv2.FONT_HERSHEY_SIMPLEX, 2.0,
  153. (0, 0, 0), 12, cv2.LINE_AA)
  154. cv2.putText(display, s, ctr,
  155. cv2.FONT_HERSHEY_SIMPLEX, 2.0,
  156. (0, 255, 0), 6, cv2.LINE_AA)
  157. debug_show('contours', display)
  158. areas = np.array(areas)
  159. amean = areas.mean()
  160. print('got {} contours with {} small.'.format(
  161. len(areas), (areas < mean_area_cutoff*amean).sum()))
  162. idx = np.where(areas > mean_area_cutoff*amean)[0]
  163. conics = np.array(conics)
  164. conics = conics[idx]
  165. centroid_accum /= total_area
  166. display = img.copy()
  167. for conic in conics:
  168. x0, y0, a, b, theta = ellipse.gparams_from_conic(conic)
  169. cv2.ellipse(display, (int(x0), int(y0)), (int(a), int(b)),
  170. theta*180/np.pi, 0, 360, (0,0,255), 6, cv2.LINE_AA)
  171. debug_show('conics', display)
  172. contours = [okcontours[i].astype('float32') for i in idx]
  173. if 0:
  174. centroids = np.array([centroids[i] for i in idx])
  175. areas = areas[idx]
  176. def polyfit(x, y):
  177. coeffs = np.polyfit(x, y, deg=1)
  178. ypred = np.polyval(coeffs, x)
  179. ymean = np.mean(y)
  180. sstot = np.sum((y - ymean)**2)
  181. ssres = np.sum((y.flatten() - ypred.flatten())**2)
  182. r2 = 1 - ssres/sstot
  183. return coeffs, r2
  184. xfit, xr2 = polyfit(centroids[:,0], areas)
  185. yfit, yr2 = polyfit(centroids[:,1], areas)
  186. xlabel = 'X coordinate (r²={:.2f})'.format(xr2)
  187. ylabel = 'Y coordinate (r²={:.2f})'.format(yr2)
  188. plt.plot(centroids[:,0], areas, 'b.', zorder=1)
  189. plt.plot(centroids[:,1], areas, 'r.', zorder=1)
  190. plt.gca().autoscale(False)
  191. plt.plot([0, 3000], np.polyval(xfit, [0,3000]), 'b--',
  192. zorder=0, label=xlabel)
  193. plt.plot([0, 3000], np.polyval(yfit, [0,3000]), 'r--',
  194. zorder=0, label=ylabel)
  195. plt.legend(loc='upper right')
  196. plt.xlabel('X/Y coordinate (px)')
  197. plt.ylabel('Contour area (px²)')
  198. plt.savefig('position-vs-area.pdf')
  199. return conics, contours, centroid_accum
  200. def optimize_conics(conics, p0):
  201. x0 = np.array([0.0, 0.0])
  202. hfunc = lambda x: centered_warp(p0[0], p0[1], x[0], x[1])
  203. opt_results = []
  204. f = lambda x: conic_area_discrepancy(conics, x, hfunc(x), opt_results)
  205. res = scipy.optimize.minimize(f, x0, method='Powell')
  206. H = hfunc(res.x)
  207. rects = []
  208. if 0:
  209. phi = np.linspace(0, 2*np.pi, 16, endpoint=False)
  210. width, height = 0, 0
  211. for x, H, fval in opt_results:
  212. allxy = []
  213. for conic in conics:
  214. Hconic = ellipse.conic_transform(conic, H)
  215. gparams = ellipse.gparams_from_conic(Hconic)
  216. x, y = ellipse.gparams_evaluate(gparams, phi)
  217. xy = np.dstack((x.reshape((-1, 1, 1)), y.reshape((-1, 1, 1))))
  218. allxy.append(xy)
  219. allxy = np.vstack(tuple(allxy)).astype(np.float32)
  220. rect = cv2.boundingRect(allxy)
  221. rects.append(rect)
  222. x, y, w, h = rect
  223. width = max(width, w)
  224. height = max(height, h)
  225. border = int(0.05 * min(width, height))
  226. width += border
  227. height += border
  228. aspect = float(width)/height
  229. if aspect < 2.0:
  230. width = 2*height
  231. else:
  232. height = width/2
  233. for i, (rect, (x, H, fval)) in enumerate(zip(rects, opt_results)):
  234. display = np.zeros((height, width), dtype=np.uint8)
  235. x, y, w, h = rect
  236. xoffs = width/2 - (x+w/2)
  237. yoffs = height/2 - (y+h/2)
  238. for conic in conics:
  239. Hconic = ellipse.conic_transform(conic, H)
  240. x0, y0, a, b, theta = ellipse.gparams_from_conic(Hconic)
  241. cv2.ellipse(display, (int(x0+xoffs), int(y0+yoffs)), (int(a), int(b)),
  242. theta*180/np.pi, 0, 360, (255,255,255), 6, cv2.LINE_AA)
  243. cv2.putText(display, 'Area discrepancy: {:.3f}'.format(fval),
  244. (16, height-24), cv2.FONT_HERSHEY_SIMPLEX, 2.0,
  245. (255,255,255), 6, cv2.LINE_AA)
  246. cv2.imwrite('frame{:04d}.png'.format(i), display)
  247. return H
  248. def orientation_detect(img, contours, H, rho=8.0, ntheta=512):
  249. # ignore this, just deal with edge-detected text
  250. pts = np.vstack(tuple(contours))
  251. shape, TH = warp_containing_points(img, pts, H, shape_only=True)
  252. text_edges = np.zeros(shape, dtype=np.uint8)
  253. for contour in contours:
  254. contour = cv2.perspectiveTransform(contour.astype(np.float32), TH)
  255. cv2.drawContours(text_edges, [contour.astype(int)], 0, (255,255,255))
  256. debug_show('edges', text_edges)
  257. # generate a linspace of thetas
  258. thetas = np.linspace(-0.5*np.pi, 0.5*np.pi, ntheta, endpoint=False)
  259. # rho is pixels per r bin in polar (theta, r) histogram
  260. # irho is bins per pixel
  261. irho = 1.0/rho
  262. # get height and width
  263. h, w = text_edges.shape
  264. # maximum bin index is given by hypotenuse of (w, h) divided by pixels per bin
  265. bin_max = int(np.ceil(np.hypot(w, h)*irho))
  266. # initialize zeroed histogram height bin_max and width num theta
  267. hist = np.zeros((bin_max, ntheta))
  268. # let u and v be x and y coordinates (respectively) of non-zero
  269. # pixels in edge map
  270. v, u = np.mgrid[0:h, 0:w]
  271. v = v[text_edges.view(bool)]
  272. u = u[text_edges.view(bool)]
  273. # get center coordinates
  274. u0 = w*0.5
  275. v0 = h*0.5
  276. # for each i and theta = thetas[i]
  277. for i, theta in enumerate(thetas):
  278. # for each nonzero edge pixel, compute bin in r direction from
  279. # pixel location and cos/sin of theta
  280. bin_idx = ( (-(u-u0)*np.sin(theta) # x term
  281. + (v-v0)*np.cos(theta))*irho # y term, both
  282. # divided by pixels
  283. # per bin
  284. + 0.5*bin_max ) # offset for center pixel
  285. assert( bin_idx.min() >= 0 and bin_idx.max() < bin_max )
  286. # 0.5 is for correct rounding here
  287. #
  288. # e.g. np.bincount([1, 1, 0, 3]) = [1, 2, 0, 1]
  289. # returns count of each integer in the array
  290. bc = np.bincount((bin_idx + 0.5).astype(int))
  291. # push this into the histogram
  292. hist[:len(bc),i] = bc
  293. # number of zero pixels in each column
  294. num_zero = (hist == 0).sum(axis=0)
  295. # find the maximum number of zero pixels
  296. best_theta_idx = num_zero.argmax()
  297. # actual detected theta - could just return this now
  298. theta = thetas[best_theta_idx]
  299. # compose with previous homography
  300. RH = np.dot(rotation(-theta), H)
  301. if 1: # just debug visualization
  302. debug_hist = (255*hist/hist.max()).astype('uint8')
  303. debug_hist = cv2.cvtColor(debug_hist, cv2.COLOR_GRAY2RGB)
  304. cv2.line(debug_hist,
  305. (best_theta_idx, 0),
  306. (best_theta_idx, bin_max), (255,0,0),
  307. 1, cv2.LINE_AA)
  308. debug_show('histogram', debug_hist)
  309. p0 = np.array((u0, v0))
  310. t = np.array((np.cos(theta), np.sin(theta)))
  311. warped = cv2.warpPerspective(img, TH, (shape[1], shape[0]),
  312. borderMode=cv2.BORDER_REPLICATE)
  313. debug_show('prerotate_noline', warped)
  314. cv2.line(warped,
  315. tuple(map(int, p0 - rho*bin_max*t)),
  316. tuple(map(int, p0 + rho*bin_max*t)),
  317. (255, 0, 0),
  318. 6, cv2.LINE_AA)
  319. debug_show('prerotate', warped)
  320. warped, _ = warp_containing_points(img, pts, RH)
  321. debug_show('preskew', warped)
  322. return RH
  323. def skew_detect(img, contours, RH):
  324. hulls = [cv2.convexHull(c) for c in contours]
  325. pts = np.vstack(tuple(hulls))
  326. display, TRH = warp_containing_points(img, pts, RH)
  327. for h in hulls:
  328. h = cv2.perspectiveTransform(h, TRH).astype(int)
  329. cv2.drawContours(display, [h], 0, (255, 0, 255), 6, cv2.LINE_AA)
  330. debug_show('convex_hulls_before', display)
  331. f = lambda x: skewed_widths(contours, np.dot(slant(x), RH))
  332. res = scipy.optimize.minimize_scalar(f, (-2.0, 0.0, 2.0))
  333. SRH = np.dot(slant(res.x), RH)
  334. warped, Hfinal = warp_containing_points(img, pts, SRH)
  335. display = warped.copy()
  336. for h in hulls:
  337. h = cv2.perspectiveTransform(h, Hfinal).astype(int)
  338. cv2.drawContours(display, [h], 0, (255, 0, 255), 6, cv2.LINE_AA)
  339. debug_show('convex_hulls_after', display)
  340. debug_show('final', warped)
  341. return SRH
  342. def main():
  343. img = cv2.imread(sys.argv[1])
  344. debug_show('input', img)
  345. contours, hierarchy = get_contours(img)
  346. conics, contours, centroid = get_conics(img, contours, hierarchy)
  347. H = optimize_conics(conics, centroid)
  348. RH = orientation_detect(img, contours, H)
  349. SRH = skew_detect(img, contours, RH)
  350. for img in DEBUG_IMAGES:
  351. cv2.imshow('Debug', img)
  352. while cv2.waitKey(5) < 0:
  353. pass
  354. if __name__ == '__main__':
  355. main()