Repo for the search and displace ingest module that takes odf, docx and pdf and transforms it into .md to be used with search and displace operations
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

923 lines
26 KiB

3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
  1. #!/usr/bin/env python
  2. ######################################################################
  3. # page_dewarp.py - Proof-of-concept of page-dewarping based on a
  4. # "cubic sheet" model. Requires OpenCV (version 3 or greater),
  5. # PIL/Pillow, and scipy.optimize.
  6. ######################################################################
  7. # Author: Matt Zucker
  8. # Date: July 2016
  9. # License: MIT License (see LICENSE.txt)
  10. ######################################################################
  11. from __future__ import division
  12. from __future__ import print_function
  13. from builtins import zip
  14. from builtins import str
  15. from builtins import range
  16. from builtins import object
  17. from past.utils import old_div
  18. import os
  19. import sys
  20. import datetime
  21. import cv2
  22. from PIL import Image
  23. import numpy as np
  24. import scipy.optimize
  25. # for some reason pylint complains about cv2 members being undefined :(
  26. # pylint: disable=E1101
  27. PAGE_MARGIN_X = 50 # reduced px to ignore near L/R edge
  28. PAGE_MARGIN_Y = 20 # reduced px to ignore near T/B edge
  29. OUTPUT_ZOOM = 1.0 # how much to zoom output relative to *original* image
  30. OUTPUT_DPI = 300 # just affects stated DPI of PNG, not appearance
  31. REMAP_DECIMATE = 16 # downscaling factor for remapping image
  32. ADAPTIVE_WINSZ = 55 # window size for adaptive threshold in reduced px
  33. TEXT_MIN_WIDTH = 15 # min reduced px width of detected text contour
  34. TEXT_MIN_HEIGHT = 2 # min reduced px height of detected text contour
  35. TEXT_MIN_ASPECT = 1.5 # filter out text contours below this w/h ratio
  36. TEXT_MAX_THICKNESS = 10 # max reduced px thickness of detected text contour
  37. EDGE_MAX_OVERLAP = 1.0 # max reduced px horiz. overlap of contours in span
  38. EDGE_MAX_LENGTH = 100.0 # max reduced px length of edge connecting contours
  39. EDGE_ANGLE_COST = 10.0 # cost of angles in edges (tradeoff vs. length)
  40. EDGE_MAX_ANGLE = 7.5 # maximum change in angle allowed between contours
  41. RVEC_IDX = slice(0, 3) # index of rvec in params vector
  42. TVEC_IDX = slice(3, 6) # index of tvec in params vector
  43. CUBIC_IDX = slice(6, 8) # index of cubic slopes in params vector
  44. SPAN_MIN_WIDTH = 30 # minimum reduced px width for span
  45. SPAN_PX_PER_STEP = 20 # reduced px spacing for sampling along spans
  46. FOCAL_LENGTH = 1.2 # normalized focal length of camera
  47. DEBUG_LEVEL = 0 # 0=none, 1=some, 2=lots, 3=all
  48. DEBUG_OUTPUT = 'file' # file, screen, both
  49. WINDOW_NAME = 'Dewarp' # Window name for visualization
  50. # nice color palette for visualizing contours, etc.
  51. CCOLORS = [
  52. (255, 0, 0),
  53. (255, 63, 0),
  54. (255, 127, 0),
  55. (255, 191, 0),
  56. (255, 255, 0),
  57. (191, 255, 0),
  58. (127, 255, 0),
  59. (63, 255, 0),
  60. (0, 255, 0),
  61. (0, 255, 63),
  62. (0, 255, 127),
  63. (0, 255, 191),
  64. (0, 255, 255),
  65. (0, 191, 255),
  66. (0, 127, 255),
  67. (0, 63, 255),
  68. (0, 0, 255),
  69. (63, 0, 255),
  70. (127, 0, 255),
  71. (191, 0, 255),
  72. (255, 0, 255),
  73. (255, 0, 191),
  74. (255, 0, 127),
  75. (255, 0, 63),
  76. ]
  77. # default intrinsic parameter matrix
  78. K = np.array([
  79. [FOCAL_LENGTH, 0, 0],
  80. [0, FOCAL_LENGTH, 0],
  81. [0, 0, 1]], dtype=np.float32)
  82. def debug_show(name, step, text, display):
  83. if DEBUG_OUTPUT != 'screen':
  84. filetext = text.replace(' ', '_')
  85. outfile = name + '_debug_' + str(step) + '_' + filetext + '.png'
  86. cv2.imwrite(outfile, display)
  87. if DEBUG_OUTPUT != 'file':
  88. image = display.copy()
  89. height = image.shape[0]
  90. cv2.putText(image, text, (16, height-16),
  91. cv2.FONT_HERSHEY_SIMPLEX, 1.0,
  92. (0, 0, 0), 3, cv2.LINE_AA)
  93. cv2.putText(image, text, (16, height-16),
  94. cv2.FONT_HERSHEY_SIMPLEX, 1.0,
  95. (255, 255, 255), 1, cv2.LINE_AA)
  96. cv2.imshow(WINDOW_NAME, image)
  97. while cv2.waitKey(5) < 0:
  98. pass
  99. def round_nearest_multiple(i, factor):
  100. i = int(i)
  101. rem = i % factor
  102. if not rem:
  103. return i
  104. else:
  105. return i + factor - rem
  106. def pix2norm(shape, pts):
  107. height, width = shape[:2]
  108. scl = 2.0/(max(height, width))
  109. offset = np.array([width, height], dtype=pts.dtype).reshape((-1, 1, 2))*0.5
  110. return (pts - offset) * scl
  111. def norm2pix(shape, pts, as_integer):
  112. height, width = shape[:2]
  113. scl = max(height, width)*0.5
  114. offset = np.array([0.5*width, 0.5*height],
  115. dtype=pts.dtype).reshape((-1, 1, 2))
  116. rval = pts * scl + offset
  117. if as_integer:
  118. return (rval + 0.5).astype(int)
  119. else:
  120. return rval
  121. def fltp(point):
  122. return tuple(point.astype(int).flatten())
  123. def draw_correspondences(img, dstpoints, projpts):
  124. display = img.copy()
  125. dstpoints = norm2pix(img.shape, dstpoints, True)
  126. projpts = norm2pix(img.shape, projpts, True)
  127. for pts, color in [(projpts, (255, 0, 0)),
  128. (dstpoints, (0, 0, 255))]:
  129. for point in pts:
  130. cv2.circle(display, fltp(point), 3, color, -1, cv2.LINE_AA)
  131. for point_a, point_b in zip(projpts, dstpoints):
  132. cv2.line(display, fltp(point_a), fltp(point_b),
  133. (255, 255, 255), 1, cv2.LINE_AA)
  134. return display
  135. def get_default_params(corners, ycoords, xcoords):
  136. # page width and height
  137. page_width = np.linalg.norm(corners[1] - corners[0])
  138. page_height = np.linalg.norm(corners[-1] - corners[0])
  139. rough_dims = (page_width, page_height)
  140. # our initial guess for the cubic has no slope
  141. cubic_slopes = [0.0, 0.0]
  142. # object points of flat page in 3D coordinates
  143. corners_object3d = np.array([
  144. [0, 0, 0],
  145. [page_width, 0, 0],
  146. [page_width, page_height, 0],
  147. [0, page_height, 0]])
  148. # estimate rotation and translation from four 2D-to-3D point
  149. # correspondences
  150. _, rvec, tvec = cv2.solvePnP(corners_object3d,
  151. corners, K, np.zeros(5))
  152. span_counts = [len(xc) for xc in xcoords]
  153. params = np.hstack((np.array(rvec).flatten(),
  154. np.array(tvec).flatten(),
  155. np.array(cubic_slopes).flatten(),
  156. ycoords.flatten()) +
  157. tuple(xcoords))
  158. return rough_dims, span_counts, params
  159. def project_xy(xy_coords, pvec):
  160. # get cubic polynomial coefficients given
  161. #
  162. # f(0) = 0, f'(0) = alpha
  163. # f(1) = 0, f'(1) = beta
  164. alpha, beta = tuple(pvec[CUBIC_IDX])
  165. poly = np.array([
  166. alpha + beta,
  167. -2*alpha - beta,
  168. alpha,
  169. 0])
  170. xy_coords = xy_coords.reshape((-1, 2))
  171. z_coords = np.polyval(poly, xy_coords[:, 0])
  172. objpoints = np.hstack((xy_coords, z_coords.reshape((-1, 1))))
  173. image_points, _ = cv2.projectPoints(objpoints,
  174. pvec[RVEC_IDX],
  175. pvec[TVEC_IDX],
  176. K, np.zeros(5))
  177. return image_points
  178. def project_keypoints(pvec, keypoint_index):
  179. xy_coords = pvec[keypoint_index]
  180. xy_coords[0, :] = 0
  181. return project_xy(xy_coords, pvec)
  182. def resize_to_screen(src, maxw=1280, maxh=700, copy=False):
  183. height, width = src.shape[:2]
  184. scl_x = float(width)/maxw
  185. scl_y = float(height)/maxh
  186. scl = int(np.ceil(max(scl_x, scl_y)))
  187. if scl > 1.0:
  188. inv_scl = 1.0/scl
  189. img = cv2.resize(src, (0, 0), None, inv_scl, inv_scl, cv2.INTER_AREA)
  190. elif copy:
  191. img = src.copy()
  192. else:
  193. img = src
  194. return img
  195. def box(width, height):
  196. return np.ones((height, width), dtype=np.uint8)
  197. def get_page_extents(small):
  198. height, width = small.shape[:2]
  199. xmin = PAGE_MARGIN_X
  200. ymin = PAGE_MARGIN_Y
  201. xmax = width-PAGE_MARGIN_X
  202. ymax = height-PAGE_MARGIN_Y
  203. page = np.zeros((height, width), dtype=np.uint8)
  204. cv2.rectangle(page, (xmin, ymin), (xmax, ymax), (255, 255, 255), -1)
  205. outline = np.array([
  206. [xmin, ymin],
  207. [xmin, ymax],
  208. [xmax, ymax],
  209. [xmax, ymin]])
  210. return page, outline
  211. def get_mask(name, small, pagemask, masktype):
  212. sgray = cv2.cvtColor(small, cv2.COLOR_RGB2GRAY)
  213. if masktype == 'text':
  214. mask = cv2.adaptiveThreshold(sgray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
  215. cv2.THRESH_BINARY_INV,
  216. ADAPTIVE_WINSZ,
  217. 25)
  218. if DEBUG_LEVEL >= 3:
  219. debug_show(name, 0.1, 'thresholded', mask)
  220. mask = cv2.dilate(mask, box(9, 1))
  221. if DEBUG_LEVEL >= 3:
  222. debug_show(name, 0.2, 'dilated', mask)
  223. mask = cv2.erode(mask, box(1, 3))
  224. if DEBUG_LEVEL >= 3:
  225. debug_show(name, 0.3, 'eroded', mask)
  226. else:
  227. mask = cv2.adaptiveThreshold(sgray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
  228. cv2.THRESH_BINARY_INV,
  229. ADAPTIVE_WINSZ,
  230. 7)
  231. if DEBUG_LEVEL >= 3:
  232. debug_show(name, 0.4, 'thresholded', mask)
  233. mask = cv2.erode(mask, box(3, 1), iterations=3)
  234. if DEBUG_LEVEL >= 3:
  235. debug_show(name, 0.5, 'eroded', mask)
  236. mask = cv2.dilate(mask, box(8, 2))
  237. if DEBUG_LEVEL >= 3:
  238. debug_show(name, 0.6, 'dilated', mask)
  239. return np.minimum(mask, pagemask)
  240. def interval_measure_overlap(int_a, int_b):
  241. return min(int_a[1], int_b[1]) - max(int_a[0], int_b[0])
  242. def angle_dist(angle_b, angle_a):
  243. diff = angle_b - angle_a
  244. while diff > np.pi:
  245. diff -= 2*np.pi
  246. while diff < -np.pi:
  247. diff += 2*np.pi
  248. return np.abs(diff)
  249. def blob_mean_and_tangent(contour):
  250. moments = cv2.moments(contour)
  251. area = moments['m00']
  252. mean_x = old_div(moments['m10'], area)
  253. mean_y = old_div(moments['m01'], area)
  254. moments_matrix = old_div(np.array([
  255. [moments['mu20'], moments['mu11']],
  256. [moments['mu11'], moments['mu02']]
  257. ]), area)
  258. _, svd_u, _ = cv2.SVDecomp(moments_matrix)
  259. center = np.array([mean_x, mean_y])
  260. tangent = svd_u[:, 0].flatten().copy()
  261. return center, tangent
  262. class ContourInfo(object):
  263. def __init__(self, contour, rect, mask):
  264. self.contour = contour
  265. self.rect = rect
  266. self.mask = mask
  267. self.center, self.tangent = blob_mean_and_tangent(contour)
  268. self.angle = np.arctan2(self.tangent[1], self.tangent[0])
  269. clx = [self.proj_x(point) for point in contour]
  270. lxmin = min(clx)
  271. lxmax = max(clx)
  272. self.local_xrng = (lxmin, lxmax)
  273. self.point0 = self.center + self.tangent * lxmin
  274. self.point1 = self.center + self.tangent * lxmax
  275. self.pred = None
  276. self.succ = None
  277. def proj_x(self, point):
  278. return np.dot(self.tangent, point.flatten()-self.center)
  279. def local_overlap(self, other):
  280. xmin = self.proj_x(other.point0)
  281. xmax = self.proj_x(other.point1)
  282. return interval_measure_overlap(self.local_xrng, (xmin, xmax))
  283. def generate_candidate_edge(cinfo_a, cinfo_b):
  284. # we want a left of b (so a's successor will be b and b's
  285. # predecessor will be a) make sure right endpoint of b is to the
  286. # right of left endpoint of a.
  287. if cinfo_a.point0[0] > cinfo_b.point1[0]:
  288. tmp = cinfo_a
  289. cinfo_a = cinfo_b
  290. cinfo_b = tmp
  291. x_overlap_a = cinfo_a.local_overlap(cinfo_b)
  292. x_overlap_b = cinfo_b.local_overlap(cinfo_a)
  293. overall_tangent = cinfo_b.center - cinfo_a.center
  294. overall_angle = np.arctan2(overall_tangent[1], overall_tangent[0])
  295. delta_angle = old_div(max(angle_dist(cinfo_a.angle, overall_angle),
  296. angle_dist(cinfo_b.angle, overall_angle)) * 180,np.pi)
  297. # we want the largest overlap in x to be small
  298. x_overlap = max(x_overlap_a, x_overlap_b)
  299. dist = np.linalg.norm(cinfo_b.point0 - cinfo_a.point1)
  300. if (dist > EDGE_MAX_LENGTH or
  301. x_overlap > EDGE_MAX_OVERLAP or
  302. delta_angle > EDGE_MAX_ANGLE):
  303. return None
  304. else:
  305. score = dist + delta_angle*EDGE_ANGLE_COST
  306. return (score, cinfo_a, cinfo_b)
  307. def make_tight_mask(contour, xmin, ymin, width, height):
  308. tight_mask = np.zeros((height, width), dtype=np.uint8)
  309. tight_contour = contour - np.array((xmin, ymin)).reshape((-1, 1, 2))
  310. cv2.drawContours(tight_mask, [tight_contour], 0,
  311. (1, 1, 1), -1)
  312. return tight_mask
  313. def get_contours(name, small, pagemask, masktype):
  314. mask = get_mask(name, small, pagemask, masktype)
  315. contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL,
  316. cv2.CHAIN_APPROX_NONE)
  317. contours_out = []
  318. for contour in contours:
  319. rect = cv2.boundingRect(contour)
  320. xmin, ymin, width, height = rect
  321. if (width < TEXT_MIN_WIDTH or
  322. height < TEXT_MIN_HEIGHT or
  323. width < TEXT_MIN_ASPECT*height):
  324. continue
  325. tight_mask = make_tight_mask(contour, xmin, ymin, width, height)
  326. if tight_mask.sum(axis=0).max() > TEXT_MAX_THICKNESS:
  327. continue
  328. contours_out.append(ContourInfo(contour, rect, tight_mask))
  329. if DEBUG_LEVEL >= 2:
  330. visualize_contours(name, small, contours_out)
  331. return contours_out
  332. def assemble_spans(name, small, pagemask, cinfo_list):
  333. # sort list
  334. cinfo_list = sorted(cinfo_list, key=lambda cinfo: cinfo.rect[1])
  335. # generate all candidate edges
  336. candidate_edges = []
  337. for i, cinfo_i in enumerate(cinfo_list):
  338. for j in range(i):
  339. # note e is of the form (score, left_cinfo, right_cinfo)
  340. edge = generate_candidate_edge(cinfo_i, cinfo_list[j])
  341. if edge is not None:
  342. candidate_edges.append(edge)
  343. # sort candidate edges by score (lower is better)
  344. candidate_edges.sort()
  345. # for each candidate edge
  346. for _, cinfo_a, cinfo_b in candidate_edges:
  347. # if left and right are unassigned, join them
  348. if cinfo_a.succ is None and cinfo_b.pred is None:
  349. cinfo_a.succ = cinfo_b
  350. cinfo_b.pred = cinfo_a
  351. # generate list of spans as output
  352. spans = []
  353. # until we have removed everything from the list
  354. while cinfo_list:
  355. # get the first on the list
  356. cinfo = cinfo_list[0]
  357. # keep following predecessors until none exists
  358. while cinfo.pred:
  359. cinfo = cinfo.pred
  360. # start a new span
  361. cur_span = []
  362. width = 0.0
  363. # follow successors til end of span
  364. while cinfo:
  365. # remove from list (sadly making this loop *also* O(n^2)
  366. cinfo_list.remove(cinfo)
  367. # add to span
  368. cur_span.append(cinfo)
  369. width += cinfo.local_xrng[1] - cinfo.local_xrng[0]
  370. # set successor
  371. cinfo = cinfo.succ
  372. # add if long enough
  373. if width > SPAN_MIN_WIDTH:
  374. spans.append(cur_span)
  375. if DEBUG_LEVEL >= 2:
  376. visualize_spans(name, small, pagemask, spans)
  377. return spans
  378. def sample_spans(shape, spans):
  379. span_points = []
  380. for span in spans:
  381. contour_points = []
  382. for cinfo in span:
  383. yvals = np.arange(cinfo.mask.shape[0]).reshape((-1, 1))
  384. totals = (yvals * cinfo.mask).sum(axis=0)
  385. means = old_div(totals, cinfo.mask.sum(axis=0))
  386. xmin, ymin = cinfo.rect[:2]
  387. step = SPAN_PX_PER_STEP
  388. start = old_div(((len(means)-1) % step), 2)
  389. contour_points += [(x+xmin, means[x]+ymin)
  390. for x in range(start, len(means), step)]
  391. contour_points = np.array(contour_points,
  392. dtype=np.float32).reshape((-1, 1, 2))
  393. contour_points = pix2norm(shape, contour_points)
  394. span_points.append(contour_points)
  395. return span_points
  396. def keypoints_from_samples(name, small, pagemask, page_outline,
  397. span_points):
  398. all_evecs = np.array([[0.0, 0.0]])
  399. all_weights = 0
  400. for points in span_points:
  401. _, evec = cv2.PCACompute(points.reshape((-1, 2)),
  402. None, maxComponents=1)
  403. weight = np.linalg.norm(points[-1] - points[0])
  404. all_evecs += evec * weight
  405. all_weights += weight
  406. evec = old_div(all_evecs, all_weights)
  407. x_dir = evec.flatten()
  408. if x_dir[0] < 0:
  409. x_dir = -x_dir
  410. y_dir = np.array([-x_dir[1], x_dir[0]])
  411. pagecoords = cv2.convexHull(page_outline)
  412. pagecoords = pix2norm(pagemask.shape, pagecoords.reshape((-1, 1, 2)))
  413. pagecoords = pagecoords.reshape((-1, 2))
  414. px_coords = np.dot(pagecoords, x_dir)
  415. py_coords = np.dot(pagecoords, y_dir)
  416. px0 = px_coords.min()
  417. px1 = px_coords.max()
  418. py0 = py_coords.min()
  419. py1 = py_coords.max()
  420. p00 = px0 * x_dir + py0 * y_dir
  421. p10 = px1 * x_dir + py0 * y_dir
  422. p11 = px1 * x_dir + py1 * y_dir
  423. p01 = px0 * x_dir + py1 * y_dir
  424. corners = np.vstack((p00, p10, p11, p01)).reshape((-1, 1, 2))
  425. ycoords = []
  426. xcoords = []
  427. for points in span_points:
  428. pts = points.reshape((-1, 2))
  429. px_coords = np.dot(pts, x_dir)
  430. py_coords = np.dot(pts, y_dir)
  431. ycoords.append(py_coords.mean() - py0)
  432. xcoords.append(px_coords - px0)
  433. if DEBUG_LEVEL >= 2:
  434. visualize_span_points(name, small, span_points, corners)
  435. return corners, np.array(ycoords), xcoords
  436. def visualize_contours(name, small, cinfo_list):
  437. regions = np.zeros_like(small)
  438. for j, cinfo in enumerate(cinfo_list):
  439. cv2.drawContours(regions, [cinfo.contour], 0,
  440. CCOLORS[j % len(CCOLORS)], -1)
  441. mask = (regions.max(axis=2) != 0)
  442. display = small.copy()
  443. display[mask] = (old_div(display[mask],2)) + (old_div(regions[mask],2))
  444. for j, cinfo in enumerate(cinfo_list):
  445. color = CCOLORS[j % len(CCOLORS)]
  446. color = tuple([old_div(c,4) for c in color])
  447. cv2.circle(display, fltp(cinfo.center), 3,
  448. (255, 255, 255), 1, cv2.LINE_AA)
  449. cv2.line(display, fltp(cinfo.point0), fltp(cinfo.point1),
  450. (255, 255, 255), 1, cv2.LINE_AA)
  451. debug_show(name, 1, 'contours', display)
  452. def visualize_spans(name, small, pagemask, spans):
  453. regions = np.zeros_like(small)
  454. for i, span in enumerate(spans):
  455. contours = [cinfo.contour for cinfo in span]
  456. cv2.drawContours(regions, contours, -1,
  457. CCOLORS[i*3 % len(CCOLORS)], -1)
  458. mask = (regions.max(axis=2) != 0)
  459. display = small.copy()
  460. display[mask] = (old_div(display[mask],2)) + (old_div(regions[mask],2))
  461. display[pagemask == 0] //= 4
  462. debug_show(name, 2, 'spans', display)
  463. def visualize_span_points(name, small, span_points, corners):
  464. display = small.copy()
  465. for i, points in enumerate(span_points):
  466. points = norm2pix(small.shape, points, False)
  467. mean, small_evec = cv2.PCACompute(points.reshape((-1, 2)),
  468. None,
  469. maxComponents=1)
  470. dps = np.dot(points.reshape((-1, 2)), small_evec.reshape((2, 1)))
  471. dpm = np.dot(mean.flatten(), small_evec.flatten())
  472. point0 = mean + small_evec * (dps.min()-dpm)
  473. point1 = mean + small_evec * (dps.max()-dpm)
  474. for point in points:
  475. cv2.circle(display, fltp(point), 3,
  476. CCOLORS[i % len(CCOLORS)], -1, cv2.LINE_AA)
  477. cv2.line(display, fltp(point0), fltp(point1),
  478. (255, 255, 255), 1, cv2.LINE_AA)
  479. cv2.polylines(display, [norm2pix(small.shape, corners, True)],
  480. True, (255, 255, 255))
  481. debug_show(name, 3, 'span points', display)
  482. def imgsize(img):
  483. height, width = img.shape[:2]
  484. return '{}x{}'.format(width, height)
  485. def make_keypoint_index(span_counts):
  486. nspans = len(span_counts)
  487. npts = sum(span_counts)
  488. keypoint_index = np.zeros((npts+1, 2), dtype=int)
  489. start = 1
  490. for i, count in enumerate(span_counts):
  491. end = start + count
  492. keypoint_index[start:start+end, 1] = 8+i
  493. start = end
  494. keypoint_index[1:, 0] = np.arange(npts) + 8 + nspans
  495. return keypoint_index
  496. def optimize_params(name, small, dstpoints, span_counts, params):
  497. keypoint_index = make_keypoint_index(span_counts)
  498. def objective(pvec):
  499. ppts = project_keypoints(pvec, keypoint_index)
  500. return np.sum((dstpoints - ppts)**2)
  501. print(' initial objective is', objective(params))
  502. if DEBUG_LEVEL >= 1:
  503. projpts = project_keypoints(params, keypoint_index)
  504. display = draw_correspondences(small, dstpoints, projpts)
  505. debug_show(name, 4, 'keypoints before', display)
  506. print(' optimizing', len(params), 'parameters...')
  507. start = datetime.datetime.now()
  508. res = scipy.optimize.minimize(objective, params,
  509. method='Powell')
  510. end = datetime.datetime.now()
  511. print(' optimization took', round((end-start).total_seconds(), 2), 'sec.')
  512. print(' final objective is', res.fun)
  513. params = res.x
  514. if DEBUG_LEVEL >= 1:
  515. projpts = project_keypoints(params, keypoint_index)
  516. display = draw_correspondences(small, dstpoints, projpts)
  517. debug_show(name, 5, 'keypoints after', display)
  518. return params
  519. def get_page_dims(corners, rough_dims, params):
  520. dst_br = corners[2].flatten()
  521. dims = np.array(rough_dims)
  522. def objective(dims):
  523. proj_br = project_xy(dims, params)
  524. return np.sum((dst_br - proj_br.flatten())**2)
  525. res = scipy.optimize.minimize(objective, dims, method='Powell')
  526. dims = res.x
  527. print(' got page dims', dims[0], 'x', dims[1])
  528. return dims
  529. def remap_image(name, dirname, img, small, page_dims, params):
  530. height = 0.5 * page_dims[1] * OUTPUT_ZOOM * img.shape[0]
  531. height = round_nearest_multiple(height, REMAP_DECIMATE)
  532. width = round_nearest_multiple(old_div(height * page_dims[0], page_dims[1]),
  533. REMAP_DECIMATE)
  534. print(' output will be {}x{}'.format(width, height))
  535. height_small = old_div(height, REMAP_DECIMATE)
  536. width_small = old_div(width, REMAP_DECIMATE)
  537. page_x_range = np.linspace(0, page_dims[0], width_small)
  538. page_y_range = np.linspace(0, page_dims[1], height_small)
  539. page_x_coords, page_y_coords = np.meshgrid(page_x_range, page_y_range)
  540. page_xy_coords = np.hstack((page_x_coords.flatten().reshape((-1, 1)),
  541. page_y_coords.flatten().reshape((-1, 1))))
  542. page_xy_coords = page_xy_coords.astype(np.float32)
  543. image_points = project_xy(page_xy_coords, params)
  544. image_points = norm2pix(img.shape, image_points, False)
  545. image_x_coords = image_points[:, 0, 0].reshape(page_x_coords.shape)
  546. image_y_coords = image_points[:, 0, 1].reshape(page_y_coords.shape)
  547. image_x_coords = cv2.resize(image_x_coords, (width, height),
  548. interpolation=cv2.INTER_CUBIC)
  549. image_y_coords = cv2.resize(image_y_coords, (width, height),
  550. interpolation=cv2.INTER_CUBIC)
  551. img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
  552. remapped = cv2.remap(img_gray, image_x_coords, image_y_coords,
  553. cv2.INTER_CUBIC,
  554. None, cv2.BORDER_REPLICATE)
  555. thresh = cv2.adaptiveThreshold(remapped, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
  556. cv2.THRESH_BINARY, ADAPTIVE_WINSZ, 25)
  557. pil_image = Image.fromarray(thresh)
  558. pil_image = pil_image.convert('1')
  559. threshfile = name + '_thresh.png'
  560. pil_image.save(dirname + '/' + threshfile, dpi=(OUTPUT_DPI, OUTPUT_DPI))
  561. if DEBUG_LEVEL >= 1:
  562. height = small.shape[0]
  563. width = int(round(height * float(thresh.shape[1])/thresh.shape[0]))
  564. display = cv2.resize(thresh, (width, height),
  565. interpolation=cv2.INTER_AREA)
  566. debug_show(name, 6, 'output', display)
  567. return threshfile
  568. def main():
  569. if len(sys.argv) < 2:
  570. print('usage:', sys.argv[0], 'IMAGE1 [IMAGE2 ...]')
  571. sys.exit(0)
  572. if DEBUG_LEVEL > 0 and DEBUG_OUTPUT != 'file':
  573. cv2.namedWindow(WINDOW_NAME)
  574. outfiles = []
  575. for imgfile in sys.argv[1:]:
  576. img = cv2.imread(imgfile)
  577. small = resize_to_screen(img)
  578. basename = os.path.basename(imgfile)
  579. dirname = os.path.dirname(imgfile)
  580. name, _ = os.path.splitext(basename)
  581. print('loaded', basename, 'with size', imgsize(img), end=' ')
  582. print('and resized to', imgsize(small))
  583. if DEBUG_LEVEL >= 3:
  584. debug_show(name, 0.0, 'original', small)
  585. pagemask, page_outline = get_page_extents(small)
  586. cinfo_list = get_contours(name, small, pagemask, 'text')
  587. spans = assemble_spans(name, small, pagemask, cinfo_list)
  588. if len(spans) < 3:
  589. print(' detecting lines because only', len(spans), 'text spans')
  590. cinfo_list = get_contours(name, small, pagemask, 'line')
  591. spans2 = assemble_spans(name, small, pagemask, cinfo_list)
  592. if len(spans2) > len(spans):
  593. spans = spans2
  594. if len(spans) < 1:
  595. print('skipping', name, 'because only', len(spans), 'spans')
  596. continue
  597. span_points = sample_spans(small.shape, spans)
  598. print(' got', len(spans), 'spans', end=' ')
  599. print('with', sum([len(pts) for pts in span_points]), 'points.')
  600. corners, ycoords, xcoords = keypoints_from_samples(name, small,
  601. pagemask,
  602. page_outline,
  603. span_points)
  604. rough_dims, span_counts, params = get_default_params(corners,
  605. ycoords, xcoords)
  606. dstpoints = np.vstack((corners[0].reshape((1, 1, 2)),) +
  607. tuple(span_points))
  608. params = optimize_params(name, small,
  609. dstpoints,
  610. span_counts, params)
  611. page_dims = get_page_dims(corners, rough_dims, params)
  612. outfile = remap_image(name, dirname, img, small, page_dims, params)
  613. outfiles.append(outfile)
  614. print(' wrote', outfile)
  615. print()
  616. print('to convert to PDF (requires ImageMagick):')
  617. print(' convert -compress Group4 ' + ' '.join(outfiles) + ' output.pdf')
  618. if __name__ == '__main__':
  619. main()