Installation for S&D
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

923 lines
26 KiB

  1. #!/usr/bin/env python
  2. ######################################################################
  3. # page_dewarp.py - Proof-of-concept of page-dewarping based on a
  4. # "cubic sheet" model. Requires OpenCV (version 3 or greater),
  5. # PIL/Pillow, and scipy.optimize.
  6. ######################################################################
  7. # Author: Matt Zucker
  8. # Date: July 2016
  9. # License: MIT License (see LICENSE.txt)
  10. ######################################################################
  11. from __future__ import division
  12. from __future__ import print_function
  13. from builtins import zip
  14. from builtins import str
  15. from builtins import range
  16. from builtins import object
  17. from past.utils import old_div
  18. import os
  19. import sys
  20. import datetime
  21. import cv2
  22. from PIL import Image
  23. import numpy as np
  24. import scipy.optimize
  25. # for some reason pylint complains about cv2 members being undefined :(
  26. # pylint: disable=E1101
  27. PAGE_MARGIN_X = 50 # reduced px to ignore near L/R edge
  28. PAGE_MARGIN_Y = 20 # reduced px to ignore near T/B edge
  29. OUTPUT_ZOOM = 1.0 # how much to zoom output relative to *original* image
  30. OUTPUT_DPI = 300 # just affects stated DPI of PNG, not appearance
  31. REMAP_DECIMATE = 16 # downscaling factor for remapping image
  32. ADAPTIVE_WINSZ = 55 # window size for adaptive threshold in reduced px
  33. TEXT_MIN_WIDTH = 15 # min reduced px width of detected text contour
  34. TEXT_MIN_HEIGHT = 2 # min reduced px height of detected text contour
  35. TEXT_MIN_ASPECT = 1.5 # filter out text contours below this w/h ratio
  36. TEXT_MAX_THICKNESS = 10 # max reduced px thickness of detected text contour
  37. EDGE_MAX_OVERLAP = 1.0 # max reduced px horiz. overlap of contours in span
  38. EDGE_MAX_LENGTH = 100.0 # max reduced px length of edge connecting contours
  39. EDGE_ANGLE_COST = 10.0 # cost of angles in edges (tradeoff vs. length)
  40. EDGE_MAX_ANGLE = 7.5 # maximum change in angle allowed between contours
  41. RVEC_IDX = slice(0, 3) # index of rvec in params vector
  42. TVEC_IDX = slice(3, 6) # index of tvec in params vector
  43. CUBIC_IDX = slice(6, 8) # index of cubic slopes in params vector
  44. SPAN_MIN_WIDTH = 30 # minimum reduced px width for span
  45. SPAN_PX_PER_STEP = 20 # reduced px spacing for sampling along spans
  46. FOCAL_LENGTH = 1.2 # normalized focal length of camera
  47. DEBUG_LEVEL = 0 # 0=none, 1=some, 2=lots, 3=all
  48. DEBUG_OUTPUT = 'file' # file, screen, both
  49. WINDOW_NAME = 'Dewarp' # Window name for visualization
  50. # nice color palette for visualizing contours, etc.
  51. CCOLORS = [
  52. (255, 0, 0),
  53. (255, 63, 0),
  54. (255, 127, 0),
  55. (255, 191, 0),
  56. (255, 255, 0),
  57. (191, 255, 0),
  58. (127, 255, 0),
  59. (63, 255, 0),
  60. (0, 255, 0),
  61. (0, 255, 63),
  62. (0, 255, 127),
  63. (0, 255, 191),
  64. (0, 255, 255),
  65. (0, 191, 255),
  66. (0, 127, 255),
  67. (0, 63, 255),
  68. (0, 0, 255),
  69. (63, 0, 255),
  70. (127, 0, 255),
  71. (191, 0, 255),
  72. (255, 0, 255),
  73. (255, 0, 191),
  74. (255, 0, 127),
  75. (255, 0, 63),
  76. ]
  77. # default intrinsic parameter matrix
  78. K = np.array([
  79. [FOCAL_LENGTH, 0, 0],
  80. [0, FOCAL_LENGTH, 0],
  81. [0, 0, 1]], dtype=np.float32)
  82. def debug_show(name, step, text, display):
  83. if DEBUG_OUTPUT != 'screen':
  84. filetext = text.replace(' ', '_')
  85. outfile = name + '_debug_' + str(step) + '_' + filetext + '.png'
  86. cv2.imwrite(outfile, display)
  87. if DEBUG_OUTPUT != 'file':
  88. image = display.copy()
  89. height = image.shape[0]
  90. cv2.putText(image, text, (16, height-16),
  91. cv2.FONT_HERSHEY_SIMPLEX, 1.0,
  92. (0, 0, 0), 3, cv2.LINE_AA)
  93. cv2.putText(image, text, (16, height-16),
  94. cv2.FONT_HERSHEY_SIMPLEX, 1.0,
  95. (255, 255, 255), 1, cv2.LINE_AA)
  96. cv2.imshow(WINDOW_NAME, image)
  97. while cv2.waitKey(5) < 0:
  98. pass
  99. def round_nearest_multiple(i, factor):
  100. i = int(i)
  101. rem = i % factor
  102. if not rem:
  103. return i
  104. else:
  105. return i + factor - rem
  106. def pix2norm(shape, pts):
  107. height, width = shape[:2]
  108. scl = 2.0/(max(height, width))
  109. offset = np.array([width, height], dtype=pts.dtype).reshape((-1, 1, 2))*0.5
  110. return (pts - offset) * scl
  111. def norm2pix(shape, pts, as_integer):
  112. height, width = shape[:2]
  113. scl = max(height, width)*0.5
  114. offset = np.array([0.5*width, 0.5*height],
  115. dtype=pts.dtype).reshape((-1, 1, 2))
  116. rval = pts * scl + offset
  117. if as_integer:
  118. return (rval + 0.5).astype(int)
  119. else:
  120. return rval
  121. def fltp(point):
  122. return tuple(point.astype(int).flatten())
  123. def draw_correspondences(img, dstpoints, projpts):
  124. display = img.copy()
  125. dstpoints = norm2pix(img.shape, dstpoints, True)
  126. projpts = norm2pix(img.shape, projpts, True)
  127. for pts, color in [(projpts, (255, 0, 0)),
  128. (dstpoints, (0, 0, 255))]:
  129. for point in pts:
  130. cv2.circle(display, fltp(point), 3, color, -1, cv2.LINE_AA)
  131. for point_a, point_b in zip(projpts, dstpoints):
  132. cv2.line(display, fltp(point_a), fltp(point_b),
  133. (255, 255, 255), 1, cv2.LINE_AA)
  134. return display
  135. def get_default_params(corners, ycoords, xcoords):
  136. # page width and height
  137. page_width = np.linalg.norm(corners[1] - corners[0])
  138. page_height = np.linalg.norm(corners[-1] - corners[0])
  139. rough_dims = (page_width, page_height)
  140. # our initial guess for the cubic has no slope
  141. cubic_slopes = [0.0, 0.0]
  142. # object points of flat page in 3D coordinates
  143. corners_object3d = np.array([
  144. [0, 0, 0],
  145. [page_width, 0, 0],
  146. [page_width, page_height, 0],
  147. [0, page_height, 0]])
  148. # estimate rotation and translation from four 2D-to-3D point
  149. # correspondences
  150. _, rvec, tvec = cv2.solvePnP(corners_object3d,
  151. corners, K, np.zeros(5))
  152. span_counts = [len(xc) for xc in xcoords]
  153. params = np.hstack((np.array(rvec).flatten(),
  154. np.array(tvec).flatten(),
  155. np.array(cubic_slopes).flatten(),
  156. ycoords.flatten()) +
  157. tuple(xcoords))
  158. return rough_dims, span_counts, params
  159. def project_xy(xy_coords, pvec):
  160. # get cubic polynomial coefficients given
  161. #
  162. # f(0) = 0, f'(0) = alpha
  163. # f(1) = 0, f'(1) = beta
  164. alpha, beta = tuple(pvec[CUBIC_IDX])
  165. poly = np.array([
  166. alpha + beta,
  167. -2*alpha - beta,
  168. alpha,
  169. 0])
  170. xy_coords = xy_coords.reshape((-1, 2))
  171. z_coords = np.polyval(poly, xy_coords[:, 0])
  172. objpoints = np.hstack((xy_coords, z_coords.reshape((-1, 1))))
  173. image_points, _ = cv2.projectPoints(objpoints,
  174. pvec[RVEC_IDX],
  175. pvec[TVEC_IDX],
  176. K, np.zeros(5))
  177. return image_points
  178. def project_keypoints(pvec, keypoint_index):
  179. xy_coords = pvec[keypoint_index]
  180. xy_coords[0, :] = 0
  181. return project_xy(xy_coords, pvec)
  182. def resize_to_screen(src, maxw=1280, maxh=700, copy=False):
  183. height, width = src.shape[:2]
  184. scl_x = float(width)/maxw
  185. scl_y = float(height)/maxh
  186. scl = int(np.ceil(max(scl_x, scl_y)))
  187. if scl > 1.0:
  188. inv_scl = 1.0/scl
  189. img = cv2.resize(src, (0, 0), None, inv_scl, inv_scl, cv2.INTER_AREA)
  190. elif copy:
  191. img = src.copy()
  192. else:
  193. img = src
  194. return img
  195. def box(width, height):
  196. return np.ones((height, width), dtype=np.uint8)
  197. def get_page_extents(small):
  198. height, width = small.shape[:2]
  199. xmin = PAGE_MARGIN_X
  200. ymin = PAGE_MARGIN_Y
  201. xmax = width-PAGE_MARGIN_X
  202. ymax = height-PAGE_MARGIN_Y
  203. page = np.zeros((height, width), dtype=np.uint8)
  204. cv2.rectangle(page, (xmin, ymin), (xmax, ymax), (255, 255, 255), -1)
  205. outline = np.array([
  206. [xmin, ymin],
  207. [xmin, ymax],
  208. [xmax, ymax],
  209. [xmax, ymin]])
  210. return page, outline
  211. def get_mask(name, small, pagemask, masktype):
  212. sgray = cv2.cvtColor(small, cv2.COLOR_RGB2GRAY)
  213. if masktype == 'text':
  214. mask = cv2.adaptiveThreshold(sgray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
  215. cv2.THRESH_BINARY_INV,
  216. ADAPTIVE_WINSZ,
  217. 25)
  218. if DEBUG_LEVEL >= 3:
  219. debug_show(name, 0.1, 'thresholded', mask)
  220. mask = cv2.dilate(mask, box(9, 1))
  221. if DEBUG_LEVEL >= 3:
  222. debug_show(name, 0.2, 'dilated', mask)
  223. mask = cv2.erode(mask, box(1, 3))
  224. if DEBUG_LEVEL >= 3:
  225. debug_show(name, 0.3, 'eroded', mask)
  226. else:
  227. mask = cv2.adaptiveThreshold(sgray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
  228. cv2.THRESH_BINARY_INV,
  229. ADAPTIVE_WINSZ,
  230. 7)
  231. if DEBUG_LEVEL >= 3:
  232. debug_show(name, 0.4, 'thresholded', mask)
  233. mask = cv2.erode(mask, box(3, 1), iterations=3)
  234. if DEBUG_LEVEL >= 3:
  235. debug_show(name, 0.5, 'eroded', mask)
  236. mask = cv2.dilate(mask, box(8, 2))
  237. if DEBUG_LEVEL >= 3:
  238. debug_show(name, 0.6, 'dilated', mask)
  239. return np.minimum(mask, pagemask)
  240. def interval_measure_overlap(int_a, int_b):
  241. return min(int_a[1], int_b[1]) - max(int_a[0], int_b[0])
  242. def angle_dist(angle_b, angle_a):
  243. diff = angle_b - angle_a
  244. while diff > np.pi:
  245. diff -= 2*np.pi
  246. while diff < -np.pi:
  247. diff += 2*np.pi
  248. return np.abs(diff)
  249. def blob_mean_and_tangent(contour):
  250. moments = cv2.moments(contour)
  251. area = moments['m00']
  252. mean_x = old_div(moments['m10'], area)
  253. mean_y = old_div(moments['m01'], area)
  254. moments_matrix = old_div(np.array([
  255. [moments['mu20'], moments['mu11']],
  256. [moments['mu11'], moments['mu02']]
  257. ]), area)
  258. _, svd_u, _ = cv2.SVDecomp(moments_matrix)
  259. center = np.array([mean_x, mean_y])
  260. tangent = svd_u[:, 0].flatten().copy()
  261. return center, tangent
  262. class ContourInfo(object):
  263. def __init__(self, contour, rect, mask):
  264. self.contour = contour
  265. self.rect = rect
  266. self.mask = mask
  267. self.center, self.tangent = blob_mean_and_tangent(contour)
  268. self.angle = np.arctan2(self.tangent[1], self.tangent[0])
  269. clx = [self.proj_x(point) for point in contour]
  270. lxmin = min(clx)
  271. lxmax = max(clx)
  272. self.local_xrng = (lxmin, lxmax)
  273. self.point0 = self.center + self.tangent * lxmin
  274. self.point1 = self.center + self.tangent * lxmax
  275. self.pred = None
  276. self.succ = None
  277. def proj_x(self, point):
  278. return np.dot(self.tangent, point.flatten()-self.center)
  279. def local_overlap(self, other):
  280. xmin = self.proj_x(other.point0)
  281. xmax = self.proj_x(other.point1)
  282. return interval_measure_overlap(self.local_xrng, (xmin, xmax))
  283. def generate_candidate_edge(cinfo_a, cinfo_b):
  284. # we want a left of b (so a's successor will be b and b's
  285. # predecessor will be a) make sure right endpoint of b is to the
  286. # right of left endpoint of a.
  287. if cinfo_a.point0[0] > cinfo_b.point1[0]:
  288. tmp = cinfo_a
  289. cinfo_a = cinfo_b
  290. cinfo_b = tmp
  291. x_overlap_a = cinfo_a.local_overlap(cinfo_b)
  292. x_overlap_b = cinfo_b.local_overlap(cinfo_a)
  293. overall_tangent = cinfo_b.center - cinfo_a.center
  294. overall_angle = np.arctan2(overall_tangent[1], overall_tangent[0])
  295. delta_angle = old_div(max(angle_dist(cinfo_a.angle, overall_angle),
  296. angle_dist(cinfo_b.angle, overall_angle)) * 180,np.pi)
  297. # we want the largest overlap in x to be small
  298. x_overlap = max(x_overlap_a, x_overlap_b)
  299. dist = np.linalg.norm(cinfo_b.point0 - cinfo_a.point1)
  300. if (dist > EDGE_MAX_LENGTH or
  301. x_overlap > EDGE_MAX_OVERLAP or
  302. delta_angle > EDGE_MAX_ANGLE):
  303. return None
  304. else:
  305. score = dist + delta_angle*EDGE_ANGLE_COST
  306. return (score, cinfo_a, cinfo_b)
  307. def make_tight_mask(contour, xmin, ymin, width, height):
  308. tight_mask = np.zeros((height, width), dtype=np.uint8)
  309. tight_contour = contour - np.array((xmin, ymin)).reshape((-1, 1, 2))
  310. cv2.drawContours(tight_mask, [tight_contour], 0,
  311. (1, 1, 1), -1)
  312. return tight_mask
  313. def get_contours(name, small, pagemask, masktype):
  314. mask = get_mask(name, small, pagemask, masktype)
  315. contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL,
  316. cv2.CHAIN_APPROX_NONE)
  317. contours_out = []
  318. for contour in contours:
  319. rect = cv2.boundingRect(contour)
  320. xmin, ymin, width, height = rect
  321. if (width < TEXT_MIN_WIDTH or
  322. height < TEXT_MIN_HEIGHT or
  323. width < TEXT_MIN_ASPECT*height):
  324. continue
  325. tight_mask = make_tight_mask(contour, xmin, ymin, width, height)
  326. if tight_mask.sum(axis=0).max() > TEXT_MAX_THICKNESS:
  327. continue
  328. contours_out.append(ContourInfo(contour, rect, tight_mask))
  329. if DEBUG_LEVEL >= 2:
  330. visualize_contours(name, small, contours_out)
  331. return contours_out
  332. def assemble_spans(name, small, pagemask, cinfo_list):
  333. # sort list
  334. cinfo_list = sorted(cinfo_list, key=lambda cinfo: cinfo.rect[1])
  335. # generate all candidate edges
  336. candidate_edges = []
  337. for i, cinfo_i in enumerate(cinfo_list):
  338. for j in range(i):
  339. # note e is of the form (score, left_cinfo, right_cinfo)
  340. edge = generate_candidate_edge(cinfo_i, cinfo_list[j])
  341. if edge is not None:
  342. candidate_edges.append(edge)
  343. # sort candidate edges by score (lower is better)
  344. candidate_edges.sort()
  345. # for each candidate edge
  346. for _, cinfo_a, cinfo_b in candidate_edges:
  347. # if left and right are unassigned, join them
  348. if cinfo_a.succ is None and cinfo_b.pred is None:
  349. cinfo_a.succ = cinfo_b
  350. cinfo_b.pred = cinfo_a
  351. # generate list of spans as output
  352. spans = []
  353. # until we have removed everything from the list
  354. while cinfo_list:
  355. # get the first on the list
  356. cinfo = cinfo_list[0]
  357. # keep following predecessors until none exists
  358. while cinfo.pred:
  359. cinfo = cinfo.pred
  360. # start a new span
  361. cur_span = []
  362. width = 0.0
  363. # follow successors til end of span
  364. while cinfo:
  365. # remove from list (sadly making this loop *also* O(n^2)
  366. cinfo_list.remove(cinfo)
  367. # add to span
  368. cur_span.append(cinfo)
  369. width += cinfo.local_xrng[1] - cinfo.local_xrng[0]
  370. # set successor
  371. cinfo = cinfo.succ
  372. # add if long enough
  373. if width > SPAN_MIN_WIDTH:
  374. spans.append(cur_span)
  375. if DEBUG_LEVEL >= 2:
  376. visualize_spans(name, small, pagemask, spans)
  377. return spans
  378. def sample_spans(shape, spans):
  379. span_points = []
  380. for span in spans:
  381. contour_points = []
  382. for cinfo in span:
  383. yvals = np.arange(cinfo.mask.shape[0]).reshape((-1, 1))
  384. totals = (yvals * cinfo.mask).sum(axis=0)
  385. means = old_div(totals, cinfo.mask.sum(axis=0))
  386. xmin, ymin = cinfo.rect[:2]
  387. step = SPAN_PX_PER_STEP
  388. start = old_div(((len(means)-1) % step), 2)
  389. contour_points += [(x+xmin, means[x]+ymin)
  390. for x in range(start, len(means), step)]
  391. contour_points = np.array(contour_points,
  392. dtype=np.float32).reshape((-1, 1, 2))
  393. contour_points = pix2norm(shape, contour_points)
  394. span_points.append(contour_points)
  395. return span_points
  396. def keypoints_from_samples(name, small, pagemask, page_outline,
  397. span_points):
  398. all_evecs = np.array([[0.0, 0.0]])
  399. all_weights = 0
  400. for points in span_points:
  401. _, evec = cv2.PCACompute(points.reshape((-1, 2)),
  402. None, maxComponents=1)
  403. weight = np.linalg.norm(points[-1] - points[0])
  404. all_evecs += evec * weight
  405. all_weights += weight
  406. evec = old_div(all_evecs, all_weights)
  407. x_dir = evec.flatten()
  408. if x_dir[0] < 0:
  409. x_dir = -x_dir
  410. y_dir = np.array([-x_dir[1], x_dir[0]])
  411. pagecoords = cv2.convexHull(page_outline)
  412. pagecoords = pix2norm(pagemask.shape, pagecoords.reshape((-1, 1, 2)))
  413. pagecoords = pagecoords.reshape((-1, 2))
  414. px_coords = np.dot(pagecoords, x_dir)
  415. py_coords = np.dot(pagecoords, y_dir)
  416. px0 = px_coords.min()
  417. px1 = px_coords.max()
  418. py0 = py_coords.min()
  419. py1 = py_coords.max()
  420. p00 = px0 * x_dir + py0 * y_dir
  421. p10 = px1 * x_dir + py0 * y_dir
  422. p11 = px1 * x_dir + py1 * y_dir
  423. p01 = px0 * x_dir + py1 * y_dir
  424. corners = np.vstack((p00, p10, p11, p01)).reshape((-1, 1, 2))
  425. ycoords = []
  426. xcoords = []
  427. for points in span_points:
  428. pts = points.reshape((-1, 2))
  429. px_coords = np.dot(pts, x_dir)
  430. py_coords = np.dot(pts, y_dir)
  431. ycoords.append(py_coords.mean() - py0)
  432. xcoords.append(px_coords - px0)
  433. if DEBUG_LEVEL >= 2:
  434. visualize_span_points(name, small, span_points, corners)
  435. return corners, np.array(ycoords), xcoords
  436. def visualize_contours(name, small, cinfo_list):
  437. regions = np.zeros_like(small)
  438. for j, cinfo in enumerate(cinfo_list):
  439. cv2.drawContours(regions, [cinfo.contour], 0,
  440. CCOLORS[j % len(CCOLORS)], -1)
  441. mask = (regions.max(axis=2) != 0)
  442. display = small.copy()
  443. display[mask] = (old_div(display[mask],2)) + (old_div(regions[mask],2))
  444. for j, cinfo in enumerate(cinfo_list):
  445. color = CCOLORS[j % len(CCOLORS)]
  446. color = tuple([old_div(c,4) for c in color])
  447. cv2.circle(display, fltp(cinfo.center), 3,
  448. (255, 255, 255), 1, cv2.LINE_AA)
  449. cv2.line(display, fltp(cinfo.point0), fltp(cinfo.point1),
  450. (255, 255, 255), 1, cv2.LINE_AA)
  451. debug_show(name, 1, 'contours', display)
  452. def visualize_spans(name, small, pagemask, spans):
  453. regions = np.zeros_like(small)
  454. for i, span in enumerate(spans):
  455. contours = [cinfo.contour for cinfo in span]
  456. cv2.drawContours(regions, contours, -1,
  457. CCOLORS[i*3 % len(CCOLORS)], -1)
  458. mask = (regions.max(axis=2) != 0)
  459. display = small.copy()
  460. display[mask] = (old_div(display[mask],2)) + (old_div(regions[mask],2))
  461. display[pagemask == 0] //= 4
  462. debug_show(name, 2, 'spans', display)
  463. def visualize_span_points(name, small, span_points, corners):
  464. display = small.copy()
  465. for i, points in enumerate(span_points):
  466. points = norm2pix(small.shape, points, False)
  467. mean, small_evec = cv2.PCACompute(points.reshape((-1, 2)),
  468. None,
  469. maxComponents=1)
  470. dps = np.dot(points.reshape((-1, 2)), small_evec.reshape((2, 1)))
  471. dpm = np.dot(mean.flatten(), small_evec.flatten())
  472. point0 = mean + small_evec * (dps.min()-dpm)
  473. point1 = mean + small_evec * (dps.max()-dpm)
  474. for point in points:
  475. cv2.circle(display, fltp(point), 3,
  476. CCOLORS[i % len(CCOLORS)], -1, cv2.LINE_AA)
  477. cv2.line(display, fltp(point0), fltp(point1),
  478. (255, 255, 255), 1, cv2.LINE_AA)
  479. cv2.polylines(display, [norm2pix(small.shape, corners, True)],
  480. True, (255, 255, 255))
  481. debug_show(name, 3, 'span points', display)
  482. def imgsize(img):
  483. height, width = img.shape[:2]
  484. return '{}x{}'.format(width, height)
  485. def make_keypoint_index(span_counts):
  486. nspans = len(span_counts)
  487. npts = sum(span_counts)
  488. keypoint_index = np.zeros((npts+1, 2), dtype=int)
  489. start = 1
  490. for i, count in enumerate(span_counts):
  491. end = start + count
  492. keypoint_index[start:start+end, 1] = 8+i
  493. start = end
  494. keypoint_index[1:, 0] = np.arange(npts) + 8 + nspans
  495. return keypoint_index
  496. def optimize_params(name, small, dstpoints, span_counts, params):
  497. keypoint_index = make_keypoint_index(span_counts)
  498. def objective(pvec):
  499. ppts = project_keypoints(pvec, keypoint_index)
  500. return np.sum((dstpoints - ppts)**2)
  501. print(' initial objective is', objective(params))
  502. if DEBUG_LEVEL >= 1:
  503. projpts = project_keypoints(params, keypoint_index)
  504. display = draw_correspondences(small, dstpoints, projpts)
  505. debug_show(name, 4, 'keypoints before', display)
  506. print(' optimizing', len(params), 'parameters...')
  507. start = datetime.datetime.now()
  508. res = scipy.optimize.minimize(objective, params,
  509. method='Powell')
  510. end = datetime.datetime.now()
  511. print(' optimization took', round((end-start).total_seconds(), 2), 'sec.')
  512. print(' final objective is', res.fun)
  513. params = res.x
  514. if DEBUG_LEVEL >= 1:
  515. projpts = project_keypoints(params, keypoint_index)
  516. display = draw_correspondences(small, dstpoints, projpts)
  517. debug_show(name, 5, 'keypoints after', display)
  518. return params
  519. def get_page_dims(corners, rough_dims, params):
  520. dst_br = corners[2].flatten()
  521. dims = np.array(rough_dims)
  522. def objective(dims):
  523. proj_br = project_xy(dims, params)
  524. return np.sum((dst_br - proj_br.flatten())**2)
  525. res = scipy.optimize.minimize(objective, dims, method='Powell')
  526. dims = res.x
  527. print(' got page dims', dims[0], 'x', dims[1])
  528. return dims
  529. def remap_image(name, dirname, img, small, page_dims, params):
  530. height = 0.5 * page_dims[1] * OUTPUT_ZOOM * img.shape[0]
  531. height = round_nearest_multiple(height, REMAP_DECIMATE)
  532. width = round_nearest_multiple(old_div(height * page_dims[0], page_dims[1]),
  533. REMAP_DECIMATE)
  534. print(' output will be {}x{}'.format(width, height))
  535. height_small = old_div(height, REMAP_DECIMATE)
  536. width_small = old_div(width, REMAP_DECIMATE)
  537. page_x_range = np.linspace(0, page_dims[0], width_small)
  538. page_y_range = np.linspace(0, page_dims[1], height_small)
  539. page_x_coords, page_y_coords = np.meshgrid(page_x_range, page_y_range)
  540. page_xy_coords = np.hstack((page_x_coords.flatten().reshape((-1, 1)),
  541. page_y_coords.flatten().reshape((-1, 1))))
  542. page_xy_coords = page_xy_coords.astype(np.float32)
  543. image_points = project_xy(page_xy_coords, params)
  544. image_points = norm2pix(img.shape, image_points, False)
  545. image_x_coords = image_points[:, 0, 0].reshape(page_x_coords.shape)
  546. image_y_coords = image_points[:, 0, 1].reshape(page_y_coords.shape)
  547. image_x_coords = cv2.resize(image_x_coords, (width, height),
  548. interpolation=cv2.INTER_CUBIC)
  549. image_y_coords = cv2.resize(image_y_coords, (width, height),
  550. interpolation=cv2.INTER_CUBIC)
  551. img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
  552. remapped = cv2.remap(img_gray, image_x_coords, image_y_coords,
  553. cv2.INTER_CUBIC,
  554. None, cv2.BORDER_REPLICATE)
  555. thresh = cv2.adaptiveThreshold(remapped, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
  556. cv2.THRESH_BINARY, ADAPTIVE_WINSZ, 25)
  557. pil_image = Image.fromarray(thresh)
  558. pil_image = pil_image.convert('1')
  559. threshfile = name + '_thresh.png'
  560. pil_image.save(dirname + '/' + threshfile, dpi=(OUTPUT_DPI, OUTPUT_DPI))
  561. if DEBUG_LEVEL >= 1:
  562. height = small.shape[0]
  563. width = int(round(height * float(thresh.shape[1])/thresh.shape[0]))
  564. display = cv2.resize(thresh, (width, height),
  565. interpolation=cv2.INTER_AREA)
  566. debug_show(name, 6, 'output', display)
  567. return threshfile
  568. def main():
  569. if len(sys.argv) < 2:
  570. print('usage:', sys.argv[0], 'IMAGE1 [IMAGE2 ...]')
  571. sys.exit(0)
  572. if DEBUG_LEVEL > 0 and DEBUG_OUTPUT != 'file':
  573. cv2.namedWindow(WINDOW_NAME)
  574. outfiles = []
  575. for imgfile in sys.argv[1:]:
  576. img = cv2.imread(imgfile)
  577. small = resize_to_screen(img)
  578. basename = os.path.basename(imgfile)
  579. dirname = os.path.dirname(imgfile)
  580. name, _ = os.path.splitext(basename)
  581. print('loaded', basename, 'with size', imgsize(img), end=' ')
  582. print('and resized to', imgsize(small))
  583. if DEBUG_LEVEL >= 3:
  584. debug_show(name, 0.0, 'original', small)
  585. pagemask, page_outline = get_page_extents(small)
  586. cinfo_list = get_contours(name, small, pagemask, 'text')
  587. spans = assemble_spans(name, small, pagemask, cinfo_list)
  588. if len(spans) < 3:
  589. print(' detecting lines because only', len(spans), 'text spans')
  590. cinfo_list = get_contours(name, small, pagemask, 'line')
  591. spans2 = assemble_spans(name, small, pagemask, cinfo_list)
  592. if len(spans2) > len(spans):
  593. spans = spans2
  594. if len(spans) < 1:
  595. print('skipping', name, 'because only', len(spans), 'spans')
  596. continue
  597. span_points = sample_spans(small.shape, spans)
  598. print(' got', len(spans), 'spans', end=' ')
  599. print('with', sum([len(pts) for pts in span_points]), 'points.')
  600. corners, ycoords, xcoords = keypoints_from_samples(name, small,
  601. pagemask,
  602. page_outline,
  603. span_points)
  604. rough_dims, span_counts, params = get_default_params(corners,
  605. ycoords, xcoords)
  606. dstpoints = np.vstack((corners[0].reshape((1, 1, 2)),) +
  607. tuple(span_points))
  608. params = optimize_params(name, small,
  609. dstpoints,
  610. span_counts, params)
  611. page_dims = get_page_dims(corners, rough_dims, params)
  612. outfile = remap_image(name, dirname, img, small, page_dims, params)
  613. outfiles.append(outfile)
  614. print(' wrote', outfile)
  615. print()
  616. print('to convert to PDF (requires ImageMagick):')
  617. print(' convert -compress Group4 ' + ' '.join(outfiles) + ' output.pdf')
  618. if __name__ == '__main__':
  619. main()