diff --git a/resources/python/dewarp b/resources/python/dewarp deleted file mode 160000 index c40293a..0000000 --- a/resources/python/dewarp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c40293a606194a322fa116ba5a7bec38148e07ff diff --git a/resources/python/dewarp_2/LICENSE.txt b/resources/python/dewarp_2/LICENSE.txt new file mode 100644 index 0000000..e35410a --- /dev/null +++ b/resources/python/dewarp_2/LICENSE.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2016, Matt Zucker + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/resources/python/dewarp_2/README.md b/resources/python/dewarp_2/README.md new file mode 100644 index 0000000..7ea5f8c --- /dev/null +++ b/resources/python/dewarp_2/README.md @@ -0,0 +1,14 @@ +page_dewarp +=========== + +Page dewarping and thresholding using a "cubic sheet" model - see full writeup at + +Requirements: + + - scipy + - OpenCV 3.0 or greater + - Image module from PIL or Pillow + +Usage: + + page_dewarp.py IMAGE1 [IMAGE2 ...] diff --git a/resources/python/dewarp_2/derive_cubic.py b/resources/python/dewarp_2/derive_cubic.py new file mode 100644 index 0000000..7b4c993 --- /dev/null +++ b/resources/python/dewarp_2/derive_cubic.py @@ -0,0 +1,46 @@ +from __future__ import print_function +import matplotlib.pyplot as plt +import numpy as np +import sympy + +# create a bunch of symbols +a, b, c, d, x, alpha, beta = sympy.symbols('a b c d x alpha beta') + +# create a polynomial function f(x) +f = a*x**3 + b*x**2 + c*x + d + +# get its derivative f'(x) +fp = f.diff(x) + +# evaluate both at x=0 and x=1 +f0 = f.subs(x, 0) +f1 = f.subs(x, 1) +fp0 = fp.subs(x, 0) +fp1 = fp.subs(x, 1) + +# we want a, b, c, d such that the following conditions hold: +# +# f(0) = 0 +# f(1) = 0 +# f'(0) = alpha +# f'(1) = beta + +S = sympy.solve([f0, f1, fp0-alpha, fp1-beta], [a, b, c, d]) + +# print the analytic solution and plot a graphical example +coeffs = [] + +num_alpha = 0.3 +num_beta = 0.03 + +for key in [a, b, c, d]: + print(key, '=', S[key]) + coeffs.append(S[key].subs(dict(alpha=num_alpha, + beta=num_beta))) + +xvals = np.linspace(0, 1, 101) +yvals = np.polyval(coeffs, xvals) + +plt.plot(xvals, yvals) +plt.show() + diff --git a/resources/python/dewarp_2/page_dewarp.py b/resources/python/dewarp_2/page_dewarp.py new file mode 100755 index 0000000..222df32 --- /dev/null +++ b/resources/python/dewarp_2/page_dewarp.py @@ -0,0 +1,922 @@ +#!/usr/bin/env python +###################################################################### +# page_dewarp.py - Proof-of-concept of page-dewarping based on a +# "cubic sheet" model. Requires OpenCV (version 3 or greater), +# PIL/Pillow, and scipy.optimize. +###################################################################### +# Author: Matt Zucker +# Date: July 2016 +# License: MIT License (see LICENSE.txt) +###################################################################### + +from __future__ import division +from __future__ import print_function +from builtins import zip +from builtins import str +from builtins import range +from builtins import object +from past.utils import old_div +import os +import sys +import datetime +import cv2 +from PIL import Image +import numpy as np +import scipy.optimize + +# for some reason pylint complains about cv2 members being undefined :( +# pylint: disable=E1101 + +PAGE_MARGIN_X = 50 # reduced px to ignore near L/R edge +PAGE_MARGIN_Y = 20 # reduced px to ignore near T/B edge + +OUTPUT_ZOOM = 1.0 # how much to zoom output relative to *original* image +OUTPUT_DPI = 300 # just affects stated DPI of PNG, not appearance +REMAP_DECIMATE = 16 # downscaling factor for remapping image + +ADAPTIVE_WINSZ = 55 # window size for adaptive threshold in reduced px + +TEXT_MIN_WIDTH = 15 # min reduced px width of detected text contour +TEXT_MIN_HEIGHT = 2 # min reduced px height of detected text contour +TEXT_MIN_ASPECT = 1.5 # filter out text contours below this w/h ratio +TEXT_MAX_THICKNESS = 10 # max reduced px thickness of detected text contour + +EDGE_MAX_OVERLAP = 1.0 # max reduced px horiz. overlap of contours in span +EDGE_MAX_LENGTH = 100.0 # max reduced px length of edge connecting contours +EDGE_ANGLE_COST = 10.0 # cost of angles in edges (tradeoff vs. length) +EDGE_MAX_ANGLE = 7.5 # maximum change in angle allowed between contours + +RVEC_IDX = slice(0, 3) # index of rvec in params vector +TVEC_IDX = slice(3, 6) # index of tvec in params vector +CUBIC_IDX = slice(6, 8) # index of cubic slopes in params vector + +SPAN_MIN_WIDTH = 30 # minimum reduced px width for span +SPAN_PX_PER_STEP = 20 # reduced px spacing for sampling along spans +FOCAL_LENGTH = 1.2 # normalized focal length of camera + +DEBUG_LEVEL = 0 # 0=none, 1=some, 2=lots, 3=all +DEBUG_OUTPUT = 'file' # file, screen, both + +WINDOW_NAME = 'Dewarp' # Window name for visualization + +# nice color palette for visualizing contours, etc. +CCOLORS = [ + (255, 0, 0), + (255, 63, 0), + (255, 127, 0), + (255, 191, 0), + (255, 255, 0), + (191, 255, 0), + (127, 255, 0), + (63, 255, 0), + (0, 255, 0), + (0, 255, 63), + (0, 255, 127), + (0, 255, 191), + (0, 255, 255), + (0, 191, 255), + (0, 127, 255), + (0, 63, 255), + (0, 0, 255), + (63, 0, 255), + (127, 0, 255), + (191, 0, 255), + (255, 0, 255), + (255, 0, 191), + (255, 0, 127), + (255, 0, 63), +] + +# default intrinsic parameter matrix +K = np.array([ + [FOCAL_LENGTH, 0, 0], + [0, FOCAL_LENGTH, 0], + [0, 0, 1]], dtype=np.float32) + + +def debug_show(name, step, text, display): + + if DEBUG_OUTPUT != 'screen': + filetext = text.replace(' ', '_') + outfile = name + '_debug_' + str(step) + '_' + filetext + '.png' + cv2.imwrite(outfile, display) + + if DEBUG_OUTPUT != 'file': + + image = display.copy() + height = image.shape[0] + + cv2.putText(image, text, (16, height-16), + cv2.FONT_HERSHEY_SIMPLEX, 1.0, + (0, 0, 0), 3, cv2.LINE_AA) + + cv2.putText(image, text, (16, height-16), + cv2.FONT_HERSHEY_SIMPLEX, 1.0, + (255, 255, 255), 1, cv2.LINE_AA) + + cv2.imshow(WINDOW_NAME, image) + + while cv2.waitKey(5) < 0: + pass + + +def round_nearest_multiple(i, factor): + i = int(i) + rem = i % factor + if not rem: + return i + else: + return i + factor - rem + + +def pix2norm(shape, pts): + height, width = shape[:2] + scl = 2.0/(max(height, width)) + offset = np.array([width, height], dtype=pts.dtype).reshape((-1, 1, 2))*0.5 + return (pts - offset) * scl + + +def norm2pix(shape, pts, as_integer): + height, width = shape[:2] + scl = max(height, width)*0.5 + offset = np.array([0.5*width, 0.5*height], + dtype=pts.dtype).reshape((-1, 1, 2)) + rval = pts * scl + offset + if as_integer: + return (rval + 0.5).astype(int) + else: + return rval + + +def fltp(point): + return tuple(point.astype(int).flatten()) + + +def draw_correspondences(img, dstpoints, projpts): + + display = img.copy() + dstpoints = norm2pix(img.shape, dstpoints, True) + projpts = norm2pix(img.shape, projpts, True) + + for pts, color in [(projpts, (255, 0, 0)), + (dstpoints, (0, 0, 255))]: + + for point in pts: + cv2.circle(display, fltp(point), 3, color, -1, cv2.LINE_AA) + + for point_a, point_b in zip(projpts, dstpoints): + cv2.line(display, fltp(point_a), fltp(point_b), + (255, 255, 255), 1, cv2.LINE_AA) + + return display + + +def get_default_params(corners, ycoords, xcoords): + + # page width and height + page_width = np.linalg.norm(corners[1] - corners[0]) + page_height = np.linalg.norm(corners[-1] - corners[0]) + rough_dims = (page_width, page_height) + + # our initial guess for the cubic has no slope + cubic_slopes = [0.0, 0.0] + + # object points of flat page in 3D coordinates + corners_object3d = np.array([ + [0, 0, 0], + [page_width, 0, 0], + [page_width, page_height, 0], + [0, page_height, 0]]) + + # estimate rotation and translation from four 2D-to-3D point + # correspondences + _, rvec, tvec = cv2.solvePnP(corners_object3d, + corners, K, np.zeros(5)) + + span_counts = [len(xc) for xc in xcoords] + + params = np.hstack((np.array(rvec).flatten(), + np.array(tvec).flatten(), + np.array(cubic_slopes).flatten(), + ycoords.flatten()) + + tuple(xcoords)) + + return rough_dims, span_counts, params + + +def project_xy(xy_coords, pvec): + + # get cubic polynomial coefficients given + # + # f(0) = 0, f'(0) = alpha + # f(1) = 0, f'(1) = beta + + alpha, beta = tuple(pvec[CUBIC_IDX]) + + poly = np.array([ + alpha + beta, + -2*alpha - beta, + alpha, + 0]) + + xy_coords = xy_coords.reshape((-1, 2)) + z_coords = np.polyval(poly, xy_coords[:, 0]) + + objpoints = np.hstack((xy_coords, z_coords.reshape((-1, 1)))) + + image_points, _ = cv2.projectPoints(objpoints, + pvec[RVEC_IDX], + pvec[TVEC_IDX], + K, np.zeros(5)) + + return image_points + + +def project_keypoints(pvec, keypoint_index): + + xy_coords = pvec[keypoint_index] + xy_coords[0, :] = 0 + + return project_xy(xy_coords, pvec) + + +def resize_to_screen(src, maxw=1280, maxh=700, copy=False): + + height, width = src.shape[:2] + + scl_x = float(width)/maxw + scl_y = float(height)/maxh + + scl = int(np.ceil(max(scl_x, scl_y))) + + if scl > 1.0: + inv_scl = 1.0/scl + img = cv2.resize(src, (0, 0), None, inv_scl, inv_scl, cv2.INTER_AREA) + elif copy: + img = src.copy() + else: + img = src + + return img + + +def box(width, height): + return np.ones((height, width), dtype=np.uint8) + + +def get_page_extents(small): + + height, width = small.shape[:2] + + xmin = PAGE_MARGIN_X + ymin = PAGE_MARGIN_Y + xmax = width-PAGE_MARGIN_X + ymax = height-PAGE_MARGIN_Y + + page = np.zeros((height, width), dtype=np.uint8) + cv2.rectangle(page, (xmin, ymin), (xmax, ymax), (255, 255, 255), -1) + + outline = np.array([ + [xmin, ymin], + [xmin, ymax], + [xmax, ymax], + [xmax, ymin]]) + + return page, outline + + +def get_mask(name, small, pagemask, masktype): + + sgray = cv2.cvtColor(small, cv2.COLOR_RGB2GRAY) + + if masktype == 'text': + + mask = cv2.adaptiveThreshold(sgray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, + cv2.THRESH_BINARY_INV, + ADAPTIVE_WINSZ, + 25) + + if DEBUG_LEVEL >= 3: + debug_show(name, 0.1, 'thresholded', mask) + + mask = cv2.dilate(mask, box(9, 1)) + + if DEBUG_LEVEL >= 3: + debug_show(name, 0.2, 'dilated', mask) + + mask = cv2.erode(mask, box(1, 3)) + + if DEBUG_LEVEL >= 3: + debug_show(name, 0.3, 'eroded', mask) + + else: + + mask = cv2.adaptiveThreshold(sgray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, + cv2.THRESH_BINARY_INV, + ADAPTIVE_WINSZ, + 7) + + if DEBUG_LEVEL >= 3: + debug_show(name, 0.4, 'thresholded', mask) + + mask = cv2.erode(mask, box(3, 1), iterations=3) + + if DEBUG_LEVEL >= 3: + debug_show(name, 0.5, 'eroded', mask) + + mask = cv2.dilate(mask, box(8, 2)) + + if DEBUG_LEVEL >= 3: + debug_show(name, 0.6, 'dilated', mask) + + return np.minimum(mask, pagemask) + + +def interval_measure_overlap(int_a, int_b): + return min(int_a[1], int_b[1]) - max(int_a[0], int_b[0]) + + +def angle_dist(angle_b, angle_a): + + diff = angle_b - angle_a + + while diff > np.pi: + diff -= 2*np.pi + + while diff < -np.pi: + diff += 2*np.pi + + return np.abs(diff) + + +def blob_mean_and_tangent(contour): + + moments = cv2.moments(contour) + + area = moments['m00'] + + mean_x = old_div(moments['m10'], area) + mean_y = old_div(moments['m01'], area) + + moments_matrix = old_div(np.array([ + [moments['mu20'], moments['mu11']], + [moments['mu11'], moments['mu02']] + ]), area) + + _, svd_u, _ = cv2.SVDecomp(moments_matrix) + + center = np.array([mean_x, mean_y]) + tangent = svd_u[:, 0].flatten().copy() + + return center, tangent + + +class ContourInfo(object): + + def __init__(self, contour, rect, mask): + + self.contour = contour + self.rect = rect + self.mask = mask + + self.center, self.tangent = blob_mean_and_tangent(contour) + + self.angle = np.arctan2(self.tangent[1], self.tangent[0]) + + clx = [self.proj_x(point) for point in contour] + + lxmin = min(clx) + lxmax = max(clx) + + self.local_xrng = (lxmin, lxmax) + + self.point0 = self.center + self.tangent * lxmin + self.point1 = self.center + self.tangent * lxmax + + self.pred = None + self.succ = None + + def proj_x(self, point): + return np.dot(self.tangent, point.flatten()-self.center) + + def local_overlap(self, other): + xmin = self.proj_x(other.point0) + xmax = self.proj_x(other.point1) + return interval_measure_overlap(self.local_xrng, (xmin, xmax)) + + +def generate_candidate_edge(cinfo_a, cinfo_b): + + # we want a left of b (so a's successor will be b and b's + # predecessor will be a) make sure right endpoint of b is to the + # right of left endpoint of a. + if cinfo_a.point0[0] > cinfo_b.point1[0]: + tmp = cinfo_a + cinfo_a = cinfo_b + cinfo_b = tmp + + x_overlap_a = cinfo_a.local_overlap(cinfo_b) + x_overlap_b = cinfo_b.local_overlap(cinfo_a) + + overall_tangent = cinfo_b.center - cinfo_a.center + overall_angle = np.arctan2(overall_tangent[1], overall_tangent[0]) + + delta_angle = old_div(max(angle_dist(cinfo_a.angle, overall_angle), + angle_dist(cinfo_b.angle, overall_angle)) * 180,np.pi) + + # we want the largest overlap in x to be small + x_overlap = max(x_overlap_a, x_overlap_b) + + dist = np.linalg.norm(cinfo_b.point0 - cinfo_a.point1) + + if (dist > EDGE_MAX_LENGTH or + x_overlap > EDGE_MAX_OVERLAP or + delta_angle > EDGE_MAX_ANGLE): + return None + else: + score = dist + delta_angle*EDGE_ANGLE_COST + return (score, cinfo_a, cinfo_b) + + +def make_tight_mask(contour, xmin, ymin, width, height): + + tight_mask = np.zeros((height, width), dtype=np.uint8) + tight_contour = contour - np.array((xmin, ymin)).reshape((-1, 1, 2)) + + cv2.drawContours(tight_mask, [tight_contour], 0, + (1, 1, 1), -1) + + return tight_mask + + +def get_contours(name, small, pagemask, masktype): + + mask = get_mask(name, small, pagemask, masktype) + + contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, + cv2.CHAIN_APPROX_NONE) + + contours_out = [] + + for contour in contours: + + rect = cv2.boundingRect(contour) + xmin, ymin, width, height = rect + + if (width < TEXT_MIN_WIDTH or + height < TEXT_MIN_HEIGHT or + width < TEXT_MIN_ASPECT*height): + continue + + tight_mask = make_tight_mask(contour, xmin, ymin, width, height) + + if tight_mask.sum(axis=0).max() > TEXT_MAX_THICKNESS: + continue + + contours_out.append(ContourInfo(contour, rect, tight_mask)) + + if DEBUG_LEVEL >= 2: + visualize_contours(name, small, contours_out) + + return contours_out + + +def assemble_spans(name, small, pagemask, cinfo_list): + + # sort list + cinfo_list = sorted(cinfo_list, key=lambda cinfo: cinfo.rect[1]) + + # generate all candidate edges + candidate_edges = [] + + for i, cinfo_i in enumerate(cinfo_list): + for j in range(i): + # note e is of the form (score, left_cinfo, right_cinfo) + edge = generate_candidate_edge(cinfo_i, cinfo_list[j]) + if edge is not None: + candidate_edges.append(edge) + + # sort candidate edges by score (lower is better) + candidate_edges.sort() + + # for each candidate edge + for _, cinfo_a, cinfo_b in candidate_edges: + # if left and right are unassigned, join them + if cinfo_a.succ is None and cinfo_b.pred is None: + cinfo_a.succ = cinfo_b + cinfo_b.pred = cinfo_a + + # generate list of spans as output + spans = [] + + # until we have removed everything from the list + while cinfo_list: + + # get the first on the list + cinfo = cinfo_list[0] + + # keep following predecessors until none exists + while cinfo.pred: + cinfo = cinfo.pred + + # start a new span + cur_span = [] + + width = 0.0 + + # follow successors til end of span + while cinfo: + # remove from list (sadly making this loop *also* O(n^2) + cinfo_list.remove(cinfo) + # add to span + cur_span.append(cinfo) + width += cinfo.local_xrng[1] - cinfo.local_xrng[0] + # set successor + cinfo = cinfo.succ + + # add if long enough + if width > SPAN_MIN_WIDTH: + spans.append(cur_span) + + if DEBUG_LEVEL >= 2: + visualize_spans(name, small, pagemask, spans) + + return spans + + +def sample_spans(shape, spans): + + span_points = [] + + for span in spans: + + contour_points = [] + + for cinfo in span: + + yvals = np.arange(cinfo.mask.shape[0]).reshape((-1, 1)) + totals = (yvals * cinfo.mask).sum(axis=0) + means = old_div(totals, cinfo.mask.sum(axis=0)) + + xmin, ymin = cinfo.rect[:2] + + step = SPAN_PX_PER_STEP + start = old_div(((len(means)-1) % step), 2) + + contour_points += [(x+xmin, means[x]+ymin) + for x in range(start, len(means), step)] + + contour_points = np.array(contour_points, + dtype=np.float32).reshape((-1, 1, 2)) + + contour_points = pix2norm(shape, contour_points) + + span_points.append(contour_points) + + return span_points + + +def keypoints_from_samples(name, small, pagemask, page_outline, + span_points): + + all_evecs = np.array([[0.0, 0.0]]) + all_weights = 0 + + for points in span_points: + + _, evec = cv2.PCACompute(points.reshape((-1, 2)), + None, maxComponents=1) + + weight = np.linalg.norm(points[-1] - points[0]) + + all_evecs += evec * weight + all_weights += weight + + evec = old_div(all_evecs, all_weights) + + x_dir = evec.flatten() + + if x_dir[0] < 0: + x_dir = -x_dir + + y_dir = np.array([-x_dir[1], x_dir[0]]) + + pagecoords = cv2.convexHull(page_outline) + pagecoords = pix2norm(pagemask.shape, pagecoords.reshape((-1, 1, 2))) + pagecoords = pagecoords.reshape((-1, 2)) + + px_coords = np.dot(pagecoords, x_dir) + py_coords = np.dot(pagecoords, y_dir) + + px0 = px_coords.min() + px1 = px_coords.max() + + py0 = py_coords.min() + py1 = py_coords.max() + + p00 = px0 * x_dir + py0 * y_dir + p10 = px1 * x_dir + py0 * y_dir + p11 = px1 * x_dir + py1 * y_dir + p01 = px0 * x_dir + py1 * y_dir + + corners = np.vstack((p00, p10, p11, p01)).reshape((-1, 1, 2)) + + ycoords = [] + xcoords = [] + + for points in span_points: + pts = points.reshape((-1, 2)) + px_coords = np.dot(pts, x_dir) + py_coords = np.dot(pts, y_dir) + ycoords.append(py_coords.mean() - py0) + xcoords.append(px_coords - px0) + + if DEBUG_LEVEL >= 2: + visualize_span_points(name, small, span_points, corners) + + return corners, np.array(ycoords), xcoords + + +def visualize_contours(name, small, cinfo_list): + + regions = np.zeros_like(small) + + for j, cinfo in enumerate(cinfo_list): + + cv2.drawContours(regions, [cinfo.contour], 0, + CCOLORS[j % len(CCOLORS)], -1) + + mask = (regions.max(axis=2) != 0) + + display = small.copy() + display[mask] = (old_div(display[mask],2)) + (old_div(regions[mask],2)) + + for j, cinfo in enumerate(cinfo_list): + color = CCOLORS[j % len(CCOLORS)] + color = tuple([old_div(c,4) for c in color]) + + cv2.circle(display, fltp(cinfo.center), 3, + (255, 255, 255), 1, cv2.LINE_AA) + + cv2.line(display, fltp(cinfo.point0), fltp(cinfo.point1), + (255, 255, 255), 1, cv2.LINE_AA) + + debug_show(name, 1, 'contours', display) + + +def visualize_spans(name, small, pagemask, spans): + + regions = np.zeros_like(small) + + for i, span in enumerate(spans): + contours = [cinfo.contour for cinfo in span] + cv2.drawContours(regions, contours, -1, + CCOLORS[i*3 % len(CCOLORS)], -1) + + mask = (regions.max(axis=2) != 0) + + display = small.copy() + display[mask] = (old_div(display[mask],2)) + (old_div(regions[mask],2)) + display[pagemask == 0] //= 4 + + debug_show(name, 2, 'spans', display) + + +def visualize_span_points(name, small, span_points, corners): + + display = small.copy() + + for i, points in enumerate(span_points): + + points = norm2pix(small.shape, points, False) + + mean, small_evec = cv2.PCACompute(points.reshape((-1, 2)), + None, + maxComponents=1) + + dps = np.dot(points.reshape((-1, 2)), small_evec.reshape((2, 1))) + dpm = np.dot(mean.flatten(), small_evec.flatten()) + + point0 = mean + small_evec * (dps.min()-dpm) + point1 = mean + small_evec * (dps.max()-dpm) + + for point in points: + cv2.circle(display, fltp(point), 3, + CCOLORS[i % len(CCOLORS)], -1, cv2.LINE_AA) + + cv2.line(display, fltp(point0), fltp(point1), + (255, 255, 255), 1, cv2.LINE_AA) + + cv2.polylines(display, [norm2pix(small.shape, corners, True)], + True, (255, 255, 255)) + + debug_show(name, 3, 'span points', display) + + +def imgsize(img): + height, width = img.shape[:2] + return '{}x{}'.format(width, height) + + +def make_keypoint_index(span_counts): + + nspans = len(span_counts) + npts = sum(span_counts) + keypoint_index = np.zeros((npts+1, 2), dtype=int) + start = 1 + + for i, count in enumerate(span_counts): + end = start + count + keypoint_index[start:start+end, 1] = 8+i + start = end + + keypoint_index[1:, 0] = np.arange(npts) + 8 + nspans + + return keypoint_index + + +def optimize_params(name, small, dstpoints, span_counts, params): + + keypoint_index = make_keypoint_index(span_counts) + + def objective(pvec): + ppts = project_keypoints(pvec, keypoint_index) + return np.sum((dstpoints - ppts)**2) + + print(' initial objective is', objective(params)) + + if DEBUG_LEVEL >= 1: + projpts = project_keypoints(params, keypoint_index) + display = draw_correspondences(small, dstpoints, projpts) + debug_show(name, 4, 'keypoints before', display) + + print(' optimizing', len(params), 'parameters...') + start = datetime.datetime.now() + res = scipy.optimize.minimize(objective, params, + method='Powell') + end = datetime.datetime.now() + print(' optimization took', round((end-start).total_seconds(), 2), 'sec.') + print(' final objective is', res.fun) + params = res.x + + if DEBUG_LEVEL >= 1: + projpts = project_keypoints(params, keypoint_index) + display = draw_correspondences(small, dstpoints, projpts) + debug_show(name, 5, 'keypoints after', display) + + return params + + +def get_page_dims(corners, rough_dims, params): + + dst_br = corners[2].flatten() + + dims = np.array(rough_dims) + + def objective(dims): + proj_br = project_xy(dims, params) + return np.sum((dst_br - proj_br.flatten())**2) + + res = scipy.optimize.minimize(objective, dims, method='Powell') + dims = res.x + + print(' got page dims', dims[0], 'x', dims[1]) + + return dims + + +def remap_image(name, img, small, page_dims, params): + + height = 0.5 * page_dims[1] * OUTPUT_ZOOM * img.shape[0] + height = round_nearest_multiple(height, REMAP_DECIMATE) + + width = round_nearest_multiple(old_div(height * page_dims[0], page_dims[1]), + REMAP_DECIMATE) + + print(' output will be {}x{}'.format(width, height)) + + height_small = old_div(height, REMAP_DECIMATE) + width_small = old_div(width, REMAP_DECIMATE) + + page_x_range = np.linspace(0, page_dims[0], width_small) + page_y_range = np.linspace(0, page_dims[1], height_small) + + page_x_coords, page_y_coords = np.meshgrid(page_x_range, page_y_range) + + page_xy_coords = np.hstack((page_x_coords.flatten().reshape((-1, 1)), + page_y_coords.flatten().reshape((-1, 1)))) + + page_xy_coords = page_xy_coords.astype(np.float32) + + image_points = project_xy(page_xy_coords, params) + image_points = norm2pix(img.shape, image_points, False) + + image_x_coords = image_points[:, 0, 0].reshape(page_x_coords.shape) + image_y_coords = image_points[:, 0, 1].reshape(page_y_coords.shape) + + image_x_coords = cv2.resize(image_x_coords, (width, height), + interpolation=cv2.INTER_CUBIC) + + image_y_coords = cv2.resize(image_y_coords, (width, height), + interpolation=cv2.INTER_CUBIC) + + img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) + + remapped = cv2.remap(img_gray, image_x_coords, image_y_coords, + cv2.INTER_CUBIC, + None, cv2.BORDER_REPLICATE) + + thresh = cv2.adaptiveThreshold(remapped, 255, cv2.ADAPTIVE_THRESH_MEAN_C, + cv2.THRESH_BINARY, ADAPTIVE_WINSZ, 25) + + pil_image = Image.fromarray(thresh) + pil_image = pil_image.convert('1') + + threshfile = name + '_thresh.png' + pil_image.save(threshfile, dpi=(OUTPUT_DPI, OUTPUT_DPI)) + + if DEBUG_LEVEL >= 1: + height = small.shape[0] + width = int(round(height * float(thresh.shape[1])/thresh.shape[0])) + display = cv2.resize(thresh, (width, height), + interpolation=cv2.INTER_AREA) + debug_show(name, 6, 'output', display) + + return threshfile + + +def main(): + + if len(sys.argv) < 2: + print('usage:', sys.argv[0], 'IMAGE1 [IMAGE2 ...]') + sys.exit(0) + + if DEBUG_LEVEL > 0 and DEBUG_OUTPUT != 'file': + cv2.namedWindow(WINDOW_NAME) + + outfiles = [] + + for imgfile in sys.argv[1:]: + + img = cv2.imread(imgfile) + small = resize_to_screen(img) + basename = os.path.basename(imgfile) + name, _ = os.path.splitext(basename) + + print('loaded', basename, 'with size', imgsize(img), end=' ') + print('and resized to', imgsize(small)) + + if DEBUG_LEVEL >= 3: + debug_show(name, 0.0, 'original', small) + + pagemask, page_outline = get_page_extents(small) + + cinfo_list = get_contours(name, small, pagemask, 'text') + spans = assemble_spans(name, small, pagemask, cinfo_list) + + if len(spans) < 3: + print(' detecting lines because only', len(spans), 'text spans') + cinfo_list = get_contours(name, small, pagemask, 'line') + spans2 = assemble_spans(name, small, pagemask, cinfo_list) + if len(spans2) > len(spans): + spans = spans2 + + if len(spans) < 1: + print('skipping', name, 'because only', len(spans), 'spans') + continue + + span_points = sample_spans(small.shape, spans) + + print(' got', len(spans), 'spans', end=' ') + print('with', sum([len(pts) for pts in span_points]), 'points.') + + corners, ycoords, xcoords = keypoints_from_samples(name, small, + pagemask, + page_outline, + span_points) + + rough_dims, span_counts, params = get_default_params(corners, + ycoords, xcoords) + + dstpoints = np.vstack((corners[0].reshape((1, 1, 2)),) + + tuple(span_points)) + + params = optimize_params(name, small, + dstpoints, + span_counts, params) + + page_dims = get_page_dims(corners, rough_dims, params) + + outfile = remap_image(name, img, small, page_dims, params) + + outfiles.append(outfile) + + print(' wrote', outfile) + print() + + print('to convert to PDF (requires ImageMagick):') + print(' convert -compress Group4 ' + ' '.join(outfiles) + ' output.pdf') + + +if __name__ == '__main__': + main() diff --git a/resources/python/dewarp_2/requirements.txt b/resources/python/dewarp_2/requirements.txt new file mode 100644 index 0000000..c716cb0 --- /dev/null +++ b/resources/python/dewarp_2/requirements.txt @@ -0,0 +1,5 @@ +numpy +scipy +Pillow +opencv-python +future diff --git a/resources/python/unproject b/resources/python/unproject deleted file mode 160000 index 734d21a..0000000 --- a/resources/python/unproject +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 734d21ad596000cff276ccaf118954362b5eb37e diff --git a/resources/python/unproject_2/LICENSE.txt b/resources/python/unproject_2/LICENSE.txt new file mode 100644 index 0000000..df5f7c8 --- /dev/null +++ b/resources/python/unproject_2/LICENSE.txt @@ -0,0 +1,24 @@ +The following license agreement applies to ellipse.py and rectify_ellipse.py; +also see the licensing information at the top of moments_from_contour.py. + +MIT License + +Copyright (c) 2016, Matt Zucker + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/resources/python/unproject_2/README.md b/resources/python/unproject_2/README.md new file mode 100644 index 0000000..eae67d8 --- /dev/null +++ b/resources/python/unproject_2/README.md @@ -0,0 +1,16 @@ +# unproject_text +Perspective recovery of text using transformed ellipses. See full writeup at + +# Requirements + + - Python 2 or 3 + - NumPy + - SciPy + - cv2 (from OpenCV 3.0 or later) + - matplotlib + +# Usage + +Try running + + python unproject_text.py deskew0.jpg diff --git a/resources/python/unproject_2/ellipse.py b/resources/python/unproject_2/ellipse.py new file mode 100644 index 0000000..7563ac7 --- /dev/null +++ b/resources/python/unproject_2/ellipse.py @@ -0,0 +1,630 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +'''Functions for representing ellipses using various +parameterizations, and converting between them. There are three +parameterizations implemented by this module: + +Geometric parameters: +--------------------- + +The geometric parameters are + + (x₀, y₀, a, b, θ) + +The most simple parameterization of an ellipse is by its center point +(x0, y0), its semimajor and semiminor axes a and b, and its rotation +angle θ. + +Conic: +------ + +This parameterization consists of six parameters A-F which establish +the implicit equation for a general conic: + + AX² + BXY + CY² + DX + EY + F = 0 + +Note that this equation may not represent only ellipses (it also +includes hyperbolas and parabolas). + +Since multiplying the entire equation by any non-zero constant results +in the same ellipse, the six parameters are only described up to +scale, yielding five degrees of freedom. We can determine a canonical +scale factor k to scale this equation by such that + + A = a²(sin θ)² + b²(cos θ)² + B = 2(b² - a²) sin θ cos θ + C = a²(cos θ)² + b²(sin θ)² + D = -2Ax₀ - By₀ + E = -Bx₀ - 2Cy₀ + F = Ax₀² + Bx₀y₀ + Cy₀² - a²b² + +...in terms of the geometric parameters (x₀, y₀, a, b, θ). + +Shape moments: +-------------- + +The shape moment parameters are + + (m₀₀, m₁₀, m₀₁, mu₂₀, mu₁₁, mu₀₂) + +An ellipse may be completely specified by its shape moments up to +order 2. These include the area m₀₀, area-weighted center (m₁₀, m₀₁), +and the three second order central moments (mu₂₀, mu₁₁, mu₀₂). + +''' + +# pylint: disable=C0103 +# pylint: disable=R0914 +# pylint: disable=E1101 + +from __future__ import print_function + +import numpy + +def _params_str(names, params): + + '''Helper function for printing out the various parameters.''' + + return '({})'.format(', '.join('{}: {:g}'.format(n, p) + for (n, p) in zip(names, params))) + +###################################################################### + +GPARAMS_NAMES = ('x0', 'y0', 'a', 'b', 'theta') +GPARAMS_DISPLAY_NAMES = ('x₀', 'y₀', 'a', 'b', 'θ') + +def gparams_str(gparams): + '''Convert geometric parameters to nice printable string.''' + return _params_str(GPARAMS_DISPLAY_NAMES, gparams) + +def gparams_evaluate(gparams, phi): + + '''Evaluate the parametric formula for an ellipse at each angle +specified in the phi array. Returns two arrays x and y of the same +size as phi. + + ''' + + x0, y0, a, b, theta = tuple(gparams) + + c = numpy.cos(theta) + s = numpy.sin(theta) + + cp = numpy.cos(phi) + sp = numpy.sin(phi) + + x = a*cp*c - b*sp*s + x0 + y = a*cp*s + b*sp*c + y0 + + return x, y + +def gparams_from_conic(conic): + + '''Convert the given conic parameters to geometric ellipse parameters.''' + + k, ab = conic_scale(conic) + + if numpy.isinf(ab): + return None + + A, B, C, D, E, F = tuple(conic) + + T = B**2 - 4*A*C + + x0 = (2*C*D - B*E)/T + y0 = (2*A*E - B*D)/T + + S = A*E**2 + C*D**2 - B*D*E + (B**2 - 4*A*C)*F + U = numpy.sqrt((A - C)**2 + B**2) + + a = -numpy.sqrt(2*S*(A+C+U))/T + b = -numpy.sqrt(2*S*(A+C-U))/T + + theta = numpy.arctan2(C-A-U, B) + + return numpy.array((x0, y0, a, b, theta)) + +def _gparams_sincos_from_moments(m): + + '''Convert from moments to canonical parameters, except postpone the + final arctan until later. Formulas determined largely by trial and + error. + + ''' + + m00, m10, m01, mu20, mu11, mu02 = tuple(m) + + x0 = m10 / m00 + y0 = m01 / m00 + + A = 4*mu02/m00 + B = -8*mu11/m00 + C = 4*mu20/m00 + + U = numpy.sqrt((A - C)**2 + B**2) + T = B**2 - 4*A*C + S = 1.0 + + a = -numpy.sqrt(2*S*(A+C+U))/T + b = -numpy.sqrt(2*S*(A+C-U))/T + + # we want a * b * pi = m00 + # + # so if we are off by some factor, we should scale a and b by this factor + # + # we need to fix things up somehow because moments have 6 DOF and + # ellipse has only 5. + area = numpy.pi * a * b + scl = numpy.sqrt(m00 / area) + a *= scl + b *= scl + + sincos = numpy.array([C-A-U, B]) + sincos /= numpy.linalg.norm(sincos) + + s, c = sincos + + return numpy.array((x0, y0, a, b, s, c)) + +def gparams_from_moments(m): + + '''Convert the given moment parameters to geometric ellipse parameters. + Formula derived through trial and error.''' + + x0, y0, a, b, s, c = _gparams_sincos_from_moments(m) + + theta = numpy.arctan2(s, c) + + return numpy.array((x0, y0, a, b, theta)) + +###################################################################### + +CONIC_NAMES = ('A', 'B', 'C', 'D', 'E', 'F') +CONIC_DISPLAY_NAMES = ('A', 'B', 'C', 'D', 'E', 'F') + +def conic_str(conic): + + '''Convert conic parameters to nice printable string.''' + return _params_str(CONIC_DISPLAY_NAMES, conic) + +def conic_scale(conic): + + '''Returns a pair (k, ab) for the given conic parameters, where k is +the scale factor to divide all parameters by in order to normalize +them, and ab is the product of the semimajor and semiminor axis +(i.e. the ellipse's area, divided by pi). If the conic does not +describe an ellipse, then this returns infinity, infinity. + + ''' + + A, B, C, D, E, F = tuple(conic) + + T = 4*A*C - B*B + + if T < 0.0: + return numpy.inf, numpy.inf + + S = A*E**2 + B**2*F + C*D**2 - B*D*E - 4*A*C*F + + if not S: + return numpy.inf, numpy.inf + + + k = 0.25*T**2/S + ab = 2.0*S/(T*numpy.sqrt(T)) + + return k, ab + +def conic_from_points(x, y): + + '''Fits conic pararameters using homogeneous least squares. The +resulting fit is unlikely to be numerically robust when the x/y +coordinates given are far from the [-1,1] interval.''' + + x = x.reshape((-1, 1)) + y = y.reshape((-1, 1)) + + M = numpy.hstack((x**2, x*y, y**2, x, y, numpy.ones_like(x))) + + _, _, v = numpy.linalg.svd(M) + + return v[5, :].copy() + +def conic_transform(conic, H): + + '''Returns the parameters of a conic after being transformed through a +3x3 homography H. This is straightforward since a conic can be +represented as a symmetric matrix (see +https://en.wikipedia.org/wiki/Matrix_representation_of_conic_sections). + + ''' + + A, B, C, D, E, F = tuple(conic) + + M = numpy.array([[A, 0.5*B, 0.5*D], + [0.5*B, C, 0.5*E], + [0.5*D, 0.5*E, F]]) + + Hinv = numpy.linalg.inv(H) + + M = numpy.dot(Hinv.T, numpy.dot(M, Hinv)) + + A = M[0, 0] + B = M[0, 1]*2 + C = M[1, 1] + D = M[0, 2]*2 + E = M[1, 2]*2 + F = M[2, 2] + + return numpy.array((A, B, C, D, E, F)) + +def _conic_from_gparams_sincos(gparams_sincos): + + x0, y0, a, b, s, c = gparams_sincos + + A = a**2 * s**2 + b**2 * c**2 + B = 2*(b**2 - a**2) * s * c + C = a**2 * c**2 + b**2 * s**2 + D = -2*A*x0 - B*y0 + E = -B*x0 - 2*C*y0 + F = A*x0**2 + B*x0*y0 + C*y0**2 - a**2*b**2 + + return numpy.array((A, B, C, D, E, F)) + +def conic_from_gparams(gparams): + + '''Convert geometric parameters to conic parameters. Formulas from +https://en.wikipedia.org/wiki/Ellipse#General_ellipse. + + ''' + + x0, y0, a, b, theta = tuple(gparams) + c = numpy.cos(theta) + s = numpy.sin(theta) + + return _conic_from_gparams_sincos((x0, y0, a, b, s, c)) + +def conic_from_moments(moments): + + g = _gparams_sincos_from_moments(moments) + + return _conic_from_gparams_sincos(g) + +###################################################################### + +MOMENTS_NAMES = ('m00', 'm10', 'm01', 'mu20', 'mu11', 'mu02') +MOMENTS_DISPLAY_NAMES = ('m₀₀', 'm₁₀', 'm₀₁', 'mu₂₀', 'mu₁₁', 'mu₀₂') + +def moments_from_dict(m): + + '''Create shape moments tuple from a dictionary (i.e. returned by cv2.moments).''' + return numpy.array([m[n] for n in MOMENTS_NAMES]) + +def moments_str(m): + '''Convert shape moments to nice printable string.''' + return _params_str(MOMENTS_DISPLAY_NAMES, m) + + +def moments_from_gparams(gparams): + + '''Create shape moments from geometric parameters.''' + x0, y0, a, b, theta = tuple(gparams) + c = numpy.cos(theta) + s = numpy.sin(theta) + + m00 = a*b*numpy.pi + m10 = x0 * m00 + m01 = y0 * m00 + + mu20 = (a**2 * c**2 + b**2 * s**2) * m00 * 0.25 + mu11 = -(b**2-a**2) * s * c * m00 * 0.25 + mu02 = (a**2 * s**2 + b**2 * c**2) * m00 * 0.25 + + return numpy.array((m00, m10, m01, mu20, mu11, mu02)) + +def moments_from_conic(scaled_conic): + + '''Create shape moments from conic parameters.''' + + k, ab = conic_scale(scaled_conic) + + if numpy.isinf(ab): + return None + + conic = numpy.array(scaled_conic)/k + + A, B, C, D, E, _ = tuple(conic) + + x0 = (B*E - 2*C*D)/(4*A*C - B**2) + y0 = (-2*A*E + B*D)/(4*A*C - B**2) + + m00 = numpy.pi*ab + m10 = x0*m00 + m01 = y0*m00 + + mu20 = 0.25*C*m00 + mu11 = -0.125*B*m00 + mu02 = 0.25*A*m00 + + return numpy.array((m00, m10, m01, mu20, mu11, mu02)) + + +###################################################################### + +def _perspective_transform(pts, H): + + '''Used for testing only.''' + + assert len(pts.shape) == 3 + assert pts.shape[1:] == (1, 2) + + pts = numpy.hstack((pts.reshape((-1, 2)), + numpy.ones((len(pts), 1), dtype=pts.dtype))) + + pts = numpy.dot(pts, H.T) + + pts = pts[:, :2] / pts[:, 2].reshape((-1, 1)) + + return pts.reshape((-1, 1, 2)) + +def _test_moments(): + + # so I just realized that moments have actually 6 DOF but all + # ellipse parameterizations have 5, therefore information is lost + # when going back and forth. + + m = numpy.array([59495.5, 5.9232e+07, 1.84847e+07, 3.34079e+08, -1.94055e+08, 3.74633e+08]) + gp = gparams_from_moments(m) + + m2 = moments_from_gparams(gp) + gp2 = gparams_from_moments(m2) + + c = conic_from_moments(m) + m3 = moments_from_conic(c) + + assert numpy.allclose(gp, gp2) + assert numpy.allclose(m2, m3) + + print('here is the first thing:') + print(' {}'.format(moments_str(m))) + print() + print('the rest should all be equal pairs:') + print(' {}'.format(moments_str(m2))) + print(' {}'.format(moments_str(m3))) + print() + print(' {}'.format(gparams_str(gp))) + print(' {}'.format(gparams_str(gp2))) + print() + + +def _test_ellipse(): + + print('testing moments badness') + _test_moments() + print('pass') + + # test that we can go from conic to geometric and back + x0 = 450 + y0 = 320 + a = 300 + b = 200 + theta = -0.25 + + gparams = numpy.array((x0, y0, a, b, theta)) + + conic = conic_from_gparams(gparams) + k, ab = conic_scale(conic) + + # ensure conic created from geometric params has trivial scale + assert numpy.allclose((k, ab), (1.0, a*b)) + + # evaluate parametric curve at different angles phi + phi = numpy.linspace(0, 2*numpy.pi, 1001).reshape((-1, 1)) + x, y = gparams_evaluate(gparams, phi) + + # evaluate implicit conic formula at x,y pairs + M = numpy.hstack((x**2, x*y, y**2, x, y, numpy.ones_like(x))) + implicit_output = numpy.dot(M, conic) + implicit_max = numpy.abs(implicit_output).max() + + # ensure implicit evaluates near 0 everywhere + print('max item from implicit: {} (should be close to 0)'.format(implicit_max)) + print() + + assert implicit_max < 1e-5 + + # ensure that scaled_conic has the scale we expect + k = 1e-3 + scaled_conic = conic*k + + k2, ab2 = conic_scale(scaled_conic) + + print('these should all be equal:') + print() + print(' k =', k) + print(' k2 =', k2) + assert numpy.allclose((k2, ab2), (k, a*b)) + print() + + # convert the scaled conic back to geometric parameters + gparams2 = gparams_from_conic(scaled_conic) + + print(' gparams =', gparams_str(gparams)) + + # ensure that converting back from scaled conic to geometric params is correct + print(' gparams2 =', gparams_str(gparams2)) + assert numpy.allclose(gparams, gparams2) + + # convert original geometric parameters to moments + m = moments_from_gparams(gparams) + # ...and back + gparams3 = gparams_from_moments(m) + + # ensure that converting back from moments to geometric params is correct + print(' gparams3 =', gparams_str(gparams3)) + print() + assert numpy.allclose(gparams, gparams3) + + # convert moments parameterization to conic + conic2 = conic_from_moments(m) + + # ensure that converting from moments to conics is correct + print(' conic =', conic_str(conic)) + print(' conic2 =', conic_str(conic2)) + assert numpy.allclose(conic, conic2) + + # create conic from homogeneous least squares fit of points + skip = len(x) / 10 + conic3 = conic_from_points(x[::skip], y[::skip]) + + # ensure that it has non-infinite area + k3, ab3 = conic_scale(conic3) + assert not numpy.isinf(ab3) + + # normalize + conic3 /= k3 + + # ensure that conic from HLS fit is same as other 2 + print(' conic3 =', conic_str(conic3)) + print() + assert numpy.allclose(conic, conic3) + + # convert from conic to moments + m2 = moments_from_conic(scaled_conic) + + print(' m =', moments_str(m)) + + # ensure that conics->moments yields the same result as geometric + # params -> moments. + print(' m2 =', moments_str(m2)) + assert numpy.allclose(m, m2) + + from moments_from_contour import moments_from_contour + + # create moments from contour + pts = numpy.hstack((x, y)).reshape((-1, 1, 2)) + m3 = moments_from_contour(pts) + + # ensure that moments from contour is reasonably close to moments + # from geometric params. + print(' m3 =', moments_str(m3)) + print() + assert numpy.allclose(m3, m, 1e-4, 1e-4) + + # create a homography H to map the ellipse through + hx = 0.001 + hy = 0.0015 + + H = numpy.array([ + [1, -0.2, 0], + [0, 0.7, 0], + [hx, hy, 1]]) + + T = numpy.array([ + [1, 0, 400], + [0, 1, 300], + [0, 0, 1]]) + + H = numpy.dot(T, numpy.dot(H, numpy.linalg.inv(T))) + + # transform the original points thru H + Hpts = _perspective_transform(pts, H) + + # transform the conic parameters directly thru H + Hconic = conic_transform(conic, H) + + # get the HLS fit of the conic corresponding to the transformed points + Hconic2 = conic_from_points(Hpts[::skip, :, 0], Hpts[::skip, :, 1]) + + # normalize the two conics + Hk, Hab = conic_scale(Hconic) + Hk2, Hab2 = conic_scale(Hconic2) + assert not numpy.isinf(Hab) and not numpy.isinf(Hab2) + + Hconic /= Hk + Hconic2 /= Hk2 + + # ensure that the two conics are equal + print(' Hconic =', conic_str(Hconic)) + print(' Hconic2 =', conic_str(Hconic2)) + print() + assert numpy.allclose(Hconic, Hconic2) + + # get the moments from Hconic + Hm = moments_from_conic(Hconic) + + # get the moments from the transformed points + Hm2 = moments_from_contour(Hpts) + + # ensure that the two moments are close enough + print(' Hm =', moments_str(Hm)) + print(' Hm2 =', moments_str(Hm2)) + print() + assert numpy.allclose(Hm, Hm2, 1e-4, 1e-4) + + # tests complete, now visualize + print('all tests passed!') + + try: + import cv2 + print('visualizing results...') + except ImportError: + import sys + print('not visualizing results since module cv2 not found') + sys.exit(0) + + shift = 3 + pow2 = 2**shift + + p0 = numpy.array([x0, y0], dtype=numpy.float32) + p1 = _perspective_transform(p0.reshape((-1, 1, 2)), H).flatten() + + Hgparams = gparams_from_conic(Hconic) + Hp0 = Hgparams[:2] + + skip = len(pts)/100 + + display = numpy.zeros((600, 800, 3), numpy.uint8) + + def _asint(x, as_tuple=True): + x = x*pow2 + 0.5 + x = x.astype(int) + if as_tuple: + return tuple(x) + else: + return x + + for (a, b) in zip(pts.reshape((-1, 2))[::skip], + Hpts.reshape((-1, 2))[::skip]): + + cv2.line(display, _asint(a), _asint(b), + (255, 0, 255), 1, cv2.LINE_AA, shift) + + cv2.polylines(display, [_asint(pts, False)], True, + (0, 255, 0), 1, cv2.LINE_AA, shift) + + cv2.polylines(display, [_asint(Hpts, False)], True, + (0, 0, 255), 1, cv2.LINE_AA, shift) + + r = 3.0 + + cv2.circle(display, _asint(p0), int(r*pow2+0.5), + (0, 255, 0), 1, cv2.LINE_AA, shift) + + cv2.circle(display, _asint(p1), int(r*pow2+0.5), + (255, 0, 255), 1, cv2.LINE_AA, shift) + + cv2.circle(display, _asint(Hp0), int(r*pow2+0.5), + (0, 0, 255), 1, cv2.LINE_AA, shift) + + cv2.imshow('win', display) + + print('click in the display window & hit any key to quit.') + + while cv2.waitKey(5) < 0: + pass + +if __name__ == '__main__': + + _test_ellipse() diff --git a/resources/python/unproject_2/moments_from_contour.py b/resources/python/unproject_2/moments_from_contour.py new file mode 100644 index 0000000..6dbee44 --- /dev/null +++ b/resources/python/unproject_2/moments_from_contour.py @@ -0,0 +1,123 @@ +'''The function below is ported from the OpenCV project's +contourMoments function in opencv/modules/imgproc/src/moments.cpp, +licensed as follows: + +---------------------------------------------------------------------- + +By downloading, copying, installing or using the software you agree to +this license. If you do not agree to this license, do not download, +install, copy or use the software. + + + License Agreement + For Open Source Computer Vision Library + (3-clause BSD License) + +Copyright (C) 2000-2016, Intel Corporation, all rights reserved. +Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. +Copyright (C) 2009-2016, NVIDIA Corporation, all rights reserved. +Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. +Copyright (C) 2015-2016, OpenCV Foundation, all rights reserved. +Copyright (C) 2015-2016, Itseez Inc., all rights reserved. +Third party copyrights are property of their respective owners. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + * Neither the names of the copyright holders nor the names of the + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +This software is provided by the copyright holders and contributors +"as is" and any express or implied warranties, including, but not +limited to, the implied warranties of merchantability and fitness for +a particular purpose are disclaimed. In no event shall copyright +holders or contributors be liable for any direct, indirect, +incidental, special, exemplary, or consequential damages (including, +but not limited to, procurement of substitute goods or services; loss +of use, data, or profits; or business interruption) however caused and +on any theory of liability, whether in contract, strict liability, or +tort (including negligence or otherwise) arising in any way out of the +use of this software, even if advised of the possibility of such +damage. + +''' + + +def moments_from_contour(xypoints): + + '''Create shape moments from points sampled from the outline of an +ellipse (note this is numerically inaccurate even for arrays of 1000s +of points). Included in this project primarily for testing purposes. + + ''' + + assert len(xypoints.shape) == 3 + assert xypoints.shape[1:] == (1, 2) + + xypoints = xypoints.reshape((-1, 2)) + + a00 = 0 + a10 = 0 + a01 = 0 + a20 = 0 + a11 = 0 + a02 = 0 + + xi_1, yi_1 = xypoints[-1] + + for xy in xypoints: + + xi, yi = xy + xi2 = xi * xi + yi2 = yi * yi + dxy = xi_1 * yi - xi * yi_1 + xii_1 = xi_1 + xi + yii_1 = yi_1 + yi + + a00 += dxy + a10 += dxy * xii_1 + a01 += dxy * yii_1 + a20 += dxy * (xi_1 * xii_1 + xi2) + a11 += dxy * (xi_1 * (yii_1 + yi_1) + xi * (yii_1 + yi)) + a02 += dxy * (yi_1 * yii_1 + yi2) + + xi_1 = xi + yi_1 = yi + + if a00 > 0: + db1_2 = 0.5 + db1_6 = 0.16666666666666666666666666666667 + db1_12 = 0.083333333333333333333333333333333 + db1_24 = 0.041666666666666666666666666666667 + else: + db1_2 = -0.5 + db1_6 = -0.16666666666666666666666666666667 + db1_12 = -0.083333333333333333333333333333333 + db1_24 = -0.041666666666666666666666666666667 + + m00 = a00 * db1_2 + m10 = a10 * db1_6 + m01 = a01 * db1_6 + m20 = a20 * db1_12 + m11 = a11 * db1_24 + m02 = a02 * db1_12 + + inv_m00 = 1. / m00 + cx = m10 * inv_m00 + cy = m01 * inv_m00 + + mu20 = m20 - m10 * cx + mu11 = m11 - m10 * cy + mu02 = m02 - m01 * cy + + return m00, m10, m01, mu20, mu11, mu02 diff --git a/resources/python/unproject_2/requirements.txt b/resources/python/unproject_2/requirements.txt new file mode 100644 index 0000000..a739100 --- /dev/null +++ b/resources/python/unproject_2/requirements.txt @@ -0,0 +1,5 @@ +unproject_text +cv2 +numpy +scipy +matplotlib diff --git a/resources/python/unproject_2/unproject_text.py b/resources/python/unproject_2/unproject_text.py new file mode 100644 index 0000000..65cd98e --- /dev/null +++ b/resources/python/unproject_2/unproject_text.py @@ -0,0 +1,504 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from __future__ import unicode_literals, print_function + +import sys +import numpy as np +import scipy.optimize +import matplotlib.pyplot as plt +import cv2 +import ellipse + +DEBUG_IMAGES = [] + +def debug_show(name, src): + + global DEBUG_IMAGES + + filename = 'debug{:02d}_{}.png'.format(len(DEBUG_IMAGES), name) + cv2.imwrite(filename, src) + + h, w = src.shape[:2] + + fx = w/1280.0 + fy = h/700.0 + + f = 1.0/np.ceil(max(fx, fy)) + + if f < 1.0: + img = cv2.resize(src, (0, 0), None, f, f, cv2.INTER_AREA) + else: + img = src.copy() + + DEBUG_IMAGES.append(img) + +def translation(x, y): + return np.array([[1, 0, x], [0, 1, y], [0, 0, 1]], dtype=float) + +def rotation(theta): + c = np.cos(theta) + s = np.sin(theta) + return np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]], dtype=float) + +def perspective_warp(a, b): + return np.array([[1, 0, 0], [0, 1, 0], [a, b, 1]], dtype=float) + +def slant(sx): + return np.array([[1, sx, 0], [0, 1, 0], [0, 0, 1]], dtype=float) + +def softmax(x, k=1.0): + b = x.max() + return np.log( np.exp(k*(x-b)).sum() ) / k + b + +def skewed_widths(contours, H): + xvals = [] + for c in contours: + pts = cv2.perspectiveTransform(c, H) + x = pts[:,:,0] + xvals.append( x.max() - x.min() ) + xvals = np.array(xvals) + return softmax(xvals, 0.1) + +def centered_warp(u0, v0, a, b): + return np.dot(translation(u0, v0), + np.dot(perspective_warp(a, b), + translation(-u0, -v0))) + +def warp_containing_points(img, pts, H, border=4, shape_only=False): + + ''' + display = img.copy() + for pt in pts.reshape((-1,2)).astype(int): + cv2.circle(display, tuple(pt), 4, (255, 0, 0), + -1, cv2.LINE_AA) + debug_show('warp', display) + ''' + + pts2 = cv2.perspectiveTransform(pts, H) + x0, y0, w, h = cv2.boundingRect(pts2) + print('got bounding rect', x0, y0, w, h) + T = translation(-x0+border, -y0+border) + TH = np.dot(T, H) + + if shape_only: + return (h+2*border, w+2*border), TH + else: + dst = cv2.warpPerspective(img, TH, (w+2*border, h+2*border), + borderMode=cv2.BORDER_REPLICATE) + return dst, TH + +def conic_area_discrepancy(conics, x, H, opt_results=None): + + areas = [] + + for conic in conics: + cx = ellipse.conic_transform(conic, H) + k, ab = ellipse.conic_scale(cx) + if np.isinf(ab): + areas.append(1e20) + else: + areas.append(ab) + + areas = np.array(areas) + + areas /= areas.mean() # rescale so mean is 1.0 + areas -= 1 # subtract off mean + + rval = 0.5*np.dot(areas, areas) + + if opt_results is not None: + if not opt_results or rval < opt_results[-1][-1]: + opt_results.append( (x, H, rval) ) + + return rval + +def threshold(img): + + if len(img.shape) > 2: + img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) + + mean = img.mean() + if mean < 100: + img = 255-img + + return cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, + cv2.THRESH_BINARY_INV, 101, 21) + +def get_contours(img): + + work = threshold(img) + + debug_show('threshold', work) + + contours, hierarchy = cv2.findContours(work, cv2.RETR_CCOMP, + cv2.CHAIN_APPROX_NONE) + + return contours, hierarchy + +def get_conics(img, contours, hierarchy, + abs_area_cutoff=0.0001, mean_area_cutoff=0.15): + + hierarchy = hierarchy.reshape((-1, 4)) + + conics = [] + used_contours = [] + areas = [] + okcontours = [] + allchildren = [] + pts = np.empty((0,1,2), dtype='float32') + centroid_accum = np.zeros(2) + total_area = 0.0 + + centroids = [] + + abs_area_cutoff *= img.shape[0] * img.shape[1] + print('abs_area_cutoff = ',abs_area_cutoff) + + for i, (c, h) in enumerate(zip(contours, hierarchy.reshape((-1, 4)))): + + next_idx, prev_idx, child_idx, parent_idx = h + + if parent_idx >= 0: + continue + + m = ellipse.moments_from_dict(cv2.moments(c)) + + if m[0] <= abs_area_cutoff: + continue + + children = [] + + while child_idx >= 0: + child_contour = contours[child_idx] + cm = cv2.moments(child_contour) + if cm['m00'] > abs_area_cutoff: + children.append(child_contour) + allchildren.append(child_contour) + child_idx = hierarchy[child_idx][0] + + if children: + work = np.zeros(img.shape[:2], dtype=np.uint8) + cv2.drawContours(work, contours, i, (1,1,1), -1) + cv2.drawContours(work, children, -1, (0,0,0), -1) + m = ellipse.moments_from_dict(cv2.moments(work, True)) + + centroids.append(m[1:3]/m[0]) + centroid_accum += m[1:3] + total_area += m[0] + pts = np.vstack((pts, c.astype('float32'))) + conic = ellipse.conic_from_moments(m) + okcontours.append(c) + conics.append(conic) + areas.append(m[0]) + + display = img.copy() + cv2.drawContours(display, okcontours+allchildren, + -1, (0, 255, 0), + 6, cv2.LINE_AA) + + debug_show('contours_only', display) + + for c, a in zip(okcontours, areas): + + x, y, w, h = cv2.boundingRect(c) + + + s = str('{:,d}'.format(int(a))) + #ctr = (x + w/2 - 15*len(s), y+h/2+10) + ctr = (x, y+h+20) + + cv2.putText(display, s, ctr, + cv2.FONT_HERSHEY_SIMPLEX, 2.0, + (0, 0, 0), 12, cv2.LINE_AA) + + cv2.putText(display, s, ctr, + cv2.FONT_HERSHEY_SIMPLEX, 2.0, + (0, 255, 0), 6, cv2.LINE_AA) + + debug_show('contours', display) + + areas = np.array(areas) + amean = areas.mean() + + print('got {} contours with {} small.'.format( + len(areas), (areas < mean_area_cutoff*amean).sum())) + + idx = np.where(areas > mean_area_cutoff*amean)[0] + + conics = np.array(conics) + conics = conics[idx] + centroid_accum /= total_area + + display = img.copy() + for conic in conics: + x0, y0, a, b, theta = ellipse.gparams_from_conic(conic) + cv2.ellipse(display, (int(x0), int(y0)), (int(a), int(b)), + theta*180/np.pi, 0, 360, (0,0,255), 6, cv2.LINE_AA) + + debug_show('conics', display) + + contours = [okcontours[i].astype('float32') for i in idx] + + if 0: + + centroids = np.array([centroids[i] for i in idx]) + areas = areas[idx] + + def polyfit(x, y): + coeffs = np.polyfit(x, y, deg=1) + ypred = np.polyval(coeffs, x) + ymean = np.mean(y) + sstot = np.sum((y - ymean)**2) + ssres = np.sum((y.flatten() - ypred.flatten())**2) + r2 = 1 - ssres/sstot + return coeffs, r2 + + xfit, xr2 = polyfit(centroids[:,0], areas) + yfit, yr2 = polyfit(centroids[:,1], areas) + + xlabel = 'X coordinate (r²={:.2f})'.format(xr2) + ylabel = 'Y coordinate (r²={:.2f})'.format(yr2) + + plt.plot(centroids[:,0], areas, 'b.', zorder=1) + plt.plot(centroids[:,1], areas, 'r.', zorder=1) + plt.gca().autoscale(False) + plt.plot([0, 3000], np.polyval(xfit, [0,3000]), 'b--', + zorder=0, label=xlabel) + plt.plot([0, 3000], np.polyval(yfit, [0,3000]), 'r--', + zorder=0, label=ylabel) + plt.legend(loc='upper right') + plt.xlabel('X/Y coordinate (px)') + plt.ylabel('Contour area (px²)') + plt.savefig('position-vs-area.pdf') + + + + return conics, contours, centroid_accum + +def optimize_conics(conics, p0): + + x0 = np.array([0.0, 0.0]) + + hfunc = lambda x: centered_warp(p0[0], p0[1], x[0], x[1]) + + opt_results = [] + + f = lambda x: conic_area_discrepancy(conics, x, hfunc(x), opt_results) + + res = scipy.optimize.minimize(f, x0, method='Powell') + + H = hfunc(res.x) + + rects = [] + + if 0: + + phi = np.linspace(0, 2*np.pi, 16, endpoint=False) + width, height = 0, 0 + for x, H, fval in opt_results: + allxy = [] + for conic in conics: + Hconic = ellipse.conic_transform(conic, H) + gparams = ellipse.gparams_from_conic(Hconic) + x, y = ellipse.gparams_evaluate(gparams, phi) + xy = np.dstack((x.reshape((-1, 1, 1)), y.reshape((-1, 1, 1)))) + allxy.append(xy) + allxy = np.vstack(tuple(allxy)).astype(np.float32) + rect = cv2.boundingRect(allxy) + rects.append(rect) + x, y, w, h = rect + width = max(width, w) + height = max(height, h) + border = int(0.05 * min(width, height)) + width += border + height += border + aspect = float(width)/height + if aspect < 2.0: + width = 2*height + else: + height = width/2 + + for i, (rect, (x, H, fval)) in enumerate(zip(rects, opt_results)): + display = np.zeros((height, width), dtype=np.uint8) + x, y, w, h = rect + xoffs = width/2 - (x+w/2) + yoffs = height/2 - (y+h/2) + for conic in conics: + Hconic = ellipse.conic_transform(conic, H) + x0, y0, a, b, theta = ellipse.gparams_from_conic(Hconic) + cv2.ellipse(display, (int(x0+xoffs), int(y0+yoffs)), (int(a), int(b)), + theta*180/np.pi, 0, 360, (255,255,255), 6, cv2.LINE_AA) + cv2.putText(display, 'Area discrepancy: {:.3f}'.format(fval), + (16, height-24), cv2.FONT_HERSHEY_SIMPLEX, 2.0, + (255,255,255), 6, cv2.LINE_AA) + cv2.imwrite('frame{:04d}.png'.format(i), display) + + return H + +def orientation_detect(img, contours, H, rho=8.0, ntheta=512): + + # ignore this, just deal with edge-detected text + + pts = np.vstack(tuple(contours)) + + shape, TH = warp_containing_points(img, pts, H, shape_only=True) + + text_edges = np.zeros(shape, dtype=np.uint8) + + for contour in contours: + contour = cv2.perspectiveTransform(contour.astype(np.float32), TH) + cv2.drawContours(text_edges, [contour.astype(int)], 0, (255,255,255)) + + debug_show('edges', text_edges) + + # generate a linspace of thetas + thetas = np.linspace(-0.5*np.pi, 0.5*np.pi, ntheta, endpoint=False) + + # rho is pixels per r bin in polar (theta, r) histogram + # irho is bins per pixel + irho = 1.0/rho + + # get height and width + h, w = text_edges.shape + + # maximum bin index is given by hypotenuse of (w, h) divided by pixels per bin + bin_max = int(np.ceil(np.hypot(w, h)*irho)) + + # initialize zeroed histogram height bin_max and width num theta + hist = np.zeros((bin_max, ntheta)) + + # let u and v be x and y coordinates (respectively) of non-zero + # pixels in edge map + v, u = np.mgrid[0:h, 0:w] + v = v[text_edges.view(bool)] + u = u[text_edges.view(bool)] + + # get center coordinates + u0 = w*0.5 + v0 = h*0.5 + + # for each i and theta = thetas[i] + for i, theta in enumerate(thetas): + + # for each nonzero edge pixel, compute bin in r direction from + # pixel location and cos/sin of theta + bin_idx = ( (-(u-u0)*np.sin(theta) # x term + + (v-v0)*np.cos(theta))*irho # y term, both + # divided by pixels + # per bin + + 0.5*bin_max ) # offset for center pixel + + assert( bin_idx.min() >= 0 and bin_idx.max() < bin_max ) + + # 0.5 is for correct rounding here + # + # e.g. np.bincount([1, 1, 0, 3]) = [1, 2, 0, 1] + # returns count of each integer in the array + + bc = np.bincount((bin_idx + 0.5).astype(int)) + + # push this into the histogram + hist[:len(bc),i] = bc + + # number of zero pixels in each column + num_zero = (hist == 0).sum(axis=0) + + # find the maximum number of zero pixels + best_theta_idx = num_zero.argmax() + + # actual detected theta - could just return this now + theta = thetas[best_theta_idx] + + # compose with previous homography + RH = np.dot(rotation(-theta), H) + + if 1: # just debug visualization + + debug_hist = (255*hist/hist.max()).astype('uint8') + debug_hist = cv2.cvtColor(debug_hist, cv2.COLOR_GRAY2RGB) + + cv2.line(debug_hist, + (best_theta_idx, 0), + (best_theta_idx, bin_max), (255,0,0), + 1, cv2.LINE_AA) + + debug_show('histogram', debug_hist) + + p0 = np.array((u0, v0)) + t = np.array((np.cos(theta), np.sin(theta))) + + warped = cv2.warpPerspective(img, TH, (shape[1], shape[0]), + borderMode=cv2.BORDER_REPLICATE) + + + debug_show('prerotate_noline', warped) + + cv2.line(warped, + tuple(map(int, p0 - rho*bin_max*t)), + tuple(map(int, p0 + rho*bin_max*t)), + (255, 0, 0), + 6, cv2.LINE_AA) + + debug_show('prerotate', warped) + + warped, _ = warp_containing_points(img, pts, RH) + debug_show('preskew', warped) + + return RH + + +def skew_detect(img, contours, RH): + + hulls = [cv2.convexHull(c) for c in contours] + pts = np.vstack(tuple(hulls)) + + + + display, TRH = warp_containing_points(img, pts, RH) + + for h in hulls: + h = cv2.perspectiveTransform(h, TRH).astype(int) + cv2.drawContours(display, [h], 0, (255, 0, 255), 6, cv2.LINE_AA) + + debug_show('convex_hulls_before', display) + + f = lambda x: skewed_widths(contours, np.dot(slant(x), RH)) + + res = scipy.optimize.minimize_scalar(f, (-2.0, 0.0, 2.0)) + + SRH = np.dot(slant(res.x), RH) + warped, Hfinal = warp_containing_points(img, pts, SRH) + + display = warped.copy() + + for h in hulls: + h = cv2.perspectiveTransform(h, Hfinal).astype(int) + cv2.drawContours(display, [h], 0, (255, 0, 255), 6, cv2.LINE_AA) + + debug_show('convex_hulls_after', display) + + debug_show('final', warped) + + return SRH + +def main(): + + img = cv2.imread(sys.argv[1]) + debug_show('input', img) + + contours, hierarchy = get_contours(img) + + conics, contours, centroid = get_conics(img, contours, hierarchy) + H = optimize_conics(conics, centroid) + RH = orientation_detect(img, contours, H) + SRH = skew_detect(img, contours, RH) + + for img in DEBUG_IMAGES: + cv2.imshow('Debug', img) + while cv2.waitKey(5) < 0: + pass + +if __name__ == '__main__': + main() +