Browse Source

Fix issue

hidden_tags_with_bookmarks
Orzu Ionut 3 years ago
parent
commit
656d14dd68
  1. 1
      resources/python/dewarp
  2. 21
      resources/python/dewarp_2/LICENSE.txt
  3. 14
      resources/python/dewarp_2/README.md
  4. 46
      resources/python/dewarp_2/derive_cubic.py
  5. 922
      resources/python/dewarp_2/page_dewarp.py
  6. 5
      resources/python/dewarp_2/requirements.txt
  7. 1
      resources/python/unproject
  8. 24
      resources/python/unproject_2/LICENSE.txt
  9. 16
      resources/python/unproject_2/README.md
  10. 630
      resources/python/unproject_2/ellipse.py
  11. 123
      resources/python/unproject_2/moments_from_contour.py
  12. 5
      resources/python/unproject_2/requirements.txt
  13. 504
      resources/python/unproject_2/unproject_text.py

1
resources/python/dewarp

@ -1 +0,0 @@
Subproject commit c40293a606194a322fa116ba5a7bec38148e07ff

21
resources/python/dewarp_2/LICENSE.txt

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2016, Matt Zucker
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

14
resources/python/dewarp_2/README.md

@ -0,0 +1,14 @@
page_dewarp
===========
Page dewarping and thresholding using a "cubic sheet" model - see full writeup at <https://mzucker.github.io/2016/08/15/page-dewarping.html>
Requirements:
- scipy
- OpenCV 3.0 or greater
- Image module from PIL or Pillow
Usage:
page_dewarp.py IMAGE1 [IMAGE2 ...]

46
resources/python/dewarp_2/derive_cubic.py

@ -0,0 +1,46 @@
from __future__ import print_function
import matplotlib.pyplot as plt
import numpy as np
import sympy
# create a bunch of symbols
a, b, c, d, x, alpha, beta = sympy.symbols('a b c d x alpha beta')
# create a polynomial function f(x)
f = a*x**3 + b*x**2 + c*x + d
# get its derivative f'(x)
fp = f.diff(x)
# evaluate both at x=0 and x=1
f0 = f.subs(x, 0)
f1 = f.subs(x, 1)
fp0 = fp.subs(x, 0)
fp1 = fp.subs(x, 1)
# we want a, b, c, d such that the following conditions hold:
#
# f(0) = 0
# f(1) = 0
# f'(0) = alpha
# f'(1) = beta
S = sympy.solve([f0, f1, fp0-alpha, fp1-beta], [a, b, c, d])
# print the analytic solution and plot a graphical example
coeffs = []
num_alpha = 0.3
num_beta = 0.03
for key in [a, b, c, d]:
print(key, '=', S[key])
coeffs.append(S[key].subs(dict(alpha=num_alpha,
beta=num_beta)))
xvals = np.linspace(0, 1, 101)
yvals = np.polyval(coeffs, xvals)
plt.plot(xvals, yvals)
plt.show()

922
resources/python/dewarp_2/page_dewarp.py

@ -0,0 +1,922 @@
#!/usr/bin/env python
######################################################################
# page_dewarp.py - Proof-of-concept of page-dewarping based on a
# "cubic sheet" model. Requires OpenCV (version 3 or greater),
# PIL/Pillow, and scipy.optimize.
######################################################################
# Author: Matt Zucker
# Date: July 2016
# License: MIT License (see LICENSE.txt)
######################################################################
from __future__ import division
from __future__ import print_function
from builtins import zip
from builtins import str
from builtins import range
from builtins import object
from past.utils import old_div
import os
import sys
import datetime
import cv2
from PIL import Image
import numpy as np
import scipy.optimize
# for some reason pylint complains about cv2 members being undefined :(
# pylint: disable=E1101
PAGE_MARGIN_X = 50 # reduced px to ignore near L/R edge
PAGE_MARGIN_Y = 20 # reduced px to ignore near T/B edge
OUTPUT_ZOOM = 1.0 # how much to zoom output relative to *original* image
OUTPUT_DPI = 300 # just affects stated DPI of PNG, not appearance
REMAP_DECIMATE = 16 # downscaling factor for remapping image
ADAPTIVE_WINSZ = 55 # window size for adaptive threshold in reduced px
TEXT_MIN_WIDTH = 15 # min reduced px width of detected text contour
TEXT_MIN_HEIGHT = 2 # min reduced px height of detected text contour
TEXT_MIN_ASPECT = 1.5 # filter out text contours below this w/h ratio
TEXT_MAX_THICKNESS = 10 # max reduced px thickness of detected text contour
EDGE_MAX_OVERLAP = 1.0 # max reduced px horiz. overlap of contours in span
EDGE_MAX_LENGTH = 100.0 # max reduced px length of edge connecting contours
EDGE_ANGLE_COST = 10.0 # cost of angles in edges (tradeoff vs. length)
EDGE_MAX_ANGLE = 7.5 # maximum change in angle allowed between contours
RVEC_IDX = slice(0, 3) # index of rvec in params vector
TVEC_IDX = slice(3, 6) # index of tvec in params vector
CUBIC_IDX = slice(6, 8) # index of cubic slopes in params vector
SPAN_MIN_WIDTH = 30 # minimum reduced px width for span
SPAN_PX_PER_STEP = 20 # reduced px spacing for sampling along spans
FOCAL_LENGTH = 1.2 # normalized focal length of camera
DEBUG_LEVEL = 0 # 0=none, 1=some, 2=lots, 3=all
DEBUG_OUTPUT = 'file' # file, screen, both
WINDOW_NAME = 'Dewarp' # Window name for visualization
# nice color palette for visualizing contours, etc.
CCOLORS = [
(255, 0, 0),
(255, 63, 0),
(255, 127, 0),
(255, 191, 0),
(255, 255, 0),
(191, 255, 0),
(127, 255, 0),
(63, 255, 0),
(0, 255, 0),
(0, 255, 63),
(0, 255, 127),
(0, 255, 191),
(0, 255, 255),
(0, 191, 255),
(0, 127, 255),
(0, 63, 255),
(0, 0, 255),
(63, 0, 255),
(127, 0, 255),
(191, 0, 255),
(255, 0, 255),
(255, 0, 191),
(255, 0, 127),
(255, 0, 63),
]
# default intrinsic parameter matrix
K = np.array([
[FOCAL_LENGTH, 0, 0],
[0, FOCAL_LENGTH, 0],
[0, 0, 1]], dtype=np.float32)
def debug_show(name, step, text, display):
if DEBUG_OUTPUT != 'screen':
filetext = text.replace(' ', '_')
outfile = name + '_debug_' + str(step) + '_' + filetext + '.png'
cv2.imwrite(outfile, display)
if DEBUG_OUTPUT != 'file':
image = display.copy()
height = image.shape[0]
cv2.putText(image, text, (16, height-16),
cv2.FONT_HERSHEY_SIMPLEX, 1.0,
(0, 0, 0), 3, cv2.LINE_AA)
cv2.putText(image, text, (16, height-16),
cv2.FONT_HERSHEY_SIMPLEX, 1.0,
(255, 255, 255), 1, cv2.LINE_AA)
cv2.imshow(WINDOW_NAME, image)
while cv2.waitKey(5) < 0:
pass
def round_nearest_multiple(i, factor):
i = int(i)
rem = i % factor
if not rem:
return i
else:
return i + factor - rem
def pix2norm(shape, pts):
height, width = shape[:2]
scl = 2.0/(max(height, width))
offset = np.array([width, height], dtype=pts.dtype).reshape((-1, 1, 2))*0.5
return (pts - offset) * scl
def norm2pix(shape, pts, as_integer):
height, width = shape[:2]
scl = max(height, width)*0.5
offset = np.array([0.5*width, 0.5*height],
dtype=pts.dtype).reshape((-1, 1, 2))
rval = pts * scl + offset
if as_integer:
return (rval + 0.5).astype(int)
else:
return rval
def fltp(point):
return tuple(point.astype(int).flatten())
def draw_correspondences(img, dstpoints, projpts):
display = img.copy()
dstpoints = norm2pix(img.shape, dstpoints, True)
projpts = norm2pix(img.shape, projpts, True)
for pts, color in [(projpts, (255, 0, 0)),
(dstpoints, (0, 0, 255))]:
for point in pts:
cv2.circle(display, fltp(point), 3, color, -1, cv2.LINE_AA)
for point_a, point_b in zip(projpts, dstpoints):
cv2.line(display, fltp(point_a), fltp(point_b),
(255, 255, 255), 1, cv2.LINE_AA)
return display
def get_default_params(corners, ycoords, xcoords):
# page width and height
page_width = np.linalg.norm(corners[1] - corners[0])
page_height = np.linalg.norm(corners[-1] - corners[0])
rough_dims = (page_width, page_height)
# our initial guess for the cubic has no slope
cubic_slopes = [0.0, 0.0]
# object points of flat page in 3D coordinates
corners_object3d = np.array([
[0, 0, 0],
[page_width, 0, 0],
[page_width, page_height, 0],
[0, page_height, 0]])
# estimate rotation and translation from four 2D-to-3D point
# correspondences
_, rvec, tvec = cv2.solvePnP(corners_object3d,
corners, K, np.zeros(5))
span_counts = [len(xc) for xc in xcoords]
params = np.hstack((np.array(rvec).flatten(),
np.array(tvec).flatten(),
np.array(cubic_slopes).flatten(),
ycoords.flatten()) +
tuple(xcoords))
return rough_dims, span_counts, params
def project_xy(xy_coords, pvec):
# get cubic polynomial coefficients given
#
# f(0) = 0, f'(0) = alpha
# f(1) = 0, f'(1) = beta
alpha, beta = tuple(pvec[CUBIC_IDX])
poly = np.array([
alpha + beta,
-2*alpha - beta,
alpha,
0])
xy_coords = xy_coords.reshape((-1, 2))
z_coords = np.polyval(poly, xy_coords[:, 0])
objpoints = np.hstack((xy_coords, z_coords.reshape((-1, 1))))
image_points, _ = cv2.projectPoints(objpoints,
pvec[RVEC_IDX],
pvec[TVEC_IDX],
K, np.zeros(5))
return image_points
def project_keypoints(pvec, keypoint_index):
xy_coords = pvec[keypoint_index]
xy_coords[0, :] = 0
return project_xy(xy_coords, pvec)
def resize_to_screen(src, maxw=1280, maxh=700, copy=False):
height, width = src.shape[:2]
scl_x = float(width)/maxw
scl_y = float(height)/maxh
scl = int(np.ceil(max(scl_x, scl_y)))
if scl > 1.0:
inv_scl = 1.0/scl
img = cv2.resize(src, (0, 0), None, inv_scl, inv_scl, cv2.INTER_AREA)
elif copy:
img = src.copy()
else:
img = src
return img
def box(width, height):
return np.ones((height, width), dtype=np.uint8)
def get_page_extents(small):
height, width = small.shape[:2]
xmin = PAGE_MARGIN_X
ymin = PAGE_MARGIN_Y
xmax = width-PAGE_MARGIN_X
ymax = height-PAGE_MARGIN_Y
page = np.zeros((height, width), dtype=np.uint8)
cv2.rectangle(page, (xmin, ymin), (xmax, ymax), (255, 255, 255), -1)
outline = np.array([
[xmin, ymin],
[xmin, ymax],
[xmax, ymax],
[xmax, ymin]])
return page, outline
def get_mask(name, small, pagemask, masktype):
sgray = cv2.cvtColor(small, cv2.COLOR_RGB2GRAY)
if masktype == 'text':
mask = cv2.adaptiveThreshold(sgray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY_INV,
ADAPTIVE_WINSZ,
25)
if DEBUG_LEVEL >= 3:
debug_show(name, 0.1, 'thresholded', mask)
mask = cv2.dilate(mask, box(9, 1))
if DEBUG_LEVEL >= 3:
debug_show(name, 0.2, 'dilated', mask)
mask = cv2.erode(mask, box(1, 3))
if DEBUG_LEVEL >= 3:
debug_show(name, 0.3, 'eroded', mask)
else:
mask = cv2.adaptiveThreshold(sgray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY_INV,
ADAPTIVE_WINSZ,
7)
if DEBUG_LEVEL >= 3:
debug_show(name, 0.4, 'thresholded', mask)
mask = cv2.erode(mask, box(3, 1), iterations=3)
if DEBUG_LEVEL >= 3:
debug_show(name, 0.5, 'eroded', mask)
mask = cv2.dilate(mask, box(8, 2))
if DEBUG_LEVEL >= 3:
debug_show(name, 0.6, 'dilated', mask)
return np.minimum(mask, pagemask)
def interval_measure_overlap(int_a, int_b):
return min(int_a[1], int_b[1]) - max(int_a[0], int_b[0])
def angle_dist(angle_b, angle_a):
diff = angle_b - angle_a
while diff > np.pi:
diff -= 2*np.pi
while diff < -np.pi:
diff += 2*np.pi
return np.abs(diff)
def blob_mean_and_tangent(contour):
moments = cv2.moments(contour)
area = moments['m00']
mean_x = old_div(moments['m10'], area)
mean_y = old_div(moments['m01'], area)
moments_matrix = old_div(np.array([
[moments['mu20'], moments['mu11']],
[moments['mu11'], moments['mu02']]
]), area)
_, svd_u, _ = cv2.SVDecomp(moments_matrix)
center = np.array([mean_x, mean_y])
tangent = svd_u[:, 0].flatten().copy()
return center, tangent
class ContourInfo(object):
def __init__(self, contour, rect, mask):
self.contour = contour
self.rect = rect
self.mask = mask
self.center, self.tangent = blob_mean_and_tangent(contour)
self.angle = np.arctan2(self.tangent[1], self.tangent[0])
clx = [self.proj_x(point) for point in contour]
lxmin = min(clx)
lxmax = max(clx)
self.local_xrng = (lxmin, lxmax)
self.point0 = self.center + self.tangent * lxmin
self.point1 = self.center + self.tangent * lxmax
self.pred = None
self.succ = None
def proj_x(self, point):
return np.dot(self.tangent, point.flatten()-self.center)
def local_overlap(self, other):
xmin = self.proj_x(other.point0)
xmax = self.proj_x(other.point1)
return interval_measure_overlap(self.local_xrng, (xmin, xmax))
def generate_candidate_edge(cinfo_a, cinfo_b):
# we want a left of b (so a's successor will be b and b's
# predecessor will be a) make sure right endpoint of b is to the
# right of left endpoint of a.
if cinfo_a.point0[0] > cinfo_b.point1[0]:
tmp = cinfo_a
cinfo_a = cinfo_b
cinfo_b = tmp
x_overlap_a = cinfo_a.local_overlap(cinfo_b)
x_overlap_b = cinfo_b.local_overlap(cinfo_a)
overall_tangent = cinfo_b.center - cinfo_a.center
overall_angle = np.arctan2(overall_tangent[1], overall_tangent[0])
delta_angle = old_div(max(angle_dist(cinfo_a.angle, overall_angle),
angle_dist(cinfo_b.angle, overall_angle)) * 180,np.pi)
# we want the largest overlap in x to be small
x_overlap = max(x_overlap_a, x_overlap_b)
dist = np.linalg.norm(cinfo_b.point0 - cinfo_a.point1)
if (dist > EDGE_MAX_LENGTH or
x_overlap > EDGE_MAX_OVERLAP or
delta_angle > EDGE_MAX_ANGLE):
return None
else:
score = dist + delta_angle*EDGE_ANGLE_COST
return (score, cinfo_a, cinfo_b)
def make_tight_mask(contour, xmin, ymin, width, height):
tight_mask = np.zeros((height, width), dtype=np.uint8)
tight_contour = contour - np.array((xmin, ymin)).reshape((-1, 1, 2))
cv2.drawContours(tight_mask, [tight_contour], 0,
(1, 1, 1), -1)
return tight_mask
def get_contours(name, small, pagemask, masktype):
mask = get_mask(name, small, pagemask, masktype)
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_NONE)
contours_out = []
for contour in contours:
rect = cv2.boundingRect(contour)
xmin, ymin, width, height = rect
if (width < TEXT_MIN_WIDTH or
height < TEXT_MIN_HEIGHT or
width < TEXT_MIN_ASPECT*height):
continue
tight_mask = make_tight_mask(contour, xmin, ymin, width, height)
if tight_mask.sum(axis=0).max() > TEXT_MAX_THICKNESS:
continue
contours_out.append(ContourInfo(contour, rect, tight_mask))
if DEBUG_LEVEL >= 2:
visualize_contours(name, small, contours_out)
return contours_out
def assemble_spans(name, small, pagemask, cinfo_list):
# sort list
cinfo_list = sorted(cinfo_list, key=lambda cinfo: cinfo.rect[1])
# generate all candidate edges
candidate_edges = []
for i, cinfo_i in enumerate(cinfo_list):
for j in range(i):
# note e is of the form (score, left_cinfo, right_cinfo)
edge = generate_candidate_edge(cinfo_i, cinfo_list[j])
if edge is not None:
candidate_edges.append(edge)
# sort candidate edges by score (lower is better)
candidate_edges.sort()
# for each candidate edge
for _, cinfo_a, cinfo_b in candidate_edges:
# if left and right are unassigned, join them
if cinfo_a.succ is None and cinfo_b.pred is None:
cinfo_a.succ = cinfo_b
cinfo_b.pred = cinfo_a
# generate list of spans as output
spans = []
# until we have removed everything from the list
while cinfo_list:
# get the first on the list
cinfo = cinfo_list[0]
# keep following predecessors until none exists
while cinfo.pred:
cinfo = cinfo.pred
# start a new span
cur_span = []
width = 0.0
# follow successors til end of span
while cinfo:
# remove from list (sadly making this loop *also* O(n^2)
cinfo_list.remove(cinfo)
# add to span
cur_span.append(cinfo)
width += cinfo.local_xrng[1] - cinfo.local_xrng[0]
# set successor
cinfo = cinfo.succ
# add if long enough
if width > SPAN_MIN_WIDTH:
spans.append(cur_span)
if DEBUG_LEVEL >= 2:
visualize_spans(name, small, pagemask, spans)
return spans
def sample_spans(shape, spans):
span_points = []
for span in spans:
contour_points = []
for cinfo in span:
yvals = np.arange(cinfo.mask.shape[0]).reshape((-1, 1))
totals = (yvals * cinfo.mask).sum(axis=0)
means = old_div(totals, cinfo.mask.sum(axis=0))
xmin, ymin = cinfo.rect[:2]
step = SPAN_PX_PER_STEP
start = old_div(((len(means)-1) % step), 2)
contour_points += [(x+xmin, means[x]+ymin)
for x in range(start, len(means), step)]
contour_points = np.array(contour_points,
dtype=np.float32).reshape((-1, 1, 2))
contour_points = pix2norm(shape, contour_points)
span_points.append(contour_points)
return span_points
def keypoints_from_samples(name, small, pagemask, page_outline,
span_points):
all_evecs = np.array([[0.0, 0.0]])
all_weights = 0
for points in span_points:
_, evec = cv2.PCACompute(points.reshape((-1, 2)),
None, maxComponents=1)
weight = np.linalg.norm(points[-1] - points[0])
all_evecs += evec * weight
all_weights += weight
evec = old_div(all_evecs, all_weights)
x_dir = evec.flatten()
if x_dir[0] < 0:
x_dir = -x_dir
y_dir = np.array([-x_dir[1], x_dir[0]])
pagecoords = cv2.convexHull(page_outline)
pagecoords = pix2norm(pagemask.shape, pagecoords.reshape((-1, 1, 2)))
pagecoords = pagecoords.reshape((-1, 2))
px_coords = np.dot(pagecoords, x_dir)
py_coords = np.dot(pagecoords, y_dir)
px0 = px_coords.min()
px1 = px_coords.max()
py0 = py_coords.min()
py1 = py_coords.max()
p00 = px0 * x_dir + py0 * y_dir
p10 = px1 * x_dir + py0 * y_dir
p11 = px1 * x_dir + py1 * y_dir
p01 = px0 * x_dir + py1 * y_dir
corners = np.vstack((p00, p10, p11, p01)).reshape((-1, 1, 2))
ycoords = []
xcoords = []
for points in span_points:
pts = points.reshape((-1, 2))
px_coords = np.dot(pts, x_dir)
py_coords = np.dot(pts, y_dir)
ycoords.append(py_coords.mean() - py0)
xcoords.append(px_coords - px0)
if DEBUG_LEVEL >= 2:
visualize_span_points(name, small, span_points, corners)
return corners, np.array(ycoords), xcoords
def visualize_contours(name, small, cinfo_list):
regions = np.zeros_like(small)
for j, cinfo in enumerate(cinfo_list):
cv2.drawContours(regions, [cinfo.contour], 0,
CCOLORS[j % len(CCOLORS)], -1)
mask = (regions.max(axis=2) != 0)
display = small.copy()
display[mask] = (old_div(display[mask],2)) + (old_div(regions[mask],2))
for j, cinfo in enumerate(cinfo_list):
color = CCOLORS[j % len(CCOLORS)]
color = tuple([old_div(c,4) for c in color])
cv2.circle(display, fltp(cinfo.center), 3,
(255, 255, 255), 1, cv2.LINE_AA)
cv2.line(display, fltp(cinfo.point0), fltp(cinfo.point1),
(255, 255, 255), 1, cv2.LINE_AA)
debug_show(name, 1, 'contours', display)
def visualize_spans(name, small, pagemask, spans):
regions = np.zeros_like(small)
for i, span in enumerate(spans):
contours = [cinfo.contour for cinfo in span]
cv2.drawContours(regions, contours, -1,
CCOLORS[i*3 % len(CCOLORS)], -1)
mask = (regions.max(axis=2) != 0)
display = small.copy()
display[mask] = (old_div(display[mask],2)) + (old_div(regions[mask],2))
display[pagemask == 0] //= 4
debug_show(name, 2, 'spans', display)
def visualize_span_points(name, small, span_points, corners):
display = small.copy()
for i, points in enumerate(span_points):
points = norm2pix(small.shape, points, False)
mean, small_evec = cv2.PCACompute(points.reshape((-1, 2)),
None,
maxComponents=1)
dps = np.dot(points.reshape((-1, 2)), small_evec.reshape((2, 1)))
dpm = np.dot(mean.flatten(), small_evec.flatten())
point0 = mean + small_evec * (dps.min()-dpm)
point1 = mean + small_evec * (dps.max()-dpm)
for point in points:
cv2.circle(display, fltp(point), 3,
CCOLORS[i % len(CCOLORS)], -1, cv2.LINE_AA)
cv2.line(display, fltp(point0), fltp(point1),
(255, 255, 255), 1, cv2.LINE_AA)
cv2.polylines(display, [norm2pix(small.shape, corners, True)],
True, (255, 255, 255))
debug_show(name, 3, 'span points', display)
def imgsize(img):
height, width = img.shape[:2]
return '{}x{}'.format(width, height)
def make_keypoint_index(span_counts):
nspans = len(span_counts)
npts = sum(span_counts)
keypoint_index = np.zeros((npts+1, 2), dtype=int)
start = 1
for i, count in enumerate(span_counts):
end = start + count
keypoint_index[start:start+end, 1] = 8+i
start = end
keypoint_index[1:, 0] = np.arange(npts) + 8 + nspans
return keypoint_index
def optimize_params(name, small, dstpoints, span_counts, params):
keypoint_index = make_keypoint_index(span_counts)
def objective(pvec):
ppts = project_keypoints(pvec, keypoint_index)
return np.sum((dstpoints - ppts)**2)
print(' initial objective is', objective(params))
if DEBUG_LEVEL >= 1:
projpts = project_keypoints(params, keypoint_index)
display = draw_correspondences(small, dstpoints, projpts)
debug_show(name, 4, 'keypoints before', display)
print(' optimizing', len(params), 'parameters...')
start = datetime.datetime.now()
res = scipy.optimize.minimize(objective, params,
method='Powell')
end = datetime.datetime.now()
print(' optimization took', round((end-start).total_seconds(), 2), 'sec.')
print(' final objective is', res.fun)
params = res.x
if DEBUG_LEVEL >= 1:
projpts = project_keypoints(params, keypoint_index)
display = draw_correspondences(small, dstpoints, projpts)
debug_show(name, 5, 'keypoints after', display)
return params
def get_page_dims(corners, rough_dims, params):
dst_br = corners[2].flatten()
dims = np.array(rough_dims)
def objective(dims):
proj_br = project_xy(dims, params)
return np.sum((dst_br - proj_br.flatten())**2)
res = scipy.optimize.minimize(objective, dims, method='Powell')
dims = res.x
print(' got page dims', dims[0], 'x', dims[1])
return dims
def remap_image(name, img, small, page_dims, params):
height = 0.5 * page_dims[1] * OUTPUT_ZOOM * img.shape[0]
height = round_nearest_multiple(height, REMAP_DECIMATE)
width = round_nearest_multiple(old_div(height * page_dims[0], page_dims[1]),
REMAP_DECIMATE)
print(' output will be {}x{}'.format(width, height))
height_small = old_div(height, REMAP_DECIMATE)
width_small = old_div(width, REMAP_DECIMATE)
page_x_range = np.linspace(0, page_dims[0], width_small)
page_y_range = np.linspace(0, page_dims[1], height_small)
page_x_coords, page_y_coords = np.meshgrid(page_x_range, page_y_range)
page_xy_coords = np.hstack((page_x_coords.flatten().reshape((-1, 1)),
page_y_coords.flatten().reshape((-1, 1))))
page_xy_coords = page_xy_coords.astype(np.float32)
image_points = project_xy(page_xy_coords, params)
image_points = norm2pix(img.shape, image_points, False)
image_x_coords = image_points[:, 0, 0].reshape(page_x_coords.shape)
image_y_coords = image_points[:, 0, 1].reshape(page_y_coords.shape)
image_x_coords = cv2.resize(image_x_coords, (width, height),
interpolation=cv2.INTER_CUBIC)
image_y_coords = cv2.resize(image_y_coords, (width, height),
interpolation=cv2.INTER_CUBIC)
img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
remapped = cv2.remap(img_gray, image_x_coords, image_y_coords,
cv2.INTER_CUBIC,
None, cv2.BORDER_REPLICATE)
thresh = cv2.adaptiveThreshold(remapped, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, ADAPTIVE_WINSZ, 25)
pil_image = Image.fromarray(thresh)
pil_image = pil_image.convert('1')
threshfile = name + '_thresh.png'
pil_image.save(threshfile, dpi=(OUTPUT_DPI, OUTPUT_DPI))
if DEBUG_LEVEL >= 1:
height = small.shape[0]
width = int(round(height * float(thresh.shape[1])/thresh.shape[0]))
display = cv2.resize(thresh, (width, height),
interpolation=cv2.INTER_AREA)
debug_show(name, 6, 'output', display)
return threshfile
def main():
if len(sys.argv) < 2:
print('usage:', sys.argv[0], 'IMAGE1 [IMAGE2 ...]')
sys.exit(0)
if DEBUG_LEVEL > 0 and DEBUG_OUTPUT != 'file':
cv2.namedWindow(WINDOW_NAME)
outfiles = []
for imgfile in sys.argv[1:]:
img = cv2.imread(imgfile)
small = resize_to_screen(img)
basename = os.path.basename(imgfile)
name, _ = os.path.splitext(basename)
print('loaded', basename, 'with size', imgsize(img), end=' ')
print('and resized to', imgsize(small))
if DEBUG_LEVEL >= 3:
debug_show(name, 0.0, 'original', small)
pagemask, page_outline = get_page_extents(small)
cinfo_list = get_contours(name, small, pagemask, 'text')
spans = assemble_spans(name, small, pagemask, cinfo_list)
if len(spans) < 3:
print(' detecting lines because only', len(spans), 'text spans')
cinfo_list = get_contours(name, small, pagemask, 'line')
spans2 = assemble_spans(name, small, pagemask, cinfo_list)
if len(spans2) > len(spans):
spans = spans2
if len(spans) < 1:
print('skipping', name, 'because only', len(spans), 'spans')
continue
span_points = sample_spans(small.shape, spans)
print(' got', len(spans), 'spans', end=' ')
print('with', sum([len(pts) for pts in span_points]), 'points.')
corners, ycoords, xcoords = keypoints_from_samples(name, small,
pagemask,
page_outline,
span_points)
rough_dims, span_counts, params = get_default_params(corners,
ycoords, xcoords)
dstpoints = np.vstack((corners[0].reshape((1, 1, 2)),) +
tuple(span_points))
params = optimize_params(name, small,
dstpoints,
span_counts, params)
page_dims = get_page_dims(corners, rough_dims, params)
outfile = remap_image(name, img, small, page_dims, params)
outfiles.append(outfile)
print(' wrote', outfile)
print()
print('to convert to PDF (requires ImageMagick):')
print(' convert -compress Group4 ' + ' '.join(outfiles) + ' output.pdf')
if __name__ == '__main__':
main()

5
resources/python/dewarp_2/requirements.txt

@ -0,0 +1,5 @@
numpy
scipy
Pillow
opencv-python
future

1
resources/python/unproject

@ -1 +0,0 @@
Subproject commit 734d21ad596000cff276ccaf118954362b5eb37e

24
resources/python/unproject_2/LICENSE.txt

@ -0,0 +1,24 @@
The following license agreement applies to ellipse.py and rectify_ellipse.py;
also see the licensing information at the top of moments_from_contour.py.
MIT License
Copyright (c) 2016, Matt Zucker
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

16
resources/python/unproject_2/README.md

@ -0,0 +1,16 @@
# unproject_text
Perspective recovery of text using transformed ellipses. See full writeup at <https://mzucker.github.io/2016/10/11/unprojecting-text-with-ellipses.html>
# Requirements
- Python 2 or 3
- NumPy
- SciPy
- cv2 (from OpenCV 3.0 or later)
- matplotlib
# Usage
Try running
python unproject_text.py deskew0.jpg

630
resources/python/unproject_2/ellipse.py

@ -0,0 +1,630 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''Functions for representing ellipses using various
parameterizations, and converting between them. There are three
parameterizations implemented by this module:
Geometric parameters:
---------------------
The geometric parameters are
(x, y, a, b, θ)
The most simple parameterization of an ellipse is by its center point
(x0, y0), its semimajor and semiminor axes a and b, and its rotation
angle θ.
Conic:
------
This parameterization consists of six parameters A-F which establish
the implicit equation for a general conic:
AX² + BXY + CY² + DX + EY + F = 0
Note that this equation may not represent only ellipses (it also
includes hyperbolas and parabolas).
Since multiplying the entire equation by any non-zero constant results
in the same ellipse, the six parameters are only described up to
scale, yielding five degrees of freedom. We can determine a canonical
scale factor k to scale this equation by such that
A = a²(sin θ)² + b²(cos θ)²
B = 2(b² - a²) sin θ cos θ
C = a²(cos θ)² + b²(sin θ)²
D = -2Ax - By
E = -Bx - 2Cy
F = Ax² + Bxy + Cy² - a²b²
...in terms of the geometric parameters (x, y, a, b, θ).
Shape moments:
--------------
The shape moment parameters are
(m, m, m, mu, mu, mu)
An ellipse may be completely specified by its shape moments up to
order 2. These include the area m, area-weighted center (m, m),
and the three second order central moments (mu, mu, mu).
'''
# pylint: disable=C0103
# pylint: disable=R0914
# pylint: disable=E1101
from __future__ import print_function
import numpy
def _params_str(names, params):
'''Helper function for printing out the various parameters.'''
return '({})'.format(', '.join('{}: {:g}'.format(n, p)
for (n, p) in zip(names, params)))
######################################################################
GPARAMS_NAMES = ('x0', 'y0', 'a', 'b', 'theta')
GPARAMS_DISPLAY_NAMES = ('x₀', 'y₀', 'a', 'b', 'θ')
def gparams_str(gparams):
'''Convert geometric parameters to nice printable string.'''
return _params_str(GPARAMS_DISPLAY_NAMES, gparams)
def gparams_evaluate(gparams, phi):
'''Evaluate the parametric formula for an ellipse at each angle
specified in the phi array. Returns two arrays x and y of the same
size as phi.
'''
x0, y0, a, b, theta = tuple(gparams)
c = numpy.cos(theta)
s = numpy.sin(theta)
cp = numpy.cos(phi)
sp = numpy.sin(phi)
x = a*cp*c - b*sp*s + x0
y = a*cp*s + b*sp*c + y0
return x, y
def gparams_from_conic(conic):
'''Convert the given conic parameters to geometric ellipse parameters.'''
k, ab = conic_scale(conic)
if numpy.isinf(ab):
return None
A, B, C, D, E, F = tuple(conic)
T = B**2 - 4*A*C
x0 = (2*C*D - B*E)/T
y0 = (2*A*E - B*D)/T
S = A*E**2 + C*D**2 - B*D*E + (B**2 - 4*A*C)*F
U = numpy.sqrt((A - C)**2 + B**2)
a = -numpy.sqrt(2*S*(A+C+U))/T
b = -numpy.sqrt(2*S*(A+C-U))/T
theta = numpy.arctan2(C-A-U, B)
return numpy.array((x0, y0, a, b, theta))
def _gparams_sincos_from_moments(m):
'''Convert from moments to canonical parameters, except postpone the
final arctan until later. Formulas determined largely by trial and
error.
'''
m00, m10, m01, mu20, mu11, mu02 = tuple(m)
x0 = m10 / m00
y0 = m01 / m00
A = 4*mu02/m00
B = -8*mu11/m00
C = 4*mu20/m00
U = numpy.sqrt((A - C)**2 + B**2)
T = B**2 - 4*A*C
S = 1.0
a = -numpy.sqrt(2*S*(A+C+U))/T
b = -numpy.sqrt(2*S*(A+C-U))/T
# we want a * b * pi = m00
#
# so if we are off by some factor, we should scale a and b by this factor
#
# we need to fix things up somehow because moments have 6 DOF and
# ellipse has only 5.
area = numpy.pi * a * b
scl = numpy.sqrt(m00 / area)
a *= scl
b *= scl
sincos = numpy.array([C-A-U, B])
sincos /= numpy.linalg.norm(sincos)
s, c = sincos
return numpy.array((x0, y0, a, b, s, c))
def gparams_from_moments(m):
'''Convert the given moment parameters to geometric ellipse parameters.
Formula derived through trial and error.'''
x0, y0, a, b, s, c = _gparams_sincos_from_moments(m)
theta = numpy.arctan2(s, c)
return numpy.array((x0, y0, a, b, theta))
######################################################################
CONIC_NAMES = ('A', 'B', 'C', 'D', 'E', 'F')
CONIC_DISPLAY_NAMES = ('A', 'B', 'C', 'D', 'E', 'F')
def conic_str(conic):
'''Convert conic parameters to nice printable string.'''
return _params_str(CONIC_DISPLAY_NAMES, conic)
def conic_scale(conic):
'''Returns a pair (k, ab) for the given conic parameters, where k is
the scale factor to divide all parameters by in order to normalize
them, and ab is the product of the semimajor and semiminor axis
(i.e. the ellipse's area, divided by pi). If the conic does not
describe an ellipse, then this returns infinity, infinity.
'''
A, B, C, D, E, F = tuple(conic)
T = 4*A*C - B*B
if T < 0.0:
return numpy.inf, numpy.inf
S = A*E**2 + B**2*F + C*D**2 - B*D*E - 4*A*C*F
if not S:
return numpy.inf, numpy.inf
k = 0.25*T**2/S
ab = 2.0*S/(T*numpy.sqrt(T))
return k, ab
def conic_from_points(x, y):
'''Fits conic pararameters using homogeneous least squares. The
resulting fit is unlikely to be numerically robust when the x/y
coordinates given are far from the [-1,1] interval.'''
x = x.reshape((-1, 1))
y = y.reshape((-1, 1))
M = numpy.hstack((x**2, x*y, y**2, x, y, numpy.ones_like(x)))
_, _, v = numpy.linalg.svd(M)
return v[5, :].copy()
def conic_transform(conic, H):
'''Returns the parameters of a conic after being transformed through a
3x3 homography H. This is straightforward since a conic can be
represented as a symmetric matrix (see
https://en.wikipedia.org/wiki/Matrix_representation_of_conic_sections).
'''
A, B, C, D, E, F = tuple(conic)
M = numpy.array([[A, 0.5*B, 0.5*D],
[0.5*B, C, 0.5*E],
[0.5*D, 0.5*E, F]])
Hinv = numpy.linalg.inv(H)
M = numpy.dot(Hinv.T, numpy.dot(M, Hinv))
A = M[0, 0]
B = M[0, 1]*2
C = M[1, 1]
D = M[0, 2]*2
E = M[1, 2]*2
F = M[2, 2]
return numpy.array((A, B, C, D, E, F))
def _conic_from_gparams_sincos(gparams_sincos):
x0, y0, a, b, s, c = gparams_sincos
A = a**2 * s**2 + b**2 * c**2
B = 2*(b**2 - a**2) * s * c
C = a**2 * c**2 + b**2 * s**2
D = -2*A*x0 - B*y0
E = -B*x0 - 2*C*y0
F = A*x0**2 + B*x0*y0 + C*y0**2 - a**2*b**2
return numpy.array((A, B, C, D, E, F))
def conic_from_gparams(gparams):
'''Convert geometric parameters to conic parameters. Formulas from
https://en.wikipedia.org/wiki/Ellipse#General_ellipse.
'''
x0, y0, a, b, theta = tuple(gparams)
c = numpy.cos(theta)
s = numpy.sin(theta)
return _conic_from_gparams_sincos((x0, y0, a, b, s, c))
def conic_from_moments(moments):
g = _gparams_sincos_from_moments(moments)
return _conic_from_gparams_sincos(g)
######################################################################
MOMENTS_NAMES = ('m00', 'm10', 'm01', 'mu20', 'mu11', 'mu02')
MOMENTS_DISPLAY_NAMES = ('m₀₀', 'm₁₀', 'm₀₁', 'mu₂₀', 'mu₁₁', 'mu₀₂')
def moments_from_dict(m):
'''Create shape moments tuple from a dictionary (i.e. returned by cv2.moments).'''
return numpy.array([m[n] for n in MOMENTS_NAMES])
def moments_str(m):
'''Convert shape moments to nice printable string.'''
return _params_str(MOMENTS_DISPLAY_NAMES, m)
def moments_from_gparams(gparams):
'''Create shape moments from geometric parameters.'''
x0, y0, a, b, theta = tuple(gparams)
c = numpy.cos(theta)
s = numpy.sin(theta)
m00 = a*b*numpy.pi
m10 = x0 * m00
m01 = y0 * m00
mu20 = (a**2 * c**2 + b**2 * s**2) * m00 * 0.25
mu11 = -(b**2-a**2) * s * c * m00 * 0.25
mu02 = (a**2 * s**2 + b**2 * c**2) * m00 * 0.25
return numpy.array((m00, m10, m01, mu20, mu11, mu02))
def moments_from_conic(scaled_conic):
'''Create shape moments from conic parameters.'''
k, ab = conic_scale(scaled_conic)
if numpy.isinf(ab):
return None
conic = numpy.array(scaled_conic)/k
A, B, C, D, E, _ = tuple(conic)
x0 = (B*E - 2*C*D)/(4*A*C - B**2)
y0 = (-2*A*E + B*D)/(4*A*C - B**2)
m00 = numpy.pi*ab
m10 = x0*m00
m01 = y0*m00
mu20 = 0.25*C*m00
mu11 = -0.125*B*m00
mu02 = 0.25*A*m00
return numpy.array((m00, m10, m01, mu20, mu11, mu02))
######################################################################
def _perspective_transform(pts, H):
'''Used for testing only.'''
assert len(pts.shape) == 3
assert pts.shape[1:] == (1, 2)
pts = numpy.hstack((pts.reshape((-1, 2)),
numpy.ones((len(pts), 1), dtype=pts.dtype)))
pts = numpy.dot(pts, H.T)
pts = pts[:, :2] / pts[:, 2].reshape((-1, 1))
return pts.reshape((-1, 1, 2))
def _test_moments():
# so I just realized that moments have actually 6 DOF but all
# ellipse parameterizations have 5, therefore information is lost
# when going back and forth.
m = numpy.array([59495.5, 5.9232e+07, 1.84847e+07, 3.34079e+08, -1.94055e+08, 3.74633e+08])
gp = gparams_from_moments(m)
m2 = moments_from_gparams(gp)
gp2 = gparams_from_moments(m2)
c = conic_from_moments(m)
m3 = moments_from_conic(c)
assert numpy.allclose(gp, gp2)
assert numpy.allclose(m2, m3)
print('here is the first thing:')
print(' {}'.format(moments_str(m)))
print()
print('the rest should all be equal pairs:')
print(' {}'.format(moments_str(m2)))
print(' {}'.format(moments_str(m3)))
print()
print(' {}'.format(gparams_str(gp)))
print(' {}'.format(gparams_str(gp2)))
print()
def _test_ellipse():
print('testing moments badness')
_test_moments()
print('pass')
# test that we can go from conic to geometric and back
x0 = 450
y0 = 320
a = 300
b = 200
theta = -0.25
gparams = numpy.array((x0, y0, a, b, theta))
conic = conic_from_gparams(gparams)
k, ab = conic_scale(conic)
# ensure conic created from geometric params has trivial scale
assert numpy.allclose((k, ab), (1.0, a*b))
# evaluate parametric curve at different angles phi
phi = numpy.linspace(0, 2*numpy.pi, 1001).reshape((-1, 1))
x, y = gparams_evaluate(gparams, phi)
# evaluate implicit conic formula at x,y pairs
M = numpy.hstack((x**2, x*y, y**2, x, y, numpy.ones_like(x)))
implicit_output = numpy.dot(M, conic)
implicit_max = numpy.abs(implicit_output).max()
# ensure implicit evaluates near 0 everywhere
print('max item from implicit: {} (should be close to 0)'.format(implicit_max))
print()
assert implicit_max < 1e-5
# ensure that scaled_conic has the scale we expect
k = 1e-3
scaled_conic = conic*k
k2, ab2 = conic_scale(scaled_conic)
print('these should all be equal:')
print()
print(' k =', k)
print(' k2 =', k2)
assert numpy.allclose((k2, ab2), (k, a*b))
print()
# convert the scaled conic back to geometric parameters
gparams2 = gparams_from_conic(scaled_conic)
print(' gparams =', gparams_str(gparams))
# ensure that converting back from scaled conic to geometric params is correct
print(' gparams2 =', gparams_str(gparams2))
assert numpy.allclose(gparams, gparams2)
# convert original geometric parameters to moments
m = moments_from_gparams(gparams)
# ...and back
gparams3 = gparams_from_moments(m)
# ensure that converting back from moments to geometric params is correct
print(' gparams3 =', gparams_str(gparams3))
print()
assert numpy.allclose(gparams, gparams3)
# convert moments parameterization to conic
conic2 = conic_from_moments(m)
# ensure that converting from moments to conics is correct
print(' conic =', conic_str(conic))
print(' conic2 =', conic_str(conic2))
assert numpy.allclose(conic, conic2)
# create conic from homogeneous least squares fit of points
skip = len(x) / 10
conic3 = conic_from_points(x[::skip], y[::skip])
# ensure that it has non-infinite area
k3, ab3 = conic_scale(conic3)
assert not numpy.isinf(ab3)
# normalize
conic3 /= k3
# ensure that conic from HLS fit is same as other 2
print(' conic3 =', conic_str(conic3))
print()
assert numpy.allclose(conic, conic3)
# convert from conic to moments
m2 = moments_from_conic(scaled_conic)
print(' m =', moments_str(m))
# ensure that conics->moments yields the same result as geometric
# params -> moments.
print(' m2 =', moments_str(m2))
assert numpy.allclose(m, m2)
from moments_from_contour import moments_from_contour
# create moments from contour
pts = numpy.hstack((x, y)).reshape((-1, 1, 2))
m3 = moments_from_contour(pts)
# ensure that moments from contour is reasonably close to moments
# from geometric params.
print(' m3 =', moments_str(m3))
print()
assert numpy.allclose(m3, m, 1e-4, 1e-4)
# create a homography H to map the ellipse through
hx = 0.001
hy = 0.0015
H = numpy.array([
[1, -0.2, 0],
[0, 0.7, 0],
[hx, hy, 1]])
T = numpy.array([
[1, 0, 400],
[0, 1, 300],
[0, 0, 1]])
H = numpy.dot(T, numpy.dot(H, numpy.linalg.inv(T)))
# transform the original points thru H
Hpts = _perspective_transform(pts, H)
# transform the conic parameters directly thru H
Hconic = conic_transform(conic, H)
# get the HLS fit of the conic corresponding to the transformed points
Hconic2 = conic_from_points(Hpts[::skip, :, 0], Hpts[::skip, :, 1])
# normalize the two conics
Hk, Hab = conic_scale(Hconic)
Hk2, Hab2 = conic_scale(Hconic2)
assert not numpy.isinf(Hab) and not numpy.isinf(Hab2)
Hconic /= Hk
Hconic2 /= Hk2
# ensure that the two conics are equal
print(' Hconic =', conic_str(Hconic))
print(' Hconic2 =', conic_str(Hconic2))
print()
assert numpy.allclose(Hconic, Hconic2)
# get the moments from Hconic
Hm = moments_from_conic(Hconic)
# get the moments from the transformed points
Hm2 = moments_from_contour(Hpts)
# ensure that the two moments are close enough
print(' Hm =', moments_str(Hm))
print(' Hm2 =', moments_str(Hm2))
print()
assert numpy.allclose(Hm, Hm2, 1e-4, 1e-4)
# tests complete, now visualize
print('all tests passed!')
try:
import cv2
print('visualizing results...')
except ImportError:
import sys
print('not visualizing results since module cv2 not found')
sys.exit(0)
shift = 3
pow2 = 2**shift
p0 = numpy.array([x0, y0], dtype=numpy.float32)
p1 = _perspective_transform(p0.reshape((-1, 1, 2)), H).flatten()
Hgparams = gparams_from_conic(Hconic)
Hp0 = Hgparams[:2]
skip = len(pts)/100
display = numpy.zeros((600, 800, 3), numpy.uint8)
def _asint(x, as_tuple=True):
x = x*pow2 + 0.5
x = x.astype(int)
if as_tuple:
return tuple(x)
else:
return x
for (a, b) in zip(pts.reshape((-1, 2))[::skip],
Hpts.reshape((-1, 2))[::skip]):
cv2.line(display, _asint(a), _asint(b),
(255, 0, 255), 1, cv2.LINE_AA, shift)
cv2.polylines(display, [_asint(pts, False)], True,
(0, 255, 0), 1, cv2.LINE_AA, shift)
cv2.polylines(display, [_asint(Hpts, False)], True,
(0, 0, 255), 1, cv2.LINE_AA, shift)
r = 3.0
cv2.circle(display, _asint(p0), int(r*pow2+0.5),
(0, 255, 0), 1, cv2.LINE_AA, shift)
cv2.circle(display, _asint(p1), int(r*pow2+0.5),
(255, 0, 255), 1, cv2.LINE_AA, shift)
cv2.circle(display, _asint(Hp0), int(r*pow2+0.5),
(0, 0, 255), 1, cv2.LINE_AA, shift)
cv2.imshow('win', display)
print('click in the display window & hit any key to quit.')
while cv2.waitKey(5) < 0:
pass
if __name__ == '__main__':
_test_ellipse()

123
resources/python/unproject_2/moments_from_contour.py

@ -0,0 +1,123 @@
'''The function below is ported from the OpenCV project's
contourMoments function in opencv/modules/imgproc/src/moments.cpp,
licensed as follows:
----------------------------------------------------------------------
By downloading, copying, installing or using the software you agree to
this license. If you do not agree to this license, do not download,
install, copy or use the software.
License Agreement
For Open Source Computer Vision Library
(3-clause BSD License)
Copyright (C) 2000-2016, Intel Corporation, all rights reserved.
Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
Copyright (C) 2009-2016, NVIDIA Corporation, all rights reserved.
Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
Copyright (C) 2015-2016, OpenCV Foundation, all rights reserved.
Copyright (C) 2015-2016, Itseez Inc., all rights reserved.
Third party copyrights are property of their respective owners.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the names of the copyright holders nor the names of the
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
This software is provided by the copyright holders and contributors
"as is" and any express or implied warranties, including, but not
limited to, the implied warranties of merchantability and fitness for
a particular purpose are disclaimed. In no event shall copyright
holders or contributors be liable for any direct, indirect,
incidental, special, exemplary, or consequential damages (including,
but not limited to, procurement of substitute goods or services; loss
of use, data, or profits; or business interruption) however caused and
on any theory of liability, whether in contract, strict liability, or
tort (including negligence or otherwise) arising in any way out of the
use of this software, even if advised of the possibility of such
damage.
'''
def moments_from_contour(xypoints):
'''Create shape moments from points sampled from the outline of an
ellipse (note this is numerically inaccurate even for arrays of 1000s
of points). Included in this project primarily for testing purposes.
'''
assert len(xypoints.shape) == 3
assert xypoints.shape[1:] == (1, 2)
xypoints = xypoints.reshape((-1, 2))
a00 = 0
a10 = 0
a01 = 0
a20 = 0
a11 = 0
a02 = 0
xi_1, yi_1 = xypoints[-1]
for xy in xypoints:
xi, yi = xy
xi2 = xi * xi
yi2 = yi * yi
dxy = xi_1 * yi - xi * yi_1
xii_1 = xi_1 + xi
yii_1 = yi_1 + yi
a00 += dxy
a10 += dxy * xii_1
a01 += dxy * yii_1
a20 += dxy * (xi_1 * xii_1 + xi2)
a11 += dxy * (xi_1 * (yii_1 + yi_1) + xi * (yii_1 + yi))
a02 += dxy * (yi_1 * yii_1 + yi2)
xi_1 = xi
yi_1 = yi
if a00 > 0:
db1_2 = 0.5
db1_6 = 0.16666666666666666666666666666667
db1_12 = 0.083333333333333333333333333333333
db1_24 = 0.041666666666666666666666666666667
else:
db1_2 = -0.5
db1_6 = -0.16666666666666666666666666666667
db1_12 = -0.083333333333333333333333333333333
db1_24 = -0.041666666666666666666666666666667
m00 = a00 * db1_2
m10 = a10 * db1_6
m01 = a01 * db1_6
m20 = a20 * db1_12
m11 = a11 * db1_24
m02 = a02 * db1_12
inv_m00 = 1. / m00
cx = m10 * inv_m00
cy = m01 * inv_m00
mu20 = m20 - m10 * cx
mu11 = m11 - m10 * cy
mu02 = m02 - m01 * cy
return m00, m10, m01, mu20, mu11, mu02

5
resources/python/unproject_2/requirements.txt

@ -0,0 +1,5 @@
unproject_text
cv2
numpy
scipy
matplotlib

504
resources/python/unproject_2/unproject_text.py

@ -0,0 +1,504 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function
import sys
import numpy as np
import scipy.optimize
import matplotlib.pyplot as plt
import cv2
import ellipse
DEBUG_IMAGES = []
def debug_show(name, src):
global DEBUG_IMAGES
filename = 'debug{:02d}_{}.png'.format(len(DEBUG_IMAGES), name)
cv2.imwrite(filename, src)
h, w = src.shape[:2]
fx = w/1280.0
fy = h/700.0
f = 1.0/np.ceil(max(fx, fy))
if f < 1.0:
img = cv2.resize(src, (0, 0), None, f, f, cv2.INTER_AREA)
else:
img = src.copy()
DEBUG_IMAGES.append(img)
def translation(x, y):
return np.array([[1, 0, x], [0, 1, y], [0, 0, 1]], dtype=float)
def rotation(theta):
c = np.cos(theta)
s = np.sin(theta)
return np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]], dtype=float)
def perspective_warp(a, b):
return np.array([[1, 0, 0], [0, 1, 0], [a, b, 1]], dtype=float)
def slant(sx):
return np.array([[1, sx, 0], [0, 1, 0], [0, 0, 1]], dtype=float)
def softmax(x, k=1.0):
b = x.max()
return np.log( np.exp(k*(x-b)).sum() ) / k + b
def skewed_widths(contours, H):
xvals = []
for c in contours:
pts = cv2.perspectiveTransform(c, H)
x = pts[:,:,0]
xvals.append( x.max() - x.min() )
xvals = np.array(xvals)
return softmax(xvals, 0.1)
def centered_warp(u0, v0, a, b):
return np.dot(translation(u0, v0),
np.dot(perspective_warp(a, b),
translation(-u0, -v0)))
def warp_containing_points(img, pts, H, border=4, shape_only=False):
'''
display = img.copy()
for pt in pts.reshape((-1,2)).astype(int):
cv2.circle(display, tuple(pt), 4, (255, 0, 0),
-1, cv2.LINE_AA)
debug_show('warp', display)
'''
pts2 = cv2.perspectiveTransform(pts, H)
x0, y0, w, h = cv2.boundingRect(pts2)
print('got bounding rect', x0, y0, w, h)
T = translation(-x0+border, -y0+border)
TH = np.dot(T, H)
if shape_only:
return (h+2*border, w+2*border), TH
else:
dst = cv2.warpPerspective(img, TH, (w+2*border, h+2*border),
borderMode=cv2.BORDER_REPLICATE)
return dst, TH
def conic_area_discrepancy(conics, x, H, opt_results=None):
areas = []
for conic in conics:
cx = ellipse.conic_transform(conic, H)
k, ab = ellipse.conic_scale(cx)
if np.isinf(ab):
areas.append(1e20)
else:
areas.append(ab)
areas = np.array(areas)
areas /= areas.mean() # rescale so mean is 1.0
areas -= 1 # subtract off mean
rval = 0.5*np.dot(areas, areas)
if opt_results is not None:
if not opt_results or rval < opt_results[-1][-1]:
opt_results.append( (x, H, rval) )
return rval
def threshold(img):
if len(img.shape) > 2:
img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
mean = img.mean()
if mean < 100:
img = 255-img
return cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY_INV, 101, 21)
def get_contours(img):
work = threshold(img)
debug_show('threshold', work)
contours, hierarchy = cv2.findContours(work, cv2.RETR_CCOMP,
cv2.CHAIN_APPROX_NONE)
return contours, hierarchy
def get_conics(img, contours, hierarchy,
abs_area_cutoff=0.0001, mean_area_cutoff=0.15):
hierarchy = hierarchy.reshape((-1, 4))
conics = []
used_contours = []
areas = []
okcontours = []
allchildren = []
pts = np.empty((0,1,2), dtype='float32')
centroid_accum = np.zeros(2)
total_area = 0.0
centroids = []
abs_area_cutoff *= img.shape[0] * img.shape[1]
print('abs_area_cutoff = ',abs_area_cutoff)
for i, (c, h) in enumerate(zip(contours, hierarchy.reshape((-1, 4)))):
next_idx, prev_idx, child_idx, parent_idx = h
if parent_idx >= 0:
continue
m = ellipse.moments_from_dict(cv2.moments(c))
if m[0] <= abs_area_cutoff:
continue
children = []
while child_idx >= 0:
child_contour = contours[child_idx]
cm = cv2.moments(child_contour)
if cm['m00'] > abs_area_cutoff:
children.append(child_contour)
allchildren.append(child_contour)
child_idx = hierarchy[child_idx][0]
if children:
work = np.zeros(img.shape[:2], dtype=np.uint8)
cv2.drawContours(work, contours, i, (1,1,1), -1)
cv2.drawContours(work, children, -1, (0,0,0), -1)
m = ellipse.moments_from_dict(cv2.moments(work, True))
centroids.append(m[1:3]/m[0])
centroid_accum += m[1:3]
total_area += m[0]
pts = np.vstack((pts, c.astype('float32')))
conic = ellipse.conic_from_moments(m)
okcontours.append(c)
conics.append(conic)
areas.append(m[0])
display = img.copy()
cv2.drawContours(display, okcontours+allchildren,
-1, (0, 255, 0),
6, cv2.LINE_AA)
debug_show('contours_only', display)
for c, a in zip(okcontours, areas):
x, y, w, h = cv2.boundingRect(c)
s = str('{:,d}'.format(int(a)))
#ctr = (x + w/2 - 15*len(s), y+h/2+10)
ctr = (x, y+h+20)
cv2.putText(display, s, ctr,
cv2.FONT_HERSHEY_SIMPLEX, 2.0,
(0, 0, 0), 12, cv2.LINE_AA)
cv2.putText(display, s, ctr,
cv2.FONT_HERSHEY_SIMPLEX, 2.0,
(0, 255, 0), 6, cv2.LINE_AA)
debug_show('contours', display)
areas = np.array(areas)
amean = areas.mean()
print('got {} contours with {} small.'.format(
len(areas), (areas < mean_area_cutoff*amean).sum()))
idx = np.where(areas > mean_area_cutoff*amean)[0]
conics = np.array(conics)
conics = conics[idx]
centroid_accum /= total_area
display = img.copy()
for conic in conics:
x0, y0, a, b, theta = ellipse.gparams_from_conic(conic)
cv2.ellipse(display, (int(x0), int(y0)), (int(a), int(b)),
theta*180/np.pi, 0, 360, (0,0,255), 6, cv2.LINE_AA)
debug_show('conics', display)
contours = [okcontours[i].astype('float32') for i in idx]
if 0:
centroids = np.array([centroids[i] for i in idx])
areas = areas[idx]
def polyfit(x, y):
coeffs = np.polyfit(x, y, deg=1)
ypred = np.polyval(coeffs, x)
ymean = np.mean(y)
sstot = np.sum((y - ymean)**2)
ssres = np.sum((y.flatten() - ypred.flatten())**2)
r2 = 1 - ssres/sstot
return coeffs, r2
xfit, xr2 = polyfit(centroids[:,0], areas)
yfit, yr2 = polyfit(centroids[:,1], areas)
xlabel = 'X coordinate (r²={:.2f})'.format(xr2)
ylabel = 'Y coordinate (r²={:.2f})'.format(yr2)
plt.plot(centroids[:,0], areas, 'b.', zorder=1)
plt.plot(centroids[:,1], areas, 'r.', zorder=1)
plt.gca().autoscale(False)
plt.plot([0, 3000], np.polyval(xfit, [0,3000]), 'b--',
zorder=0, label=xlabel)
plt.plot([0, 3000], np.polyval(yfit, [0,3000]), 'r--',
zorder=0, label=ylabel)
plt.legend(loc='upper right')
plt.xlabel('X/Y coordinate (px)')
plt.ylabel('Contour area (px²)')
plt.savefig('position-vs-area.pdf')
return conics, contours, centroid_accum
def optimize_conics(conics, p0):
x0 = np.array([0.0, 0.0])
hfunc = lambda x: centered_warp(p0[0], p0[1], x[0], x[1])
opt_results = []
f = lambda x: conic_area_discrepancy(conics, x, hfunc(x), opt_results)
res = scipy.optimize.minimize(f, x0, method='Powell')
H = hfunc(res.x)
rects = []
if 0:
phi = np.linspace(0, 2*np.pi, 16, endpoint=False)
width, height = 0, 0
for x, H, fval in opt_results:
allxy = []
for conic in conics:
Hconic = ellipse.conic_transform(conic, H)
gparams = ellipse.gparams_from_conic(Hconic)
x, y = ellipse.gparams_evaluate(gparams, phi)
xy = np.dstack((x.reshape((-1, 1, 1)), y.reshape((-1, 1, 1))))
allxy.append(xy)
allxy = np.vstack(tuple(allxy)).astype(np.float32)
rect = cv2.boundingRect(allxy)
rects.append(rect)
x, y, w, h = rect
width = max(width, w)
height = max(height, h)
border = int(0.05 * min(width, height))
width += border
height += border
aspect = float(width)/height
if aspect < 2.0:
width = 2*height
else:
height = width/2
for i, (rect, (x, H, fval)) in enumerate(zip(rects, opt_results)):
display = np.zeros((height, width), dtype=np.uint8)
x, y, w, h = rect
xoffs = width/2 - (x+w/2)
yoffs = height/2 - (y+h/2)
for conic in conics:
Hconic = ellipse.conic_transform(conic, H)
x0, y0, a, b, theta = ellipse.gparams_from_conic(Hconic)
cv2.ellipse(display, (int(x0+xoffs), int(y0+yoffs)), (int(a), int(b)),
theta*180/np.pi, 0, 360, (255,255,255), 6, cv2.LINE_AA)
cv2.putText(display, 'Area discrepancy: {:.3f}'.format(fval),
(16, height-24), cv2.FONT_HERSHEY_SIMPLEX, 2.0,
(255,255,255), 6, cv2.LINE_AA)
cv2.imwrite('frame{:04d}.png'.format(i), display)
return H
def orientation_detect(img, contours, H, rho=8.0, ntheta=512):
# ignore this, just deal with edge-detected text
pts = np.vstack(tuple(contours))
shape, TH = warp_containing_points(img, pts, H, shape_only=True)
text_edges = np.zeros(shape, dtype=np.uint8)
for contour in contours:
contour = cv2.perspectiveTransform(contour.astype(np.float32), TH)
cv2.drawContours(text_edges, [contour.astype(int)], 0, (255,255,255))
debug_show('edges', text_edges)
# generate a linspace of thetas
thetas = np.linspace(-0.5*np.pi, 0.5*np.pi, ntheta, endpoint=False)
# rho is pixels per r bin in polar (theta, r) histogram
# irho is bins per pixel
irho = 1.0/rho
# get height and width
h, w = text_edges.shape
# maximum bin index is given by hypotenuse of (w, h) divided by pixels per bin
bin_max = int(np.ceil(np.hypot(w, h)*irho))
# initialize zeroed histogram height bin_max and width num theta
hist = np.zeros((bin_max, ntheta))
# let u and v be x and y coordinates (respectively) of non-zero
# pixels in edge map
v, u = np.mgrid[0:h, 0:w]
v = v[text_edges.view(bool)]
u = u[text_edges.view(bool)]
# get center coordinates
u0 = w*0.5
v0 = h*0.5
# for each i and theta = thetas[i]
for i, theta in enumerate(thetas):
# for each nonzero edge pixel, compute bin in r direction from
# pixel location and cos/sin of theta
bin_idx = ( (-(u-u0)*np.sin(theta) # x term
+ (v-v0)*np.cos(theta))*irho # y term, both
# divided by pixels
# per bin
+ 0.5*bin_max ) # offset for center pixel
assert( bin_idx.min() >= 0 and bin_idx.max() < bin_max )
# 0.5 is for correct rounding here
#
# e.g. np.bincount([1, 1, 0, 3]) = [1, 2, 0, 1]
# returns count of each integer in the array
bc = np.bincount((bin_idx + 0.5).astype(int))
# push this into the histogram
hist[:len(bc),i] = bc
# number of zero pixels in each column
num_zero = (hist == 0).sum(axis=0)
# find the maximum number of zero pixels
best_theta_idx = num_zero.argmax()
# actual detected theta - could just return this now
theta = thetas[best_theta_idx]
# compose with previous homography
RH = np.dot(rotation(-theta), H)
if 1: # just debug visualization
debug_hist = (255*hist/hist.max()).astype('uint8')
debug_hist = cv2.cvtColor(debug_hist, cv2.COLOR_GRAY2RGB)
cv2.line(debug_hist,
(best_theta_idx, 0),
(best_theta_idx, bin_max), (255,0,0),
1, cv2.LINE_AA)
debug_show('histogram', debug_hist)
p0 = np.array((u0, v0))
t = np.array((np.cos(theta), np.sin(theta)))
warped = cv2.warpPerspective(img, TH, (shape[1], shape[0]),
borderMode=cv2.BORDER_REPLICATE)
debug_show('prerotate_noline', warped)
cv2.line(warped,
tuple(map(int, p0 - rho*bin_max*t)),
tuple(map(int, p0 + rho*bin_max*t)),
(255, 0, 0),
6, cv2.LINE_AA)
debug_show('prerotate', warped)
warped, _ = warp_containing_points(img, pts, RH)
debug_show('preskew', warped)
return RH
def skew_detect(img, contours, RH):
hulls = [cv2.convexHull(c) for c in contours]
pts = np.vstack(tuple(hulls))
display, TRH = warp_containing_points(img, pts, RH)
for h in hulls:
h = cv2.perspectiveTransform(h, TRH).astype(int)
cv2.drawContours(display, [h], 0, (255, 0, 255), 6, cv2.LINE_AA)
debug_show('convex_hulls_before', display)
f = lambda x: skewed_widths(contours, np.dot(slant(x), RH))
res = scipy.optimize.minimize_scalar(f, (-2.0, 0.0, 2.0))
SRH = np.dot(slant(res.x), RH)
warped, Hfinal = warp_containing_points(img, pts, SRH)
display = warped.copy()
for h in hulls:
h = cv2.perspectiveTransform(h, Hfinal).astype(int)
cv2.drawContours(display, [h], 0, (255, 0, 255), 6, cv2.LINE_AA)
debug_show('convex_hulls_after', display)
debug_show('final', warped)
return SRH
def main():
img = cv2.imread(sys.argv[1])
debug_show('input', img)
contours, hierarchy = get_contours(img)
conics, contours, centroid = get_conics(img, contours, hierarchy)
H = optimize_conics(conics, centroid)
RH = orientation_detect(img, contours, H)
SRH = skew_detect(img, contours, RH)
for img in DEBUG_IMAGES:
cv2.imshow('Debug', img)
while cv2.waitKey(5) < 0:
pass
if __name__ == '__main__':
main()
Loading…
Cancel
Save