Repo for the search and displace ingest module that takes odf, docx and pdf and transforms it into .md to be used with search and displace operations
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

923 lines
26 KiB

#!/usr/bin/env python
######################################################################
# page_dewarp.py - Proof-of-concept of page-dewarping based on a
# "cubic sheet" model. Requires OpenCV (version 3 or greater),
# PIL/Pillow, and scipy.optimize.
######################################################################
# Author: Matt Zucker
# Date: July 2016
# License: MIT License (see LICENSE.txt)
######################################################################
from __future__ import division
from __future__ import print_function
from builtins import zip
from builtins import str
from builtins import range
from builtins import object
from past.utils import old_div
import os
import sys
import datetime
import cv2
from PIL import Image
import numpy as np
import scipy.optimize
# for some reason pylint complains about cv2 members being undefined :(
# pylint: disable=E1101
PAGE_MARGIN_X = 50 # reduced px to ignore near L/R edge
PAGE_MARGIN_Y = 20 # reduced px to ignore near T/B edge
OUTPUT_ZOOM = 1.0 # how much to zoom output relative to *original* image
OUTPUT_DPI = 300 # just affects stated DPI of PNG, not appearance
REMAP_DECIMATE = 16 # downscaling factor for remapping image
ADAPTIVE_WINSZ = 55 # window size for adaptive threshold in reduced px
TEXT_MIN_WIDTH = 15 # min reduced px width of detected text contour
TEXT_MIN_HEIGHT = 2 # min reduced px height of detected text contour
TEXT_MIN_ASPECT = 1.5 # filter out text contours below this w/h ratio
TEXT_MAX_THICKNESS = 10 # max reduced px thickness of detected text contour
EDGE_MAX_OVERLAP = 1.0 # max reduced px horiz. overlap of contours in span
EDGE_MAX_LENGTH = 100.0 # max reduced px length of edge connecting contours
EDGE_ANGLE_COST = 10.0 # cost of angles in edges (tradeoff vs. length)
EDGE_MAX_ANGLE = 7.5 # maximum change in angle allowed between contours
RVEC_IDX = slice(0, 3) # index of rvec in params vector
TVEC_IDX = slice(3, 6) # index of tvec in params vector
CUBIC_IDX = slice(6, 8) # index of cubic slopes in params vector
SPAN_MIN_WIDTH = 30 # minimum reduced px width for span
SPAN_PX_PER_STEP = 20 # reduced px spacing for sampling along spans
FOCAL_LENGTH = 1.2 # normalized focal length of camera
DEBUG_LEVEL = 0 # 0=none, 1=some, 2=lots, 3=all
DEBUG_OUTPUT = 'file' # file, screen, both
WINDOW_NAME = 'Dewarp' # Window name for visualization
# nice color palette for visualizing contours, etc.
CCOLORS = [
(255, 0, 0),
(255, 63, 0),
(255, 127, 0),
(255, 191, 0),
(255, 255, 0),
(191, 255, 0),
(127, 255, 0),
(63, 255, 0),
(0, 255, 0),
(0, 255, 63),
(0, 255, 127),
(0, 255, 191),
(0, 255, 255),
(0, 191, 255),
(0, 127, 255),
(0, 63, 255),
(0, 0, 255),
(63, 0, 255),
(127, 0, 255),
(191, 0, 255),
(255, 0, 255),
(255, 0, 191),
(255, 0, 127),
(255, 0, 63),
]
# default intrinsic parameter matrix
K = np.array([
[FOCAL_LENGTH, 0, 0],
[0, FOCAL_LENGTH, 0],
[0, 0, 1]], dtype=np.float32)
def debug_show(name, step, text, display):
if DEBUG_OUTPUT != 'screen':
filetext = text.replace(' ', '_')
outfile = name + '_debug_' + str(step) + '_' + filetext + '.png'
cv2.imwrite(outfile, display)
if DEBUG_OUTPUT != 'file':
image = display.copy()
height = image.shape[0]
cv2.putText(image, text, (16, height-16),
cv2.FONT_HERSHEY_SIMPLEX, 1.0,
(0, 0, 0), 3, cv2.LINE_AA)
cv2.putText(image, text, (16, height-16),
cv2.FONT_HERSHEY_SIMPLEX, 1.0,
(255, 255, 255), 1, cv2.LINE_AA)
cv2.imshow(WINDOW_NAME, image)
while cv2.waitKey(5) < 0:
pass
def round_nearest_multiple(i, factor):
i = int(i)
rem = i % factor
if not rem:
return i
else:
return i + factor - rem
def pix2norm(shape, pts):
height, width = shape[:2]
scl = 2.0/(max(height, width))
offset = np.array([width, height], dtype=pts.dtype).reshape((-1, 1, 2))*0.5
return (pts - offset) * scl
def norm2pix(shape, pts, as_integer):
height, width = shape[:2]
scl = max(height, width)*0.5
offset = np.array([0.5*width, 0.5*height],
dtype=pts.dtype).reshape((-1, 1, 2))
rval = pts * scl + offset
if as_integer:
return (rval + 0.5).astype(int)
else:
return rval
def fltp(point):
return tuple(point.astype(int).flatten())
def draw_correspondences(img, dstpoints, projpts):
display = img.copy()
dstpoints = norm2pix(img.shape, dstpoints, True)
projpts = norm2pix(img.shape, projpts, True)
for pts, color in [(projpts, (255, 0, 0)),
(dstpoints, (0, 0, 255))]:
for point in pts:
cv2.circle(display, fltp(point), 3, color, -1, cv2.LINE_AA)
for point_a, point_b in zip(projpts, dstpoints):
cv2.line(display, fltp(point_a), fltp(point_b),
(255, 255, 255), 1, cv2.LINE_AA)
return display
def get_default_params(corners, ycoords, xcoords):
# page width and height
page_width = np.linalg.norm(corners[1] - corners[0])
page_height = np.linalg.norm(corners[-1] - corners[0])
rough_dims = (page_width, page_height)
# our initial guess for the cubic has no slope
cubic_slopes = [0.0, 0.0]
# object points of flat page in 3D coordinates
corners_object3d = np.array([
[0, 0, 0],
[page_width, 0, 0],
[page_width, page_height, 0],
[0, page_height, 0]])
# estimate rotation and translation from four 2D-to-3D point
# correspondences
_, rvec, tvec = cv2.solvePnP(corners_object3d,
corners, K, np.zeros(5))
span_counts = [len(xc) for xc in xcoords]
params = np.hstack((np.array(rvec).flatten(),
np.array(tvec).flatten(),
np.array(cubic_slopes).flatten(),
ycoords.flatten()) +
tuple(xcoords))
return rough_dims, span_counts, params
def project_xy(xy_coords, pvec):
# get cubic polynomial coefficients given
#
# f(0) = 0, f'(0) = alpha
# f(1) = 0, f'(1) = beta
alpha, beta = tuple(pvec[CUBIC_IDX])
poly = np.array([
alpha + beta,
-2*alpha - beta,
alpha,
0])
xy_coords = xy_coords.reshape((-1, 2))
z_coords = np.polyval(poly, xy_coords[:, 0])
objpoints = np.hstack((xy_coords, z_coords.reshape((-1, 1))))
image_points, _ = cv2.projectPoints(objpoints,
pvec[RVEC_IDX],
pvec[TVEC_IDX],
K, np.zeros(5))
return image_points
def project_keypoints(pvec, keypoint_index):
xy_coords = pvec[keypoint_index]
xy_coords[0, :] = 0
return project_xy(xy_coords, pvec)
def resize_to_screen(src, maxw=1280, maxh=700, copy=False):
height, width = src.shape[:2]
scl_x = float(width)/maxw
scl_y = float(height)/maxh
scl = int(np.ceil(max(scl_x, scl_y)))
if scl > 1.0:
inv_scl = 1.0/scl
img = cv2.resize(src, (0, 0), None, inv_scl, inv_scl, cv2.INTER_AREA)
elif copy:
img = src.copy()
else:
img = src
return img
def box(width, height):
return np.ones((height, width), dtype=np.uint8)
def get_page_extents(small):
height, width = small.shape[:2]
xmin = PAGE_MARGIN_X
ymin = PAGE_MARGIN_Y
xmax = width-PAGE_MARGIN_X
ymax = height-PAGE_MARGIN_Y
page = np.zeros((height, width), dtype=np.uint8)
cv2.rectangle(page, (xmin, ymin), (xmax, ymax), (255, 255, 255), -1)
outline = np.array([
[xmin, ymin],
[xmin, ymax],
[xmax, ymax],
[xmax, ymin]])
return page, outline
def get_mask(name, small, pagemask, masktype):
sgray = cv2.cvtColor(small, cv2.COLOR_RGB2GRAY)
if masktype == 'text':
mask = cv2.adaptiveThreshold(sgray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY_INV,
ADAPTIVE_WINSZ,
25)
if DEBUG_LEVEL >= 3:
debug_show(name, 0.1, 'thresholded', mask)
mask = cv2.dilate(mask, box(9, 1))
if DEBUG_LEVEL >= 3:
debug_show(name, 0.2, 'dilated', mask)
mask = cv2.erode(mask, box(1, 3))
if DEBUG_LEVEL >= 3:
debug_show(name, 0.3, 'eroded', mask)
else:
mask = cv2.adaptiveThreshold(sgray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY_INV,
ADAPTIVE_WINSZ,
7)
if DEBUG_LEVEL >= 3:
debug_show(name, 0.4, 'thresholded', mask)
mask = cv2.erode(mask, box(3, 1), iterations=3)
if DEBUG_LEVEL >= 3:
debug_show(name, 0.5, 'eroded', mask)
mask = cv2.dilate(mask, box(8, 2))
if DEBUG_LEVEL >= 3:
debug_show(name, 0.6, 'dilated', mask)
return np.minimum(mask, pagemask)
def interval_measure_overlap(int_a, int_b):
return min(int_a[1], int_b[1]) - max(int_a[0], int_b[0])
def angle_dist(angle_b, angle_a):
diff = angle_b - angle_a
while diff > np.pi:
diff -= 2*np.pi
while diff < -np.pi:
diff += 2*np.pi
return np.abs(diff)
def blob_mean_and_tangent(contour):
moments = cv2.moments(contour)
area = moments['m00']
mean_x = old_div(moments['m10'], area)
mean_y = old_div(moments['m01'], area)
moments_matrix = old_div(np.array([
[moments['mu20'], moments['mu11']],
[moments['mu11'], moments['mu02']]
]), area)
_, svd_u, _ = cv2.SVDecomp(moments_matrix)
center = np.array([mean_x, mean_y])
tangent = svd_u[:, 0].flatten().copy()
return center, tangent
class ContourInfo(object):
def __init__(self, contour, rect, mask):
self.contour = contour
self.rect = rect
self.mask = mask
self.center, self.tangent = blob_mean_and_tangent(contour)
self.angle = np.arctan2(self.tangent[1], self.tangent[0])
clx = [self.proj_x(point) for point in contour]
lxmin = min(clx)
lxmax = max(clx)
self.local_xrng = (lxmin, lxmax)
self.point0 = self.center + self.tangent * lxmin
self.point1 = self.center + self.tangent * lxmax
self.pred = None
self.succ = None
def proj_x(self, point):
return np.dot(self.tangent, point.flatten()-self.center)
def local_overlap(self, other):
xmin = self.proj_x(other.point0)
xmax = self.proj_x(other.point1)
return interval_measure_overlap(self.local_xrng, (xmin, xmax))
def generate_candidate_edge(cinfo_a, cinfo_b):
# we want a left of b (so a's successor will be b and b's
# predecessor will be a) make sure right endpoint of b is to the
# right of left endpoint of a.
if cinfo_a.point0[0] > cinfo_b.point1[0]:
tmp = cinfo_a
cinfo_a = cinfo_b
cinfo_b = tmp
x_overlap_a = cinfo_a.local_overlap(cinfo_b)
x_overlap_b = cinfo_b.local_overlap(cinfo_a)
overall_tangent = cinfo_b.center - cinfo_a.center
overall_angle = np.arctan2(overall_tangent[1], overall_tangent[0])
delta_angle = old_div(max(angle_dist(cinfo_a.angle, overall_angle),
angle_dist(cinfo_b.angle, overall_angle)) * 180,np.pi)
# we want the largest overlap in x to be small
x_overlap = max(x_overlap_a, x_overlap_b)
dist = np.linalg.norm(cinfo_b.point0 - cinfo_a.point1)
if (dist > EDGE_MAX_LENGTH or
x_overlap > EDGE_MAX_OVERLAP or
delta_angle > EDGE_MAX_ANGLE):
return None
else:
score = dist + delta_angle*EDGE_ANGLE_COST
return (score, cinfo_a, cinfo_b)
def make_tight_mask(contour, xmin, ymin, width, height):
tight_mask = np.zeros((height, width), dtype=np.uint8)
tight_contour = contour - np.array((xmin, ymin)).reshape((-1, 1, 2))
cv2.drawContours(tight_mask, [tight_contour], 0,
(1, 1, 1), -1)
return tight_mask
def get_contours(name, small, pagemask, masktype):
mask = get_mask(name, small, pagemask, masktype)
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_NONE)
contours_out = []
for contour in contours:
rect = cv2.boundingRect(contour)
xmin, ymin, width, height = rect
if (width < TEXT_MIN_WIDTH or
height < TEXT_MIN_HEIGHT or
width < TEXT_MIN_ASPECT*height):
continue
tight_mask = make_tight_mask(contour, xmin, ymin, width, height)
if tight_mask.sum(axis=0).max() > TEXT_MAX_THICKNESS:
continue
contours_out.append(ContourInfo(contour, rect, tight_mask))
if DEBUG_LEVEL >= 2:
visualize_contours(name, small, contours_out)
return contours_out
def assemble_spans(name, small, pagemask, cinfo_list):
# sort list
cinfo_list = sorted(cinfo_list, key=lambda cinfo: cinfo.rect[1])
# generate all candidate edges
candidate_edges = []
for i, cinfo_i in enumerate(cinfo_list):
for j in range(i):
# note e is of the form (score, left_cinfo, right_cinfo)
edge = generate_candidate_edge(cinfo_i, cinfo_list[j])
if edge is not None:
candidate_edges.append(edge)
# sort candidate edges by score (lower is better)
candidate_edges.sort()
# for each candidate edge
for _, cinfo_a, cinfo_b in candidate_edges:
# if left and right are unassigned, join them
if cinfo_a.succ is None and cinfo_b.pred is None:
cinfo_a.succ = cinfo_b
cinfo_b.pred = cinfo_a
# generate list of spans as output
spans = []
# until we have removed everything from the list
while cinfo_list:
# get the first on the list
cinfo = cinfo_list[0]
# keep following predecessors until none exists
while cinfo.pred:
cinfo = cinfo.pred
# start a new span
cur_span = []
width = 0.0
# follow successors til end of span
while cinfo:
# remove from list (sadly making this loop *also* O(n^2)
cinfo_list.remove(cinfo)
# add to span
cur_span.append(cinfo)
width += cinfo.local_xrng[1] - cinfo.local_xrng[0]
# set successor
cinfo = cinfo.succ
# add if long enough
if width > SPAN_MIN_WIDTH:
spans.append(cur_span)
if DEBUG_LEVEL >= 2:
visualize_spans(name, small, pagemask, spans)
return spans
def sample_spans(shape, spans):
span_points = []
for span in spans:
contour_points = []
for cinfo in span:
yvals = np.arange(cinfo.mask.shape[0]).reshape((-1, 1))
totals = (yvals * cinfo.mask).sum(axis=0)
means = old_div(totals, cinfo.mask.sum(axis=0))
xmin, ymin = cinfo.rect[:2]
step = SPAN_PX_PER_STEP
start = old_div(((len(means)-1) % step), 2)
contour_points += [(x+xmin, means[x]+ymin)
for x in range(start, len(means), step)]
contour_points = np.array(contour_points,
dtype=np.float32).reshape((-1, 1, 2))
contour_points = pix2norm(shape, contour_points)
span_points.append(contour_points)
return span_points
def keypoints_from_samples(name, small, pagemask, page_outline,
span_points):
all_evecs = np.array([[0.0, 0.0]])
all_weights = 0
for points in span_points:
_, evec = cv2.PCACompute(points.reshape((-1, 2)),
None, maxComponents=1)
weight = np.linalg.norm(points[-1] - points[0])
all_evecs += evec * weight
all_weights += weight
evec = old_div(all_evecs, all_weights)
x_dir = evec.flatten()
if x_dir[0] < 0:
x_dir = -x_dir
y_dir = np.array([-x_dir[1], x_dir[0]])
pagecoords = cv2.convexHull(page_outline)
pagecoords = pix2norm(pagemask.shape, pagecoords.reshape((-1, 1, 2)))
pagecoords = pagecoords.reshape((-1, 2))
px_coords = np.dot(pagecoords, x_dir)
py_coords = np.dot(pagecoords, y_dir)
px0 = px_coords.min()
px1 = px_coords.max()
py0 = py_coords.min()
py1 = py_coords.max()
p00 = px0 * x_dir + py0 * y_dir
p10 = px1 * x_dir + py0 * y_dir
p11 = px1 * x_dir + py1 * y_dir
p01 = px0 * x_dir + py1 * y_dir
corners = np.vstack((p00, p10, p11, p01)).reshape((-1, 1, 2))
ycoords = []
xcoords = []
for points in span_points:
pts = points.reshape((-1, 2))
px_coords = np.dot(pts, x_dir)
py_coords = np.dot(pts, y_dir)
ycoords.append(py_coords.mean() - py0)
xcoords.append(px_coords - px0)
if DEBUG_LEVEL >= 2:
visualize_span_points(name, small, span_points, corners)
return corners, np.array(ycoords), xcoords
def visualize_contours(name, small, cinfo_list):
regions = np.zeros_like(small)
for j, cinfo in enumerate(cinfo_list):
cv2.drawContours(regions, [cinfo.contour], 0,
CCOLORS[j % len(CCOLORS)], -1)
mask = (regions.max(axis=2) != 0)
display = small.copy()
display[mask] = (old_div(display[mask],2)) + (old_div(regions[mask],2))
for j, cinfo in enumerate(cinfo_list):
color = CCOLORS[j % len(CCOLORS)]
color = tuple([old_div(c,4) for c in color])
cv2.circle(display, fltp(cinfo.center), 3,
(255, 255, 255), 1, cv2.LINE_AA)
cv2.line(display, fltp(cinfo.point0), fltp(cinfo.point1),
(255, 255, 255), 1, cv2.LINE_AA)
debug_show(name, 1, 'contours', display)
def visualize_spans(name, small, pagemask, spans):
regions = np.zeros_like(small)
for i, span in enumerate(spans):
contours = [cinfo.contour for cinfo in span]
cv2.drawContours(regions, contours, -1,
CCOLORS[i*3 % len(CCOLORS)], -1)
mask = (regions.max(axis=2) != 0)
display = small.copy()
display[mask] = (old_div(display[mask],2)) + (old_div(regions[mask],2))
display[pagemask == 0] //= 4
debug_show(name, 2, 'spans', display)
def visualize_span_points(name, small, span_points, corners):
display = small.copy()
for i, points in enumerate(span_points):
points = norm2pix(small.shape, points, False)
mean, small_evec = cv2.PCACompute(points.reshape((-1, 2)),
None,
maxComponents=1)
dps = np.dot(points.reshape((-1, 2)), small_evec.reshape((2, 1)))
dpm = np.dot(mean.flatten(), small_evec.flatten())
point0 = mean + small_evec * (dps.min()-dpm)
point1 = mean + small_evec * (dps.max()-dpm)
for point in points:
cv2.circle(display, fltp(point), 3,
CCOLORS[i % len(CCOLORS)], -1, cv2.LINE_AA)
cv2.line(display, fltp(point0), fltp(point1),
(255, 255, 255), 1, cv2.LINE_AA)
cv2.polylines(display, [norm2pix(small.shape, corners, True)],
True, (255, 255, 255))
debug_show(name, 3, 'span points', display)
def imgsize(img):
height, width = img.shape[:2]
return '{}x{}'.format(width, height)
def make_keypoint_index(span_counts):
nspans = len(span_counts)
npts = sum(span_counts)
keypoint_index = np.zeros((npts+1, 2), dtype=int)
start = 1
for i, count in enumerate(span_counts):
end = start + count
keypoint_index[start:start+end, 1] = 8+i
start = end
keypoint_index[1:, 0] = np.arange(npts) + 8 + nspans
return keypoint_index
def optimize_params(name, small, dstpoints, span_counts, params):
keypoint_index = make_keypoint_index(span_counts)
def objective(pvec):
ppts = project_keypoints(pvec, keypoint_index)
return np.sum((dstpoints - ppts)**2)
print(' initial objective is', objective(params))
if DEBUG_LEVEL >= 1:
projpts = project_keypoints(params, keypoint_index)
display = draw_correspondences(small, dstpoints, projpts)
debug_show(name, 4, 'keypoints before', display)
print(' optimizing', len(params), 'parameters...')
start = datetime.datetime.now()
res = scipy.optimize.minimize(objective, params,
method='Powell')
end = datetime.datetime.now()
print(' optimization took', round((end-start).total_seconds(), 2), 'sec.')
print(' final objective is', res.fun)
params = res.x
if DEBUG_LEVEL >= 1:
projpts = project_keypoints(params, keypoint_index)
display = draw_correspondences(small, dstpoints, projpts)
debug_show(name, 5, 'keypoints after', display)
return params
def get_page_dims(corners, rough_dims, params):
dst_br = corners[2].flatten()
dims = np.array(rough_dims)
def objective(dims):
proj_br = project_xy(dims, params)
return np.sum((dst_br - proj_br.flatten())**2)
res = scipy.optimize.minimize(objective, dims, method='Powell')
dims = res.x
print(' got page dims', dims[0], 'x', dims[1])
return dims
def remap_image(name, dirname, img, small, page_dims, params):
height = 0.5 * page_dims[1] * OUTPUT_ZOOM * img.shape[0]
height = round_nearest_multiple(height, REMAP_DECIMATE)
width = round_nearest_multiple(old_div(height * page_dims[0], page_dims[1]),
REMAP_DECIMATE)
print(' output will be {}x{}'.format(width, height))
height_small = old_div(height, REMAP_DECIMATE)
width_small = old_div(width, REMAP_DECIMATE)
page_x_range = np.linspace(0, page_dims[0], width_small)
page_y_range = np.linspace(0, page_dims[1], height_small)
page_x_coords, page_y_coords = np.meshgrid(page_x_range, page_y_range)
page_xy_coords = np.hstack((page_x_coords.flatten().reshape((-1, 1)),
page_y_coords.flatten().reshape((-1, 1))))
page_xy_coords = page_xy_coords.astype(np.float32)
image_points = project_xy(page_xy_coords, params)
image_points = norm2pix(img.shape, image_points, False)
image_x_coords = image_points[:, 0, 0].reshape(page_x_coords.shape)
image_y_coords = image_points[:, 0, 1].reshape(page_y_coords.shape)
image_x_coords = cv2.resize(image_x_coords, (width, height),
interpolation=cv2.INTER_CUBIC)
image_y_coords = cv2.resize(image_y_coords, (width, height),
interpolation=cv2.INTER_CUBIC)
img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
remapped = cv2.remap(img_gray, image_x_coords, image_y_coords,
cv2.INTER_CUBIC,
None, cv2.BORDER_REPLICATE)
thresh = cv2.adaptiveThreshold(remapped, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, ADAPTIVE_WINSZ, 25)
pil_image = Image.fromarray(thresh)
pil_image = pil_image.convert('1')
threshfile = name + '_thresh.png'
pil_image.save(dirname + '/' + threshfile, dpi=(OUTPUT_DPI, OUTPUT_DPI))
if DEBUG_LEVEL >= 1:
height = small.shape[0]
width = int(round(height * float(thresh.shape[1])/thresh.shape[0]))
display = cv2.resize(thresh, (width, height),
interpolation=cv2.INTER_AREA)
debug_show(name, 6, 'output', display)
return threshfile
def main():
if len(sys.argv) < 2:
print('usage:', sys.argv[0], 'IMAGE1 [IMAGE2 ...]')
sys.exit(0)
if DEBUG_LEVEL > 0 and DEBUG_OUTPUT != 'file':
cv2.namedWindow(WINDOW_NAME)
outfiles = []
for imgfile in sys.argv[1:]:
img = cv2.imread(imgfile)
small = resize_to_screen(img)
basename = os.path.basename(imgfile)
dirname = os.path.dirname(imgfile)
name, _ = os.path.splitext(basename)
print('loaded', basename, 'with size', imgsize(img), end=' ')
print('and resized to', imgsize(small))
if DEBUG_LEVEL >= 3:
debug_show(name, 0.0, 'original', small)
pagemask, page_outline = get_page_extents(small)
cinfo_list = get_contours(name, small, pagemask, 'text')
spans = assemble_spans(name, small, pagemask, cinfo_list)
if len(spans) < 3:
print(' detecting lines because only', len(spans), 'text spans')
cinfo_list = get_contours(name, small, pagemask, 'line')
spans2 = assemble_spans(name, small, pagemask, cinfo_list)
if len(spans2) > len(spans):
spans = spans2
if len(spans) < 1:
print('skipping', name, 'because only', len(spans), 'spans')
continue
span_points = sample_spans(small.shape, spans)
print(' got', len(spans), 'spans', end=' ')
print('with', sum([len(pts) for pts in span_points]), 'points.')
corners, ycoords, xcoords = keypoints_from_samples(name, small,
pagemask,
page_outline,
span_points)
rough_dims, span_counts, params = get_default_params(corners,
ycoords, xcoords)
dstpoints = np.vstack((corners[0].reshape((1, 1, 2)),) +
tuple(span_points))
params = optimize_params(name, small,
dstpoints,
span_counts, params)
page_dims = get_page_dims(corners, rough_dims, params)
outfile = remap_image(name, dirname, img, small, page_dims, params)
outfiles.append(outfile)
print(' wrote', outfile)
print()
print('to convert to PDF (requires ImageMagick):')
print(' convert -compress Group4 ' + ' '.join(outfiles) + ' output.pdf')
if __name__ == '__main__':
main()