Browse Source

modified: .gitmodules

deleted:    docker/core/duckling
	deleted:    docker/ingest/Dockerfile
	deleted:    docker/ingest/dewarp/LICENSE.txt
	deleted:    docker/ingest/dewarp/README.md
	deleted:    docker/ingest/dewarp/derive_cubic.py
	deleted:    docker/ingest/dewarp/page_dewarp.py
	deleted:    docker/ingest/dewarp/requirements.txt
	modified:   searchanddisplace-core
	modified:   searchanddisplace-ingest
master
root 2 years ago
parent
commit
651f6ac325
  1. 3
      .gitmodules
  2. 1
      docker/core/duckling
  3. 53
      docker/ingest/Dockerfile
  4. 21
      docker/ingest/dewarp/LICENSE.txt
  5. 14
      docker/ingest/dewarp/README.md
  6. 46
      docker/ingest/dewarp/derive_cubic.py
  7. 923
      docker/ingest/dewarp/page_dewarp.py
  8. 5
      docker/ingest/dewarp/requirements.txt
  9. 2
      searchanddisplace-core
  10. 2
      searchanddisplace-ingest

3
.gitmodules

@ -4,6 +4,3 @@
[submodule "searchanddisplace-ingest"]
path = searchanddisplace-ingest
url = https://git.law/newroco/searchanddisplace-ingest
[submodule "docker/core/duckling"]
path = docker/core/duckling
url = https://github.com/facebook/duckling.git

1
docker/core/duckling

@ -1 +0,0 @@
Subproject commit 7520daaeba28691cda8e1b5c3d946028a28fb64b

53
docker/ingest/Dockerfile

@ -1,53 +0,0 @@
FROM rcarjan/nginx-php:7.4
LABEL maintainer="Radu Liviu Carjan"
## Add required files
RUN mkdir /var/www/dewarp
ADD dewarp /var/www/dewarp
## Install libreoffice
RUN apt-add-repository -y ppa:libreoffice/ppa && \
apt-get install -y \
libreoffice \
software-properties-common
# Install python & popple PDF convertor
RUN add-apt-repository -y ppa:deadsnakes/ppa && \
apt-get install -y \
supervisor \
python3.8 \
python3.8-dev \
python3.8-distutils \
libpoppler-cpp-dev \
poppler-utils
## Install Tesseract OCR, Pandoc & other dependencies
RUN add-apt-repository -y ppa:alex-p/tesseract-ocr-devel && \
apt-get install -y \
tesseract-ocr \
unpaper \
unoconv \
pandoc
## Configure correct python version, install PIP
RUN rm /usr/bin/python3 && \
ln -s /usr/bin/python3.8 /usr/bin/python3 && \
apt-get install -y python-is-python3 && \
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
python get-pip.py && \
rm -rf get-pip.py && \
pip install --upgrade pip
## Install PIP packages
RUN pip install \
pdftotext \
supervisor \
opencv-python
WORKDIR /var/www/dewarp
RUN pip install -r requirements.txt
RUN mkdir /var/log/queue
WORKDIR /var/www/ingest

21
docker/ingest/dewarp/LICENSE.txt

@ -1,21 +0,0 @@
MIT License
Copyright (c) 2016, Matt Zucker
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

14
docker/ingest/dewarp/README.md

@ -1,14 +0,0 @@
page_dewarp
===========
Page dewarping and thresholding using a "cubic sheet" model - see full writeup at <https://mzucker.github.io/2016/08/15/page-dewarping.html>
Requirements:
- scipy
- OpenCV 3.0 or greater
- Image module from PIL or Pillow
Usage:
page_dewarp.py IMAGE1 [IMAGE2 ...]

46
docker/ingest/dewarp/derive_cubic.py

@ -1,46 +0,0 @@
from __future__ import print_function
import matplotlib.pyplot as plt
import numpy as np
import sympy
# create a bunch of symbols
a, b, c, d, x, alpha, beta = sympy.symbols('a b c d x alpha beta')
# create a polynomial function f(x)
f = a*x**3 + b*x**2 + c*x + d
# get its derivative f'(x)
fp = f.diff(x)
# evaluate both at x=0 and x=1
f0 = f.subs(x, 0)
f1 = f.subs(x, 1)
fp0 = fp.subs(x, 0)
fp1 = fp.subs(x, 1)
# we want a, b, c, d such that the following conditions hold:
#
# f(0) = 0
# f(1) = 0
# f'(0) = alpha
# f'(1) = beta
S = sympy.solve([f0, f1, fp0-alpha, fp1-beta], [a, b, c, d])
# print the analytic solution and plot a graphical example
coeffs = []
num_alpha = 0.3
num_beta = 0.03
for key in [a, b, c, d]:
print(key, '=', S[key])
coeffs.append(S[key].subs(dict(alpha=num_alpha,
beta=num_beta)))
xvals = np.linspace(0, 1, 101)
yvals = np.polyval(coeffs, xvals)
plt.plot(xvals, yvals)
plt.show()

923
docker/ingest/dewarp/page_dewarp.py

@ -1,923 +0,0 @@
#!/usr/bin/env python
######################################################################
# page_dewarp.py - Proof-of-concept of page-dewarping based on a
# "cubic sheet" model. Requires OpenCV (version 3 or greater),
# PIL/Pillow, and scipy.optimize.
######################################################################
# Author: Matt Zucker
# Date: July 2016
# License: MIT License (see LICENSE.txt)
######################################################################
from __future__ import division
from __future__ import print_function
from builtins import zip
from builtins import str
from builtins import range
from builtins import object
from past.utils import old_div
import os
import sys
import datetime
import cv2
from PIL import Image
import numpy as np
import scipy.optimize
# for some reason pylint complains about cv2 members being undefined :(
# pylint: disable=E1101
PAGE_MARGIN_X = 50 # reduced px to ignore near L/R edge
PAGE_MARGIN_Y = 20 # reduced px to ignore near T/B edge
OUTPUT_ZOOM = 1.0 # how much to zoom output relative to *original* image
OUTPUT_DPI = 300 # just affects stated DPI of PNG, not appearance
REMAP_DECIMATE = 16 # downscaling factor for remapping image
ADAPTIVE_WINSZ = 55 # window size for adaptive threshold in reduced px
TEXT_MIN_WIDTH = 15 # min reduced px width of detected text contour
TEXT_MIN_HEIGHT = 2 # min reduced px height of detected text contour
TEXT_MIN_ASPECT = 1.5 # filter out text contours below this w/h ratio
TEXT_MAX_THICKNESS = 10 # max reduced px thickness of detected text contour
EDGE_MAX_OVERLAP = 1.0 # max reduced px horiz. overlap of contours in span
EDGE_MAX_LENGTH = 100.0 # max reduced px length of edge connecting contours
EDGE_ANGLE_COST = 10.0 # cost of angles in edges (tradeoff vs. length)
EDGE_MAX_ANGLE = 7.5 # maximum change in angle allowed between contours
RVEC_IDX = slice(0, 3) # index of rvec in params vector
TVEC_IDX = slice(3, 6) # index of tvec in params vector
CUBIC_IDX = slice(6, 8) # index of cubic slopes in params vector
SPAN_MIN_WIDTH = 30 # minimum reduced px width for span
SPAN_PX_PER_STEP = 20 # reduced px spacing for sampling along spans
FOCAL_LENGTH = 1.2 # normalized focal length of camera
DEBUG_LEVEL = 0 # 0=none, 1=some, 2=lots, 3=all
DEBUG_OUTPUT = 'file' # file, screen, both
WINDOW_NAME = 'Dewarp' # Window name for visualization
# nice color palette for visualizing contours, etc.
CCOLORS = [
(255, 0, 0),
(255, 63, 0),
(255, 127, 0),
(255, 191, 0),
(255, 255, 0),
(191, 255, 0),
(127, 255, 0),
(63, 255, 0),
(0, 255, 0),
(0, 255, 63),
(0, 255, 127),
(0, 255, 191),
(0, 255, 255),
(0, 191, 255),
(0, 127, 255),
(0, 63, 255),
(0, 0, 255),
(63, 0, 255),
(127, 0, 255),
(191, 0, 255),
(255, 0, 255),
(255, 0, 191),
(255, 0, 127),
(255, 0, 63),
]
# default intrinsic parameter matrix
K = np.array([
[FOCAL_LENGTH, 0, 0],
[0, FOCAL_LENGTH, 0],
[0, 0, 1]], dtype=np.float32)
def debug_show(name, step, text, display):
if DEBUG_OUTPUT != 'screen':
filetext = text.replace(' ', '_')
outfile = name + '_debug_' + str(step) + '_' + filetext + '.png'
cv2.imwrite(outfile, display)
if DEBUG_OUTPUT != 'file':
image = display.copy()
height = image.shape[0]
cv2.putText(image, text, (16, height-16),
cv2.FONT_HERSHEY_SIMPLEX, 1.0,
(0, 0, 0), 3, cv2.LINE_AA)
cv2.putText(image, text, (16, height-16),
cv2.FONT_HERSHEY_SIMPLEX, 1.0,
(255, 255, 255), 1, cv2.LINE_AA)
cv2.imshow(WINDOW_NAME, image)
while cv2.waitKey(5) < 0:
pass
def round_nearest_multiple(i, factor):
i = int(i)
rem = i % factor
if not rem:
return i
else:
return i + factor - rem
def pix2norm(shape, pts):
height, width = shape[:2]
scl = 2.0/(max(height, width))
offset = np.array([width, height], dtype=pts.dtype).reshape((-1, 1, 2))*0.5
return (pts - offset) * scl
def norm2pix(shape, pts, as_integer):
height, width = shape[:2]
scl = max(height, width)*0.5
offset = np.array([0.5*width, 0.5*height],
dtype=pts.dtype).reshape((-1, 1, 2))
rval = pts * scl + offset
if as_integer:
return (rval + 0.5).astype(int)
else:
return rval
def fltp(point):
return tuple(point.astype(int).flatten())
def draw_correspondences(img, dstpoints, projpts):
display = img.copy()
dstpoints = norm2pix(img.shape, dstpoints, True)
projpts = norm2pix(img.shape, projpts, True)
for pts, color in [(projpts, (255, 0, 0)),
(dstpoints, (0, 0, 255))]:
for point in pts:
cv2.circle(display, fltp(point), 3, color, -1, cv2.LINE_AA)
for point_a, point_b in zip(projpts, dstpoints):
cv2.line(display, fltp(point_a), fltp(point_b),
(255, 255, 255), 1, cv2.LINE_AA)
return display
def get_default_params(corners, ycoords, xcoords):
# page width and height
page_width = np.linalg.norm(corners[1] - corners[0])
page_height = np.linalg.norm(corners[-1] - corners[0])
rough_dims = (page_width, page_height)
# our initial guess for the cubic has no slope
cubic_slopes = [0.0, 0.0]
# object points of flat page in 3D coordinates
corners_object3d = np.array([
[0, 0, 0],
[page_width, 0, 0],
[page_width, page_height, 0],
[0, page_height, 0]])
# estimate rotation and translation from four 2D-to-3D point
# correspondences
_, rvec, tvec = cv2.solvePnP(corners_object3d,
corners, K, np.zeros(5))
span_counts = [len(xc) for xc in xcoords]
params = np.hstack((np.array(rvec).flatten(),
np.array(tvec).flatten(),
np.array(cubic_slopes).flatten(),
ycoords.flatten()) +
tuple(xcoords))
return rough_dims, span_counts, params
def project_xy(xy_coords, pvec):
# get cubic polynomial coefficients given
#
# f(0) = 0, f'(0) = alpha
# f(1) = 0, f'(1) = beta
alpha, beta = tuple(pvec[CUBIC_IDX])
poly = np.array([
alpha + beta,
-2*alpha - beta,
alpha,
0])
xy_coords = xy_coords.reshape((-1, 2))
z_coords = np.polyval(poly, xy_coords[:, 0])
objpoints = np.hstack((xy_coords, z_coords.reshape((-1, 1))))
image_points, _ = cv2.projectPoints(objpoints,
pvec[RVEC_IDX],
pvec[TVEC_IDX],
K, np.zeros(5))
return image_points
def project_keypoints(pvec, keypoint_index):
xy_coords = pvec[keypoint_index]
xy_coords[0, :] = 0
return project_xy(xy_coords, pvec)
def resize_to_screen(src, maxw=1280, maxh=700, copy=False):
height, width = src.shape[:2]
scl_x = float(width)/maxw
scl_y = float(height)/maxh
scl = int(np.ceil(max(scl_x, scl_y)))
if scl > 1.0:
inv_scl = 1.0/scl
img = cv2.resize(src, (0, 0), None, inv_scl, inv_scl, cv2.INTER_AREA)
elif copy:
img = src.copy()
else:
img = src
return img
def box(width, height):
return np.ones((height, width), dtype=np.uint8)
def get_page_extents(small):
height, width = small.shape[:2]
xmin = PAGE_MARGIN_X
ymin = PAGE_MARGIN_Y
xmax = width-PAGE_MARGIN_X
ymax = height-PAGE_MARGIN_Y
page = np.zeros((height, width), dtype=np.uint8)
cv2.rectangle(page, (xmin, ymin), (xmax, ymax), (255, 255, 255), -1)
outline = np.array([
[xmin, ymin],
[xmin, ymax],
[xmax, ymax],
[xmax, ymin]])
return page, outline
def get_mask(name, small, pagemask, masktype):
sgray = cv2.cvtColor(small, cv2.COLOR_RGB2GRAY)
if masktype == 'text':
mask = cv2.adaptiveThreshold(sgray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY_INV,
ADAPTIVE_WINSZ,
25)
if DEBUG_LEVEL >= 3:
debug_show(name, 0.1, 'thresholded', mask)
mask = cv2.dilate(mask, box(9, 1))
if DEBUG_LEVEL >= 3:
debug_show(name, 0.2, 'dilated', mask)
mask = cv2.erode(mask, box(1, 3))
if DEBUG_LEVEL >= 3:
debug_show(name, 0.3, 'eroded', mask)
else:
mask = cv2.adaptiveThreshold(sgray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY_INV,
ADAPTIVE_WINSZ,
7)
if DEBUG_LEVEL >= 3:
debug_show(name, 0.4, 'thresholded', mask)
mask = cv2.erode(mask, box(3, 1), iterations=3)
if DEBUG_LEVEL >= 3:
debug_show(name, 0.5, 'eroded', mask)
mask = cv2.dilate(mask, box(8, 2))
if DEBUG_LEVEL >= 3:
debug_show(name, 0.6, 'dilated', mask)
return np.minimum(mask, pagemask)
def interval_measure_overlap(int_a, int_b):
return min(int_a[1], int_b[1]) - max(int_a[0], int_b[0])
def angle_dist(angle_b, angle_a):
diff = angle_b - angle_a
while diff > np.pi:
diff -= 2*np.pi
while diff < -np.pi:
diff += 2*np.pi
return np.abs(diff)
def blob_mean_and_tangent(contour):
moments = cv2.moments(contour)
area = moments['m00']
mean_x = old_div(moments['m10'], area)
mean_y = old_div(moments['m01'], area)
moments_matrix = old_div(np.array([
[moments['mu20'], moments['mu11']],
[moments['mu11'], moments['mu02']]
]), area)
_, svd_u, _ = cv2.SVDecomp(moments_matrix)
center = np.array([mean_x, mean_y])
tangent = svd_u[:, 0].flatten().copy()
return center, tangent
class ContourInfo(object):
def __init__(self, contour, rect, mask):
self.contour = contour
self.rect = rect
self.mask = mask
self.center, self.tangent = blob_mean_and_tangent(contour)
self.angle = np.arctan2(self.tangent[1], self.tangent[0])
clx = [self.proj_x(point) for point in contour]
lxmin = min(clx)
lxmax = max(clx)
self.local_xrng = (lxmin, lxmax)
self.point0 = self.center + self.tangent * lxmin
self.point1 = self.center + self.tangent * lxmax
self.pred = None
self.succ = None
def proj_x(self, point):
return np.dot(self.tangent, point.flatten()-self.center)
def local_overlap(self, other):
xmin = self.proj_x(other.point0)
xmax = self.proj_x(other.point1)
return interval_measure_overlap(self.local_xrng, (xmin, xmax))
def generate_candidate_edge(cinfo_a, cinfo_b):
# we want a left of b (so a's successor will be b and b's
# predecessor will be a) make sure right endpoint of b is to the
# right of left endpoint of a.
if cinfo_a.point0[0] > cinfo_b.point1[0]:
tmp = cinfo_a
cinfo_a = cinfo_b
cinfo_b = tmp
x_overlap_a = cinfo_a.local_overlap(cinfo_b)
x_overlap_b = cinfo_b.local_overlap(cinfo_a)
overall_tangent = cinfo_b.center - cinfo_a.center
overall_angle = np.arctan2(overall_tangent[1], overall_tangent[0])
delta_angle = old_div(max(angle_dist(cinfo_a.angle, overall_angle),
angle_dist(cinfo_b.angle, overall_angle)) * 180,np.pi)
# we want the largest overlap in x to be small
x_overlap = max(x_overlap_a, x_overlap_b)
dist = np.linalg.norm(cinfo_b.point0 - cinfo_a.point1)
if (dist > EDGE_MAX_LENGTH or
x_overlap > EDGE_MAX_OVERLAP or
delta_angle > EDGE_MAX_ANGLE):
return None
else:
score = dist + delta_angle*EDGE_ANGLE_COST
return (score, cinfo_a, cinfo_b)
def make_tight_mask(contour, xmin, ymin, width, height):
tight_mask = np.zeros((height, width), dtype=np.uint8)
tight_contour = contour - np.array((xmin, ymin)).reshape((-1, 1, 2))
cv2.drawContours(tight_mask, [tight_contour], 0,
(1, 1, 1), -1)
return tight_mask
def get_contours(name, small, pagemask, masktype):
mask = get_mask(name, small, pagemask, masktype)
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_NONE)
contours_out = []
for contour in contours:
rect = cv2.boundingRect(contour)
xmin, ymin, width, height = rect
if (width < TEXT_MIN_WIDTH or
height < TEXT_MIN_HEIGHT or
width < TEXT_MIN_ASPECT*height):
continue
tight_mask = make_tight_mask(contour, xmin, ymin, width, height)
if tight_mask.sum(axis=0).max() > TEXT_MAX_THICKNESS:
continue
contours_out.append(ContourInfo(contour, rect, tight_mask))
if DEBUG_LEVEL >= 2:
visualize_contours(name, small, contours_out)
return contours_out
def assemble_spans(name, small, pagemask, cinfo_list):
# sort list
cinfo_list = sorted(cinfo_list, key=lambda cinfo: cinfo.rect[1])
# generate all candidate edges
candidate_edges = []
for i, cinfo_i in enumerate(cinfo_list):
for j in range(i):
# note e is of the form (score, left_cinfo, right_cinfo)
edge = generate_candidate_edge(cinfo_i, cinfo_list[j])
if edge is not None:
candidate_edges.append(edge)
# sort candidate edges by score (lower is better)
candidate_edges.sort()
# for each candidate edge
for _, cinfo_a, cinfo_b in candidate_edges:
# if left and right are unassigned, join them
if cinfo_a.succ is None and cinfo_b.pred is None:
cinfo_a.succ = cinfo_b
cinfo_b.pred = cinfo_a
# generate list of spans as output
spans = []
# until we have removed everything from the list
while cinfo_list:
# get the first on the list
cinfo = cinfo_list[0]
# keep following predecessors until none exists
while cinfo.pred:
cinfo = cinfo.pred
# start a new span
cur_span = []
width = 0.0
# follow successors til end of span
while cinfo:
# remove from list (sadly making this loop *also* O(n^2)
cinfo_list.remove(cinfo)
# add to span
cur_span.append(cinfo)
width += cinfo.local_xrng[1] - cinfo.local_xrng[0]
# set successor
cinfo = cinfo.succ
# add if long enough
if width > SPAN_MIN_WIDTH:
spans.append(cur_span)
if DEBUG_LEVEL >= 2:
visualize_spans(name, small, pagemask, spans)
return spans
def sample_spans(shape, spans):
span_points = []
for span in spans:
contour_points = []
for cinfo in span:
yvals = np.arange(cinfo.mask.shape[0]).reshape((-1, 1))
totals = (yvals * cinfo.mask).sum(axis=0)
means = old_div(totals, cinfo.mask.sum(axis=0))
xmin, ymin = cinfo.rect[:2]
step = SPAN_PX_PER_STEP
start = old_div(((len(means)-1) % step), 2)
contour_points += [(x+xmin, means[x]+ymin)
for x in range(start, len(means), step)]
contour_points = np.array(contour_points,
dtype=np.float32).reshape((-1, 1, 2))
contour_points = pix2norm(shape, contour_points)
span_points.append(contour_points)
return span_points
def keypoints_from_samples(name, small, pagemask, page_outline,
span_points):
all_evecs = np.array([[0.0, 0.0]])
all_weights = 0
for points in span_points:
_, evec = cv2.PCACompute(points.reshape((-1, 2)),
None, maxComponents=1)
weight = np.linalg.norm(points[-1] - points[0])
all_evecs += evec * weight
all_weights += weight
evec = old_div(all_evecs, all_weights)
x_dir = evec.flatten()
if x_dir[0] < 0:
x_dir = -x_dir
y_dir = np.array([-x_dir[1], x_dir[0]])
pagecoords = cv2.convexHull(page_outline)
pagecoords = pix2norm(pagemask.shape, pagecoords.reshape((-1, 1, 2)))
pagecoords = pagecoords.reshape((-1, 2))
px_coords = np.dot(pagecoords, x_dir)
py_coords = np.dot(pagecoords, y_dir)
px0 = px_coords.min()
px1 = px_coords.max()
py0 = py_coords.min()
py1 = py_coords.max()
p00 = px0 * x_dir + py0 * y_dir
p10 = px1 * x_dir + py0 * y_dir
p11 = px1 * x_dir + py1 * y_dir
p01 = px0 * x_dir + py1 * y_dir
corners = np.vstack((p00, p10, p11, p01)).reshape((-1, 1, 2))
ycoords = []
xcoords = []
for points in span_points:
pts = points.reshape((-1, 2))
px_coords = np.dot(pts, x_dir)
py_coords = np.dot(pts, y_dir)
ycoords.append(py_coords.mean() - py0)
xcoords.append(px_coords - px0)
if DEBUG_LEVEL >= 2:
visualize_span_points(name, small, span_points, corners)
return corners, np.array(ycoords), xcoords
def visualize_contours(name, small, cinfo_list):
regions = np.zeros_like(small)
for j, cinfo in enumerate(cinfo_list):
cv2.drawContours(regions, [cinfo.contour], 0,
CCOLORS[j % len(CCOLORS)], -1)
mask = (regions.max(axis=2) != 0)
display = small.copy()
display[mask] = (old_div(display[mask],2)) + (old_div(regions[mask],2))
for j, cinfo in enumerate(cinfo_list):
color = CCOLORS[j % len(CCOLORS)]
color = tuple([old_div(c,4) for c in color])
cv2.circle(display, fltp(cinfo.center), 3,
(255, 255, 255), 1, cv2.LINE_AA)
cv2.line(display, fltp(cinfo.point0), fltp(cinfo.point1),
(255, 255, 255), 1, cv2.LINE_AA)
debug_show(name, 1, 'contours', display)
def visualize_spans(name, small, pagemask, spans):
regions = np.zeros_like(small)
for i, span in enumerate(spans):
contours = [cinfo.contour for cinfo in span]
cv2.drawContours(regions, contours, -1,
CCOLORS[i*3 % len(CCOLORS)], -1)
mask = (regions.max(axis=2) != 0)
display = small.copy()
display[mask] = (old_div(display[mask],2)) + (old_div(regions[mask],2))
display[pagemask == 0] //= 4
debug_show(name, 2, 'spans', display)
def visualize_span_points(name, small, span_points, corners):
display = small.copy()
for i, points in enumerate(span_points):
points = norm2pix(small.shape, points, False)
mean, small_evec = cv2.PCACompute(points.reshape((-1, 2)),
None,
maxComponents=1)
dps = np.dot(points.reshape((-1, 2)), small_evec.reshape((2, 1)))
dpm = np.dot(mean.flatten(), small_evec.flatten())
point0 = mean + small_evec * (dps.min()-dpm)
point1 = mean + small_evec * (dps.max()-dpm)
for point in points:
cv2.circle(display, fltp(point), 3,
CCOLORS[i % len(CCOLORS)], -1, cv2.LINE_AA)
cv2.line(display, fltp(point0), fltp(point1),
(255, 255, 255), 1, cv2.LINE_AA)
cv2.polylines(display, [norm2pix(small.shape, corners, True)],
True, (255, 255, 255))
debug_show(name, 3, 'span points', display)
def imgsize(img):
height, width = img.shape[:2]
return '{}x{}'.format(width, height)
def make_keypoint_index(span_counts):
nspans = len(span_counts)
npts = sum(span_counts)
keypoint_index = np.zeros((npts+1, 2), dtype=int)
start = 1
for i, count in enumerate(span_counts):
end = start + count
keypoint_index[start:start+end, 1] = 8+i
start = end
keypoint_index[1:, 0] = np.arange(npts) + 8 + nspans
return keypoint_index
def optimize_params(name, small, dstpoints, span_counts, params):
keypoint_index = make_keypoint_index(span_counts)
def objective(pvec):
ppts = project_keypoints(pvec, keypoint_index)
return np.sum((dstpoints - ppts)**2)
print(' initial objective is', objective(params))
if DEBUG_LEVEL >= 1:
projpts = project_keypoints(params, keypoint_index)
display = draw_correspondences(small, dstpoints, projpts)
debug_show(name, 4, 'keypoints before', display)
print(' optimizing', len(params), 'parameters...')
start = datetime.datetime.now()
res = scipy.optimize.minimize(objective, params,
method='Powell')
end = datetime.datetime.now()
print(' optimization took', round((end-start).total_seconds(), 2), 'sec.')
print(' final objective is', res.fun)
params = res.x
if DEBUG_LEVEL >= 1:
projpts = project_keypoints(params, keypoint_index)
display = draw_correspondences(small, dstpoints, projpts)
debug_show(name, 5, 'keypoints after', display)
return params
def get_page_dims(corners, rough_dims, params):
dst_br = corners[2].flatten()
dims = np.array(rough_dims)
def objective(dims):
proj_br = project_xy(dims, params)
return np.sum((dst_br - proj_br.flatten())**2)
res = scipy.optimize.minimize(objective, dims, method='Powell')
dims = res.x
print(' got page dims', dims[0], 'x', dims[1])
return dims
def remap_image(name, dirname, img, small, page_dims, params):
height = 0.5 * page_dims[1] * OUTPUT_ZOOM * img.shape[0]
height = round_nearest_multiple(height, REMAP_DECIMATE)
width = round_nearest_multiple(old_div(height * page_dims[0], page_dims[1]),
REMAP_DECIMATE)
print(' output will be {}x{}'.format(width, height))
height_small = old_div(height, REMAP_DECIMATE)
width_small = old_div(width, REMAP_DECIMATE)
page_x_range = np.linspace(0, page_dims[0], width_small)
page_y_range = np.linspace(0, page_dims[1], height_small)
page_x_coords, page_y_coords = np.meshgrid(page_x_range, page_y_range)
page_xy_coords = np.hstack((page_x_coords.flatten().reshape((-1, 1)),
page_y_coords.flatten().reshape((-1, 1))))
page_xy_coords = page_xy_coords.astype(np.float32)
image_points = project_xy(page_xy_coords, params)
image_points = norm2pix(img.shape, image_points, False)
image_x_coords = image_points[:, 0, 0].reshape(page_x_coords.shape)
image_y_coords = image_points[:, 0, 1].reshape(page_y_coords.shape)
image_x_coords = cv2.resize(image_x_coords, (width, height),
interpolation=cv2.INTER_CUBIC)
image_y_coords = cv2.resize(image_y_coords, (width, height),
interpolation=cv2.INTER_CUBIC)
img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
remapped = cv2.remap(img_gray, image_x_coords, image_y_coords,
cv2.INTER_CUBIC,
None, cv2.BORDER_REPLICATE)
thresh = cv2.adaptiveThreshold(remapped, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, ADAPTIVE_WINSZ, 25)
pil_image = Image.fromarray(thresh)
pil_image = pil_image.convert('1')
threshfile = name + '_thresh.png'
pil_image.save(dirname + '/' + threshfile, dpi=(OUTPUT_DPI, OUTPUT_DPI))
if DEBUG_LEVEL >= 1:
height = small.shape[0]
width = int(round(height * float(thresh.shape[1])/thresh.shape[0]))
display = cv2.resize(thresh, (width, height),
interpolation=cv2.INTER_AREA)
debug_show(name, 6, 'output', display)
return threshfile
def main():
if len(sys.argv) < 2:
print('usage:', sys.argv[0], 'IMAGE1 [IMAGE2 ...]')
sys.exit(0)
if DEBUG_LEVEL > 0 and DEBUG_OUTPUT != 'file':
cv2.namedWindow(WINDOW_NAME)
outfiles = []
for imgfile in sys.argv[1:]:
img = cv2.imread(imgfile)
small = resize_to_screen(img)
basename = os.path.basename(imgfile)
dirname = os.path.dirname(imgfile)
name, _ = os.path.splitext(basename)
print('loaded', basename, 'with size', imgsize(img), end=' ')
print('and resized to', imgsize(small))
if DEBUG_LEVEL >= 3:
debug_show(name, 0.0, 'original', small)
pagemask, page_outline = get_page_extents(small)
cinfo_list = get_contours(name, small, pagemask, 'text')
spans = assemble_spans(name, small, pagemask, cinfo_list)
if len(spans) < 3:
print(' detecting lines because only', len(spans), 'text spans')
cinfo_list = get_contours(name, small, pagemask, 'line')
spans2 = assemble_spans(name, small, pagemask, cinfo_list)
if len(spans2) > len(spans):
spans = spans2
if len(spans) < 1:
print('skipping', name, 'because only', len(spans), 'spans')
continue
span_points = sample_spans(small.shape, spans)
print(' got', len(spans), 'spans', end=' ')
print('with', sum([len(pts) for pts in span_points]), 'points.')
corners, ycoords, xcoords = keypoints_from_samples(name, small,
pagemask,
page_outline,
span_points)
rough_dims, span_counts, params = get_default_params(corners,
ycoords, xcoords)
dstpoints = np.vstack((corners[0].reshape((1, 1, 2)),) +
tuple(span_points))
params = optimize_params(name, small,
dstpoints,
span_counts, params)
page_dims = get_page_dims(corners, rough_dims, params)
outfile = remap_image(name, dirname, img, small, page_dims, params)
outfiles.append(outfile)
print(' wrote', outfile)
print()
print('to convert to PDF (requires ImageMagick):')
print(' convert -compress Group4 ' + ' '.join(outfiles) + ' output.pdf')
if __name__ == '__main__':
main()

5
docker/ingest/dewarp/requirements.txt

@ -1,5 +0,0 @@
numpy
scipy
Pillow
opencv-python
future

2
searchanddisplace-core

@ -1 +1 @@
Subproject commit 0d558adfcccce90d69730865267d636042a37418
Subproject commit 955aecce9432f24765aa8626af0cfe768852349d

2
searchanddisplace-ingest

@ -1 +1 @@
Subproject commit 50ab7a6333566fc8ce8fc2ba0e66abe769d21617
Subproject commit 96a09813e5a4859e3b6804e5bda3b3d243df03f7
Loading…
Cancel
Save