2 Commits
3b555efacd
...
574d05d4cf
Author | SHA1 | Message | Date |
---|---|---|---|
root | 574d05d4cf |
Merge branch 'master' of ssh://git.law:2222/newroco/searchanddisplace-docker
|
2 years ago |
root | 651f6ac325 |
modified: .gitmodules
deleted: docker/core/duckling deleted: docker/ingest/Dockerfile deleted: docker/ingest/dewarp/LICENSE.txt deleted: docker/ingest/dewarp/README.md deleted: docker/ingest/dewarp/derive_cubic.py deleted: docker/ingest/dewarp/page_dewarp.py deleted: docker/ingest/dewarp/requirements.txt modified: searchanddisplace-core modified: searchanddisplace-ingest |
2 years ago |
8 changed files with 0 additions and 1066 deletions
-
3.gitmodules
-
1docker/core/duckling
-
53docker/ingest/Dockerfile
-
21docker/ingest/dewarp/LICENSE.txt
-
14docker/ingest/dewarp/README.md
-
46docker/ingest/dewarp/derive_cubic.py
-
923docker/ingest/dewarp/page_dewarp.py
-
5docker/ingest/dewarp/requirements.txt
@ -1,53 +0,0 @@ |
|||||
FROM rcarjan/nginx-php:7.4 |
|
||||
|
|
||||
LABEL maintainer="Radu Liviu Carjan" |
|
||||
|
|
||||
## Add required files |
|
||||
RUN mkdir /var/www/dewarp |
|
||||
ADD dewarp /var/www/dewarp |
|
||||
|
|
||||
## Install libreoffice |
|
||||
RUN apt-add-repository -y ppa:libreoffice/ppa && \ |
|
||||
apt-get install -y \ |
|
||||
libreoffice \ |
|
||||
software-properties-common |
|
||||
|
|
||||
# Install python & popple PDF convertor |
|
||||
RUN add-apt-repository -y ppa:deadsnakes/ppa && \ |
|
||||
apt-get install -y \ |
|
||||
supervisor \ |
|
||||
python3.8 \ |
|
||||
python3.8-dev \ |
|
||||
python3.8-distutils \ |
|
||||
libpoppler-cpp-dev \ |
|
||||
poppler-utils |
|
||||
|
|
||||
## Install Tesseract OCR, Pandoc & other dependencies |
|
||||
RUN add-apt-repository -y ppa:alex-p/tesseract-ocr-devel && \ |
|
||||
apt-get install -y \ |
|
||||
tesseract-ocr \ |
|
||||
unpaper \ |
|
||||
unoconv \ |
|
||||
pandoc |
|
||||
|
|
||||
## Configure correct python version, install PIP |
|
||||
RUN rm /usr/bin/python3 && \ |
|
||||
ln -s /usr/bin/python3.8 /usr/bin/python3 && \ |
|
||||
apt-get install -y python-is-python3 && \ |
|
||||
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ |
|
||||
python get-pip.py && \ |
|
||||
rm -rf get-pip.py && \ |
|
||||
pip install --upgrade pip |
|
||||
|
|
||||
## Install PIP packages |
|
||||
RUN pip install \ |
|
||||
pdftotext \ |
|
||||
supervisor \ |
|
||||
opencv-python |
|
||||
|
|
||||
WORKDIR /var/www/dewarp |
|
||||
RUN pip install -r requirements.txt |
|
||||
|
|
||||
RUN mkdir /var/log/queue |
|
||||
|
|
||||
WORKDIR /var/www/ingest |
|
@ -1,21 +0,0 @@ |
|||||
MIT License |
|
||||
|
|
||||
Copyright (c) 2016, Matt Zucker |
|
||||
|
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy |
|
||||
of this software and associated documentation files (the "Software"), to deal |
|
||||
in the Software without restriction, including without limitation the rights |
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
||||
copies of the Software, and to permit persons to whom the Software is |
|
||||
furnished to do so, subject to the following conditions: |
|
||||
|
|
||||
The above copyright notice and this permission notice shall be included in all |
|
||||
copies or substantial portions of the Software. |
|
||||
|
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|
||||
SOFTWARE. |
|
@ -1,14 +0,0 @@ |
|||||
page_dewarp |
|
||||
=========== |
|
||||
|
|
||||
Page dewarping and thresholding using a "cubic sheet" model - see full writeup at <https://mzucker.github.io/2016/08/15/page-dewarping.html> |
|
||||
|
|
||||
Requirements: |
|
||||
|
|
||||
- scipy |
|
||||
- OpenCV 3.0 or greater |
|
||||
- Image module from PIL or Pillow |
|
||||
|
|
||||
Usage: |
|
||||
|
|
||||
page_dewarp.py IMAGE1 [IMAGE2 ...] |
|
@ -1,46 +0,0 @@ |
|||||
from __future__ import print_function |
|
||||
import matplotlib.pyplot as plt |
|
||||
import numpy as np |
|
||||
import sympy |
|
||||
|
|
||||
# create a bunch of symbols |
|
||||
a, b, c, d, x, alpha, beta = sympy.symbols('a b c d x alpha beta') |
|
||||
|
|
||||
# create a polynomial function f(x) |
|
||||
f = a*x**3 + b*x**2 + c*x + d |
|
||||
|
|
||||
# get its derivative f'(x) |
|
||||
fp = f.diff(x) |
|
||||
|
|
||||
# evaluate both at x=0 and x=1 |
|
||||
f0 = f.subs(x, 0) |
|
||||
f1 = f.subs(x, 1) |
|
||||
fp0 = fp.subs(x, 0) |
|
||||
fp1 = fp.subs(x, 1) |
|
||||
|
|
||||
# we want a, b, c, d such that the following conditions hold: |
|
||||
# |
|
||||
# f(0) = 0 |
|
||||
# f(1) = 0 |
|
||||
# f'(0) = alpha |
|
||||
# f'(1) = beta |
|
||||
|
|
||||
S = sympy.solve([f0, f1, fp0-alpha, fp1-beta], [a, b, c, d]) |
|
||||
|
|
||||
# print the analytic solution and plot a graphical example |
|
||||
coeffs = [] |
|
||||
|
|
||||
num_alpha = 0.3 |
|
||||
num_beta = 0.03 |
|
||||
|
|
||||
for key in [a, b, c, d]: |
|
||||
print(key, '=', S[key]) |
|
||||
coeffs.append(S[key].subs(dict(alpha=num_alpha, |
|
||||
beta=num_beta))) |
|
||||
|
|
||||
xvals = np.linspace(0, 1, 101) |
|
||||
yvals = np.polyval(coeffs, xvals) |
|
||||
|
|
||||
plt.plot(xvals, yvals) |
|
||||
plt.show() |
|
||||
|
|
@ -1,923 +0,0 @@ |
|||||
#!/usr/bin/env python |
|
||||
###################################################################### |
|
||||
# page_dewarp.py - Proof-of-concept of page-dewarping based on a |
|
||||
# "cubic sheet" model. Requires OpenCV (version 3 or greater), |
|
||||
# PIL/Pillow, and scipy.optimize. |
|
||||
###################################################################### |
|
||||
# Author: Matt Zucker |
|
||||
# Date: July 2016 |
|
||||
# License: MIT License (see LICENSE.txt) |
|
||||
###################################################################### |
|
||||
|
|
||||
from __future__ import division |
|
||||
from __future__ import print_function |
|
||||
from builtins import zip |
|
||||
from builtins import str |
|
||||
from builtins import range |
|
||||
from builtins import object |
|
||||
from past.utils import old_div |
|
||||
import os |
|
||||
import sys |
|
||||
import datetime |
|
||||
import cv2 |
|
||||
from PIL import Image |
|
||||
import numpy as np |
|
||||
import scipy.optimize |
|
||||
|
|
||||
# for some reason pylint complains about cv2 members being undefined :( |
|
||||
# pylint: disable=E1101 |
|
||||
|
|
||||
PAGE_MARGIN_X = 50 # reduced px to ignore near L/R edge |
|
||||
PAGE_MARGIN_Y = 20 # reduced px to ignore near T/B edge |
|
||||
|
|
||||
OUTPUT_ZOOM = 1.0 # how much to zoom output relative to *original* image |
|
||||
OUTPUT_DPI = 300 # just affects stated DPI of PNG, not appearance |
|
||||
REMAP_DECIMATE = 16 # downscaling factor for remapping image |
|
||||
|
|
||||
ADAPTIVE_WINSZ = 55 # window size for adaptive threshold in reduced px |
|
||||
|
|
||||
TEXT_MIN_WIDTH = 15 # min reduced px width of detected text contour |
|
||||
TEXT_MIN_HEIGHT = 2 # min reduced px height of detected text contour |
|
||||
TEXT_MIN_ASPECT = 1.5 # filter out text contours below this w/h ratio |
|
||||
TEXT_MAX_THICKNESS = 10 # max reduced px thickness of detected text contour |
|
||||
|
|
||||
EDGE_MAX_OVERLAP = 1.0 # max reduced px horiz. overlap of contours in span |
|
||||
EDGE_MAX_LENGTH = 100.0 # max reduced px length of edge connecting contours |
|
||||
EDGE_ANGLE_COST = 10.0 # cost of angles in edges (tradeoff vs. length) |
|
||||
EDGE_MAX_ANGLE = 7.5 # maximum change in angle allowed between contours |
|
||||
|
|
||||
RVEC_IDX = slice(0, 3) # index of rvec in params vector |
|
||||
TVEC_IDX = slice(3, 6) # index of tvec in params vector |
|
||||
CUBIC_IDX = slice(6, 8) # index of cubic slopes in params vector |
|
||||
|
|
||||
SPAN_MIN_WIDTH = 30 # minimum reduced px width for span |
|
||||
SPAN_PX_PER_STEP = 20 # reduced px spacing for sampling along spans |
|
||||
FOCAL_LENGTH = 1.2 # normalized focal length of camera |
|
||||
|
|
||||
DEBUG_LEVEL = 0 # 0=none, 1=some, 2=lots, 3=all |
|
||||
DEBUG_OUTPUT = 'file' # file, screen, both |
|
||||
|
|
||||
WINDOW_NAME = 'Dewarp' # Window name for visualization |
|
||||
|
|
||||
# nice color palette for visualizing contours, etc. |
|
||||
CCOLORS = [ |
|
||||
(255, 0, 0), |
|
||||
(255, 63, 0), |
|
||||
(255, 127, 0), |
|
||||
(255, 191, 0), |
|
||||
(255, 255, 0), |
|
||||
(191, 255, 0), |
|
||||
(127, 255, 0), |
|
||||
(63, 255, 0), |
|
||||
(0, 255, 0), |
|
||||
(0, 255, 63), |
|
||||
(0, 255, 127), |
|
||||
(0, 255, 191), |
|
||||
(0, 255, 255), |
|
||||
(0, 191, 255), |
|
||||
(0, 127, 255), |
|
||||
(0, 63, 255), |
|
||||
(0, 0, 255), |
|
||||
(63, 0, 255), |
|
||||
(127, 0, 255), |
|
||||
(191, 0, 255), |
|
||||
(255, 0, 255), |
|
||||
(255, 0, 191), |
|
||||
(255, 0, 127), |
|
||||
(255, 0, 63), |
|
||||
] |
|
||||
|
|
||||
# default intrinsic parameter matrix |
|
||||
K = np.array([ |
|
||||
[FOCAL_LENGTH, 0, 0], |
|
||||
[0, FOCAL_LENGTH, 0], |
|
||||
[0, 0, 1]], dtype=np.float32) |
|
||||
|
|
||||
|
|
||||
def debug_show(name, step, text, display): |
|
||||
|
|
||||
if DEBUG_OUTPUT != 'screen': |
|
||||
filetext = text.replace(' ', '_') |
|
||||
outfile = name + '_debug_' + str(step) + '_' + filetext + '.png' |
|
||||
cv2.imwrite(outfile, display) |
|
||||
|
|
||||
if DEBUG_OUTPUT != 'file': |
|
||||
|
|
||||
image = display.copy() |
|
||||
height = image.shape[0] |
|
||||
|
|
||||
cv2.putText(image, text, (16, height-16), |
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1.0, |
|
||||
(0, 0, 0), 3, cv2.LINE_AA) |
|
||||
|
|
||||
cv2.putText(image, text, (16, height-16), |
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1.0, |
|
||||
(255, 255, 255), 1, cv2.LINE_AA) |
|
||||
|
|
||||
cv2.imshow(WINDOW_NAME, image) |
|
||||
|
|
||||
while cv2.waitKey(5) < 0: |
|
||||
pass |
|
||||
|
|
||||
|
|
||||
def round_nearest_multiple(i, factor): |
|
||||
i = int(i) |
|
||||
rem = i % factor |
|
||||
if not rem: |
|
||||
return i |
|
||||
else: |
|
||||
return i + factor - rem |
|
||||
|
|
||||
|
|
||||
def pix2norm(shape, pts): |
|
||||
height, width = shape[:2] |
|
||||
scl = 2.0/(max(height, width)) |
|
||||
offset = np.array([width, height], dtype=pts.dtype).reshape((-1, 1, 2))*0.5 |
|
||||
return (pts - offset) * scl |
|
||||
|
|
||||
|
|
||||
def norm2pix(shape, pts, as_integer): |
|
||||
height, width = shape[:2] |
|
||||
scl = max(height, width)*0.5 |
|
||||
offset = np.array([0.5*width, 0.5*height], |
|
||||
dtype=pts.dtype).reshape((-1, 1, 2)) |
|
||||
rval = pts * scl + offset |
|
||||
if as_integer: |
|
||||
return (rval + 0.5).astype(int) |
|
||||
else: |
|
||||
return rval |
|
||||
|
|
||||
|
|
||||
def fltp(point): |
|
||||
return tuple(point.astype(int).flatten()) |
|
||||
|
|
||||
|
|
||||
def draw_correspondences(img, dstpoints, projpts): |
|
||||
|
|
||||
display = img.copy() |
|
||||
dstpoints = norm2pix(img.shape, dstpoints, True) |
|
||||
projpts = norm2pix(img.shape, projpts, True) |
|
||||
|
|
||||
for pts, color in [(projpts, (255, 0, 0)), |
|
||||
(dstpoints, (0, 0, 255))]: |
|
||||
|
|
||||
for point in pts: |
|
||||
cv2.circle(display, fltp(point), 3, color, -1, cv2.LINE_AA) |
|
||||
|
|
||||
for point_a, point_b in zip(projpts, dstpoints): |
|
||||
cv2.line(display, fltp(point_a), fltp(point_b), |
|
||||
(255, 255, 255), 1, cv2.LINE_AA) |
|
||||
|
|
||||
return display |
|
||||
|
|
||||
|
|
||||
def get_default_params(corners, ycoords, xcoords): |
|
||||
|
|
||||
# page width and height |
|
||||
page_width = np.linalg.norm(corners[1] - corners[0]) |
|
||||
page_height = np.linalg.norm(corners[-1] - corners[0]) |
|
||||
rough_dims = (page_width, page_height) |
|
||||
|
|
||||
# our initial guess for the cubic has no slope |
|
||||
cubic_slopes = [0.0, 0.0] |
|
||||
|
|
||||
# object points of flat page in 3D coordinates |
|
||||
corners_object3d = np.array([ |
|
||||
[0, 0, 0], |
|
||||
[page_width, 0, 0], |
|
||||
[page_width, page_height, 0], |
|
||||
[0, page_height, 0]]) |
|
||||
|
|
||||
# estimate rotation and translation from four 2D-to-3D point |
|
||||
# correspondences |
|
||||
_, rvec, tvec = cv2.solvePnP(corners_object3d, |
|
||||
corners, K, np.zeros(5)) |
|
||||
|
|
||||
span_counts = [len(xc) for xc in xcoords] |
|
||||
|
|
||||
params = np.hstack((np.array(rvec).flatten(), |
|
||||
np.array(tvec).flatten(), |
|
||||
np.array(cubic_slopes).flatten(), |
|
||||
ycoords.flatten()) + |
|
||||
tuple(xcoords)) |
|
||||
|
|
||||
return rough_dims, span_counts, params |
|
||||
|
|
||||
|
|
||||
def project_xy(xy_coords, pvec): |
|
||||
|
|
||||
# get cubic polynomial coefficients given |
|
||||
# |
|
||||
# f(0) = 0, f'(0) = alpha |
|
||||
# f(1) = 0, f'(1) = beta |
|
||||
|
|
||||
alpha, beta = tuple(pvec[CUBIC_IDX]) |
|
||||
|
|
||||
poly = np.array([ |
|
||||
alpha + beta, |
|
||||
-2*alpha - beta, |
|
||||
alpha, |
|
||||
0]) |
|
||||
|
|
||||
xy_coords = xy_coords.reshape((-1, 2)) |
|
||||
z_coords = np.polyval(poly, xy_coords[:, 0]) |
|
||||
|
|
||||
objpoints = np.hstack((xy_coords, z_coords.reshape((-1, 1)))) |
|
||||
|
|
||||
image_points, _ = cv2.projectPoints(objpoints, |
|
||||
pvec[RVEC_IDX], |
|
||||
pvec[TVEC_IDX], |
|
||||
K, np.zeros(5)) |
|
||||
|
|
||||
return image_points |
|
||||
|
|
||||
|
|
||||
def project_keypoints(pvec, keypoint_index): |
|
||||
|
|
||||
xy_coords = pvec[keypoint_index] |
|
||||
xy_coords[0, :] = 0 |
|
||||
|
|
||||
return project_xy(xy_coords, pvec) |
|
||||
|
|
||||
|
|
||||
def resize_to_screen(src, maxw=1280, maxh=700, copy=False): |
|
||||
|
|
||||
height, width = src.shape[:2] |
|
||||
|
|
||||
scl_x = float(width)/maxw |
|
||||
scl_y = float(height)/maxh |
|
||||
|
|
||||
scl = int(np.ceil(max(scl_x, scl_y))) |
|
||||
|
|
||||
if scl > 1.0: |
|
||||
inv_scl = 1.0/scl |
|
||||
img = cv2.resize(src, (0, 0), None, inv_scl, inv_scl, cv2.INTER_AREA) |
|
||||
elif copy: |
|
||||
img = src.copy() |
|
||||
else: |
|
||||
img = src |
|
||||
|
|
||||
return img |
|
||||
|
|
||||
|
|
||||
def box(width, height): |
|
||||
return np.ones((height, width), dtype=np.uint8) |
|
||||
|
|
||||
|
|
||||
def get_page_extents(small): |
|
||||
|
|
||||
height, width = small.shape[:2] |
|
||||
|
|
||||
xmin = PAGE_MARGIN_X |
|
||||
ymin = PAGE_MARGIN_Y |
|
||||
xmax = width-PAGE_MARGIN_X |
|
||||
ymax = height-PAGE_MARGIN_Y |
|
||||
|
|
||||
page = np.zeros((height, width), dtype=np.uint8) |
|
||||
cv2.rectangle(page, (xmin, ymin), (xmax, ymax), (255, 255, 255), -1) |
|
||||
|
|
||||
outline = np.array([ |
|
||||
[xmin, ymin], |
|
||||
[xmin, ymax], |
|
||||
[xmax, ymax], |
|
||||
[xmax, ymin]]) |
|
||||
|
|
||||
return page, outline |
|
||||
|
|
||||
|
|
||||
def get_mask(name, small, pagemask, masktype): |
|
||||
|
|
||||
sgray = cv2.cvtColor(small, cv2.COLOR_RGB2GRAY) |
|
||||
|
|
||||
if masktype == 'text': |
|
||||
|
|
||||
mask = cv2.adaptiveThreshold(sgray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, |
|
||||
cv2.THRESH_BINARY_INV, |
|
||||
ADAPTIVE_WINSZ, |
|
||||
25) |
|
||||
|
|
||||
if DEBUG_LEVEL >= 3: |
|
||||
debug_show(name, 0.1, 'thresholded', mask) |
|
||||
|
|
||||
mask = cv2.dilate(mask, box(9, 1)) |
|
||||
|
|
||||
if DEBUG_LEVEL >= 3: |
|
||||
debug_show(name, 0.2, 'dilated', mask) |
|
||||
|
|
||||
mask = cv2.erode(mask, box(1, 3)) |
|
||||
|
|
||||
if DEBUG_LEVEL >= 3: |
|
||||
debug_show(name, 0.3, 'eroded', mask) |
|
||||
|
|
||||
else: |
|
||||
|
|
||||
mask = cv2.adaptiveThreshold(sgray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, |
|
||||
cv2.THRESH_BINARY_INV, |
|
||||
ADAPTIVE_WINSZ, |
|
||||
7) |
|
||||
|
|
||||
if DEBUG_LEVEL >= 3: |
|
||||
debug_show(name, 0.4, 'thresholded', mask) |
|
||||
|
|
||||
mask = cv2.erode(mask, box(3, 1), iterations=3) |
|
||||
|
|
||||
if DEBUG_LEVEL >= 3: |
|
||||
debug_show(name, 0.5, 'eroded', mask) |
|
||||
|
|
||||
mask = cv2.dilate(mask, box(8, 2)) |
|
||||
|
|
||||
if DEBUG_LEVEL >= 3: |
|
||||
debug_show(name, 0.6, 'dilated', mask) |
|
||||
|
|
||||
return np.minimum(mask, pagemask) |
|
||||
|
|
||||
|
|
||||
def interval_measure_overlap(int_a, int_b): |
|
||||
return min(int_a[1], int_b[1]) - max(int_a[0], int_b[0]) |
|
||||
|
|
||||
|
|
||||
def angle_dist(angle_b, angle_a): |
|
||||
|
|
||||
diff = angle_b - angle_a |
|
||||
|
|
||||
while diff > np.pi: |
|
||||
diff -= 2*np.pi |
|
||||
|
|
||||
while diff < -np.pi: |
|
||||
diff += 2*np.pi |
|
||||
|
|
||||
return np.abs(diff) |
|
||||
|
|
||||
|
|
||||
def blob_mean_and_tangent(contour): |
|
||||
|
|
||||
moments = cv2.moments(contour) |
|
||||
|
|
||||
area = moments['m00'] |
|
||||
|
|
||||
mean_x = old_div(moments['m10'], area) |
|
||||
mean_y = old_div(moments['m01'], area) |
|
||||
|
|
||||
moments_matrix = old_div(np.array([ |
|
||||
[moments['mu20'], moments['mu11']], |
|
||||
[moments['mu11'], moments['mu02']] |
|
||||
]), area) |
|
||||
|
|
||||
_, svd_u, _ = cv2.SVDecomp(moments_matrix) |
|
||||
|
|
||||
center = np.array([mean_x, mean_y]) |
|
||||
tangent = svd_u[:, 0].flatten().copy() |
|
||||
|
|
||||
return center, tangent |
|
||||
|
|
||||
|
|
||||
class ContourInfo(object): |
|
||||
|
|
||||
def __init__(self, contour, rect, mask): |
|
||||
|
|
||||
self.contour = contour |
|
||||
self.rect = rect |
|
||||
self.mask = mask |
|
||||
|
|
||||
self.center, self.tangent = blob_mean_and_tangent(contour) |
|
||||
|
|
||||
self.angle = np.arctan2(self.tangent[1], self.tangent[0]) |
|
||||
|
|
||||
clx = [self.proj_x(point) for point in contour] |
|
||||
|
|
||||
lxmin = min(clx) |
|
||||
lxmax = max(clx) |
|
||||
|
|
||||
self.local_xrng = (lxmin, lxmax) |
|
||||
|
|
||||
self.point0 = self.center + self.tangent * lxmin |
|
||||
self.point1 = self.center + self.tangent * lxmax |
|
||||
|
|
||||
self.pred = None |
|
||||
self.succ = None |
|
||||
|
|
||||
def proj_x(self, point): |
|
||||
return np.dot(self.tangent, point.flatten()-self.center) |
|
||||
|
|
||||
def local_overlap(self, other): |
|
||||
xmin = self.proj_x(other.point0) |
|
||||
xmax = self.proj_x(other.point1) |
|
||||
return interval_measure_overlap(self.local_xrng, (xmin, xmax)) |
|
||||
|
|
||||
|
|
||||
def generate_candidate_edge(cinfo_a, cinfo_b): |
|
||||
|
|
||||
# we want a left of b (so a's successor will be b and b's |
|
||||
# predecessor will be a) make sure right endpoint of b is to the |
|
||||
# right of left endpoint of a. |
|
||||
if cinfo_a.point0[0] > cinfo_b.point1[0]: |
|
||||
tmp = cinfo_a |
|
||||
cinfo_a = cinfo_b |
|
||||
cinfo_b = tmp |
|
||||
|
|
||||
x_overlap_a = cinfo_a.local_overlap(cinfo_b) |
|
||||
x_overlap_b = cinfo_b.local_overlap(cinfo_a) |
|
||||
|
|
||||
overall_tangent = cinfo_b.center - cinfo_a.center |
|
||||
overall_angle = np.arctan2(overall_tangent[1], overall_tangent[0]) |
|
||||
|
|
||||
delta_angle = old_div(max(angle_dist(cinfo_a.angle, overall_angle), |
|
||||
angle_dist(cinfo_b.angle, overall_angle)) * 180,np.pi) |
|
||||
|
|
||||
# we want the largest overlap in x to be small |
|
||||
x_overlap = max(x_overlap_a, x_overlap_b) |
|
||||
|
|
||||
dist = np.linalg.norm(cinfo_b.point0 - cinfo_a.point1) |
|
||||
|
|
||||
if (dist > EDGE_MAX_LENGTH or |
|
||||
x_overlap > EDGE_MAX_OVERLAP or |
|
||||
delta_angle > EDGE_MAX_ANGLE): |
|
||||
return None |
|
||||
else: |
|
||||
score = dist + delta_angle*EDGE_ANGLE_COST |
|
||||
return (score, cinfo_a, cinfo_b) |
|
||||
|
|
||||
|
|
||||
def make_tight_mask(contour, xmin, ymin, width, height): |
|
||||
|
|
||||
tight_mask = np.zeros((height, width), dtype=np.uint8) |
|
||||
tight_contour = contour - np.array((xmin, ymin)).reshape((-1, 1, 2)) |
|
||||
|
|
||||
cv2.drawContours(tight_mask, [tight_contour], 0, |
|
||||
(1, 1, 1), -1) |
|
||||
|
|
||||
return tight_mask |
|
||||
|
|
||||
|
|
||||
def get_contours(name, small, pagemask, masktype): |
|
||||
|
|
||||
mask = get_mask(name, small, pagemask, masktype) |
|
||||
|
|
||||
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, |
|
||||
cv2.CHAIN_APPROX_NONE) |
|
||||
|
|
||||
contours_out = [] |
|
||||
|
|
||||
for contour in contours: |
|
||||
|
|
||||
rect = cv2.boundingRect(contour) |
|
||||
xmin, ymin, width, height = rect |
|
||||
|
|
||||
if (width < TEXT_MIN_WIDTH or |
|
||||
height < TEXT_MIN_HEIGHT or |
|
||||
width < TEXT_MIN_ASPECT*height): |
|
||||
continue |
|
||||
|
|
||||
tight_mask = make_tight_mask(contour, xmin, ymin, width, height) |
|
||||
|
|
||||
if tight_mask.sum(axis=0).max() > TEXT_MAX_THICKNESS: |
|
||||
continue |
|
||||
|
|
||||
contours_out.append(ContourInfo(contour, rect, tight_mask)) |
|
||||
|
|
||||
if DEBUG_LEVEL >= 2: |
|
||||
visualize_contours(name, small, contours_out) |
|
||||
|
|
||||
return contours_out |
|
||||
|
|
||||
|
|
||||
def assemble_spans(name, small, pagemask, cinfo_list): |
|
||||
|
|
||||
# sort list |
|
||||
cinfo_list = sorted(cinfo_list, key=lambda cinfo: cinfo.rect[1]) |
|
||||
|
|
||||
# generate all candidate edges |
|
||||
candidate_edges = [] |
|
||||
|
|
||||
for i, cinfo_i in enumerate(cinfo_list): |
|
||||
for j in range(i): |
|
||||
# note e is of the form (score, left_cinfo, right_cinfo) |
|
||||
edge = generate_candidate_edge(cinfo_i, cinfo_list[j]) |
|
||||
if edge is not None: |
|
||||
candidate_edges.append(edge) |
|
||||
|
|
||||
# sort candidate edges by score (lower is better) |
|
||||
candidate_edges.sort() |
|
||||
|
|
||||
# for each candidate edge |
|
||||
for _, cinfo_a, cinfo_b in candidate_edges: |
|
||||
# if left and right are unassigned, join them |
|
||||
if cinfo_a.succ is None and cinfo_b.pred is None: |
|
||||
cinfo_a.succ = cinfo_b |
|
||||
cinfo_b.pred = cinfo_a |
|
||||
|
|
||||
# generate list of spans as output |
|
||||
spans = [] |
|
||||
|
|
||||
# until we have removed everything from the list |
|
||||
while cinfo_list: |
|
||||
|
|
||||
# get the first on the list |
|
||||
cinfo = cinfo_list[0] |
|
||||
|
|
||||
# keep following predecessors until none exists |
|
||||
while cinfo.pred: |
|
||||
cinfo = cinfo.pred |
|
||||
|
|
||||
# start a new span |
|
||||
cur_span = [] |
|
||||
|
|
||||
width = 0.0 |
|
||||
|
|
||||
# follow successors til end of span |
|
||||
while cinfo: |
|
||||
# remove from list (sadly making this loop *also* O(n^2) |
|
||||
cinfo_list.remove(cinfo) |
|
||||
# add to span |
|
||||
cur_span.append(cinfo) |
|
||||
width += cinfo.local_xrng[1] - cinfo.local_xrng[0] |
|
||||
# set successor |
|
||||
cinfo = cinfo.succ |
|
||||
|
|
||||
# add if long enough |
|
||||
if width > SPAN_MIN_WIDTH: |
|
||||
spans.append(cur_span) |
|
||||
|
|
||||
if DEBUG_LEVEL >= 2: |
|
||||
visualize_spans(name, small, pagemask, spans) |
|
||||
|
|
||||
return spans |
|
||||
|
|
||||
|
|
||||
def sample_spans(shape, spans): |
|
||||
|
|
||||
span_points = [] |
|
||||
|
|
||||
for span in spans: |
|
||||
|
|
||||
contour_points = [] |
|
||||
|
|
||||
for cinfo in span: |
|
||||
|
|
||||
yvals = np.arange(cinfo.mask.shape[0]).reshape((-1, 1)) |
|
||||
totals = (yvals * cinfo.mask).sum(axis=0) |
|
||||
means = old_div(totals, cinfo.mask.sum(axis=0)) |
|
||||
|
|
||||
xmin, ymin = cinfo.rect[:2] |
|
||||
|
|
||||
step = SPAN_PX_PER_STEP |
|
||||
start = old_div(((len(means)-1) % step), 2) |
|
||||
|
|
||||
contour_points += [(x+xmin, means[x]+ymin) |
|
||||
for x in range(start, len(means), step)] |
|
||||
|
|
||||
contour_points = np.array(contour_points, |
|
||||
dtype=np.float32).reshape((-1, 1, 2)) |
|
||||
|
|
||||
contour_points = pix2norm(shape, contour_points) |
|
||||
|
|
||||
span_points.append(contour_points) |
|
||||
|
|
||||
return span_points |
|
||||
|
|
||||
|
|
||||
def keypoints_from_samples(name, small, pagemask, page_outline, |
|
||||
span_points): |
|
||||
|
|
||||
all_evecs = np.array([[0.0, 0.0]]) |
|
||||
all_weights = 0 |
|
||||
|
|
||||
for points in span_points: |
|
||||
|
|
||||
_, evec = cv2.PCACompute(points.reshape((-1, 2)), |
|
||||
None, maxComponents=1) |
|
||||
|
|
||||
weight = np.linalg.norm(points[-1] - points[0]) |
|
||||
|
|
||||
all_evecs += evec * weight |
|
||||
all_weights += weight |
|
||||
|
|
||||
evec = old_div(all_evecs, all_weights) |
|
||||
|
|
||||
x_dir = evec.flatten() |
|
||||
|
|
||||
if x_dir[0] < 0: |
|
||||
x_dir = -x_dir |
|
||||
|
|
||||
y_dir = np.array([-x_dir[1], x_dir[0]]) |
|
||||
|
|
||||
pagecoords = cv2.convexHull(page_outline) |
|
||||
pagecoords = pix2norm(pagemask.shape, pagecoords.reshape((-1, 1, 2))) |
|
||||
pagecoords = pagecoords.reshape((-1, 2)) |
|
||||
|
|
||||
px_coords = np.dot(pagecoords, x_dir) |
|
||||
py_coords = np.dot(pagecoords, y_dir) |
|
||||
|
|
||||
px0 = px_coords.min() |
|
||||
px1 = px_coords.max() |
|
||||
|
|
||||
py0 = py_coords.min() |
|
||||
py1 = py_coords.max() |
|
||||
|
|
||||
p00 = px0 * x_dir + py0 * y_dir |
|
||||
p10 = px1 * x_dir + py0 * y_dir |
|
||||
p11 = px1 * x_dir + py1 * y_dir |
|
||||
p01 = px0 * x_dir + py1 * y_dir |
|
||||
|
|
||||
corners = np.vstack((p00, p10, p11, p01)).reshape((-1, 1, 2)) |
|
||||
|
|
||||
ycoords = [] |
|
||||
xcoords = [] |
|
||||
|
|
||||
for points in span_points: |
|
||||
pts = points.reshape((-1, 2)) |
|
||||
px_coords = np.dot(pts, x_dir) |
|
||||
py_coords = np.dot(pts, y_dir) |
|
||||
ycoords.append(py_coords.mean() - py0) |
|
||||
xcoords.append(px_coords - px0) |
|
||||
|
|
||||
if DEBUG_LEVEL >= 2: |
|
||||
visualize_span_points(name, small, span_points, corners) |
|
||||
|
|
||||
return corners, np.array(ycoords), xcoords |
|
||||
|
|
||||
|
|
||||
def visualize_contours(name, small, cinfo_list): |
|
||||
|
|
||||
regions = np.zeros_like(small) |
|
||||
|
|
||||
for j, cinfo in enumerate(cinfo_list): |
|
||||
|
|
||||
cv2.drawContours(regions, [cinfo.contour], 0, |
|
||||
CCOLORS[j % len(CCOLORS)], -1) |
|
||||
|
|
||||
mask = (regions.max(axis=2) != 0) |
|
||||
|
|
||||
display = small.copy() |
|
||||
display[mask] = (old_div(display[mask],2)) + (old_div(regions[mask],2)) |
|
||||
|
|
||||
for j, cinfo in enumerate(cinfo_list): |
|
||||
color = CCOLORS[j % len(CCOLORS)] |
|
||||
color = tuple([old_div(c,4) for c in color]) |
|
||||
|
|
||||
cv2.circle(display, fltp(cinfo.center), 3, |
|
||||
(255, 255, 255), 1, cv2.LINE_AA) |
|
||||
|
|
||||
cv2.line(display, fltp(cinfo.point0), fltp(cinfo.point1), |
|
||||
(255, 255, 255), 1, cv2.LINE_AA) |
|
||||
|
|
||||
debug_show(name, 1, 'contours', display) |
|
||||
|
|
||||
|
|
||||
def visualize_spans(name, small, pagemask, spans): |
|
||||
|
|
||||
regions = np.zeros_like(small) |
|
||||
|
|
||||
for i, span in enumerate(spans): |
|
||||
contours = [cinfo.contour for cinfo in span] |
|
||||
cv2.drawContours(regions, contours, -1, |
|
||||
CCOLORS[i*3 % len(CCOLORS)], -1) |
|
||||
|
|
||||
mask = (regions.max(axis=2) != 0) |
|
||||
|
|
||||
display = small.copy() |
|
||||
display[mask] = (old_div(display[mask],2)) + (old_div(regions[mask],2)) |
|
||||
display[pagemask == 0] //= 4 |
|
||||
|
|
||||
debug_show(name, 2, 'spans', display) |
|
||||
|
|
||||
|
|
||||
def visualize_span_points(name, small, span_points, corners): |
|
||||
|
|
||||
display = small.copy() |
|
||||
|
|
||||
for i, points in enumerate(span_points): |
|
||||
|
|
||||
points = norm2pix(small.shape, points, False) |
|
||||
|
|
||||
mean, small_evec = cv2.PCACompute(points.reshape((-1, 2)), |
|
||||
None, |
|
||||
maxComponents=1) |
|
||||
|
|
||||
dps = np.dot(points.reshape((-1, 2)), small_evec.reshape((2, 1))) |
|
||||
dpm = np.dot(mean.flatten(), small_evec.flatten()) |
|
||||
|
|
||||
point0 = mean + small_evec * (dps.min()-dpm) |
|
||||
point1 = mean + small_evec * (dps.max()-dpm) |
|
||||
|
|
||||
for point in points: |
|
||||
cv2.circle(display, fltp(point), 3, |
|
||||
CCOLORS[i % len(CCOLORS)], -1, cv2.LINE_AA) |
|
||||
|
|
||||
cv2.line(display, fltp(point0), fltp(point1), |
|
||||
(255, 255, 255), 1, cv2.LINE_AA) |
|
||||
|
|
||||
cv2.polylines(display, [norm2pix(small.shape, corners, True)], |
|
||||
True, (255, 255, 255)) |
|
||||
|
|
||||
debug_show(name, 3, 'span points', display) |
|
||||
|
|
||||
|
|
||||
def imgsize(img): |
|
||||
height, width = img.shape[:2] |
|
||||
return '{}x{}'.format(width, height) |
|
||||
|
|
||||
|
|
||||
def make_keypoint_index(span_counts): |
|
||||
|
|
||||
nspans = len(span_counts) |
|
||||
npts = sum(span_counts) |
|
||||
keypoint_index = np.zeros((npts+1, 2), dtype=int) |
|
||||
start = 1 |
|
||||
|
|
||||
for i, count in enumerate(span_counts): |
|
||||
end = start + count |
|
||||
keypoint_index[start:start+end, 1] = 8+i |
|
||||
start = end |
|
||||
|
|
||||
keypoint_index[1:, 0] = np.arange(npts) + 8 + nspans |
|
||||
|
|
||||
return keypoint_index |
|
||||
|
|
||||
|
|
||||
def optimize_params(name, small, dstpoints, span_counts, params): |
|
||||
|
|
||||
keypoint_index = make_keypoint_index(span_counts) |
|
||||
|
|
||||
def objective(pvec): |
|
||||
ppts = project_keypoints(pvec, keypoint_index) |
|
||||
return np.sum((dstpoints - ppts)**2) |
|
||||
|
|
||||
print(' initial objective is', objective(params)) |
|
||||
|
|
||||
if DEBUG_LEVEL >= 1: |
|
||||
projpts = project_keypoints(params, keypoint_index) |
|
||||
display = draw_correspondences(small, dstpoints, projpts) |
|
||||
debug_show(name, 4, 'keypoints before', display) |
|
||||
|
|
||||
print(' optimizing', len(params), 'parameters...') |
|
||||
start = datetime.datetime.now() |
|
||||
res = scipy.optimize.minimize(objective, params, |
|
||||
method='Powell') |
|
||||
end = datetime.datetime.now() |
|
||||
print(' optimization took', round((end-start).total_seconds(), 2), 'sec.') |
|
||||
print(' final objective is', res.fun) |
|
||||
params = res.x |
|
||||
|
|
||||
if DEBUG_LEVEL >= 1: |
|
||||
projpts = project_keypoints(params, keypoint_index) |
|
||||
display = draw_correspondences(small, dstpoints, projpts) |
|
||||
debug_show(name, 5, 'keypoints after', display) |
|
||||
|
|
||||
return params |
|
||||
|
|
||||
|
|
||||
def get_page_dims(corners, rough_dims, params): |
|
||||
|
|
||||
dst_br = corners[2].flatten() |
|
||||
|
|
||||
dims = np.array(rough_dims) |
|
||||
|
|
||||
def objective(dims): |
|
||||
proj_br = project_xy(dims, params) |
|
||||
return np.sum((dst_br - proj_br.flatten())**2) |
|
||||
|
|
||||
res = scipy.optimize.minimize(objective, dims, method='Powell') |
|
||||
dims = res.x |
|
||||
|
|
||||
print(' got page dims', dims[0], 'x', dims[1]) |
|
||||
|
|
||||
return dims |
|
||||
|
|
||||
|
|
||||
def remap_image(name, dirname, img, small, page_dims, params): |
|
||||
|
|
||||
height = 0.5 * page_dims[1] * OUTPUT_ZOOM * img.shape[0] |
|
||||
height = round_nearest_multiple(height, REMAP_DECIMATE) |
|
||||
|
|
||||
width = round_nearest_multiple(old_div(height * page_dims[0], page_dims[1]), |
|
||||
REMAP_DECIMATE) |
|
||||
|
|
||||
print(' output will be {}x{}'.format(width, height)) |
|
||||
|
|
||||
height_small = old_div(height, REMAP_DECIMATE) |
|
||||
width_small = old_div(width, REMAP_DECIMATE) |
|
||||
|
|
||||
page_x_range = np.linspace(0, page_dims[0], width_small) |
|
||||
page_y_range = np.linspace(0, page_dims[1], height_small) |
|
||||
|
|
||||
page_x_coords, page_y_coords = np.meshgrid(page_x_range, page_y_range) |
|
||||
|
|
||||
page_xy_coords = np.hstack((page_x_coords.flatten().reshape((-1, 1)), |
|
||||
page_y_coords.flatten().reshape((-1, 1)))) |
|
||||
|
|
||||
page_xy_coords = page_xy_coords.astype(np.float32) |
|
||||
|
|
||||
image_points = project_xy(page_xy_coords, params) |
|
||||
image_points = norm2pix(img.shape, image_points, False) |
|
||||
|
|
||||
image_x_coords = image_points[:, 0, 0].reshape(page_x_coords.shape) |
|
||||
image_y_coords = image_points[:, 0, 1].reshape(page_y_coords.shape) |
|
||||
|
|
||||
image_x_coords = cv2.resize(image_x_coords, (width, height), |
|
||||
interpolation=cv2.INTER_CUBIC) |
|
||||
|
|
||||
image_y_coords = cv2.resize(image_y_coords, (width, height), |
|
||||
interpolation=cv2.INTER_CUBIC) |
|
||||
|
|
||||
img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) |
|
||||
|
|
||||
remapped = cv2.remap(img_gray, image_x_coords, image_y_coords, |
|
||||
cv2.INTER_CUBIC, |
|
||||
None, cv2.BORDER_REPLICATE) |
|
||||
|
|
||||
thresh = cv2.adaptiveThreshold(remapped, 255, cv2.ADAPTIVE_THRESH_MEAN_C, |
|
||||
cv2.THRESH_BINARY, ADAPTIVE_WINSZ, 25) |
|
||||
|
|
||||
pil_image = Image.fromarray(thresh) |
|
||||
pil_image = pil_image.convert('1') |
|
||||
|
|
||||
threshfile = name + '_thresh.png' |
|
||||
pil_image.save(dirname + '/' + threshfile, dpi=(OUTPUT_DPI, OUTPUT_DPI)) |
|
||||
|
|
||||
if DEBUG_LEVEL >= 1: |
|
||||
height = small.shape[0] |
|
||||
width = int(round(height * float(thresh.shape[1])/thresh.shape[0])) |
|
||||
display = cv2.resize(thresh, (width, height), |
|
||||
interpolation=cv2.INTER_AREA) |
|
||||
debug_show(name, 6, 'output', display) |
|
||||
|
|
||||
return threshfile |
|
||||
|
|
||||
|
|
||||
def main(): |
|
||||
|
|
||||
if len(sys.argv) < 2: |
|
||||
print('usage:', sys.argv[0], 'IMAGE1 [IMAGE2 ...]') |
|
||||
sys.exit(0) |
|
||||
|
|
||||
if DEBUG_LEVEL > 0 and DEBUG_OUTPUT != 'file': |
|
||||
cv2.namedWindow(WINDOW_NAME) |
|
||||
|
|
||||
outfiles = [] |
|
||||
|
|
||||
for imgfile in sys.argv[1:]: |
|
||||
|
|
||||
img = cv2.imread(imgfile) |
|
||||
small = resize_to_screen(img) |
|
||||
basename = os.path.basename(imgfile) |
|
||||
dirname = os.path.dirname(imgfile) |
|
||||
name, _ = os.path.splitext(basename) |
|
||||
|
|
||||
print('loaded', basename, 'with size', imgsize(img), end=' ') |
|
||||
print('and resized to', imgsize(small)) |
|
||||
|
|
||||
if DEBUG_LEVEL >= 3: |
|
||||
debug_show(name, 0.0, 'original', small) |
|
||||
|
|
||||
pagemask, page_outline = get_page_extents(small) |
|
||||
|
|
||||
cinfo_list = get_contours(name, small, pagemask, 'text') |
|
||||
spans = assemble_spans(name, small, pagemask, cinfo_list) |
|
||||
|
|
||||
if len(spans) < 3: |
|
||||
print(' detecting lines because only', len(spans), 'text spans') |
|
||||
cinfo_list = get_contours(name, small, pagemask, 'line') |
|
||||
spans2 = assemble_spans(name, small, pagemask, cinfo_list) |
|
||||
if len(spans2) > len(spans): |
|
||||
spans = spans2 |
|
||||
|
|
||||
if len(spans) < 1: |
|
||||
print('skipping', name, 'because only', len(spans), 'spans') |
|
||||
continue |
|
||||
|
|
||||
span_points = sample_spans(small.shape, spans) |
|
||||
|
|
||||
print(' got', len(spans), 'spans', end=' ') |
|
||||
print('with', sum([len(pts) for pts in span_points]), 'points.') |
|
||||
|
|
||||
corners, ycoords, xcoords = keypoints_from_samples(name, small, |
|
||||
pagemask, |
|
||||
page_outline, |
|
||||
span_points) |
|
||||
|
|
||||
rough_dims, span_counts, params = get_default_params(corners, |
|
||||
ycoords, xcoords) |
|
||||
|
|
||||
dstpoints = np.vstack((corners[0].reshape((1, 1, 2)),) + |
|
||||
tuple(span_points)) |
|
||||
|
|
||||
params = optimize_params(name, small, |
|
||||
dstpoints, |
|
||||
span_counts, params) |
|
||||
|
|
||||
page_dims = get_page_dims(corners, rough_dims, params) |
|
||||
|
|
||||
outfile = remap_image(name, dirname, img, small, page_dims, params) |
|
||||
|
|
||||
outfiles.append(outfile) |
|
||||
|
|
||||
print(' wrote', outfile) |
|
||||
print() |
|
||||
|
|
||||
print('to convert to PDF (requires ImageMagick):') |
|
||||
print(' convert -compress Group4 ' + ' '.join(outfiles) + ' output.pdf') |
|
||||
|
|
||||
|
|
||||
if __name__ == '__main__': |
|
||||
main() |
|
@ -1,5 +0,0 @@ |
|||||
numpy |
|
||||
scipy |
|
||||
Pillow |
|
||||
opencv-python |
|
||||
future |
|
Write
Preview
Loading…
Cancel
Save
Reference in new issue