From b48a9c439e2972b98789433319d6ccaa512db24f Mon Sep 17 00:00:00 2001 From: Radu Liviu Carjan Date: Thu, 20 Oct 2022 14:31:43 +0300 Subject: [PATCH] Added dewarp to the ingest image --- docker/ingest/Dockerfile | 11 +- docker/ingest/dewarp/LICENSE.txt | 21 + docker/ingest/dewarp/README.md | 14 + docker/ingest/dewarp/derive_cubic.py | 46 ++ docker/ingest/dewarp/page_dewarp.py | 923 ++++++++++++++++++++++++++ docker/ingest/dewarp/requirements.txt | 5 + 6 files changed, 1019 insertions(+), 1 deletion(-) create mode 100644 docker/ingest/dewarp/LICENSE.txt create mode 100644 docker/ingest/dewarp/README.md create mode 100644 docker/ingest/dewarp/derive_cubic.py create mode 100755 docker/ingest/dewarp/page_dewarp.py create mode 100644 docker/ingest/dewarp/requirements.txt diff --git a/docker/ingest/Dockerfile b/docker/ingest/Dockerfile index e287f35..51962a8 100644 --- a/docker/ingest/Dockerfile +++ b/docker/ingest/Dockerfile @@ -2,6 +2,10 @@ FROM rcarjan/nginx-php:7.4 LABEL maintainer="Radu Liviu Carjan" +## Add required files +RUN mkdir /var/www/dewarp +ADD dewarp /var/www/dewarp + ## Install libreoffice RUN apt-add-repository -y ppa:libreoffice/ppa && \ apt-get install -y \ @@ -40,5 +44,10 @@ RUN pip install \ pdftotext \ supervisor \ opencv-python + +WORKDIR /var/www/dewarp +RUN pip install -r requirements.txt + +RUN mkdir /var/log/queue -RUN mkdir /var/log/queue \ No newline at end of file +WORKDIR /var/www/ingest diff --git a/docker/ingest/dewarp/LICENSE.txt b/docker/ingest/dewarp/LICENSE.txt new file mode 100644 index 0000000..e35410a --- /dev/null +++ b/docker/ingest/dewarp/LICENSE.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2016, Matt Zucker + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/docker/ingest/dewarp/README.md b/docker/ingest/dewarp/README.md new file mode 100644 index 0000000..7ea5f8c --- /dev/null +++ b/docker/ingest/dewarp/README.md @@ -0,0 +1,14 @@ +page_dewarp +=========== + +Page dewarping and thresholding using a "cubic sheet" model - see full writeup at + +Requirements: + + - scipy + - OpenCV 3.0 or greater + - Image module from PIL or Pillow + +Usage: + + page_dewarp.py IMAGE1 [IMAGE2 ...] diff --git a/docker/ingest/dewarp/derive_cubic.py b/docker/ingest/dewarp/derive_cubic.py new file mode 100644 index 0000000..7b4c993 --- /dev/null +++ b/docker/ingest/dewarp/derive_cubic.py @@ -0,0 +1,46 @@ +from __future__ import print_function +import matplotlib.pyplot as plt +import numpy as np +import sympy + +# create a bunch of symbols +a, b, c, d, x, alpha, beta = sympy.symbols('a b c d x alpha beta') + +# create a polynomial function f(x) +f = a*x**3 + b*x**2 + c*x + d + +# get its derivative f'(x) +fp = f.diff(x) + +# evaluate both at x=0 and x=1 +f0 = f.subs(x, 0) +f1 = f.subs(x, 1) +fp0 = fp.subs(x, 0) +fp1 = fp.subs(x, 1) + +# we want a, b, c, d such that the following conditions hold: +# +# f(0) = 0 +# f(1) = 0 +# f'(0) = alpha +# f'(1) = beta + +S = sympy.solve([f0, f1, fp0-alpha, fp1-beta], [a, b, c, d]) + +# print the analytic solution and plot a graphical example +coeffs = [] + +num_alpha = 0.3 +num_beta = 0.03 + +for key in [a, b, c, d]: + print(key, '=', S[key]) + coeffs.append(S[key].subs(dict(alpha=num_alpha, + beta=num_beta))) + +xvals = np.linspace(0, 1, 101) +yvals = np.polyval(coeffs, xvals) + +plt.plot(xvals, yvals) +plt.show() + diff --git a/docker/ingest/dewarp/page_dewarp.py b/docker/ingest/dewarp/page_dewarp.py new file mode 100755 index 0000000..c0b1e7e --- /dev/null +++ b/docker/ingest/dewarp/page_dewarp.py @@ -0,0 +1,923 @@ +#!/usr/bin/env python +###################################################################### +# page_dewarp.py - Proof-of-concept of page-dewarping based on a +# "cubic sheet" model. Requires OpenCV (version 3 or greater), +# PIL/Pillow, and scipy.optimize. +###################################################################### +# Author: Matt Zucker +# Date: July 2016 +# License: MIT License (see LICENSE.txt) +###################################################################### + +from __future__ import division +from __future__ import print_function +from builtins import zip +from builtins import str +from builtins import range +from builtins import object +from past.utils import old_div +import os +import sys +import datetime +import cv2 +from PIL import Image +import numpy as np +import scipy.optimize + +# for some reason pylint complains about cv2 members being undefined :( +# pylint: disable=E1101 + +PAGE_MARGIN_X = 50 # reduced px to ignore near L/R edge +PAGE_MARGIN_Y = 20 # reduced px to ignore near T/B edge + +OUTPUT_ZOOM = 1.0 # how much to zoom output relative to *original* image +OUTPUT_DPI = 300 # just affects stated DPI of PNG, not appearance +REMAP_DECIMATE = 16 # downscaling factor for remapping image + +ADAPTIVE_WINSZ = 55 # window size for adaptive threshold in reduced px + +TEXT_MIN_WIDTH = 15 # min reduced px width of detected text contour +TEXT_MIN_HEIGHT = 2 # min reduced px height of detected text contour +TEXT_MIN_ASPECT = 1.5 # filter out text contours below this w/h ratio +TEXT_MAX_THICKNESS = 10 # max reduced px thickness of detected text contour + +EDGE_MAX_OVERLAP = 1.0 # max reduced px horiz. overlap of contours in span +EDGE_MAX_LENGTH = 100.0 # max reduced px length of edge connecting contours +EDGE_ANGLE_COST = 10.0 # cost of angles in edges (tradeoff vs. length) +EDGE_MAX_ANGLE = 7.5 # maximum change in angle allowed between contours + +RVEC_IDX = slice(0, 3) # index of rvec in params vector +TVEC_IDX = slice(3, 6) # index of tvec in params vector +CUBIC_IDX = slice(6, 8) # index of cubic slopes in params vector + +SPAN_MIN_WIDTH = 30 # minimum reduced px width for span +SPAN_PX_PER_STEP = 20 # reduced px spacing for sampling along spans +FOCAL_LENGTH = 1.2 # normalized focal length of camera + +DEBUG_LEVEL = 0 # 0=none, 1=some, 2=lots, 3=all +DEBUG_OUTPUT = 'file' # file, screen, both + +WINDOW_NAME = 'Dewarp' # Window name for visualization + +# nice color palette for visualizing contours, etc. +CCOLORS = [ + (255, 0, 0), + (255, 63, 0), + (255, 127, 0), + (255, 191, 0), + (255, 255, 0), + (191, 255, 0), + (127, 255, 0), + (63, 255, 0), + (0, 255, 0), + (0, 255, 63), + (0, 255, 127), + (0, 255, 191), + (0, 255, 255), + (0, 191, 255), + (0, 127, 255), + (0, 63, 255), + (0, 0, 255), + (63, 0, 255), + (127, 0, 255), + (191, 0, 255), + (255, 0, 255), + (255, 0, 191), + (255, 0, 127), + (255, 0, 63), +] + +# default intrinsic parameter matrix +K = np.array([ + [FOCAL_LENGTH, 0, 0], + [0, FOCAL_LENGTH, 0], + [0, 0, 1]], dtype=np.float32) + + +def debug_show(name, step, text, display): + + if DEBUG_OUTPUT != 'screen': + filetext = text.replace(' ', '_') + outfile = name + '_debug_' + str(step) + '_' + filetext + '.png' + cv2.imwrite(outfile, display) + + if DEBUG_OUTPUT != 'file': + + image = display.copy() + height = image.shape[0] + + cv2.putText(image, text, (16, height-16), + cv2.FONT_HERSHEY_SIMPLEX, 1.0, + (0, 0, 0), 3, cv2.LINE_AA) + + cv2.putText(image, text, (16, height-16), + cv2.FONT_HERSHEY_SIMPLEX, 1.0, + (255, 255, 255), 1, cv2.LINE_AA) + + cv2.imshow(WINDOW_NAME, image) + + while cv2.waitKey(5) < 0: + pass + + +def round_nearest_multiple(i, factor): + i = int(i) + rem = i % factor + if not rem: + return i + else: + return i + factor - rem + + +def pix2norm(shape, pts): + height, width = shape[:2] + scl = 2.0/(max(height, width)) + offset = np.array([width, height], dtype=pts.dtype).reshape((-1, 1, 2))*0.5 + return (pts - offset) * scl + + +def norm2pix(shape, pts, as_integer): + height, width = shape[:2] + scl = max(height, width)*0.5 + offset = np.array([0.5*width, 0.5*height], + dtype=pts.dtype).reshape((-1, 1, 2)) + rval = pts * scl + offset + if as_integer: + return (rval + 0.5).astype(int) + else: + return rval + + +def fltp(point): + return tuple(point.astype(int).flatten()) + + +def draw_correspondences(img, dstpoints, projpts): + + display = img.copy() + dstpoints = norm2pix(img.shape, dstpoints, True) + projpts = norm2pix(img.shape, projpts, True) + + for pts, color in [(projpts, (255, 0, 0)), + (dstpoints, (0, 0, 255))]: + + for point in pts: + cv2.circle(display, fltp(point), 3, color, -1, cv2.LINE_AA) + + for point_a, point_b in zip(projpts, dstpoints): + cv2.line(display, fltp(point_a), fltp(point_b), + (255, 255, 255), 1, cv2.LINE_AA) + + return display + + +def get_default_params(corners, ycoords, xcoords): + + # page width and height + page_width = np.linalg.norm(corners[1] - corners[0]) + page_height = np.linalg.norm(corners[-1] - corners[0]) + rough_dims = (page_width, page_height) + + # our initial guess for the cubic has no slope + cubic_slopes = [0.0, 0.0] + + # object points of flat page in 3D coordinates + corners_object3d = np.array([ + [0, 0, 0], + [page_width, 0, 0], + [page_width, page_height, 0], + [0, page_height, 0]]) + + # estimate rotation and translation from four 2D-to-3D point + # correspondences + _, rvec, tvec = cv2.solvePnP(corners_object3d, + corners, K, np.zeros(5)) + + span_counts = [len(xc) for xc in xcoords] + + params = np.hstack((np.array(rvec).flatten(), + np.array(tvec).flatten(), + np.array(cubic_slopes).flatten(), + ycoords.flatten()) + + tuple(xcoords)) + + return rough_dims, span_counts, params + + +def project_xy(xy_coords, pvec): + + # get cubic polynomial coefficients given + # + # f(0) = 0, f'(0) = alpha + # f(1) = 0, f'(1) = beta + + alpha, beta = tuple(pvec[CUBIC_IDX]) + + poly = np.array([ + alpha + beta, + -2*alpha - beta, + alpha, + 0]) + + xy_coords = xy_coords.reshape((-1, 2)) + z_coords = np.polyval(poly, xy_coords[:, 0]) + + objpoints = np.hstack((xy_coords, z_coords.reshape((-1, 1)))) + + image_points, _ = cv2.projectPoints(objpoints, + pvec[RVEC_IDX], + pvec[TVEC_IDX], + K, np.zeros(5)) + + return image_points + + +def project_keypoints(pvec, keypoint_index): + + xy_coords = pvec[keypoint_index] + xy_coords[0, :] = 0 + + return project_xy(xy_coords, pvec) + + +def resize_to_screen(src, maxw=1280, maxh=700, copy=False): + + height, width = src.shape[:2] + + scl_x = float(width)/maxw + scl_y = float(height)/maxh + + scl = int(np.ceil(max(scl_x, scl_y))) + + if scl > 1.0: + inv_scl = 1.0/scl + img = cv2.resize(src, (0, 0), None, inv_scl, inv_scl, cv2.INTER_AREA) + elif copy: + img = src.copy() + else: + img = src + + return img + + +def box(width, height): + return np.ones((height, width), dtype=np.uint8) + + +def get_page_extents(small): + + height, width = small.shape[:2] + + xmin = PAGE_MARGIN_X + ymin = PAGE_MARGIN_Y + xmax = width-PAGE_MARGIN_X + ymax = height-PAGE_MARGIN_Y + + page = np.zeros((height, width), dtype=np.uint8) + cv2.rectangle(page, (xmin, ymin), (xmax, ymax), (255, 255, 255), -1) + + outline = np.array([ + [xmin, ymin], + [xmin, ymax], + [xmax, ymax], + [xmax, ymin]]) + + return page, outline + + +def get_mask(name, small, pagemask, masktype): + + sgray = cv2.cvtColor(small, cv2.COLOR_RGB2GRAY) + + if masktype == 'text': + + mask = cv2.adaptiveThreshold(sgray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, + cv2.THRESH_BINARY_INV, + ADAPTIVE_WINSZ, + 25) + + if DEBUG_LEVEL >= 3: + debug_show(name, 0.1, 'thresholded', mask) + + mask = cv2.dilate(mask, box(9, 1)) + + if DEBUG_LEVEL >= 3: + debug_show(name, 0.2, 'dilated', mask) + + mask = cv2.erode(mask, box(1, 3)) + + if DEBUG_LEVEL >= 3: + debug_show(name, 0.3, 'eroded', mask) + + else: + + mask = cv2.adaptiveThreshold(sgray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, + cv2.THRESH_BINARY_INV, + ADAPTIVE_WINSZ, + 7) + + if DEBUG_LEVEL >= 3: + debug_show(name, 0.4, 'thresholded', mask) + + mask = cv2.erode(mask, box(3, 1), iterations=3) + + if DEBUG_LEVEL >= 3: + debug_show(name, 0.5, 'eroded', mask) + + mask = cv2.dilate(mask, box(8, 2)) + + if DEBUG_LEVEL >= 3: + debug_show(name, 0.6, 'dilated', mask) + + return np.minimum(mask, pagemask) + + +def interval_measure_overlap(int_a, int_b): + return min(int_a[1], int_b[1]) - max(int_a[0], int_b[0]) + + +def angle_dist(angle_b, angle_a): + + diff = angle_b - angle_a + + while diff > np.pi: + diff -= 2*np.pi + + while diff < -np.pi: + diff += 2*np.pi + + return np.abs(diff) + + +def blob_mean_and_tangent(contour): + + moments = cv2.moments(contour) + + area = moments['m00'] + + mean_x = old_div(moments['m10'], area) + mean_y = old_div(moments['m01'], area) + + moments_matrix = old_div(np.array([ + [moments['mu20'], moments['mu11']], + [moments['mu11'], moments['mu02']] + ]), area) + + _, svd_u, _ = cv2.SVDecomp(moments_matrix) + + center = np.array([mean_x, mean_y]) + tangent = svd_u[:, 0].flatten().copy() + + return center, tangent + + +class ContourInfo(object): + + def __init__(self, contour, rect, mask): + + self.contour = contour + self.rect = rect + self.mask = mask + + self.center, self.tangent = blob_mean_and_tangent(contour) + + self.angle = np.arctan2(self.tangent[1], self.tangent[0]) + + clx = [self.proj_x(point) for point in contour] + + lxmin = min(clx) + lxmax = max(clx) + + self.local_xrng = (lxmin, lxmax) + + self.point0 = self.center + self.tangent * lxmin + self.point1 = self.center + self.tangent * lxmax + + self.pred = None + self.succ = None + + def proj_x(self, point): + return np.dot(self.tangent, point.flatten()-self.center) + + def local_overlap(self, other): + xmin = self.proj_x(other.point0) + xmax = self.proj_x(other.point1) + return interval_measure_overlap(self.local_xrng, (xmin, xmax)) + + +def generate_candidate_edge(cinfo_a, cinfo_b): + + # we want a left of b (so a's successor will be b and b's + # predecessor will be a) make sure right endpoint of b is to the + # right of left endpoint of a. + if cinfo_a.point0[0] > cinfo_b.point1[0]: + tmp = cinfo_a + cinfo_a = cinfo_b + cinfo_b = tmp + + x_overlap_a = cinfo_a.local_overlap(cinfo_b) + x_overlap_b = cinfo_b.local_overlap(cinfo_a) + + overall_tangent = cinfo_b.center - cinfo_a.center + overall_angle = np.arctan2(overall_tangent[1], overall_tangent[0]) + + delta_angle = old_div(max(angle_dist(cinfo_a.angle, overall_angle), + angle_dist(cinfo_b.angle, overall_angle)) * 180,np.pi) + + # we want the largest overlap in x to be small + x_overlap = max(x_overlap_a, x_overlap_b) + + dist = np.linalg.norm(cinfo_b.point0 - cinfo_a.point1) + + if (dist > EDGE_MAX_LENGTH or + x_overlap > EDGE_MAX_OVERLAP or + delta_angle > EDGE_MAX_ANGLE): + return None + else: + score = dist + delta_angle*EDGE_ANGLE_COST + return (score, cinfo_a, cinfo_b) + + +def make_tight_mask(contour, xmin, ymin, width, height): + + tight_mask = np.zeros((height, width), dtype=np.uint8) + tight_contour = contour - np.array((xmin, ymin)).reshape((-1, 1, 2)) + + cv2.drawContours(tight_mask, [tight_contour], 0, + (1, 1, 1), -1) + + return tight_mask + + +def get_contours(name, small, pagemask, masktype): + + mask = get_mask(name, small, pagemask, masktype) + + contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, + cv2.CHAIN_APPROX_NONE) + + contours_out = [] + + for contour in contours: + + rect = cv2.boundingRect(contour) + xmin, ymin, width, height = rect + + if (width < TEXT_MIN_WIDTH or + height < TEXT_MIN_HEIGHT or + width < TEXT_MIN_ASPECT*height): + continue + + tight_mask = make_tight_mask(contour, xmin, ymin, width, height) + + if tight_mask.sum(axis=0).max() > TEXT_MAX_THICKNESS: + continue + + contours_out.append(ContourInfo(contour, rect, tight_mask)) + + if DEBUG_LEVEL >= 2: + visualize_contours(name, small, contours_out) + + return contours_out + + +def assemble_spans(name, small, pagemask, cinfo_list): + + # sort list + cinfo_list = sorted(cinfo_list, key=lambda cinfo: cinfo.rect[1]) + + # generate all candidate edges + candidate_edges = [] + + for i, cinfo_i in enumerate(cinfo_list): + for j in range(i): + # note e is of the form (score, left_cinfo, right_cinfo) + edge = generate_candidate_edge(cinfo_i, cinfo_list[j]) + if edge is not None: + candidate_edges.append(edge) + + # sort candidate edges by score (lower is better) + candidate_edges.sort() + + # for each candidate edge + for _, cinfo_a, cinfo_b in candidate_edges: + # if left and right are unassigned, join them + if cinfo_a.succ is None and cinfo_b.pred is None: + cinfo_a.succ = cinfo_b + cinfo_b.pred = cinfo_a + + # generate list of spans as output + spans = [] + + # until we have removed everything from the list + while cinfo_list: + + # get the first on the list + cinfo = cinfo_list[0] + + # keep following predecessors until none exists + while cinfo.pred: + cinfo = cinfo.pred + + # start a new span + cur_span = [] + + width = 0.0 + + # follow successors til end of span + while cinfo: + # remove from list (sadly making this loop *also* O(n^2) + cinfo_list.remove(cinfo) + # add to span + cur_span.append(cinfo) + width += cinfo.local_xrng[1] - cinfo.local_xrng[0] + # set successor + cinfo = cinfo.succ + + # add if long enough + if width > SPAN_MIN_WIDTH: + spans.append(cur_span) + + if DEBUG_LEVEL >= 2: + visualize_spans(name, small, pagemask, spans) + + return spans + + +def sample_spans(shape, spans): + + span_points = [] + + for span in spans: + + contour_points = [] + + for cinfo in span: + + yvals = np.arange(cinfo.mask.shape[0]).reshape((-1, 1)) + totals = (yvals * cinfo.mask).sum(axis=0) + means = old_div(totals, cinfo.mask.sum(axis=0)) + + xmin, ymin = cinfo.rect[:2] + + step = SPAN_PX_PER_STEP + start = old_div(((len(means)-1) % step), 2) + + contour_points += [(x+xmin, means[x]+ymin) + for x in range(start, len(means), step)] + + contour_points = np.array(contour_points, + dtype=np.float32).reshape((-1, 1, 2)) + + contour_points = pix2norm(shape, contour_points) + + span_points.append(contour_points) + + return span_points + + +def keypoints_from_samples(name, small, pagemask, page_outline, + span_points): + + all_evecs = np.array([[0.0, 0.0]]) + all_weights = 0 + + for points in span_points: + + _, evec = cv2.PCACompute(points.reshape((-1, 2)), + None, maxComponents=1) + + weight = np.linalg.norm(points[-1] - points[0]) + + all_evecs += evec * weight + all_weights += weight + + evec = old_div(all_evecs, all_weights) + + x_dir = evec.flatten() + + if x_dir[0] < 0: + x_dir = -x_dir + + y_dir = np.array([-x_dir[1], x_dir[0]]) + + pagecoords = cv2.convexHull(page_outline) + pagecoords = pix2norm(pagemask.shape, pagecoords.reshape((-1, 1, 2))) + pagecoords = pagecoords.reshape((-1, 2)) + + px_coords = np.dot(pagecoords, x_dir) + py_coords = np.dot(pagecoords, y_dir) + + px0 = px_coords.min() + px1 = px_coords.max() + + py0 = py_coords.min() + py1 = py_coords.max() + + p00 = px0 * x_dir + py0 * y_dir + p10 = px1 * x_dir + py0 * y_dir + p11 = px1 * x_dir + py1 * y_dir + p01 = px0 * x_dir + py1 * y_dir + + corners = np.vstack((p00, p10, p11, p01)).reshape((-1, 1, 2)) + + ycoords = [] + xcoords = [] + + for points in span_points: + pts = points.reshape((-1, 2)) + px_coords = np.dot(pts, x_dir) + py_coords = np.dot(pts, y_dir) + ycoords.append(py_coords.mean() - py0) + xcoords.append(px_coords - px0) + + if DEBUG_LEVEL >= 2: + visualize_span_points(name, small, span_points, corners) + + return corners, np.array(ycoords), xcoords + + +def visualize_contours(name, small, cinfo_list): + + regions = np.zeros_like(small) + + for j, cinfo in enumerate(cinfo_list): + + cv2.drawContours(regions, [cinfo.contour], 0, + CCOLORS[j % len(CCOLORS)], -1) + + mask = (regions.max(axis=2) != 0) + + display = small.copy() + display[mask] = (old_div(display[mask],2)) + (old_div(regions[mask],2)) + + for j, cinfo in enumerate(cinfo_list): + color = CCOLORS[j % len(CCOLORS)] + color = tuple([old_div(c,4) for c in color]) + + cv2.circle(display, fltp(cinfo.center), 3, + (255, 255, 255), 1, cv2.LINE_AA) + + cv2.line(display, fltp(cinfo.point0), fltp(cinfo.point1), + (255, 255, 255), 1, cv2.LINE_AA) + + debug_show(name, 1, 'contours', display) + + +def visualize_spans(name, small, pagemask, spans): + + regions = np.zeros_like(small) + + for i, span in enumerate(spans): + contours = [cinfo.contour for cinfo in span] + cv2.drawContours(regions, contours, -1, + CCOLORS[i*3 % len(CCOLORS)], -1) + + mask = (regions.max(axis=2) != 0) + + display = small.copy() + display[mask] = (old_div(display[mask],2)) + (old_div(regions[mask],2)) + display[pagemask == 0] //= 4 + + debug_show(name, 2, 'spans', display) + + +def visualize_span_points(name, small, span_points, corners): + + display = small.copy() + + for i, points in enumerate(span_points): + + points = norm2pix(small.shape, points, False) + + mean, small_evec = cv2.PCACompute(points.reshape((-1, 2)), + None, + maxComponents=1) + + dps = np.dot(points.reshape((-1, 2)), small_evec.reshape((2, 1))) + dpm = np.dot(mean.flatten(), small_evec.flatten()) + + point0 = mean + small_evec * (dps.min()-dpm) + point1 = mean + small_evec * (dps.max()-dpm) + + for point in points: + cv2.circle(display, fltp(point), 3, + CCOLORS[i % len(CCOLORS)], -1, cv2.LINE_AA) + + cv2.line(display, fltp(point0), fltp(point1), + (255, 255, 255), 1, cv2.LINE_AA) + + cv2.polylines(display, [norm2pix(small.shape, corners, True)], + True, (255, 255, 255)) + + debug_show(name, 3, 'span points', display) + + +def imgsize(img): + height, width = img.shape[:2] + return '{}x{}'.format(width, height) + + +def make_keypoint_index(span_counts): + + nspans = len(span_counts) + npts = sum(span_counts) + keypoint_index = np.zeros((npts+1, 2), dtype=int) + start = 1 + + for i, count in enumerate(span_counts): + end = start + count + keypoint_index[start:start+end, 1] = 8+i + start = end + + keypoint_index[1:, 0] = np.arange(npts) + 8 + nspans + + return keypoint_index + + +def optimize_params(name, small, dstpoints, span_counts, params): + + keypoint_index = make_keypoint_index(span_counts) + + def objective(pvec): + ppts = project_keypoints(pvec, keypoint_index) + return np.sum((dstpoints - ppts)**2) + + print(' initial objective is', objective(params)) + + if DEBUG_LEVEL >= 1: + projpts = project_keypoints(params, keypoint_index) + display = draw_correspondences(small, dstpoints, projpts) + debug_show(name, 4, 'keypoints before', display) + + print(' optimizing', len(params), 'parameters...') + start = datetime.datetime.now() + res = scipy.optimize.minimize(objective, params, + method='Powell') + end = datetime.datetime.now() + print(' optimization took', round((end-start).total_seconds(), 2), 'sec.') + print(' final objective is', res.fun) + params = res.x + + if DEBUG_LEVEL >= 1: + projpts = project_keypoints(params, keypoint_index) + display = draw_correspondences(small, dstpoints, projpts) + debug_show(name, 5, 'keypoints after', display) + + return params + + +def get_page_dims(corners, rough_dims, params): + + dst_br = corners[2].flatten() + + dims = np.array(rough_dims) + + def objective(dims): + proj_br = project_xy(dims, params) + return np.sum((dst_br - proj_br.flatten())**2) + + res = scipy.optimize.minimize(objective, dims, method='Powell') + dims = res.x + + print(' got page dims', dims[0], 'x', dims[1]) + + return dims + + +def remap_image(name, dirname, img, small, page_dims, params): + + height = 0.5 * page_dims[1] * OUTPUT_ZOOM * img.shape[0] + height = round_nearest_multiple(height, REMAP_DECIMATE) + + width = round_nearest_multiple(old_div(height * page_dims[0], page_dims[1]), + REMAP_DECIMATE) + + print(' output will be {}x{}'.format(width, height)) + + height_small = old_div(height, REMAP_DECIMATE) + width_small = old_div(width, REMAP_DECIMATE) + + page_x_range = np.linspace(0, page_dims[0], width_small) + page_y_range = np.linspace(0, page_dims[1], height_small) + + page_x_coords, page_y_coords = np.meshgrid(page_x_range, page_y_range) + + page_xy_coords = np.hstack((page_x_coords.flatten().reshape((-1, 1)), + page_y_coords.flatten().reshape((-1, 1)))) + + page_xy_coords = page_xy_coords.astype(np.float32) + + image_points = project_xy(page_xy_coords, params) + image_points = norm2pix(img.shape, image_points, False) + + image_x_coords = image_points[:, 0, 0].reshape(page_x_coords.shape) + image_y_coords = image_points[:, 0, 1].reshape(page_y_coords.shape) + + image_x_coords = cv2.resize(image_x_coords, (width, height), + interpolation=cv2.INTER_CUBIC) + + image_y_coords = cv2.resize(image_y_coords, (width, height), + interpolation=cv2.INTER_CUBIC) + + img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) + + remapped = cv2.remap(img_gray, image_x_coords, image_y_coords, + cv2.INTER_CUBIC, + None, cv2.BORDER_REPLICATE) + + thresh = cv2.adaptiveThreshold(remapped, 255, cv2.ADAPTIVE_THRESH_MEAN_C, + cv2.THRESH_BINARY, ADAPTIVE_WINSZ, 25) + + pil_image = Image.fromarray(thresh) + pil_image = pil_image.convert('1') + + threshfile = name + '_thresh.png' + pil_image.save(dirname + '/' + threshfile, dpi=(OUTPUT_DPI, OUTPUT_DPI)) + + if DEBUG_LEVEL >= 1: + height = small.shape[0] + width = int(round(height * float(thresh.shape[1])/thresh.shape[0])) + display = cv2.resize(thresh, (width, height), + interpolation=cv2.INTER_AREA) + debug_show(name, 6, 'output', display) + + return threshfile + + +def main(): + + if len(sys.argv) < 2: + print('usage:', sys.argv[0], 'IMAGE1 [IMAGE2 ...]') + sys.exit(0) + + if DEBUG_LEVEL > 0 and DEBUG_OUTPUT != 'file': + cv2.namedWindow(WINDOW_NAME) + + outfiles = [] + + for imgfile in sys.argv[1:]: + + img = cv2.imread(imgfile) + small = resize_to_screen(img) + basename = os.path.basename(imgfile) + dirname = os.path.dirname(imgfile) + name, _ = os.path.splitext(basename) + + print('loaded', basename, 'with size', imgsize(img), end=' ') + print('and resized to', imgsize(small)) + + if DEBUG_LEVEL >= 3: + debug_show(name, 0.0, 'original', small) + + pagemask, page_outline = get_page_extents(small) + + cinfo_list = get_contours(name, small, pagemask, 'text') + spans = assemble_spans(name, small, pagemask, cinfo_list) + + if len(spans) < 3: + print(' detecting lines because only', len(spans), 'text spans') + cinfo_list = get_contours(name, small, pagemask, 'line') + spans2 = assemble_spans(name, small, pagemask, cinfo_list) + if len(spans2) > len(spans): + spans = spans2 + + if len(spans) < 1: + print('skipping', name, 'because only', len(spans), 'spans') + continue + + span_points = sample_spans(small.shape, spans) + + print(' got', len(spans), 'spans', end=' ') + print('with', sum([len(pts) for pts in span_points]), 'points.') + + corners, ycoords, xcoords = keypoints_from_samples(name, small, + pagemask, + page_outline, + span_points) + + rough_dims, span_counts, params = get_default_params(corners, + ycoords, xcoords) + + dstpoints = np.vstack((corners[0].reshape((1, 1, 2)),) + + tuple(span_points)) + + params = optimize_params(name, small, + dstpoints, + span_counts, params) + + page_dims = get_page_dims(corners, rough_dims, params) + + outfile = remap_image(name, dirname, img, small, page_dims, params) + + outfiles.append(outfile) + + print(' wrote', outfile) + print() + + print('to convert to PDF (requires ImageMagick):') + print(' convert -compress Group4 ' + ' '.join(outfiles) + ' output.pdf') + + +if __name__ == '__main__': + main() diff --git a/docker/ingest/dewarp/requirements.txt b/docker/ingest/dewarp/requirements.txt new file mode 100644 index 0000000..c716cb0 --- /dev/null +++ b/docker/ingest/dewarp/requirements.txt @@ -0,0 +1,5 @@ +numpy +scipy +Pillow +opencv-python +future