searchanddisplace-ingest/resources/python/unproject/unproject_text.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals, print_function

import sys
import numpy as np
import scipy.optimize
import matplotlib.pyplot as plt
import cv2
import ellipse

DEBUG_IMAGES = []

def debug_show(name, src):

    global DEBUG_IMAGES

    filename = 'debug{:02d}_{}.png'.format(len(DEBUG_IMAGES), name)
    cv2.imwrite(filename, src)

    h, w = src.shape[:2]

    fx = w/1280.0
    fy = h/700.0

    f = 1.0/np.ceil(max(fx, fy))

    if f < 1.0:
        img = cv2.resize(src, (0, 0), None, f, f, cv2.INTER_AREA)
    else:
        img = src.copy()

    DEBUG_IMAGES.append(img)

def translation(x, y):
    return np.array([[1, 0, x], [0, 1, y], [0, 0, 1]], dtype=float)

def rotation(theta):
    c = np.cos(theta)
    s = np.sin(theta)
    return np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]], dtype=float)

def perspective_warp(a, b):
    return np.array([[1, 0, 0], [0, 1, 0], [a, b, 1]], dtype=float)

def slant(sx):
    return np.array([[1, sx, 0], [0, 1, 0], [0, 0, 1]], dtype=float)

def softmax(x, k=1.0):
    b = x.max()
    return np.log( np.exp(k*(x-b)).sum() ) / k + b

def skewed_widths(contours, H):
    xvals = []
    for c in contours:
        pts = cv2.perspectiveTransform(c, H)
        x = pts[:,:,0]
        xvals.append( x.max() - x.min() )
    xvals = np.array(xvals)
    return softmax(xvals, 0.1)

def centered_warp(u0, v0, a, b):
    return np.dot(translation(u0, v0),
                  np.dot(perspective_warp(a, b),
                         translation(-u0, -v0)))

def warp_containing_points(img, pts, H, border=4, shape_only=False):

    '''
    display = img.copy()
    for pt in pts.reshape((-1,2)).astype(int):
        cv2.circle(display, tuple(pt), 4, (255, 0, 0),
                   -1, cv2.LINE_AA)
    debug_show('warp', display)
    '''

    pts2 = cv2.perspectiveTransform(pts, H)
    x0, y0, w, h = cv2.boundingRect(pts2)
    print('got bounding rect', x0, y0, w, h)
    T = translation(-x0+border, -y0+border)
    TH = np.dot(T, H)

    if shape_only:
        return (h+2*border, w+2*border), TH
    else:
        dst = cv2.warpPerspective(img, TH, (w+2*border, h+2*border),
                                  borderMode=cv2.BORDER_REPLICATE)
        return dst, TH

def conic_area_discrepancy(conics, x, H, opt_results=None):

    areas = []

    for conic in conics:
        cx = ellipse.conic_transform(conic, H)
        k, ab = ellipse.conic_scale(cx)
        if np.isinf(ab):
            areas.append(1e20)
        else:
            areas.append(ab)

    areas = np.array(areas)

    areas /= areas.mean() # rescale so mean is 1.0
    areas -= 1 # subtract off mean

    rval = 0.5*np.dot(areas, areas)

    if opt_results is not None:
        if not opt_results or rval < opt_results[-1][-1]:
            opt_results.append( (x, H, rval) )

    return rval

def threshold(img):

    if len(img.shape) > 2:
        img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

    mean = img.mean()
    if mean < 100:
        img = 255-img

    return cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                                 cv2.THRESH_BINARY_INV, 101, 21)

def get_contours(img):

    work = threshold(img)

    debug_show('threshold', work)

    contours, hierarchy = cv2.findContours(work, cv2.RETR_CCOMP,
                                              cv2.CHAIN_APPROX_NONE)

    return contours, hierarchy

def get_conics(img, contours, hierarchy,
               abs_area_cutoff=0.0001, mean_area_cutoff=0.15):

    hierarchy = hierarchy.reshape((-1, 4))

    conics = []
    used_contours = []
    areas = []
    okcontours = []
    allchildren = []
    pts = np.empty((0,1,2), dtype='float32')
    centroid_accum = np.zeros(2)
    total_area = 0.0

    centroids = []

    abs_area_cutoff *= img.shape[0] * img.shape[1]
    print('abs_area_cutoff = ',abs_area_cutoff)

    for i, (c, h) in enumerate(zip(contours, hierarchy.reshape((-1, 4)))):

        next_idx, prev_idx, child_idx, parent_idx = h

        if parent_idx >= 0:
            continue

        m = ellipse.moments_from_dict(cv2.moments(c))

        if m[0] <= abs_area_cutoff:
            continue

        children = []

        while child_idx >= 0:
            child_contour = contours[child_idx]
            cm = cv2.moments(child_contour)
            if cm['m00'] > abs_area_cutoff:
                children.append(child_contour)
                allchildren.append(child_contour)
            child_idx = hierarchy[child_idx][0]

        if children:
            work = np.zeros(img.shape[:2], dtype=np.uint8)
            cv2.drawContours(work, contours, i, (1,1,1), -1)
            cv2.drawContours(work, children, -1, (0,0,0), -1)
            m = ellipse.moments_from_dict(cv2.moments(work, True))

        centroids.append(m[1:3]/m[0])
        centroid_accum += m[1:3]
        total_area += m[0]
        pts = np.vstack((pts, c.astype('float32')))
        conic = ellipse.conic_from_moments(m)
        okcontours.append(c)
        conics.append(conic)
        areas.append(m[0])

    display = img.copy()
    cv2.drawContours(display, okcontours+allchildren,
                     -1, (0, 255, 0),
                     6, cv2.LINE_AA)

    debug_show('contours_only', display)

    for c, a in zip(okcontours, areas):

        x, y, w, h = cv2.boundingRect(c)


        s = str('{:,d}'.format(int(a)))
        #ctr = (x + w/2 - 15*len(s), y+h/2+10)
        ctr = (x, y+h+20)

        cv2.putText(display, s, ctr,
                    cv2.FONT_HERSHEY_SIMPLEX, 2.0,
                    (0, 0, 0), 12, cv2.LINE_AA)

        cv2.putText(display, s, ctr,
                    cv2.FONT_HERSHEY_SIMPLEX, 2.0,
                    (0, 255, 0), 6, cv2.LINE_AA)

    debug_show('contours', display)

    areas = np.array(areas)
    amean = areas.mean()

    print('got {} contours with {} small.'.format(
        len(areas), (areas < mean_area_cutoff*amean).sum()))

    idx = np.where(areas > mean_area_cutoff*amean)[0]

    conics = np.array(conics)
    conics = conics[idx]
    centroid_accum /= total_area

    display = img.copy()
    for conic in conics:
        x0, y0, a, b, theta = ellipse.gparams_from_conic(conic)
        cv2.ellipse(display, (int(x0), int(y0)), (int(a), int(b)),
                    theta*180/np.pi, 0, 360, (0,0,255), 6, cv2.LINE_AA)

    debug_show('conics', display)

    contours = [okcontours[i].astype('float32') for i in idx]

    if 0:

        centroids = np.array([centroids[i] for i in idx])
        areas = areas[idx]

        def polyfit(x, y):
            coeffs = np.polyfit(x, y, deg=1)
            ypred = np.polyval(coeffs, x)
            ymean = np.mean(y)
            sstot = np.sum((y - ymean)**2)
            ssres = np.sum((y.flatten() - ypred.flatten())**2)
            r2 = 1 - ssres/sstot
            return coeffs, r2

        xfit, xr2 = polyfit(centroids[:,0], areas)
        yfit, yr2 = polyfit(centroids[:,1], areas)

        xlabel = 'X coordinate (r²={:.2f})'.format(xr2)
        ylabel = 'Y coordinate (r²={:.2f})'.format(yr2)

        plt.plot(centroids[:,0], areas, 'b.', zorder=1)
        plt.plot(centroids[:,1], areas, 'r.', zorder=1)
        plt.gca().autoscale(False)
        plt.plot([0, 3000], np.polyval(xfit, [0,3000]), 'b--',
                 zorder=0, label=xlabel)
        plt.plot([0, 3000], np.polyval(yfit, [0,3000]), 'r--',
                 zorder=0, label=ylabel)
        plt.legend(loc='upper right')
        plt.xlabel('X/Y coordinate (px)')
        plt.ylabel('Contour area (px²)')
        plt.savefig('position-vs-area.pdf')


    return conics, contours, centroid_accum

def optimize_conics(conics, p0):

    x0 = np.array([0.0, 0.0])

    hfunc = lambda x: centered_warp(p0[0], p0[1], x[0], x[1])

    opt_results = []

    f = lambda x: conic_area_discrepancy(conics, x, hfunc(x), opt_results)

    res = scipy.optimize.minimize(f, x0, method='Powell')

    H = hfunc(res.x)

    rects = []

    if 0:

        phi = np.linspace(0, 2*np.pi, 16, endpoint=False)
        width, height = 0, 0
        for x, H, fval in opt_results:
            allxy = []
            for conic in conics:
                Hconic = ellipse.conic_transform(conic, H)
                gparams = ellipse.gparams_from_conic(Hconic)
                x, y = ellipse.gparams_evaluate(gparams, phi)
                xy = np.dstack((x.reshape((-1, 1, 1)), y.reshape((-1, 1, 1))))
                allxy.append(xy)
            allxy = np.vstack(tuple(allxy)).astype(np.float32)
            rect = cv2.boundingRect(allxy)
            rects.append(rect)
            x, y, w, h = rect
            width = max(width, w)
            height = max(height, h)
        border = int(0.05 * min(width, height))
        width += border
        height += border
        aspect = float(width)/height
        if aspect < 2.0:
            width = 2*height
        else:
            height = width/2

        for i, (rect, (x, H, fval)) in enumerate(zip(rects, opt_results)):
            display = np.zeros((height, width), dtype=np.uint8)
            x, y, w, h = rect
            xoffs = width/2 - (x+w/2)
            yoffs = height/2 - (y+h/2)
            for conic in conics:
                Hconic = ellipse.conic_transform(conic, H)
                x0, y0, a, b, theta = ellipse.gparams_from_conic(Hconic)
                cv2.ellipse(display, (int(x0+xoffs), int(y0+yoffs)), (int(a), int(b)),
                            theta*180/np.pi, 0, 360, (255,255,255), 6, cv2.LINE_AA)
            cv2.putText(display, 'Area discrepancy: {:.3f}'.format(fval),
                        (16, height-24), cv2.FONT_HERSHEY_SIMPLEX, 2.0,
                        (255,255,255), 6, cv2.LINE_AA)
            cv2.imwrite('frame{:04d}.png'.format(i), display)

    return H

def orientation_detect(img, contours, H, rho=8.0, ntheta=512):

    # ignore this, just deal with edge-detected text

    pts = np.vstack(tuple(contours))

    shape, TH = warp_containing_points(img, pts, H, shape_only=True)

    text_edges = np.zeros(shape, dtype=np.uint8)

    for contour in contours:
        contour = cv2.perspectiveTransform(contour.astype(np.float32), TH)
        cv2.drawContours(text_edges, [contour.astype(int)], 0, (255,255,255))

    debug_show('edges', text_edges)

    # generate a linspace of thetas
    thetas = np.linspace(-0.5*np.pi, 0.5*np.pi, ntheta, endpoint=False)

    # rho is pixels per r bin in polar (theta, r) histogram
    # irho is bins per pixel
    irho = 1.0/rho

    # get height and width
    h, w = text_edges.shape

    # maximum bin index is given by hypotenuse of (w, h) divided by pixels per bin
    bin_max = int(np.ceil(np.hypot(w, h)*irho))

    # initialize zeroed histogram height bin_max and width num theta
    hist = np.zeros((bin_max, ntheta))

    # let u and v be x and y coordinates (respectively) of non-zero
    # pixels in edge map
    v, u = np.mgrid[0:h, 0:w]
    v = v[text_edges.view(bool)]
    u = u[text_edges.view(bool)]

    # get center coordinates
    u0 = w*0.5
    v0 = h*0.5

    # for each i and theta = thetas[i]
    for i, theta in enumerate(thetas):

        # for each nonzero edge pixel, compute bin in r direction from
        # pixel location and cos/sin of theta
        bin_idx =  ( (-(u-u0)*np.sin(theta) # x term
                      + (v-v0)*np.cos(theta))*irho # y term, both
                                                   # divided by pixels
                                                   # per bin
                     + 0.5*bin_max ) # offset for center pixel

        assert( bin_idx.min() >= 0 and bin_idx.max() < bin_max )

        # 0.5 is for correct rounding here
        #
        # e.g. np.bincount([1, 1, 0, 3]) = [1, 2, 0, 1]
        # returns count of each integer in the array

        bc = np.bincount((bin_idx + 0.5).astype(int))

        # push this into the histogram
        hist[:len(bc),i] = bc

    # number of zero pixels in each column
    num_zero = (hist == 0).sum(axis=0)

    # find the maximum number of zero pixels
    best_theta_idx = num_zero.argmax()

    # actual detected theta - could just return this now
    theta = thetas[best_theta_idx]

    # compose with previous homography
    RH = np.dot(rotation(-theta), H)

    if 1: # just debug visualization

        debug_hist = (255*hist/hist.max()).astype('uint8')
        debug_hist = cv2.cvtColor(debug_hist, cv2.COLOR_GRAY2RGB)

        cv2.line(debug_hist,
                 (best_theta_idx, 0),
                 (best_theta_idx, bin_max), (255,0,0),
                 1, cv2.LINE_AA)

        debug_show('histogram', debug_hist)

        p0 = np.array((u0, v0))
        t = np.array((np.cos(theta), np.sin(theta)))

        warped = cv2.warpPerspective(img, TH, (shape[1], shape[0]),
                                     borderMode=cv2.BORDER_REPLICATE)


        debug_show('prerotate_noline', warped)

        cv2.line(warped,
                 tuple(map(int, p0 - rho*bin_max*t)),
                 tuple(map(int, p0 + rho*bin_max*t)),
                 (255, 0, 0),
                 6, cv2.LINE_AA)

        debug_show('prerotate', warped)

        warped, _ = warp_containing_points(img, pts, RH)
        debug_show('preskew', warped)

    return RH


def skew_detect(img, contours, RH):

    hulls = [cv2.convexHull(c) for c in contours]
    pts = np.vstack(tuple(hulls))


    display, TRH = warp_containing_points(img, pts, RH)

    for h in hulls:
        h = cv2.perspectiveTransform(h, TRH).astype(int)
        cv2.drawContours(display, [h], 0, (255, 0, 255), 6, cv2.LINE_AA)

    debug_show('convex_hulls_before', display)

    f = lambda x: skewed_widths(contours, np.dot(slant(x), RH))

    res = scipy.optimize.minimize_scalar(f, (-2.0, 0.0, 2.0))

    SRH = np.dot(slant(res.x), RH)
    warped, Hfinal = warp_containing_points(img, pts, SRH)

    display = warped.copy()

    for h in hulls:
        h = cv2.perspectiveTransform(h, Hfinal).astype(int)
        cv2.drawContours(display, [h], 0, (255, 0, 255), 6, cv2.LINE_AA)

    debug_show('convex_hulls_after', display)

    debug_show('final', warped)

    return SRH

def main():

    img = cv2.imread(sys.argv[1])
    debug_show('input', img)

    contours, hierarchy = get_contours(img)

    conics, contours, centroid = get_conics(img, contours, hierarchy)
    H = optimize_conics(conics, centroid)
    RH = orientation_detect(img, contours, H)
    SRH = skew_detect(img, contours, RH)

    for img in DEBUG_IMAGES:
        cv2.imshow('Debug', img)
        while cv2.waitKey(5) < 0:
            pass

if __name__ == '__main__':
    main()