Repo for the search and displace ingest module that takes odf, docx and pdf and transforms it into .md to be used with search and displace operations
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

630 lines
16 KiB

3 years ago
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. '''Functions for representing ellipses using various
  4. parameterizations, and converting between them. There are three
  5. parameterizations implemented by this module:
  6. Geometric parameters:
  7. ---------------------
  8. The geometric parameters are
  9. (x, y, a, b, θ)
  10. The most simple parameterization of an ellipse is by its center point
  11. (x0, y0), its semimajor and semiminor axes a and b, and its rotation
  12. angle θ.
  13. Conic:
  14. ------
  15. This parameterization consists of six parameters A-F which establish
  16. the implicit equation for a general conic:
  17. AX² + BXY + CY² + DX + EY + F = 0
  18. Note that this equation may not represent only ellipses (it also
  19. includes hyperbolas and parabolas).
  20. Since multiplying the entire equation by any non-zero constant results
  21. in the same ellipse, the six parameters are only described up to
  22. scale, yielding five degrees of freedom. We can determine a canonical
  23. scale factor k to scale this equation by such that
  24. A = a²(sin θ)² + b²(cos θ)²
  25. B = 2(b² - a²) sin θ cos θ
  26. C = a²(cos θ)² + b²(sin θ)²
  27. D = -2Ax - By
  28. E = -Bx - 2Cy
  29. F = Ax² + Bxy + Cy² - a²b²
  30. ...in terms of the geometric parameters (x, y, a, b, θ).
  31. Shape moments:
  32. --------------
  33. The shape moment parameters are
  34. (m, m, m, mu, mu, mu)
  35. An ellipse may be completely specified by its shape moments up to
  36. order 2. These include the area m, area-weighted center (m, m),
  37. and the three second order central moments (mu, mu, mu).
  38. '''
  39. # pylint: disable=C0103
  40. # pylint: disable=R0914
  41. # pylint: disable=E1101
  42. from __future__ import print_function
  43. import numpy
  44. def _params_str(names, params):
  45. '''Helper function for printing out the various parameters.'''
  46. return '({})'.format(', '.join('{}: {:g}'.format(n, p)
  47. for (n, p) in zip(names, params)))
  48. ######################################################################
  49. GPARAMS_NAMES = ('x0', 'y0', 'a', 'b', 'theta')
  50. GPARAMS_DISPLAY_NAMES = ('x₀', 'y₀', 'a', 'b', 'θ')
  51. def gparams_str(gparams):
  52. '''Convert geometric parameters to nice printable string.'''
  53. return _params_str(GPARAMS_DISPLAY_NAMES, gparams)
  54. def gparams_evaluate(gparams, phi):
  55. '''Evaluate the parametric formula for an ellipse at each angle
  56. specified in the phi array. Returns two arrays x and y of the same
  57. size as phi.
  58. '''
  59. x0, y0, a, b, theta = tuple(gparams)
  60. c = numpy.cos(theta)
  61. s = numpy.sin(theta)
  62. cp = numpy.cos(phi)
  63. sp = numpy.sin(phi)
  64. x = a*cp*c - b*sp*s + x0
  65. y = a*cp*s + b*sp*c + y0
  66. return x, y
  67. def gparams_from_conic(conic):
  68. '''Convert the given conic parameters to geometric ellipse parameters.'''
  69. k, ab = conic_scale(conic)
  70. if numpy.isinf(ab):
  71. return None
  72. A, B, C, D, E, F = tuple(conic)
  73. T = B**2 - 4*A*C
  74. x0 = (2*C*D - B*E)/T
  75. y0 = (2*A*E - B*D)/T
  76. S = A*E**2 + C*D**2 - B*D*E + (B**2 - 4*A*C)*F
  77. U = numpy.sqrt((A - C)**2 + B**2)
  78. a = -numpy.sqrt(2*S*(A+C+U))/T
  79. b = -numpy.sqrt(2*S*(A+C-U))/T
  80. theta = numpy.arctan2(C-A-U, B)
  81. return numpy.array((x0, y0, a, b, theta))
  82. def _gparams_sincos_from_moments(m):
  83. '''Convert from moments to canonical parameters, except postpone the
  84. final arctan until later. Formulas determined largely by trial and
  85. error.
  86. '''
  87. m00, m10, m01, mu20, mu11, mu02 = tuple(m)
  88. x0 = m10 / m00
  89. y0 = m01 / m00
  90. A = 4*mu02/m00
  91. B = -8*mu11/m00
  92. C = 4*mu20/m00
  93. U = numpy.sqrt((A - C)**2 + B**2)
  94. T = B**2 - 4*A*C
  95. S = 1.0
  96. a = -numpy.sqrt(2*S*(A+C+U))/T
  97. b = -numpy.sqrt(2*S*(A+C-U))/T
  98. # we want a * b * pi = m00
  99. #
  100. # so if we are off by some factor, we should scale a and b by this factor
  101. #
  102. # we need to fix things up somehow because moments have 6 DOF and
  103. # ellipse has only 5.
  104. area = numpy.pi * a * b
  105. scl = numpy.sqrt(m00 / area)
  106. a *= scl
  107. b *= scl
  108. sincos = numpy.array([C-A-U, B])
  109. sincos /= numpy.linalg.norm(sincos)
  110. s, c = sincos
  111. return numpy.array((x0, y0, a, b, s, c))
  112. def gparams_from_moments(m):
  113. '''Convert the given moment parameters to geometric ellipse parameters.
  114. Formula derived through trial and error.'''
  115. x0, y0, a, b, s, c = _gparams_sincos_from_moments(m)
  116. theta = numpy.arctan2(s, c)
  117. return numpy.array((x0, y0, a, b, theta))
  118. ######################################################################
  119. CONIC_NAMES = ('A', 'B', 'C', 'D', 'E', 'F')
  120. CONIC_DISPLAY_NAMES = ('A', 'B', 'C', 'D', 'E', 'F')
  121. def conic_str(conic):
  122. '''Convert conic parameters to nice printable string.'''
  123. return _params_str(CONIC_DISPLAY_NAMES, conic)
  124. def conic_scale(conic):
  125. '''Returns a pair (k, ab) for the given conic parameters, where k is
  126. the scale factor to divide all parameters by in order to normalize
  127. them, and ab is the product of the semimajor and semiminor axis
  128. (i.e. the ellipse's area, divided by pi). If the conic does not
  129. describe an ellipse, then this returns infinity, infinity.
  130. '''
  131. A, B, C, D, E, F = tuple(conic)
  132. T = 4*A*C - B*B
  133. if T < 0.0:
  134. return numpy.inf, numpy.inf
  135. S = A*E**2 + B**2*F + C*D**2 - B*D*E - 4*A*C*F
  136. if not S:
  137. return numpy.inf, numpy.inf
  138. k = 0.25*T**2/S
  139. ab = 2.0*S/(T*numpy.sqrt(T))
  140. return k, ab
  141. def conic_from_points(x, y):
  142. '''Fits conic pararameters using homogeneous least squares. The
  143. resulting fit is unlikely to be numerically robust when the x/y
  144. coordinates given are far from the [-1,1] interval.'''
  145. x = x.reshape((-1, 1))
  146. y = y.reshape((-1, 1))
  147. M = numpy.hstack((x**2, x*y, y**2, x, y, numpy.ones_like(x)))
  148. _, _, v = numpy.linalg.svd(M)
  149. return v[5, :].copy()
  150. def conic_transform(conic, H):
  151. '''Returns the parameters of a conic after being transformed through a
  152. 3x3 homography H. This is straightforward since a conic can be
  153. represented as a symmetric matrix (see
  154. https://en.wikipedia.org/wiki/Matrix_representation_of_conic_sections).
  155. '''
  156. A, B, C, D, E, F = tuple(conic)
  157. M = numpy.array([[A, 0.5*B, 0.5*D],
  158. [0.5*B, C, 0.5*E],
  159. [0.5*D, 0.5*E, F]])
  160. Hinv = numpy.linalg.inv(H)
  161. M = numpy.dot(Hinv.T, numpy.dot(M, Hinv))
  162. A = M[0, 0]
  163. B = M[0, 1]*2
  164. C = M[1, 1]
  165. D = M[0, 2]*2
  166. E = M[1, 2]*2
  167. F = M[2, 2]
  168. return numpy.array((A, B, C, D, E, F))
  169. def _conic_from_gparams_sincos(gparams_sincos):
  170. x0, y0, a, b, s, c = gparams_sincos
  171. A = a**2 * s**2 + b**2 * c**2
  172. B = 2*(b**2 - a**2) * s * c
  173. C = a**2 * c**2 + b**2 * s**2
  174. D = -2*A*x0 - B*y0
  175. E = -B*x0 - 2*C*y0
  176. F = A*x0**2 + B*x0*y0 + C*y0**2 - a**2*b**2
  177. return numpy.array((A, B, C, D, E, F))
  178. def conic_from_gparams(gparams):
  179. '''Convert geometric parameters to conic parameters. Formulas from
  180. https://en.wikipedia.org/wiki/Ellipse#General_ellipse.
  181. '''
  182. x0, y0, a, b, theta = tuple(gparams)
  183. c = numpy.cos(theta)
  184. s = numpy.sin(theta)
  185. return _conic_from_gparams_sincos((x0, y0, a, b, s, c))
  186. def conic_from_moments(moments):
  187. g = _gparams_sincos_from_moments(moments)
  188. return _conic_from_gparams_sincos(g)
  189. ######################################################################
  190. MOMENTS_NAMES = ('m00', 'm10', 'm01', 'mu20', 'mu11', 'mu02')
  191. MOMENTS_DISPLAY_NAMES = ('m₀₀', 'm₁₀', 'm₀₁', 'mu₂₀', 'mu₁₁', 'mu₀₂')
  192. def moments_from_dict(m):
  193. '''Create shape moments tuple from a dictionary (i.e. returned by cv2.moments).'''
  194. return numpy.array([m[n] for n in MOMENTS_NAMES])
  195. def moments_str(m):
  196. '''Convert shape moments to nice printable string.'''
  197. return _params_str(MOMENTS_DISPLAY_NAMES, m)
  198. def moments_from_gparams(gparams):
  199. '''Create shape moments from geometric parameters.'''
  200. x0, y0, a, b, theta = tuple(gparams)
  201. c = numpy.cos(theta)
  202. s = numpy.sin(theta)
  203. m00 = a*b*numpy.pi
  204. m10 = x0 * m00
  205. m01 = y0 * m00
  206. mu20 = (a**2 * c**2 + b**2 * s**2) * m00 * 0.25
  207. mu11 = -(b**2-a**2) * s * c * m00 * 0.25
  208. mu02 = (a**2 * s**2 + b**2 * c**2) * m00 * 0.25
  209. return numpy.array((m00, m10, m01, mu20, mu11, mu02))
  210. def moments_from_conic(scaled_conic):
  211. '''Create shape moments from conic parameters.'''
  212. k, ab = conic_scale(scaled_conic)
  213. if numpy.isinf(ab):
  214. return None
  215. conic = numpy.array(scaled_conic)/k
  216. A, B, C, D, E, _ = tuple(conic)
  217. x0 = (B*E - 2*C*D)/(4*A*C - B**2)
  218. y0 = (-2*A*E + B*D)/(4*A*C - B**2)
  219. m00 = numpy.pi*ab
  220. m10 = x0*m00
  221. m01 = y0*m00
  222. mu20 = 0.25*C*m00
  223. mu11 = -0.125*B*m00
  224. mu02 = 0.25*A*m00
  225. return numpy.array((m00, m10, m01, mu20, mu11, mu02))
  226. ######################################################################
  227. def _perspective_transform(pts, H):
  228. '''Used for testing only.'''
  229. assert len(pts.shape) == 3
  230. assert pts.shape[1:] == (1, 2)
  231. pts = numpy.hstack((pts.reshape((-1, 2)),
  232. numpy.ones((len(pts), 1), dtype=pts.dtype)))
  233. pts = numpy.dot(pts, H.T)
  234. pts = pts[:, :2] / pts[:, 2].reshape((-1, 1))
  235. return pts.reshape((-1, 1, 2))
  236. def _test_moments():
  237. # so I just realized that moments have actually 6 DOF but all
  238. # ellipse parameterizations have 5, therefore information is lost
  239. # when going back and forth.
  240. m = numpy.array([59495.5, 5.9232e+07, 1.84847e+07, 3.34079e+08, -1.94055e+08, 3.74633e+08])
  241. gp = gparams_from_moments(m)
  242. m2 = moments_from_gparams(gp)
  243. gp2 = gparams_from_moments(m2)
  244. c = conic_from_moments(m)
  245. m3 = moments_from_conic(c)
  246. assert numpy.allclose(gp, gp2)
  247. assert numpy.allclose(m2, m3)
  248. print('here is the first thing:')
  249. print(' {}'.format(moments_str(m)))
  250. print()
  251. print('the rest should all be equal pairs:')
  252. print(' {}'.format(moments_str(m2)))
  253. print(' {}'.format(moments_str(m3)))
  254. print()
  255. print(' {}'.format(gparams_str(gp)))
  256. print(' {}'.format(gparams_str(gp2)))
  257. print()
  258. def _test_ellipse():
  259. print('testing moments badness')
  260. _test_moments()
  261. print('pass')
  262. # test that we can go from conic to geometric and back
  263. x0 = 450
  264. y0 = 320
  265. a = 300
  266. b = 200
  267. theta = -0.25
  268. gparams = numpy.array((x0, y0, a, b, theta))
  269. conic = conic_from_gparams(gparams)
  270. k, ab = conic_scale(conic)
  271. # ensure conic created from geometric params has trivial scale
  272. assert numpy.allclose((k, ab), (1.0, a*b))
  273. # evaluate parametric curve at different angles phi
  274. phi = numpy.linspace(0, 2*numpy.pi, 1001).reshape((-1, 1))
  275. x, y = gparams_evaluate(gparams, phi)
  276. # evaluate implicit conic formula at x,y pairs
  277. M = numpy.hstack((x**2, x*y, y**2, x, y, numpy.ones_like(x)))
  278. implicit_output = numpy.dot(M, conic)
  279. implicit_max = numpy.abs(implicit_output).max()
  280. # ensure implicit evaluates near 0 everywhere
  281. print('max item from implicit: {} (should be close to 0)'.format(implicit_max))
  282. print()
  283. assert implicit_max < 1e-5
  284. # ensure that scaled_conic has the scale we expect
  285. k = 1e-3
  286. scaled_conic = conic*k
  287. k2, ab2 = conic_scale(scaled_conic)
  288. print('these should all be equal:')
  289. print()
  290. print(' k =', k)
  291. print(' k2 =', k2)
  292. assert numpy.allclose((k2, ab2), (k, a*b))
  293. print()
  294. # convert the scaled conic back to geometric parameters
  295. gparams2 = gparams_from_conic(scaled_conic)
  296. print(' gparams =', gparams_str(gparams))
  297. # ensure that converting back from scaled conic to geometric params is correct
  298. print(' gparams2 =', gparams_str(gparams2))
  299. assert numpy.allclose(gparams, gparams2)
  300. # convert original geometric parameters to moments
  301. m = moments_from_gparams(gparams)
  302. # ...and back
  303. gparams3 = gparams_from_moments(m)
  304. # ensure that converting back from moments to geometric params is correct
  305. print(' gparams3 =', gparams_str(gparams3))
  306. print()
  307. assert numpy.allclose(gparams, gparams3)
  308. # convert moments parameterization to conic
  309. conic2 = conic_from_moments(m)
  310. # ensure that converting from moments to conics is correct
  311. print(' conic =', conic_str(conic))
  312. print(' conic2 =', conic_str(conic2))
  313. assert numpy.allclose(conic, conic2)
  314. # create conic from homogeneous least squares fit of points
  315. skip = len(x) / 10
  316. conic3 = conic_from_points(x[::skip], y[::skip])
  317. # ensure that it has non-infinite area
  318. k3, ab3 = conic_scale(conic3)
  319. assert not numpy.isinf(ab3)
  320. # normalize
  321. conic3 /= k3
  322. # ensure that conic from HLS fit is same as other 2
  323. print(' conic3 =', conic_str(conic3))
  324. print()
  325. assert numpy.allclose(conic, conic3)
  326. # convert from conic to moments
  327. m2 = moments_from_conic(scaled_conic)
  328. print(' m =', moments_str(m))
  329. # ensure that conics->moments yields the same result as geometric
  330. # params -> moments.
  331. print(' m2 =', moments_str(m2))
  332. assert numpy.allclose(m, m2)
  333. from moments_from_contour import moments_from_contour
  334. # create moments from contour
  335. pts = numpy.hstack((x, y)).reshape((-1, 1, 2))
  336. m3 = moments_from_contour(pts)
  337. # ensure that moments from contour is reasonably close to moments
  338. # from geometric params.
  339. print(' m3 =', moments_str(m3))
  340. print()
  341. assert numpy.allclose(m3, m, 1e-4, 1e-4)
  342. # create a homography H to map the ellipse through
  343. hx = 0.001
  344. hy = 0.0015
  345. H = numpy.array([
  346. [1, -0.2, 0],
  347. [0, 0.7, 0],
  348. [hx, hy, 1]])
  349. T = numpy.array([
  350. [1, 0, 400],
  351. [0, 1, 300],
  352. [0, 0, 1]])
  353. H = numpy.dot(T, numpy.dot(H, numpy.linalg.inv(T)))
  354. # transform the original points thru H
  355. Hpts = _perspective_transform(pts, H)
  356. # transform the conic parameters directly thru H
  357. Hconic = conic_transform(conic, H)
  358. # get the HLS fit of the conic corresponding to the transformed points
  359. Hconic2 = conic_from_points(Hpts[::skip, :, 0], Hpts[::skip, :, 1])
  360. # normalize the two conics
  361. Hk, Hab = conic_scale(Hconic)
  362. Hk2, Hab2 = conic_scale(Hconic2)
  363. assert not numpy.isinf(Hab) and not numpy.isinf(Hab2)
  364. Hconic /= Hk
  365. Hconic2 /= Hk2
  366. # ensure that the two conics are equal
  367. print(' Hconic =', conic_str(Hconic))
  368. print(' Hconic2 =', conic_str(Hconic2))
  369. print()
  370. assert numpy.allclose(Hconic, Hconic2)
  371. # get the moments from Hconic
  372. Hm = moments_from_conic(Hconic)
  373. # get the moments from the transformed points
  374. Hm2 = moments_from_contour(Hpts)
  375. # ensure that the two moments are close enough
  376. print(' Hm =', moments_str(Hm))
  377. print(' Hm2 =', moments_str(Hm2))
  378. print()
  379. assert numpy.allclose(Hm, Hm2, 1e-4, 1e-4)
  380. # tests complete, now visualize
  381. print('all tests passed!')
  382. try:
  383. import cv2
  384. print('visualizing results...')
  385. except ImportError:
  386. import sys
  387. print('not visualizing results since module cv2 not found')
  388. sys.exit(0)
  389. shift = 3
  390. pow2 = 2**shift
  391. p0 = numpy.array([x0, y0], dtype=numpy.float32)
  392. p1 = _perspective_transform(p0.reshape((-1, 1, 2)), H).flatten()
  393. Hgparams = gparams_from_conic(Hconic)
  394. Hp0 = Hgparams[:2]
  395. skip = len(pts)/100
  396. display = numpy.zeros((600, 800, 3), numpy.uint8)
  397. def _asint(x, as_tuple=True):
  398. x = x*pow2 + 0.5
  399. x = x.astype(int)
  400. if as_tuple:
  401. return tuple(x)
  402. else:
  403. return x
  404. for (a, b) in zip(pts.reshape((-1, 2))[::skip],
  405. Hpts.reshape((-1, 2))[::skip]):
  406. cv2.line(display, _asint(a), _asint(b),
  407. (255, 0, 255), 1, cv2.LINE_AA, shift)
  408. cv2.polylines(display, [_asint(pts, False)], True,
  409. (0, 255, 0), 1, cv2.LINE_AA, shift)
  410. cv2.polylines(display, [_asint(Hpts, False)], True,
  411. (0, 0, 255), 1, cv2.LINE_AA, shift)
  412. r = 3.0
  413. cv2.circle(display, _asint(p0), int(r*pow2+0.5),
  414. (0, 255, 0), 1, cv2.LINE_AA, shift)
  415. cv2.circle(display, _asint(p1), int(r*pow2+0.5),
  416. (255, 0, 255), 1, cv2.LINE_AA, shift)
  417. cv2.circle(display, _asint(Hp0), int(r*pow2+0.5),
  418. (0, 0, 255), 1, cv2.LINE_AA, shift)
  419. cv2.imshow('win', display)
  420. print('click in the display window & hit any key to quit.')
  421. while cv2.waitKey(5) < 0:
  422. pass
  423. if __name__ == '__main__':
  424. _test_ellipse()