Repo for the search and displace ingest module that takes odf, docx and pdf and transforms it into .md to be used with search and displace operations
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

147 lines
3.6 KiB

  1. <?php
  2. namespace App\Parser\DocxParser;
  3. use App\Parser\DocxParser\Traits\Helper;
  4. class Text
  5. {
  6. use Helper;
  7. public function handle($textElement)
  8. {
  9. $data = $this->getElementData($textElement);
  10. $data[ 'type' ] = 'text';
  11. return $data;
  12. }
  13. /**
  14. * @param $textElement
  15. *
  16. * @return array
  17. */
  18. private function getElementData($textElement)
  19. {
  20. $text = $textElement->getText();
  21. //if (strpos($text, 'PPOINTMENT AND GRANT OF LICENSE') !== false) {
  22. // dd($textElement->getParent()->getDepth());
  23. //}
  24. $textData = $this->getNumberingFromText($text);
  25. if (strlen($textData[ 'content' ])) {
  26. $textData[ 'content' ] = $this->styleTheText($textData[ 'content' ], $textElement);
  27. }
  28. return $textData;
  29. }
  30. /**
  31. * @param $text
  32. *
  33. * @return array
  34. */
  35. private function getNumberingFromText($text)
  36. {
  37. $data = [];
  38. preg_match('/^([0-9.])([^(A-Z)(a-z) ]*)/', trim($text), $match);
  39. if ($match && isset($match[ 0 ]) && $match[ 0 ] !== '.') {
  40. $data[ 'content' ] = trim(str_replace($match[ 0 ], '', $text));
  41. $data[ 'numbering' ] = $match[ 0 ];
  42. } else {
  43. $data[ 'content' ] = trim(preg_replace('/\t+/', '', $text));
  44. }
  45. return $data;
  46. }
  47. private function styleTheText($textString, $textObject)
  48. {
  49. $textStyle = [
  50. 'font' => $textObject->getFontStyle(),
  51. 'paragraph' => $textObject->getParagraphStyle()
  52. ];
  53. $fontStyle = $textStyle[ 'font' ]->getStyleValues();
  54. $inlineStyle = $this->getInlineStyles(array_merge($fontStyle[ 'style' ], $fontStyle[ 'basic' ]));
  55. return '<span'.(($inlineStyle) ? ' style="'.$inlineStyle.'"' : '').'>'.$this->getStyledText($textString,
  56. $fontStyle[ 'style' ]).'</span>';
  57. }
  58. /**
  59. * @param $styles
  60. *
  61. * @return string
  62. */
  63. private function getInlineStyles($styles)
  64. {
  65. $styleString = '';
  66. $acceptedInline = [
  67. "dStrike" => 'text-decoration: line-through;text-decoration-style: double;',
  68. "smallCaps" => 'text-transform: lowercase;',
  69. "allCaps" => 'text-transform: capitalize;',
  70. "fgColor" => 'background-color:'.$styles[ 'fgColor' ].';',
  71. "hidden" => 'display:none;',
  72. "size" => 'font-size:'.$styles[ 'size' ].'pt;',
  73. "color" => 'color:#'.$styles[ 'color' ].';'
  74. ];
  75. foreach ($styles as $style => $value) {
  76. if (array_key_exists($style, $acceptedInline) && $value && ! in_array($value, ['none', 'auto'])) {
  77. $styleString .= $acceptedInline[ $style ];
  78. }
  79. }
  80. return $styleString;
  81. }
  82. /**
  83. * @param $text
  84. * @param $styles
  85. *
  86. * @return string
  87. */
  88. private function getStyledText($text, $styles)
  89. {
  90. $mappedStyle = [
  91. 'bold' => 'strong',
  92. 'italic' => 'i',
  93. 'underline' => 'u',
  94. 'strike' => 'strike',
  95. "super" => 'sup',
  96. "sub" => 'sub',
  97. ];
  98. foreach ($styles as $style => $active) {
  99. if (array_key_exists($style, $mappedStyle) && $active && $active !== 'none') {
  100. $text = $this->appendHtmlStyle($text, $mappedStyle[ $style ]);
  101. }
  102. }
  103. return $text;
  104. }
  105. /**
  106. * @param $text
  107. * @param $styleType
  108. *
  109. * @return string
  110. */
  111. private function appendHtmlStyle($text, $styleType)
  112. {
  113. return "<$styleType>$text</$styleType>";
  114. }
  115. }