Repo for the search and displace core module including the interface to select files and search and displace operations to run on them. https://searchanddisplace.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

219 lines
7.2 KiB

  1. <?php
  2. namespace App\SearchDisplace;
  3. use App\SearchDisplace\Documents\DocumentFile;
  4. use Illuminate\Support\Facades\Storage;
  5. use App\SearchDisplace\Convertor\Convertor;
  6. use DOMDocument;
  7. use DOMNode;
  8. use DOMText;
  9. class SearchAndDisplaceXML
  10. {
  11. protected $file;
  12. protected $searchers;
  13. protected $storage;
  14. protected $searchOnly;
  15. protected $markedStyleCreated;
  16. public function __construct($file, $searchers, $searchOnly)
  17. {
  18. $this->fileDirectory = $file;
  19. $this->searchers = $searchers;
  20. $this->storage = Storage::disk('local');
  21. $this->searchOnly = $searchOnly;
  22. $this->markedStyleCreated = false;
  23. }
  24. public function execute()
  25. {
  26. $sdXML = $this->applySD();
  27. $pathinfo = pathinfo($sdXML);
  28. $this->convertSearchDisplacedXMLToHTML($sdXML);
  29. DocumentFile::updateImagesPath($pathinfo['dirname'] . '/document_sdapplied.html', $this->fileDirectory);
  30. return $pathinfo['filename'];
  31. }
  32. /**
  33. * Convert (Search displaced) XML to HTML for browser preview
  34. *
  35. * @return void
  36. */
  37. protected function convertSearchDisplacedXMLToHTML($file)
  38. {
  39. Convertor::convert('html', $file);
  40. }
  41. /**
  42. * Read XML document and send text contents to SD
  43. *
  44. * @return void
  45. */
  46. protected function applySD()
  47. {
  48. $dom = new \DOMDocument();
  49. $filePath = $this->storage->path("contracts/$this->fileDirectory");
  50. $dom->load($filePath . "/document.xml");
  51. // foreach($dom->getElementsByTagName('p') as $p) {
  52. foreach($dom->getElementsByTagName('body') as $body) {
  53. $this->processElement($body, $dom);
  54. }
  55. $dom->save($filePath . "/document_sdapplied.xml");
  56. return $filePath . "/document_sdapplied.xml";
  57. }
  58. public function processElement(DOMNode &$element, DOMDocument &$dom)
  59. {
  60. # When processing (replacing) a DOMText node, we replace the initial node with 3 nodes.
  61. # In order to avoid an infinite loop when processing the nodes inside a foreach statement,
  62. # We will keep a list of all new nodes created(which replaced the old nodes),
  63. # and skip those when processing the child nodes of the element
  64. $replacements = [];
  65. # If the element is NOT a DOMText, and has at least one child node,
  66. # iterate the child nodes and process them
  67. if(
  68. !$element instanceof DOMText &&
  69. count($element->childNodes) > 0
  70. ) {
  71. foreach($element->childNodes as $child) {
  72. # If the child is in the list of newly created nodes (which replaced an earlier child node), skip it
  73. if (in_array($child, $replacements)) {
  74. continue;
  75. }
  76. # If the child is NOT a DOMText node, recursively call this method to process it.
  77. if (!$child instanceof DOMText) {
  78. $this->processElement($child, $dom);
  79. } else {
  80. # Otherwise, replace the child (if it contains the proper text)
  81. $replacements = array_merge($replacements, $this->replace($child, $dom));
  82. }
  83. }
  84. }
  85. // else if ($element instanceof DOMText) {
  86. // $this->replace($element, $dom);
  87. // }
  88. }
  89. /**
  90. * Apply SD on document's paragraph
  91. *
  92. * @param DOMText $element DOM element
  93. * @param DOMDocument $dom The document
  94. *
  95. * @return array
  96. */
  97. protected function replace(DOMText &$element, DOMDocument &$dom)
  98. {
  99. /** @var string $content */
  100. $content = $element->textContent ?? $element->nodeValue;
  101. $search = new SearchAndDisplace(
  102. stripslashes($content),
  103. [
  104. 'searchers' => $this->searchers,
  105. ],
  106. $this->searchOnly,
  107. true
  108. );
  109. $changed = $search->execute();
  110. $replacementNodes = [];
  111. if($changed) {
  112. if($this->searchOnly) {
  113. $content = $element->textContent;
  114. $indexes = $changed;
  115. } else {
  116. $element->textContent = $content = $changed['content'];
  117. $indexes = $changed['indexes'];
  118. }
  119. # The changed indexes are filed into arrays based on which searcher they belong.
  120. # This doesn't concern us here. Merge all the changed indexes into a single array, so we can sort them
  121. $indexes = array_merge( ...array_values($indexes)); # Unpack and merge the arrays into a single array
  122. # Sort all the indexes in descending order
  123. usort($indexes, function($first, $second) {
  124. return $second['start'] - $first['start'];
  125. });
  126. foreach($indexes as $index) {
  127. $content = $element->textContent;
  128. # Split the element at the specified indexes
  129. # (end needs to b e + 1, since end is where the changed text finishes),
  130. # so we need to split the text one character after it ends
  131. $lastNode = $element->splitText($index['end'] + 1);
  132. $changedNode = $element->splitText($index['start']);
  133. # DOMText::splitText creates DOMText nodes.
  134. # The changed node needs to be a DOMElement so we can add the mark style
  135. # Create a element with the same content, then replace the node.
  136. $changedNodeElement = $dom->createElement("text:span", $changedNode->textContent);
  137. $changedNodeElement->setAttribute('text:style-name', 'mark');
  138. $changedNode->parentNode->replaceChild($changedNodeElement, $changedNode);
  139. $changedNode = $changedNodeElement;
  140. $replacementNodes[] = $changedNode;
  141. $replacementNodes[] = $lastNode;
  142. }
  143. if(!$this->markedStyleCreated) {
  144. $this->createMarkedStyle($dom);
  145. $this->markedStyleCreated = true;
  146. }
  147. }
  148. return $replacementNodes;
  149. }
  150. /**
  151. * Create marked style for browser preview
  152. *
  153. */
  154. private function createMarkedStyle($dom)
  155. {
  156. $style = $dom->createElement("style:style");
  157. $style->setAttribute("style:name", 'mark');
  158. $style->setAttribute("style:family", 'text');
  159. $child = $dom->createElement('style:text-properties');
  160. $child->setAttribute("officeooo:rsid", '0014890a');
  161. $child->setAttribute("fo:background-color", '#ffff00');
  162. $style->appendChild($child);
  163. $dom->getElementsByTagName('automatic-styles')->item(0)->appendChild($style);
  164. }
  165. /**
  166. * Remove marked style used in browser and convert XML file to original file type
  167. *
  168. * @param $type file type
  169. * @param $file absolute file path
  170. *
  171. * @return string $path
  172. */
  173. public static function prepareForDownload($type, $file)
  174. {
  175. // remove marked style from XML
  176. $dom = new DOMDocument();
  177. $dom->load($file);
  178. $style = $dom->getElementsByTagName('automatic-styles')->item(0);
  179. $style->removeChild($style->lastChild);
  180. $dom->save($file);
  181. return Convertor::convert($type, $file, true);
  182. }
  183. }