Repo for the search and displace core module including the interface to select files and search and displace operations to run on them.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

240 lines
7.9 KiB

namespace App\SearchDisplace;
use App\SearchDisplace\Documents\DocumentFile;
use Illuminate\Support\Facades\Storage;
use App\SearchDisplace\Convertor\Convertor;
use DOMDocument;
use DOMNode;
use DOMText;
class SearchAndDisplaceXML
protected $file;
protected $searchers;
protected $storage;
protected $searchOnly;
protected $markedStyleCreated;
public function __construct($file, $searchers, $searchOnly)
$this->fileDirectory = $file;
$this->searchers = $searchers;
$this->storage = Storage::disk('local');
$this->searchOnly = $searchOnly;
$this->markedStyleCreated = false;
public function execute()
$sdXML = $this->applySD();
$pathinfo = pathinfo($sdXML);
DocumentFile::updateImagesPath($pathinfo['dirname'] . '/document_sdapplied.html', $this->fileDirectory);
return $pathinfo['filename'];
* Convert (Search displaced) XML to HTML for browser preview
* @return void
protected function convertSearchDisplacedXMLToHTML($file)
Convertor::convert('html', $file);
* Read XML document and send text contents to SD
* @return void
protected function applySD()
$dom = new \DOMDocument();
$filePath = $this->storage->path("contracts/$this->fileDirectory");
$dom->load($filePath . "/document.xml");
// foreach($dom->getElementsByTagName('p') as $p) {
foreach($dom->getElementsByTagName('body') as $p) {
// if(
// !$p instanceof DOMText &&
// count($p->childNodes) > 0 &&
// isset($p->parentNode->tagName) &&
// $p->parentNode->tagName !== "table:table-cell"
// ) {
// $replacements = [];
// foreach($p->childNodes as $child) {
// if (in_array($child, $replacements)) {
// continue;
// }
// if (!$child instanceof DOMText) {
// continue;
// }
// $replacements = array_merge($replacements, $this->replace($child, $dom));
// }
// }
$this->processElement($p, $dom);
$dom->save($filePath . "/document_sdapplied.xml");
return $filePath . "/document_sdapplied.xml";
public function processElement(DOMNode &$element, DOMDocument &$dom)
# When processing (replacing) a DOMText node, we replace the initial node with 3 nodes.
# In order to avoid an infinite loop when processing the nodes inside a foreach statement,
# We will keep a list of all new nodes created(which replaced the old nodes),
# and skip those when processing the child nodes of the element
$replacements = [];
# If the element is NOT a DOMText, and has at least one child node,
# iterate the child nodes and process them
!$element instanceof DOMText &&
count($element->childNodes) > 0
) {
foreach($element->childNodes as $child) {
# If the child is in the list of newly created nodes (which replaced an earlier child node), skip it
if (in_array($child, $replacements)) {
# If the child is NOT a DOMText node, recursively call this method to process it.
if (!$child instanceof DOMText) {
$this->processElement($child, $dom);
} else {
# Otherwise, replace the child (if it contains the proper text)
$replacements = array_merge($replacements, $this->replace($child, $dom));
// else if ($element instanceof DOMText) {
// $this->replace($element, $dom);
// }
* Apply SD on document's paragraph
* @param DOMNode $element DOM element
* @param DOMDocument $dom The document
* @return array
protected function replace(DOMText &$element, DOMDocument &$dom)
/** @var string $content */
$content = $element->textContent ?? $element->nodeValue;
$search = new SearchAndDisplace(
'searchers' => $this->searchers,
$changed = $search->execute();
$replacementNodes = [];
if($changed) {
if($this->searchOnly) {
$content = $element->textContent;
$indexes = $changed;
} else {
$content = $changed['content'];
$indexes = $changed['indexes'];
foreach($indexes as $searcher => $changes) {
if(empty($changes)) {
foreach($changes as $change) {
$firstContent = substr($content, 0, $change['start']);
$changedContent = substr($content, $change['start'], $change['end'] - $change['start'] + 1);
$lastContent = substr($content, $change['end'] + 1);
// $firstNode = $dom->createElement("text:span", $firstContent);
$element->textContent = htmlspecialchars($firstContent);
$changedNode = $dom->createElement("text:span", htmlspecialchars($changedContent));
$changedNode->setAttribute('text:style-name', 'mark');
$lastNode = $dom->createElement("text:span", htmlspecialchars($lastContent));
// Add the changed and last nodes after the current (element) node
// $element->parentNode->insertBefore($firstNode, $element->nextSibling);
# element->parentNode->insertBefore(... $element->nextSibling) inserts a new node before the node AFTER this one
# So we need to add the `last` node first, and then the `changed` node BEFORE the last.
$element->parentNode->insertBefore($lastNode, $element->nextSibling);
$element->parentNode->insertBefore($changedNode, $element->nextSibling);
$replacementNodes[] = $changedNode;
$replacementNodes[] = $lastNode;
if(!$this->markedStyleCreated) {
$this->markedStyleCreated = true;
return $replacementNodes;
* Create marked style for browser preview
private function createMarkedStyle($dom)
$style = $dom->createElement("style:style");
$style->setAttribute("style:name", 'mark');
$style->setAttribute("style:family", 'text');
$child = $dom->createElement('style:text-properties');
$child->setAttribute("officeooo:rsid", '0014890a');
$child->setAttribute("fo:background-color", '#ffff00');
* Remove marked style used in browser and convert XML file to original file type
* @param $type file type
* @param $file absolute file path
* @return string $path
public static function prepareForDownload($type, $file)
// remove marked style from XML
$dom = new DOMDocument();
$style = $dom->getElementsByTagName('automatic-styles')->item(0);
return Convertor::convert($type, $file, true);