Repo for the search and displace core module including the interface to select files and search and displace operations to run on them. https://searchanddisplace.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

219 lines
7.2 KiB

<?php
namespace App\SearchDisplace;
use App\SearchDisplace\Documents\DocumentFile;
use Illuminate\Support\Facades\Storage;
use App\SearchDisplace\Convertor\Convertor;
use DOMDocument;
use DOMNode;
use DOMText;
class SearchAndDisplaceXML
{
protected $file;
protected $searchers;
protected $storage;
protected $searchOnly;
protected $markedStyleCreated;
public function __construct($file, $searchers, $searchOnly)
{
$this->fileDirectory = $file;
$this->searchers = $searchers;
$this->storage = Storage::disk('local');
$this->searchOnly = $searchOnly;
$this->markedStyleCreated = false;
}
public function execute()
{
$sdXML = $this->applySD();
$pathinfo = pathinfo($sdXML);
$this->convertSearchDisplacedXMLToHTML($sdXML);
DocumentFile::updateImagesPath($pathinfo['dirname'] . '/document_sdapplied.html', $this->fileDirectory);
return $pathinfo['filename'];
}
/**
* Convert (Search displaced) XML to HTML for browser preview
*
* @return void
*/
protected function convertSearchDisplacedXMLToHTML($file)
{
Convertor::convert('html', $file);
}
/**
* Read XML document and send text contents to SD
*
* @return void
*/
protected function applySD()
{
$dom = new \DOMDocument();
$filePath = $this->storage->path("contracts/$this->fileDirectory");
$dom->load($filePath . "/document.xml");
// foreach($dom->getElementsByTagName('p') as $p) {
foreach($dom->getElementsByTagName('body') as $body) {
$this->processElement($body, $dom);
}
$dom->save($filePath . "/document_sdapplied.xml");
return $filePath . "/document_sdapplied.xml";
}
public function processElement(DOMNode &$element, DOMDocument &$dom)
{
# When processing (replacing) a DOMText node, we replace the initial node with 3 nodes.
# In order to avoid an infinite loop when processing the nodes inside a foreach statement,
# We will keep a list of all new nodes created(which replaced the old nodes),
# and skip those when processing the child nodes of the element
$replacements = [];
# If the element is NOT a DOMText, and has at least one child node,
# iterate the child nodes and process them
if(
!$element instanceof DOMText &&
count($element->childNodes) > 0
) {
foreach($element->childNodes as $child) {
# If the child is in the list of newly created nodes (which replaced an earlier child node), skip it
if (in_array($child, $replacements)) {
continue;
}
# If the child is NOT a DOMText node, recursively call this method to process it.
if (!$child instanceof DOMText) {
$this->processElement($child, $dom);
} else {
# Otherwise, replace the child (if it contains the proper text)
$replacements = array_merge($replacements, $this->replace($child, $dom));
}
}
}
// else if ($element instanceof DOMText) {
// $this->replace($element, $dom);
// }
}
/**
* Apply SD on document's paragraph
*
* @param DOMText $element DOM element
* @param DOMDocument $dom The document
*
* @return array
*/
protected function replace(DOMText &$element, DOMDocument &$dom)
{
/** @var string $content */
$content = $element->textContent ?? $element->nodeValue;
$search = new SearchAndDisplace(
stripslashes($content),
[
'searchers' => $this->searchers,
],
$this->searchOnly,
true
);
$changed = $search->execute();
$replacementNodes = [];
if($changed) {
if($this->searchOnly) {
$content = $element->textContent;
$indexes = $changed;
} else {
$element->textContent = $content = $changed['content'];
$indexes = $changed['indexes'];
}
# The changed indexes are filed into arrays based on which searcher they belong.
# This doesn't concern us here. Merge all the changed indexes into a single array, so we can sort them
$indexes = array_merge( ...array_values($indexes)); # Unpack and merge the arrays into a single array
# Sort all the indexes in descending order
usort($indexes, function($first, $second) {
return $second['start'] - $first['start'];
});
foreach($indexes as $index) {
$content = $element->textContent;
# Split the element at the specified indexes
# (end needs to b e + 1, since end is where the changed text finishes),
# so we need to split the text one character after it ends
$lastNode = $element->splitText($index['end'] + 1);
$changedNode = $element->splitText($index['start']);
# DOMText::splitText creates DOMText nodes.
# The changed node needs to be a DOMElement so we can add the mark style
# Create a element with the same content, then replace the node.
$changedNodeElement = $dom->createElement("text:span", $changedNode->textContent);
$changedNodeElement->setAttribute('text:style-name', 'mark');
$changedNode->parentNode->replaceChild($changedNodeElement, $changedNode);
$changedNode = $changedNodeElement;
$replacementNodes[] = $changedNode;
$replacementNodes[] = $lastNode;
}
if(!$this->markedStyleCreated) {
$this->createMarkedStyle($dom);
$this->markedStyleCreated = true;
}
}
return $replacementNodes;
}
/**
* Create marked style for browser preview
*
*/
private function createMarkedStyle($dom)
{
$style = $dom->createElement("style:style");
$style->setAttribute("style:name", 'mark');
$style->setAttribute("style:family", 'text');
$child = $dom->createElement('style:text-properties');
$child->setAttribute("officeooo:rsid", '0014890a');
$child->setAttribute("fo:background-color", '#ffff00');
$style->appendChild($child);
$dom->getElementsByTagName('automatic-styles')->item(0)->appendChild($style);
}
/**
* Remove marked style used in browser and convert XML file to original file type
*
* @param $type file type
* @param $file absolute file path
*
* @return string $path
*/
public static function prepareForDownload($type, $file)
{
// remove marked style from XML
$dom = new DOMDocument();
$dom->load($file);
$style = $dom->getElementsByTagName('automatic-styles')->item(0);
$style->removeChild($style->lastChild);
$dom->save($file);
return Convertor::convert($type, $file, true);
}
}