Browse Source

Fixes to the search & displace functionality

master
Radu Liviu Carjan 2 years ago
parent
commit
d8ac0c8433
  1. 3
      .env.example
  2. 22
      app/SearchDisplace/Convertor/Convertor.php
  3. 79
      app/SearchDisplace/SearchAndDisplaceXML.php

3
.env.example

@ -23,3 +23,6 @@ SD_DUCKLING_URL=http://0.0.0.0:8000/parse
SD_INGEST_URL=http://localhost/ingest
WEBHOOK_CLIENT_SECRET=A5qayc2O53Vslw
# The config path is relative to the storage path
LIBREOFFICE_CONFIG_PATH=tmp/libreoffice

22
app/SearchDisplace/Convertor/Convertor.php

@ -5,6 +5,7 @@ namespace App\SearchDisplace\Convertor;
use Symfony\Component\Process\Process;
use Symfony\Component\Process\Exception\ProcessFailedException;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Storage;
/**
* Convert documents from formats supported by Libre Office
@ -31,7 +32,7 @@ class Convertor {
}
$env = [
'HOME' => storage_path('app/tmp/'),
'HOME' => storage_path('app/' . env('LIBREOFFICE_CONFIG_PATH', 'tmp/libreoffice')),
];
if ($extension == 'odt') {
@ -50,21 +51,6 @@ class Convertor {
$folder
);
// $process = new Process(
// [
// 'soffice',
// '--convert-to',
// $to,
// $original,
// '--outdir',
// $folder
// ], base_path(),
// [
// 'HOME' => base_path(),
// 'FILTER' => 'OpenDocument Text Flat XML'
// ]
// );
# We will run the process from a shell command line, which allows us to add parameters
# The "OpenDocument Text Flat XML" parameter contains whitespaces, so we will need to add that as
# a env variable parameter, otherwise the Process class will escape it and it will not work properly.
@ -76,8 +62,6 @@ class Convertor {
$process->run(function ($type, $buffer) {
if (Process::ERR === $type) {
Log::info("CONVERT ERROR: " . $buffer);
} else {
// Log::info("CONVERT OUTPUT: " . $buffer);
}
}, $env);
@ -85,6 +69,8 @@ class Convertor {
throw new ProcessFailedException($process);
}
Storage::deleteDirectory(env('LIBREOFFICE_CONFIG_PATH', 'app/tmp/libreoffice'));
return $path['filename'] . '.' . $to;
}
}

79
app/SearchDisplace/SearchAndDisplaceXML.php

@ -60,27 +60,8 @@ class SearchAndDisplaceXML
$dom->load($filePath . "/document.xml");
// foreach($dom->getElementsByTagName('p') as $p) {
foreach($dom->getElementsByTagName('body') as $p) {
// if(
// !$p instanceof DOMText &&
// count($p->childNodes) > 0 &&
// isset($p->parentNode->tagName) &&
// $p->parentNode->tagName !== "table:table-cell"
// ) {
// $replacements = [];
// foreach($p->childNodes as $child) {
// if (in_array($child, $replacements)) {
// continue;
// }
// if (!$child instanceof DOMText) {
// continue;
// }
// $replacements = array_merge($replacements, $this->replace($child, $dom));
// }
// }
$this->processElement($p, $dom);
foreach($dom->getElementsByTagName('body') as $body) {
$this->processElement($body, $dom);
}
$dom->save($filePath . "/document_sdapplied.xml");
@ -126,7 +107,7 @@ class SearchAndDisplaceXML
/**
* Apply SD on document's paragraph
*
* @param DOMNode $element DOM element
* @param DOMText $element DOM element
* @param DOMDocument $dom The document
*
* @return array
@ -154,40 +135,38 @@ class SearchAndDisplaceXML
$content = $element->textContent;
$indexes = $changed;
} else {
$content = $changed['content'];
$element->textContent = $content = $changed['content'];
$indexes = $changed['indexes'];
}
foreach($indexes as $searcher => $changes) {
if(empty($changes)) {
continue;
}
foreach($changes as $change) {
$firstContent = substr($content, 0, $change['start']);
$changedContent = substr($content, $change['start'], $change['end'] - $change['start'] + 1);
$lastContent = substr($content, $change['end'] + 1);
// $firstNode = $dom->createElement("text:span", $firstContent);
$element->textContent = htmlspecialchars($firstContent);
# The changed indexes are filed into arrays based on which searcher they belong.
# This doesn't concern us here. Merge all the changed indexes into a single array, so we can sort them
$indexes = array_merge( ...array_values($indexes)); # Unpack and merge the arrays into a single array
$changedNode = $dom->createElement("text:span", htmlspecialchars($changedContent));
$changedNode->setAttribute('text:style-name', 'mark');
# Sort all the indexes in descending order
usort($indexes, function($first, $second) {
return $second['start'] - $first['start'];
});
$lastNode = $dom->createElement("text:span", htmlspecialchars($lastContent));
// Add the changed and last nodes after the current (element) node
// $element->parentNode->insertBefore($firstNode, $element->nextSibling);
# element->parentNode->insertBefore(... $element->nextSibling) inserts a new node before the node AFTER this one
# So we need to add the `last` node first, and then the `changed` node BEFORE the last.
$element->parentNode->insertBefore($lastNode, $element->nextSibling);
$element->parentNode->insertBefore($changedNode, $element->nextSibling);
foreach($indexes as $index) {
$content = $element->textContent;
$replacementNodes[] = $changedNode;
$replacementNodes[] = $lastNode;
}
# Split the element at the specified indexes
# (end needs to b e + 1, since end is where the changed text finishes),
# so we need to split the text one character after it ends
$lastNode = $element->splitText($index['end'] + 1);
$changedNode = $element->splitText($index['start']);
# DOMText::splitText creates DOMText nodes.
# The changed node needs to be a DOMElement so we can add the mark style
# Create a element with the same content, then replace the node.
$changedNodeElement = $dom->createElement("text:span", $changedNode->textContent);
$changedNodeElement->setAttribute('text:style-name', 'mark');
$changedNode->parentNode->replaceChild($changedNodeElement, $changedNode);
$changedNode = $changedNodeElement;
$replacementNodes[] = $changedNode;
$replacementNodes[] = $lastNode;
}
if(!$this->markedStyleCreated) {

Loading…
Cancel
Save