Browse Source

WIP Display document preview from original file

master
Alex Puiu 2 years ago
parent
commit
b893f55220
  1. 33
      app/Http/Controllers/FileController.php
  2. 38
      app/Http/Controllers/SearchAndDisplaceController.php
  3. 35
      app/SearchDisplace/Convertor/Convertor.php
  4. 37
      app/SearchDisplace/Documents/DocumentFile.php
  5. 4
      app/SearchDisplace/Ingest/HandleReceivedDocument.php
  6. 4
      app/SearchDisplace/Ingest/SendDocument.php
  7. 4
      app/SearchDisplace/SearchAndDisplace.php
  8. 98
      app/SearchDisplace/SearchAndDisplaceJSON.php
  9. 129
      app/SearchDisplace/SearchAndDisplaceXML.php
  10. 8
      resources/js/components/ProcessFile/ProcessFile.ts

33
app/Http/Controllers/FileController.php

@ -7,14 +7,13 @@ use Illuminate\Http\JsonResponse;
use Illuminate\Support\Facades\Storage;
use Illuminate\Http\UploadedFile;
use GuzzleHttp\Exception\BadResponseException;
use App\SearchDisplace\SearchAndDisplaceXML;
use Symfony\Component\Process\Process;
use App\SearchDisplace\Convertor\Convertor;
class FileController extends Controller
{
public function __construct()
{
$this->directoryPath = storage_path("app/contracts/");
$this->storage = Storage::disk('local');
}
/**
@ -46,14 +45,13 @@ class FileController extends Controller
'name' => $file->getClientOriginalName()
]);
$originalFile = $fileId . "/{$fileId}." . $file->extension();
$originalFile = $fileId . "/document." . $file->extension();
Storage::disk('local')->put("contracts/{$originalFile}", file_get_contents($file)); // keep the original file
$this->storage->put("contracts/{$originalFile}", file_get_contents($file)); // keep the original file
$process = new Process(['soffice', '--convert-to', 'xml', $this->directoryPath . $originalFile, '--outdir', $this->directoryPath . $fileId]);
$process->run();
$xml = Convertor::convert('xml', $this->storage->path("contracts/" . $originalFile));
Storage::delete("contracts/{$originalFile}");
$this->storage->delete("contracts/{$originalFile}");
return response()->json([
'id' => $fileId,
@ -77,14 +75,12 @@ class FileController extends Controller
public function convert(): JsonResponse
{
$file = (object) request()->input('file');
$searchers = request()->input('searchers');
$xml = $this->storage->path("contracts/{$file->id}/document_sdapplied.xml");
$xml = $this->directoryPath . "{$file->id}/{$file->id}.xml";
(new SearchAndDisplaceXML($xml, $searchers, $file->type))->execute();
$original = Convertor::convert($file->type, $xml, true);
return response()->json([
'path' => "tmp/{$file->id}.{$file->type}"
'path' => "tmp/$original"
]);
}
@ -95,21 +91,20 @@ class FileController extends Controller
*/
public function download(string $path)
{
return Storage::download($path);
return $this->storage->download($path);
}
/**
* Delete a file currently in progress
* Clear contracts and tmp directory
*
* @param string $id
* @return JsonResponse
*/
public function delete(string $id): JsonResponse
{
Storage::deleteDirectory("contracts/${id}");
$tmpFiles = Storage::allFiles("tmp");
$success = Storage::delete($tmpFiles);
$this->storage->deleteDirectory("contracts/${id}");
$tmpFiles = $this->storage->allFiles("tmp");
$success = $this->storage->delete($tmpFiles);
return response()->json(['success' => $success]);
}

38
app/Http/Controllers/SearchAndDisplaceController.php

@ -3,10 +3,18 @@
namespace App\Http\Controllers;
use App\SearchDisplace\Documents\DocumentFile;
use App\SearchDisplace\SearchAndDisplaceJSON;
use Illuminate\Support\Facades\Storage;
use App\SearchDisplace\SearchAndDisplaceXML;
class SearchAndDisplaceController extends Controller
{
protected $storage;
public function __construct()
{
$this->storage = Storage::disk('local');
}
public function show($id)
{
$handler = new DocumentFile();
@ -14,9 +22,10 @@ class SearchAndDisplaceController extends Controller
try {
$result = $handler->getAfterIngest($id);
// if ($result['status'] !== 'processing') {
// $handler->destroy($id);
// }
// remove HTML file after receiving it's content
if ($result['status'] !== 'processing') {
$this->storage->delete("contracts/$id/document.html");
}
return response()->json($result, 200);
} catch (\Exception $exception) {
@ -29,7 +38,7 @@ class SearchAndDisplaceController extends Controller
public function store()
{
request()->validate([
'file' => 'required', // String or file.
'file' => 'required', // file.
'searchers' => 'required|array',
'searchers.*.key' => 'required',
'searchers.*.type' => 'required|in:replace,displace',
@ -37,16 +46,23 @@ class SearchAndDisplaceController extends Controller
'searchOnly' => 'nullable|boolean'
]);
$file = request()->input('file');
$searchOnly = request()->input('searchOnly') ?? false;
$searchAndDisplace = new SearchAndDisplaceJSON(
request()->input('file'),
request()->input('searchers'),
$searchOnly
);
$changes = (new SearchAndDisplaceXML($file, request()->input('searchers'), $searchOnly))->execute();
try {
return response()->json($searchAndDisplace->execute(), 200);
$processedFile = "contracts/$file/document_sdapplied.html";
if($this->storage->exists($processedFile)) {
$processedFileContent = $this->storage->get($processedFile);
$this->storage->delete($processedFile);
return response()->json([
'content' => $processedFileContent,
'indexes' => []
], 200);
}
} catch (\Exception $exception) {
return response()->json([
'message' => $exception->getMessage(),

35
app/SearchDisplace/Convertor/Convertor.php

@ -0,0 +1,35 @@
<?php
namespace App\SearchDisplace\Convertor;
use Symfony\Component\Process\Process;
/**
* Convert documents from formats supported by Libre Office
*/
class Convertor {
/**
* @param $to desired file format
* @param $document absolute file path
* @param $tmp - if true file will be saved to tmp directory for download
*
* @return string $path to converted file
*/
public static function convert($to, $document, $tmp = false)
{
$path = pathinfo($document);
$dir = $path['dirname'];
$original = $dir . '/' . $path['basename'];
if(!$tmp) {
$folder = $dir . '/';
} else {
$folder = storage_path('app/tmp/');
}
$process = new Process(['soffice', '--convert-to', $to, $original, '--outdir', $folder]);
$process->run();
return $path['filename'] . '.' . $to;
}
}

37
app/SearchDisplace/Documents/DocumentFile.php

@ -3,6 +3,7 @@
namespace App\SearchDisplace\Documents;
use Illuminate\Support\Facades\Storage;
use DOMDocument;
class DocumentFile
{
@ -18,10 +19,10 @@ class DocumentFile
$path = $this->getPath($id);
// Ingest success.
if ($this->storage->exists("$path/document.json")) {
if ($this->storage->exists("$path/document.html")) {
return [
'status' => 'success',
'content' => $this->getDocumentContent($path),
'content' => $this->getDocumentContent($path, $id),
];
}
@ -50,29 +51,27 @@ class DocumentFile
return "contracts/$id";
}
protected function getDocumentContent($path)
protected function getDocumentContent($path, $id)
{
$content = json_decode($this->storage->get("$path/document.json"));
$document = $this->storage->path($path) . '/document.html';
return $this->convertToHTML($content);
$this->updateImagesPath($document, $id);
return file_get_contents($document);
}
protected function convertToHTML($elements)
public static function updateImagesPath($document, $id)
{
$html = '';
$url = url('/') . '/contracts-images';
foreach($elements as $key => $element) {
if($element->tag !== 'img') {
$html .= "<$element->tag style=\"$element->style\">$element->content</$element->tag>";
} else {
$src = $url . '/' . str_replace(' ', '%20', $element->src);
$html .= "<img $element->style src=\"$src\" alt=\"$element->details\">";
}
if($key !== array_key_last($elements))
$html .= '<br>';
$html = new DOMDocument();
$html->loadHTMLFile($document);
$url = url('/') . "/contracts-images/$id/";
foreach($html->getElementsByTagName('img') as $image) {
$src = $image->getAttribute('src');
$image->setAttribute('src', $url . $src);
}
return $html;
$html->saveHTMLFile($document);
}
}

4
app/SearchDisplace/Ingest/HandleReceivedDocument.php

@ -35,7 +35,7 @@ class HandleReceivedDocument
*/
public function handle()
{
if ($this->fileResultType === 'json') {
if ($this->fileResultType === 'html') {
$this->handleDocument();
return;
@ -58,7 +58,7 @@ class HandleReceivedDocument
// The .md extension signals the success status, the lack of signals the fail status.
if ($this->status === 'success') {
$fileName = $fileName . '.json';
$fileName = $fileName . '.html';
}
$dir = "contracts/$this->id";

4
app/SearchDisplace/Ingest/SendDocument.php

@ -23,10 +23,10 @@ class SendDocument
* @param string $fileResultType
* @throws \Exception
*/
public function execute($id, $document, string $fileResultType = 'json')
public function execute($id, $document, string $fileResultType = 'html')
{
try {
if ( ! in_array($fileResultType, ['md', 'original', 'json'])) {
if ($fileResultType !== 'html') {
throw new \Exception('Invalid file result type provided.');
}

4
app/SearchDisplace/SearchAndDisplace.php

@ -8,10 +8,10 @@ class SearchAndDisplace
{
protected $documentContent;
protected $info;
protected $searchOnly = false;
protected $searchOnly;
protected $isDocument = false;
public function __construct($documentContent, $info, $searchOnly = false, $isDocument = false)
public function __construct($documentContent, $info, $searchOnly, $isDocument = false)
{
$this->documentContent = $documentContent;
$this->info = $info;

98
app/SearchDisplace/SearchAndDisplaceJSON.php

@ -1,98 +0,0 @@
<?php
namespace App\SearchDisplace;
use Illuminate\Support\Facades\Storage;
use App\SearchDisplace\Output\HtmlOutput;
class SearchAndDisplaceJSON
{
protected $file;
protected $searchers;
protected $searchOnly;
protected $content;
public function __construct($file, $searchers, $searchOnly)
{
$searchOnly ? $this->content = $file : $this->file = $file . '/document.json';
$this->searchers = $searchers;
$this->storage = Storage::disk('local');
}
public function execute()
{
if(isset($this->content)) {
$search = new SearchAndDisplace(
stripslashes($this->content),
[
'searchers' => $this->searchers,
],
true
);
return $search->execute();
}
if(! $this->storage->exists("contracts/$this->file")) {
return;
}
try {
$content = $this->getContent();
} catch (\Exception $exception) {
\Illuminate\Support\Facades\Log::info('EXCEPTION: ' . $exception->getMessage());
return;
}
$searchDisplace = $this->applySD($content);
$convert = new HtmlOutput($searchDisplace);
return $convert->getData();
}
protected function getContent()
{
return json_decode($this->storage->get("contracts/$this->file"));
}
protected function applySD($elements)
{
$indexes = [];
foreach($elements as $element) {
if($element->tag === 'img')
continue;
$search = new SearchAndDisplace(
stripslashes($element->content),
[
'searchers' => $this->searchers,
],
false,
true
);
$changed = $search->execute();
if($changed) {
foreach($changed['indexes'] as $key => $searcher) {
foreach($searcher as $change) {
if($change['start']) {
$indexes[$key][] = $change;
$element->paragraph = $key;
}
}
}
$element->content = $changed['content'];
}
}
return [
'content' => $elements,
'indexes' => $indexes
];
}
}

129
app/SearchDisplace/SearchAndDisplaceXML.php

@ -2,57 +2,132 @@
namespace App\SearchDisplace;
use Symfony\Component\Process\Process;
use App\SearchDisplace\Documents\DocumentFile;
use Illuminate\Support\Facades\Storage;
use App\SearchDisplace\Convertor\Convertor;
class SearchAndDisplaceXML
{
protected $file;
protected $searchers;
protected $type;
protected $storage;
protected $searchOnly;
public function __construct($file, $searchers, $type = 'odt')
public function __construct($file, $searchers, $searchOnly)
{
$this->file = $file;
$this->fileDirectory = $file;
$this->searchers = $searchers;
$this->type = $type;
$this->storage = Storage::disk('local');
$this->searchOnly = $searchOnly;
}
public function execute()
{
$this->applySD();
$sdXML = $this->applySD();
$pathinfo = pathinfo($sdXML);
$this->convertToOriginalFileType();
$this->convertSearchDisplacedXMLToHTML($sdXML);
DocumentFile::updateImagesPath($pathinfo['dirname'] . '/document_sdapplied.html', $this->fileDirectory);
return $pathinfo['filename'];
}
protected function convertToOriginalFileType()
/**
* Convert (Search displaced) XML to HTML for browser preview
*
* @return void
*/
protected function convertSearchDisplacedXMLToHTML($file)
{
(new Process(['soffice', '--convert-to', $this->type, $this->file, '--outdir', storage_path('app/tmp/')]))->run();
Convertor::convert('html', $file);
}
/**
* Read XML document and send text contents to SD
*
* @return void
*/
protected function applySD()
{
$dom = new \DOMDocument();
$dom->load($this->file);
foreach($dom->getElementsByTagName('span') as $p) {
$search = new SearchAndDisplace(
stripslashes($p->textContent),
[
'searchers' => $this->searchers,
],
false,
true
);
$changed = $search->execute();
$filePath = $this->storage->path("contracts/$this->fileDirectory");
$dom->load($filePath . "/document.xml");
if(!$changed) {
continue;
}
foreach($dom->getElementsByTagName('p') as $p) {
if(count($p->childNodes) > 0 && isset($p->parentNode->tagName) && $p->parentNode->tagName !== "table:table-cell") {
foreach($p->childNodes as $child) {
if(isset($child->tagName) && $child->tagName === "text:span") {
$content = trim($child->textContent);
if($content == '') {
continue;
}
$p->textContent = $changed['content'];
$this->replace($content, $child, $dom);
}
}
} else {
$content = trim($p->textContent);
if($content == '') {
continue;
}
$this->replace($content, $p, $dom);
}
}
$dom->save($this->file);
$dom->save($filePath . "/document_sdapplied.xml");
return $filePath . "/document_sdapplied.xml";
}
/**
* Apply SD on document's paragraph
*
* @param string $content paragraph content
* @param $element DOM element
*
* @return void
*/
protected function replace($content, $element, $dom) {
$search = new SearchAndDisplace(
stripslashes($content),
[
'searchers' => $this->searchers,
],
$this->searchOnly,
true
);
$changed = $search->execute();
if($changed) {
if($this->searchOnly) {
$content = $element->textContent;
$indexes = $changed;
} else {
$content = $changed['content'];
$indexes = $changed['indexes'];
}
foreach(array_keys($indexes) as $searcher) {
if(empty($indexes[$searcher])) {
continue;
}
foreach($indexes[$searcher] as $change) {
$first = substr($content, 0, $change['start']);
$changed = "<mark>" . substr($content, $change['start'], $change['end'] - $change['start'] + 1) . "</mark>";
$last = substr($content, $change['end'] + 1);
$element->textContent = $first;
$changed = $dom->createElement("text:span", $changed);
$last = $dom->createElement("text:span", $last);
// $changed->setAttribute('style', 'background-color: red');
$element->appendChild($changed);
$element->appendChild($last);
}
}
}
}
}

8
resources/js/components/ProcessFile/ProcessFile.ts

@ -1,4 +1,3 @@
import marked from 'marked';
import {Vue, Component, Prop, Watch} from 'vue-property-decorator';
import {FileData} from '@/interfaces/FileData';
import { eventBus } from '@/app';
@ -357,7 +356,6 @@ export default class ProcessFile extends Vue {
private async runSearchersWithoutDisplacing() {
this.processing = true;
this.processedFileContent = '';
let searchers: Array<{ key: string; type: string; value: string; }> = [];
@ -370,10 +368,10 @@ export default class ProcessFile extends Vue {
});
try {
const response = await this.$api.filterDocument(this.fileContent, searchers, true);
const response = await this.$api.filterDocument(this.file.id, searchers, true);
this.processedFileContent = this.fileContent;
this.documentDiffIndexes = response;
this.processedFileContent = response.content;
// this.documentDiffIndexes = response;
this.createDiffPreview();
this.processing = false;

Loading…
Cancel
Save