From c73a1f87f97eb57fd8b1b086c9e04778adcfdeb2 Mon Sep 17 00:00:00 2001 From: Alex Puiu Date: Wed, 2 Mar 2022 17:02:47 +0200 Subject: [PATCH] Get document content from ingest as JSON. --- .../SearchAndDisplaceController.php | 12 +-- app/SearchDisplace/Documents/DocumentFile.php | 32 ++++-- .../Ingest/HandleReceivedDocument.php | 10 +- app/SearchDisplace/Ingest/SendDocument.php | 4 +- app/SearchDisplace/SearchAndDisplaceJSON.php | 98 +++++++++++++++++++ app/SearchDisplace/Searchers/Duckling.php | 4 +- .../js/components/ProcessFile/ProcessFile.ts | 15 +-- resources/js/services/ApiService.ts | 9 +- routes/web.php | 8 -- 9 files changed, 147 insertions(+), 45 deletions(-) create mode 100644 app/SearchDisplace/SearchAndDisplaceJSON.php diff --git a/app/Http/Controllers/SearchAndDisplaceController.php b/app/Http/Controllers/SearchAndDisplaceController.php index 0e7d03a..c360a99 100644 --- a/app/Http/Controllers/SearchAndDisplaceController.php +++ b/app/Http/Controllers/SearchAndDisplaceController.php @@ -3,7 +3,7 @@ namespace App\Http\Controllers; use App\SearchDisplace\Documents\DocumentFile; -use App\SearchDisplace\SearchAndDisplace; +use App\SearchDisplace\SearchAndDisplaceJSON; class SearchAndDisplaceController extends Controller { @@ -29,7 +29,7 @@ class SearchAndDisplaceController extends Controller public function store() { request()->validate([ - 'content' => 'required', // String or file. + 'file' => 'required', // String or file. 'searchers' => 'required|array', 'searchers.*.key' => 'required', 'searchers.*.type' => 'required|in:replace,displace', @@ -39,11 +39,9 @@ class SearchAndDisplaceController extends Controller $searchOnly = request()->input('searchOnly') ?? false; - $searchAndDisplace = new SearchAndDisplace( - stripslashes(request()->input('content')), - [ - 'searchers' => request()->input('searchers'), - ], + $searchAndDisplace = new SearchAndDisplaceJSON( + request()->input('file'), + request()->input('searchers'), $searchOnly ); diff --git a/app/SearchDisplace/Documents/DocumentFile.php b/app/SearchDisplace/Documents/DocumentFile.php index 45fb1ec..37b878d 100644 --- a/app/SearchDisplace/Documents/DocumentFile.php +++ b/app/SearchDisplace/Documents/DocumentFile.php @@ -18,10 +18,10 @@ class DocumentFile $path = $this->getPath($id); // Ingest success. - if ($this->storage->exists("$path/document.md")) { + if ($this->storage->exists("$path/document.json")) { return [ 'status' => 'success', - 'content' => $this->getDocumentContent($id, $path), + 'content' => $this->getDocumentContent($path), ]; } @@ -50,15 +50,29 @@ class DocumentFile return "contracts/$id"; } - protected function getDocumentContent($id, $path) + protected function getDocumentContent($path) { - $content = $this->storage->get("$path/document.md"); + $content = json_decode($this->storage->get("$path/document.json")); - $imageFullPath = url('/') . '/contracts-images/' . $id . '/'; - $imageFullPath = str_replace( ' ', '%20', $imageFullPath); + return $this->convertToHTML($content); + } + + protected function convertToHTML($elements) + { + $html = ''; + $url = url('/') . '/contracts-images'; + foreach($elements as $key => $element) { + if($element->tag !== 'img') { + $html .= "<$element->tag style=\"$element->style\">$element->contenttag>"; + } else { + $src = $url . '/' . str_replace(' ', '%20', $element->src); + $html .= "style src=\"$src\" alt=\"$element->details\">"; + } + + if($key !== array_key_last($elements)) + $html .= '
'; + } - // @TODO Use preg_replace to find correctly formatted images and any wild cards for the image caption. -// return str_replace('![](./', '![](' . $imageFullPath, $content); - return str_replace('](./', '](' . $imageFullPath, $content); + return $html; } } diff --git a/app/SearchDisplace/Ingest/HandleReceivedDocument.php b/app/SearchDisplace/Ingest/HandleReceivedDocument.php index 17778ca..d25792a 100644 --- a/app/SearchDisplace/Ingest/HandleReceivedDocument.php +++ b/app/SearchDisplace/Ingest/HandleReceivedDocument.php @@ -35,8 +35,8 @@ class HandleReceivedDocument */ public function handle() { - if ($this->fileResultType === 'md') { - $this->handleDocumentMD(); + if ($this->fileResultType === 'json') { + $this->handleDocument(); return; } @@ -47,10 +47,10 @@ class HandleReceivedDocument return; } - $this->handleDocumentJson(); + // $this->handleDocumentJson(); } - protected function handleDocumentMD() + protected function handleDocument() { $storage = Storage::disk('local'); @@ -58,7 +58,7 @@ class HandleReceivedDocument // The .md extension signals the success status, the lack of signals the fail status. if ($this->status === 'success') { - $fileName = $fileName . '.md'; + $fileName = $fileName . '.json'; } $dir = "contracts/$this->id"; diff --git a/app/SearchDisplace/Ingest/SendDocument.php b/app/SearchDisplace/Ingest/SendDocument.php index da0b063..e9935e9 100644 --- a/app/SearchDisplace/Ingest/SendDocument.php +++ b/app/SearchDisplace/Ingest/SendDocument.php @@ -23,10 +23,10 @@ class SendDocument * @param string $fileResultType * @throws \Exception */ - public function execute($id, $document, string $fileResultType = 'md') + public function execute($id, $document, string $fileResultType = 'json') { try { - if ( ! in_array($fileResultType, ['md', 'original'])) { + if ( ! in_array($fileResultType, ['md', 'original', 'json'])) { throw new \Exception('Invalid file result type provided.'); } diff --git a/app/SearchDisplace/SearchAndDisplaceJSON.php b/app/SearchDisplace/SearchAndDisplaceJSON.php new file mode 100644 index 0000000..8f7469e --- /dev/null +++ b/app/SearchDisplace/SearchAndDisplaceJSON.php @@ -0,0 +1,98 @@ +file = $file . '/document.json'; + $this->searchers = $searchers; + + $this->storage = Storage::disk('local'); + } + + public function execute() + { + if(! $this->storage->exists("contracts/$this->file")) { + return; + } + + try { + $content = $this->getContent(); + } catch (\Exception $exception) { + \Illuminate\Support\Facades\Log::info('EXCEPTION: ' . $exception->getMessage()); + + return; + } + + $sd = $this->applySD($content); + + return [ + 'content' => $this->convertToHTML($sd['content']), + 'indexes' => $sd['indexes'] + ]; + } + + protected function getContent() + { + return json_decode($this->storage->get("contracts/$this->file")); + } + + protected function applySD($elements) + { + $indexes = []; + + foreach($elements as $element) { + if($element->tag === 'img') + continue; + + $search = new SearchAndDisplace( + stripslashes($element->content), + [ + 'searchers' => $this->searchers, + ], + false, + true + ); + + $changed = $search->execute(); + + if($changed) { + $indexes = $changed['indexes']; + $element->content = $changed['content']; + } + } + + return [ + 'content' => $elements, + 'indexes' => $indexes + ]; + } + + protected function convertToHTML($elements) + { + $html = ''; + $url = url('/') . '/contracts-images'; + + foreach($elements as $key => $element) { + if($element->tag !== 'img') { + $html .= "<$element->tag style=\"$element->style\">$element->contenttag>"; + } else { + $src = $url . '/' . str_replace(' ', '%20', $element->src); + $html .= "style src=\"$src\" alt=\"$element->details\">"; + } + + if($key !== array_key_last($elements)) { + $html .= '
'; + } + } + + return $html; + } +} \ No newline at end of file diff --git a/app/SearchDisplace/Searchers/Duckling.php b/app/SearchDisplace/Searchers/Duckling.php index 38a82b1..f9a219b 100644 --- a/app/SearchDisplace/Searchers/Duckling.php +++ b/app/SearchDisplace/Searchers/Duckling.php @@ -10,8 +10,8 @@ class Duckling public function __construct() { - // $this->url = env('SD_DUCKLING_URL'); - $this->url = 'host.docker.internal:5000/parse'; + $this->url = env('SD_DUCKLING_URL'); + // $this->url = 'host.docker.internal:5000/parse'; } public function execute($content, $dimensions) diff --git a/resources/js/components/ProcessFile/ProcessFile.ts b/resources/js/components/ProcessFile/ProcessFile.ts index 3bea862..7030aa3 100644 --- a/resources/js/components/ProcessFile/ProcessFile.ts +++ b/resources/js/components/ProcessFile/ProcessFile.ts @@ -113,24 +113,24 @@ export default class ProcessFile extends Vue { } /** - * MD-to-HTML compiled file content + * HTML compiled file content */ get compiledFileContent(): string { - return marked(this.fileContent); + return this.fileContent; } /** - * MD-to-HTML compiled processed file content + * HTML compiled processed file content */ get compiledProcessedFileContent(): string { - return marked(this.processedFileContent); + return this.processedFileContent; } /** * MD-to-HTML compiled processed file content with diff highlight */ get compiledProcessedFileContentPreview(): string { - return marked(this.processedFileContentPreview); + return this.processedFileContentPreview; } public changeRoute(url: string) { @@ -205,6 +205,7 @@ export default class ProcessFile extends Vue { public async uploadFile(event: any): Promise { localStorage.setItem('searchers', JSON.stringify(this.selectedSearchers)); localStorage.setItem('searchersOptions', JSON.stringify(this.searchersOptions)); + this.toggleUploadDialog(false); this.$confirm.require({ message: 'You will lose any progress on the current uploaded document. Are you sure you want to proceed?', @@ -337,7 +338,7 @@ export default class ProcessFile extends Vue { }); try { - const response = await this.$api.filterDocument(this.fileContent, searchers); + const response = await this.$api.filterDocument(this.file.id, searchers); this.processedFileContent = response.content; this.documentDiffIndexes = response.indexes; @@ -428,7 +429,7 @@ export default class ProcessFile extends Vue { }); }); - let response = await this.$api.convertFile(this.processedFileContent, {id: this.file.id, name: this.file.file_name || 'filename.odt'}, searchers); + let response = await this.$api.convertFile({id: this.file.id, name: this.file.file_name || 'filename.odt'}, searchers); window.open(`${window.location.origin}/file/download/` + response.path); } diff --git a/resources/js/services/ApiService.ts b/resources/js/services/ApiService.ts index 4197f83..f72175e 100644 --- a/resources/js/services/ApiService.ts +++ b/resources/js/services/ApiService.ts @@ -116,15 +116,15 @@ export default class ApiService { * @param {boolean} searchOnly Whether or not to also displace the content (default yes) */ public async filterDocument( - content: string, + file: string, searchers: Array<{ key: string; type: string; value: string; }>, - searchOnly: boolean = false + searchOnly: boolean = false, ) { try { let response = await axios.post( this.apiRoutes.searchAndDisplace, { - 'content': content, + 'file': file, 'searchers': searchers, 'searchOnly': searchOnly } @@ -145,7 +145,7 @@ export default class ApiService { * * @returns */ - public async convertFile(content: string, file: {id: string, name: string}, searchers: Array<{ key: string; type: string; value: string; }>) { + public async convertFile(file: {id: string, name: string}, searchers: Array<{ key: string; type: string; value: string; }>) { try { let response = await axios.post( this.apiRoutes.fileDownload, @@ -154,7 +154,6 @@ export default class ApiService { 'id': file.id, 'type': (file.name.substring(file.name.lastIndexOf('.') + 1, file.name.length) !== 'pdf') ? 'odt' : 'pdf' }, - 'content': content, 'searchers': searchers } ); diff --git a/routes/web.php b/routes/web.php index 6c290e7..a12a3ed 100644 --- a/routes/web.php +++ b/routes/web.php @@ -15,14 +15,6 @@ use Illuminate\Support\Facades\Route; */ Route::get('/', 'HomeController@index'); -// Route::get('/', function() { -// $dom = new DOMDocument(); -// $dom->load(storage_path() . '/doc.xml'); - -// $dom->getElementsByTagName('p')->item(0)->nodeValue = 'changed'; - -// $dom->save(storage_path() . '/changed.xml'); -// }); Route::get('/file/download/{path}', [ FileController::class,