Browse Source

Get document content from ingest as JSON.

master
Alex Puiu 2 years ago
parent
commit
c73a1f87f9
  1. 12
      app/Http/Controllers/SearchAndDisplaceController.php
  2. 32
      app/SearchDisplace/Documents/DocumentFile.php
  3. 10
      app/SearchDisplace/Ingest/HandleReceivedDocument.php
  4. 4
      app/SearchDisplace/Ingest/SendDocument.php
  5. 98
      app/SearchDisplace/SearchAndDisplaceJSON.php
  6. 4
      app/SearchDisplace/Searchers/Duckling.php
  7. 15
      resources/js/components/ProcessFile/ProcessFile.ts
  8. 9
      resources/js/services/ApiService.ts
  9. 8
      routes/web.php

12
app/Http/Controllers/SearchAndDisplaceController.php

@ -3,7 +3,7 @@
namespace App\Http\Controllers; namespace App\Http\Controllers;
use App\SearchDisplace\Documents\DocumentFile; use App\SearchDisplace\Documents\DocumentFile;
use App\SearchDisplace\SearchAndDisplace;
use App\SearchDisplace\SearchAndDisplaceJSON;
class SearchAndDisplaceController extends Controller class SearchAndDisplaceController extends Controller
{ {
@ -29,7 +29,7 @@ class SearchAndDisplaceController extends Controller
public function store() public function store()
{ {
request()->validate([ request()->validate([
'content' => 'required', // String or file.
'file' => 'required', // String or file.
'searchers' => 'required|array', 'searchers' => 'required|array',
'searchers.*.key' => 'required', 'searchers.*.key' => 'required',
'searchers.*.type' => 'required|in:replace,displace', 'searchers.*.type' => 'required|in:replace,displace',
@ -39,11 +39,9 @@ class SearchAndDisplaceController extends Controller
$searchOnly = request()->input('searchOnly') ?? false; $searchOnly = request()->input('searchOnly') ?? false;
$searchAndDisplace = new SearchAndDisplace(
stripslashes(request()->input('content')),
[
'searchers' => request()->input('searchers'),
],
$searchAndDisplace = new SearchAndDisplaceJSON(
request()->input('file'),
request()->input('searchers'),
$searchOnly $searchOnly
); );

32
app/SearchDisplace/Documents/DocumentFile.php

@ -18,10 +18,10 @@ class DocumentFile
$path = $this->getPath($id); $path = $this->getPath($id);
// Ingest success. // Ingest success.
if ($this->storage->exists("$path/document.md")) {
if ($this->storage->exists("$path/document.json")) {
return [ return [
'status' => 'success', 'status' => 'success',
'content' => $this->getDocumentContent($id, $path),
'content' => $this->getDocumentContent($path),
]; ];
} }
@ -50,15 +50,29 @@ class DocumentFile
return "contracts/$id"; return "contracts/$id";
} }
protected function getDocumentContent($id, $path)
protected function getDocumentContent($path)
{ {
$content = $this->storage->get("$path/document.md");
$content = json_decode($this->storage->get("$path/document.json"));
$imageFullPath = url('/') . '/contracts-images/' . $id . '/';
$imageFullPath = str_replace( ' ', '%20', $imageFullPath);
return $this->convertToHTML($content);
}
protected function convertToHTML($elements)
{
$html = '';
$url = url('/') . '/contracts-images';
foreach($elements as $key => $element) {
if($element->tag !== 'img') {
$html .= "<$element->tag style=\"$element->style\">$element->content</$element->tag>";
} else {
$src = $url . '/' . str_replace(' ', '%20', $element->src);
$html .= "<img $element->style src=\"$src\" alt=\"$element->details\">";
}
if($key !== array_key_last($elements))
$html .= '<br>';
}
// @TODO Use preg_replace to find correctly formatted images and any wild cards for the image caption.
// return str_replace('![](./', '![](' . $imageFullPath, $content);
return str_replace('](./', '](' . $imageFullPath, $content);
return $html;
} }
} }

10
app/SearchDisplace/Ingest/HandleReceivedDocument.php

@ -35,8 +35,8 @@ class HandleReceivedDocument
*/ */
public function handle() public function handle()
{ {
if ($this->fileResultType === 'md') {
$this->handleDocumentMD();
if ($this->fileResultType === 'json') {
$this->handleDocument();
return; return;
} }
@ -47,10 +47,10 @@ class HandleReceivedDocument
return; return;
} }
$this->handleDocumentJson();
// $this->handleDocumentJson();
} }
protected function handleDocumentMD()
protected function handleDocument()
{ {
$storage = Storage::disk('local'); $storage = Storage::disk('local');
@ -58,7 +58,7 @@ class HandleReceivedDocument
// The .md extension signals the success status, the lack of signals the fail status. // The .md extension signals the success status, the lack of signals the fail status.
if ($this->status === 'success') { if ($this->status === 'success') {
$fileName = $fileName . '.md';
$fileName = $fileName . '.json';
} }
$dir = "contracts/$this->id"; $dir = "contracts/$this->id";

4
app/SearchDisplace/Ingest/SendDocument.php

@ -23,10 +23,10 @@ class SendDocument
* @param string $fileResultType * @param string $fileResultType
* @throws \Exception * @throws \Exception
*/ */
public function execute($id, $document, string $fileResultType = 'md')
public function execute($id, $document, string $fileResultType = 'json')
{ {
try { try {
if ( ! in_array($fileResultType, ['md', 'original'])) {
if ( ! in_array($fileResultType, ['md', 'original', 'json'])) {
throw new \Exception('Invalid file result type provided.'); throw new \Exception('Invalid file result type provided.');
} }

98
app/SearchDisplace/SearchAndDisplaceJSON.php

@ -0,0 +1,98 @@
<?php
namespace App\SearchDisplace;
use Illuminate\Support\Facades\Storage;
class SearchAndDisplaceJSON
{
protected $file;
protected $searchers;
public function __construct($file, $searchers)
{
$this->file = $file . '/document.json';
$this->searchers = $searchers;
$this->storage = Storage::disk('local');
}
public function execute()
{
if(! $this->storage->exists("contracts/$this->file")) {
return;
}
try {
$content = $this->getContent();
} catch (\Exception $exception) {
\Illuminate\Support\Facades\Log::info('EXCEPTION: ' . $exception->getMessage());
return;
}
$sd = $this->applySD($content);
return [
'content' => $this->convertToHTML($sd['content']),
'indexes' => $sd['indexes']
];
}
protected function getContent()
{
return json_decode($this->storage->get("contracts/$this->file"));
}
protected function applySD($elements)
{
$indexes = [];
foreach($elements as $element) {
if($element->tag === 'img')
continue;
$search = new SearchAndDisplace(
stripslashes($element->content),
[
'searchers' => $this->searchers,
],
false,
true
);
$changed = $search->execute();
if($changed) {
$indexes = $changed['indexes'];
$element->content = $changed['content'];
}
}
return [
'content' => $elements,
'indexes' => $indexes
];
}
protected function convertToHTML($elements)
{
$html = '';
$url = url('/') . '/contracts-images';
foreach($elements as $key => $element) {
if($element->tag !== 'img') {
$html .= "<$element->tag style=\"$element->style\">$element->content</$element->tag>";
} else {
$src = $url . '/' . str_replace(' ', '%20', $element->src);
$html .= "<img $element->style src=\"$src\" alt=\"$element->details\">";
}
if($key !== array_key_last($elements)) {
$html .= '<br>';
}
}
return $html;
}
}

4
app/SearchDisplace/Searchers/Duckling.php

@ -10,8 +10,8 @@ class Duckling
public function __construct() public function __construct()
{ {
// $this->url = env('SD_DUCKLING_URL');
$this->url = 'host.docker.internal:5000/parse';
$this->url = env('SD_DUCKLING_URL');
// $this->url = 'host.docker.internal:5000/parse';
} }
public function execute($content, $dimensions) public function execute($content, $dimensions)

15
resources/js/components/ProcessFile/ProcessFile.ts

@ -113,24 +113,24 @@ export default class ProcessFile extends Vue {
} }
/** /**
* MD-to-HTML compiled file content
* HTML compiled file content
*/ */
get compiledFileContent(): string { get compiledFileContent(): string {
return marked(this.fileContent);
return this.fileContent;
} }
/** /**
* MD-to-HTML compiled processed file content
* HTML compiled processed file content
*/ */
get compiledProcessedFileContent(): string { get compiledProcessedFileContent(): string {
return marked(this.processedFileContent);
return this.processedFileContent;
} }
/** /**
* MD-to-HTML compiled processed file content with diff highlight * MD-to-HTML compiled processed file content with diff highlight
*/ */
get compiledProcessedFileContentPreview(): string { get compiledProcessedFileContentPreview(): string {
return marked(this.processedFileContentPreview);
return this.processedFileContentPreview;
} }
public changeRoute(url: string) { public changeRoute(url: string) {
@ -205,6 +205,7 @@ export default class ProcessFile extends Vue {
public async uploadFile(event: any): Promise<void> { public async uploadFile(event: any): Promise<void> {
localStorage.setItem('searchers', JSON.stringify(this.selectedSearchers)); localStorage.setItem('searchers', JSON.stringify(this.selectedSearchers));
localStorage.setItem('searchersOptions', JSON.stringify(this.searchersOptions)); localStorage.setItem('searchersOptions', JSON.stringify(this.searchersOptions));
this.toggleUploadDialog(false);
this.$confirm.require({ this.$confirm.require({
message: 'You will lose any progress on the current uploaded document. Are you sure you want to proceed?', message: 'You will lose any progress on the current uploaded document. Are you sure you want to proceed?',
@ -337,7 +338,7 @@ export default class ProcessFile extends Vue {
}); });
try { try {
const response = await this.$api.filterDocument(this.fileContent, searchers);
const response = await this.$api.filterDocument(this.file.id, searchers);
this.processedFileContent = response.content; this.processedFileContent = response.content;
this.documentDiffIndexes = response.indexes; this.documentDiffIndexes = response.indexes;
@ -428,7 +429,7 @@ export default class ProcessFile extends Vue {
}); });
}); });
let response = await this.$api.convertFile(this.processedFileContent, {id: this.file.id, name: this.file.file_name || 'filename.odt'}, searchers);
let response = await this.$api.convertFile({id: this.file.id, name: this.file.file_name || 'filename.odt'}, searchers);
window.open(`${window.location.origin}/file/download/` + response.path); window.open(`${window.location.origin}/file/download/` + response.path);
} }

9
resources/js/services/ApiService.ts

@ -116,15 +116,15 @@ export default class ApiService {
* @param {boolean} searchOnly Whether or not to also displace the content (default yes) * @param {boolean} searchOnly Whether or not to also displace the content (default yes)
*/ */
public async filterDocument( public async filterDocument(
content: string,
file: string,
searchers: Array<{ key: string; type: string; value: string; }>, searchers: Array<{ key: string; type: string; value: string; }>,
searchOnly: boolean = false
searchOnly: boolean = false,
) { ) {
try { try {
let response = await axios.post( let response = await axios.post(
this.apiRoutes.searchAndDisplace, this.apiRoutes.searchAndDisplace,
{ {
'content': content,
'file': file,
'searchers': searchers, 'searchers': searchers,
'searchOnly': searchOnly 'searchOnly': searchOnly
} }
@ -145,7 +145,7 @@ export default class ApiService {
* *
* @returns * @returns
*/ */
public async convertFile(content: string, file: {id: string, name: string}, searchers: Array<{ key: string; type: string; value: string; }>) {
public async convertFile(file: {id: string, name: string}, searchers: Array<{ key: string; type: string; value: string; }>) {
try { try {
let response = await axios.post( let response = await axios.post(
this.apiRoutes.fileDownload, this.apiRoutes.fileDownload,
@ -154,7 +154,6 @@ export default class ApiService {
'id': file.id, 'id': file.id,
'type': (file.name.substring(file.name.lastIndexOf('.') + 1, file.name.length) !== 'pdf') ? 'odt' : 'pdf' 'type': (file.name.substring(file.name.lastIndexOf('.') + 1, file.name.length) !== 'pdf') ? 'odt' : 'pdf'
}, },
'content': content,
'searchers': searchers 'searchers': searchers
} }
); );

8
routes/web.php

@ -15,14 +15,6 @@ use Illuminate\Support\Facades\Route;
*/ */
Route::get('/', 'HomeController@index'); Route::get('/', 'HomeController@index');
// Route::get('/', function() {
// $dom = new DOMDocument();
// $dom->load(storage_path() . '/doc.xml');
// $dom->getElementsByTagName('p')->item(0)->nodeValue = 'changed';
// $dom->save(storage_path() . '/changed.xml');
// });
Route::get('/file/download/{path}', [ Route::get('/file/download/{path}', [
FileController::class, FileController::class,

Loading…
Cancel
Save