Browse Source

Get document content from ingest as JSON.

master
Alex Puiu 2 years ago
parent
commit
c73a1f87f9
  1. 12
      app/Http/Controllers/SearchAndDisplaceController.php
  2. 32
      app/SearchDisplace/Documents/DocumentFile.php
  3. 10
      app/SearchDisplace/Ingest/HandleReceivedDocument.php
  4. 4
      app/SearchDisplace/Ingest/SendDocument.php
  5. 98
      app/SearchDisplace/SearchAndDisplaceJSON.php
  6. 4
      app/SearchDisplace/Searchers/Duckling.php
  7. 15
      resources/js/components/ProcessFile/ProcessFile.ts
  8. 9
      resources/js/services/ApiService.ts
  9. 8
      routes/web.php

12
app/Http/Controllers/SearchAndDisplaceController.php

@ -3,7 +3,7 @@
namespace App\Http\Controllers;
use App\SearchDisplace\Documents\DocumentFile;
use App\SearchDisplace\SearchAndDisplace;
use App\SearchDisplace\SearchAndDisplaceJSON;
class SearchAndDisplaceController extends Controller
{
@ -29,7 +29,7 @@ class SearchAndDisplaceController extends Controller
public function store()
{
request()->validate([
'content' => 'required', // String or file.
'file' => 'required', // String or file.
'searchers' => 'required|array',
'searchers.*.key' => 'required',
'searchers.*.type' => 'required|in:replace,displace',
@ -39,11 +39,9 @@ class SearchAndDisplaceController extends Controller
$searchOnly = request()->input('searchOnly') ?? false;
$searchAndDisplace = new SearchAndDisplace(
stripslashes(request()->input('content')),
[
'searchers' => request()->input('searchers'),
],
$searchAndDisplace = new SearchAndDisplaceJSON(
request()->input('file'),
request()->input('searchers'),
$searchOnly
);

32
app/SearchDisplace/Documents/DocumentFile.php

@ -18,10 +18,10 @@ class DocumentFile
$path = $this->getPath($id);
// Ingest success.
if ($this->storage->exists("$path/document.md")) {
if ($this->storage->exists("$path/document.json")) {
return [
'status' => 'success',
'content' => $this->getDocumentContent($id, $path),
'content' => $this->getDocumentContent($path),
];
}
@ -50,15 +50,29 @@ class DocumentFile
return "contracts/$id";
}
protected function getDocumentContent($id, $path)
protected function getDocumentContent($path)
{
$content = $this->storage->get("$path/document.md");
$content = json_decode($this->storage->get("$path/document.json"));
$imageFullPath = url('/') . '/contracts-images/' . $id . '/';
$imageFullPath = str_replace( ' ', '%20', $imageFullPath);
return $this->convertToHTML($content);
}
protected function convertToHTML($elements)
{
$html = '';
$url = url('/') . '/contracts-images';
foreach($elements as $key => $element) {
if($element->tag !== 'img') {
$html .= "<$element->tag style=\"$element->style\">$element->content</$element->tag>";
} else {
$src = $url . '/' . str_replace(' ', '%20', $element->src);
$html .= "<img $element->style src=\"$src\" alt=\"$element->details\">";
}
if($key !== array_key_last($elements))
$html .= '<br>';
}
// @TODO Use preg_replace to find correctly formatted images and any wild cards for the image caption.
// return str_replace('![](./', '![](' . $imageFullPath, $content);
return str_replace('](./', '](' . $imageFullPath, $content);
return $html;
}
}

10
app/SearchDisplace/Ingest/HandleReceivedDocument.php

@ -35,8 +35,8 @@ class HandleReceivedDocument
*/
public function handle()
{
if ($this->fileResultType === 'md') {
$this->handleDocumentMD();
if ($this->fileResultType === 'json') {
$this->handleDocument();
return;
}
@ -47,10 +47,10 @@ class HandleReceivedDocument
return;
}
$this->handleDocumentJson();
// $this->handleDocumentJson();
}
protected function handleDocumentMD()
protected function handleDocument()
{
$storage = Storage::disk('local');
@ -58,7 +58,7 @@ class HandleReceivedDocument
// The .md extension signals the success status, the lack of signals the fail status.
if ($this->status === 'success') {
$fileName = $fileName . '.md';
$fileName = $fileName . '.json';
}
$dir = "contracts/$this->id";

4
app/SearchDisplace/Ingest/SendDocument.php

@ -23,10 +23,10 @@ class SendDocument
* @param string $fileResultType
* @throws \Exception
*/
public function execute($id, $document, string $fileResultType = 'md')
public function execute($id, $document, string $fileResultType = 'json')
{
try {
if ( ! in_array($fileResultType, ['md', 'original'])) {
if ( ! in_array($fileResultType, ['md', 'original', 'json'])) {
throw new \Exception('Invalid file result type provided.');
}

98
app/SearchDisplace/SearchAndDisplaceJSON.php

@ -0,0 +1,98 @@
<?php
namespace App\SearchDisplace;
use Illuminate\Support\Facades\Storage;
class SearchAndDisplaceJSON
{
protected $file;
protected $searchers;
public function __construct($file, $searchers)
{
$this->file = $file . '/document.json';
$this->searchers = $searchers;
$this->storage = Storage::disk('local');
}
public function execute()
{
if(! $this->storage->exists("contracts/$this->file")) {
return;
}
try {
$content = $this->getContent();
} catch (\Exception $exception) {
\Illuminate\Support\Facades\Log::info('EXCEPTION: ' . $exception->getMessage());
return;
}
$sd = $this->applySD($content);
return [
'content' => $this->convertToHTML($sd['content']),
'indexes' => $sd['indexes']
];
}
protected function getContent()
{
return json_decode($this->storage->get("contracts/$this->file"));
}
protected function applySD($elements)
{
$indexes = [];
foreach($elements as $element) {
if($element->tag === 'img')
continue;
$search = new SearchAndDisplace(
stripslashes($element->content),
[
'searchers' => $this->searchers,
],
false,
true
);
$changed = $search->execute();
if($changed) {
$indexes = $changed['indexes'];
$element->content = $changed['content'];
}
}
return [
'content' => $elements,
'indexes' => $indexes
];
}
protected function convertToHTML($elements)
{
$html = '';
$url = url('/') . '/contracts-images';
foreach($elements as $key => $element) {
if($element->tag !== 'img') {
$html .= "<$element->tag style=\"$element->style\">$element->content</$element->tag>";
} else {
$src = $url . '/' . str_replace(' ', '%20', $element->src);
$html .= "<img $element->style src=\"$src\" alt=\"$element->details\">";
}
if($key !== array_key_last($elements)) {
$html .= '<br>';
}
}
return $html;
}
}

4
app/SearchDisplace/Searchers/Duckling.php

@ -10,8 +10,8 @@ class Duckling
public function __construct()
{
// $this->url = env('SD_DUCKLING_URL');
$this->url = 'host.docker.internal:5000/parse';
$this->url = env('SD_DUCKLING_URL');
// $this->url = 'host.docker.internal:5000/parse';
}
public function execute($content, $dimensions)

15
resources/js/components/ProcessFile/ProcessFile.ts

@ -113,24 +113,24 @@ export default class ProcessFile extends Vue {
}
/**
* MD-to-HTML compiled file content
* HTML compiled file content
*/
get compiledFileContent(): string {
return marked(this.fileContent);
return this.fileContent;
}
/**
* MD-to-HTML compiled processed file content
* HTML compiled processed file content
*/
get compiledProcessedFileContent(): string {
return marked(this.processedFileContent);
return this.processedFileContent;
}
/**
* MD-to-HTML compiled processed file content with diff highlight
*/
get compiledProcessedFileContentPreview(): string {
return marked(this.processedFileContentPreview);
return this.processedFileContentPreview;
}
public changeRoute(url: string) {
@ -205,6 +205,7 @@ export default class ProcessFile extends Vue {
public async uploadFile(event: any): Promise<void> {
localStorage.setItem('searchers', JSON.stringify(this.selectedSearchers));
localStorage.setItem('searchersOptions', JSON.stringify(this.searchersOptions));
this.toggleUploadDialog(false);
this.$confirm.require({
message: 'You will lose any progress on the current uploaded document. Are you sure you want to proceed?',
@ -337,7 +338,7 @@ export default class ProcessFile extends Vue {
});
try {
const response = await this.$api.filterDocument(this.fileContent, searchers);
const response = await this.$api.filterDocument(this.file.id, searchers);
this.processedFileContent = response.content;
this.documentDiffIndexes = response.indexes;
@ -428,7 +429,7 @@ export default class ProcessFile extends Vue {
});
});
let response = await this.$api.convertFile(this.processedFileContent, {id: this.file.id, name: this.file.file_name || 'filename.odt'}, searchers);
let response = await this.$api.convertFile({id: this.file.id, name: this.file.file_name || 'filename.odt'}, searchers);
window.open(`${window.location.origin}/file/download/` + response.path);
}

9
resources/js/services/ApiService.ts

@ -116,15 +116,15 @@ export default class ApiService {
* @param {boolean} searchOnly Whether or not to also displace the content (default yes)
*/
public async filterDocument(
content: string,
file: string,
searchers: Array<{ key: string; type: string; value: string; }>,
searchOnly: boolean = false
searchOnly: boolean = false,
) {
try {
let response = await axios.post(
this.apiRoutes.searchAndDisplace,
{
'content': content,
'file': file,
'searchers': searchers,
'searchOnly': searchOnly
}
@ -145,7 +145,7 @@ export default class ApiService {
*
* @returns
*/
public async convertFile(content: string, file: {id: string, name: string}, searchers: Array<{ key: string; type: string; value: string; }>) {
public async convertFile(file: {id: string, name: string}, searchers: Array<{ key: string; type: string; value: string; }>) {
try {
let response = await axios.post(
this.apiRoutes.fileDownload,
@ -154,7 +154,6 @@ export default class ApiService {
'id': file.id,
'type': (file.name.substring(file.name.lastIndexOf('.') + 1, file.name.length) !== 'pdf') ? 'odt' : 'pdf'
},
'content': content,
'searchers': searchers
}
);

8
routes/web.php

@ -15,14 +15,6 @@ use Illuminate\Support\Facades\Route;
*/
Route::get('/', 'HomeController@index');
// Route::get('/', function() {
// $dom = new DOMDocument();
// $dom->load(storage_path() . '/doc.xml');
// $dom->getElementsByTagName('p')->item(0)->nodeValue = 'changed';
// $dom->save(storage_path() . '/changed.xml');
// });
Route::get('/file/download/{path}', [
FileController::class,

Loading…
Cancel
Save