Browse Source

Minor fix to OCR files

master
Radu Liviu Carjan 2 years ago
parent
commit
b7ebd686df
  1. 12
      app/SearchDisplace/Convertor/Convertor.php
  2. 14
      app/SearchDisplace/Ingest/HandleReceivedDocument.php
  3. 22
      app/SearchDisplace/SearchAndDisplaceFromFiles.php

12
app/SearchDisplace/Convertor/Convertor.php

@ -4,6 +4,7 @@ namespace App\SearchDisplace\Convertor;
use Symfony\Component\Process\Process;
use Symfony\Component\Process\Exception\ProcessFailedException;
use Illuminate\Support\Facades\Log;
/**
* Convert documents from formats supported by Libre Office
@ -28,6 +29,17 @@ class Convertor {
$folder = storage_path('app/tmp/');
}
Log::info('Running `soffice` to convert "' . $original . '" to "' . $to . '". Output folder: "' . $folder . '"');
Log::info(
'COMMAND: ' .
'soffice ' .
'--convert-to ' .
$to . ' ' .
$original . ' ' .
'--outdir ' .
$folder
);
$process = new Process(
[
'soffice',

14
app/SearchDisplace/Ingest/HandleReceivedDocument.php

@ -9,6 +9,8 @@ use GuzzleHttp\Client;
use GuzzleHttp\Exception\ClientException;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Storage;
use \Illuminate\Contracts\Filesystem\Filesystem;
use App\SearchDisplace\Convertor\Convertor;
class HandleReceivedDocument
{
@ -17,6 +19,7 @@ class HandleReceivedDocument
protected $fileResultType;
protected $documentFormat;
protected $status;
protected Filesystem $storage;
public function __construct($payload)
{
@ -28,6 +31,8 @@ class HandleReceivedDocument
if (isset($payload['data']['document_format'])) {
$this->documentFormat = $payload['data']['document_format'];
}
$this->storage = Storage::disk('local');
}
/**
@ -58,6 +63,7 @@ class HandleReceivedDocument
// The .md extension signals the success status, the lack of signals the fail status.
if ($this->status === 'success') {
$intermediateFileName = $fileName . '.odt';
$fileName = $fileName . '.html';
}
@ -68,6 +74,11 @@ class HandleReceivedDocument
$storage->put("$dir/$fileName", $this->content['document']);
// HTML document cannot be converted directly to xml. So we convert to ODT first, then to XML
Convertor::convert('odt', $this->storage->path("$dir/$fileName"));
Convertor::convert('xml', $this->storage->path("$dir/$intermediateFileName"));
$this->storage->delete("$dir/$intermediateFileName");
foreach ($this->content['images'] as $image) {
$name = $image['name'];
$type = $image['type'];
@ -81,7 +92,8 @@ class HandleReceivedDocument
// Emit event so other sections of the app can work on it.
IngestDocumentReceived::dispatch($this->id);
} catch (\Exception $exception) {
\Illuminate\Support\Facades\Log::info('Exception: ' . $exception->getTraceAsString());
Log::error('Exception: ' . $exception->getMessage());
Log::error($exception->getTraceAsString());
}
}

22
app/SearchDisplace/SearchAndDisplaceFromFiles.php

@ -30,7 +30,7 @@ class SearchAndDisplaceFromFiles
}
try {
$documentContent = $this->storage->get("$this->directoryPath/document.md");
$documentContent = $this->storage->get("$this->directoryPath/document.html");
$searchersContent = json_decode($this->storage->get($this->infoFilePath), true);
$documentPath = $searchersContent['document_path'];
@ -38,15 +38,19 @@ class SearchAndDisplaceFromFiles
$this->storage->put($this->infoFilePath, json_encode($searchers[0]['content']));
$searchAndDisplace = new SearchAndDisplace($documentContent, [
'searchers' => [
[
'key' => $this->id,
'type' => $searchers[0]['type'],
'value' => $searchers[0]['value'],
]
$searchAndDisplace = new SearchAndDisplace(
$documentContent,
[
'searchers' => [
[
'key' => $this->id,
'type' => $searchers[0]['type'],
'value' => $searchers[0]['value'],
]
],
],
]);
false
);
$result = $searchAndDisplace->execute();

Loading…
Cancel
Save