|
|
<?php
namespace App\SearchDisplace;
use App\SearchDisplace\Ingest\SendDataToRecreateDocument; use App\SearchDisplace\Ingest\SendDocument; use Illuminate\Http\File; use Illuminate\Http\UploadedFile; use Illuminate\Support\Facades\Storage;
class SearchAndDisplaceOriginalDocument { /** * * @throws \Exception */ public function start($document, $searchers) { $storeResultAtPath = '';
if ($document instanceof UploadedFile) { $fileName = pathinfo($document->getClientOriginalName(), PATHINFO_FILENAME); } else { // From CLI.
$document = new File($document);
$fileName = str_replace('.' . $document->getExtension(), '', $document->getFilename());
$storeResultAtPath = $document->getPath(); }
$id = time() . '_' . $fileName;
$this->storeSearchers($id, $searchers, $storeResultAtPath); $this->sendDocumentToIngest($id, $document, $fileName);
return $id; }
/** * @param $id * @param $contents * @param $documentFormat * @throws \GuzzleHttp\Exception\GuzzleException */ public function applySD($id, $contents, $documentFormat) { $data = json_decode($contents['document'], true);
try { $searchAndDisplace = new SearchAndDisplace( $data['contents']['text'],
[ 'searchers' => $this->getSearchers($id), ] );
$result = $searchAndDisplace->execute();
// Update text.
$x = $this->applyResultsOnIngestData($data['contents'], $result); $data['contents'] = $x;
$data['document_format'] = $documentFormat;
$this->sendDataToIngestToRebuild($id, $data); } catch (\Exception $exception) { \Illuminate\Support\Facades\Log::info('========================'); \Illuminate\Support\Facades\Log::info('Exception - SearchAndDisplaceOriginalDocument@applySD'); \Illuminate\Support\Facades\Log::info($exception->getMessage()); \Illuminate\Support\Facades\Log::info($exception->getTraceAsString()); \Illuminate\Support\Facades\Log::info('========================'); } }
public function onIngestFail($id) { $storage = Storage::disk('local'); $directory = "contracts/$id";
$storage->deleteDirectory($directory); }
public function hasFailed($id) { $storage = Storage::disk('local'); $basePath = "contracts/$id";
if ($storage->exists($basePath)) { return false; }
if ($this->findDocumentById($id) !== null) { return false; }
return true; }
public function isInProgress($id) { $storage = Storage::disk('local'); $basePath = "contracts/$id";
return $storage->exists($basePath) && $this->findDocumentById($id) === null; }
/** * @param $id * @return string * @throws \Exception */ public function getDownloadPath($id) { $storage = Storage::disk('local');
// @TODO Improve this. Right now we are calling 'findDocumentById' multiple times.
if ($this->hasFailed($id)) { throw new \Exception('Document has failed.'); }
if ($this->isInProgress($id)) { throw new \Exception('Document is still processing.'); }
return $storage->path($this->findDocumentById($id)); }
protected function findDocumentById($id) { $storage = Storage::disk('local');
$documentPartialPath = "contracts/$id-document";
$contractFiles = $storage->files('contracts');
foreach ($contractFiles as $contractFile) { if (substr($contractFile, 0, strlen($documentPartialPath)) === $documentPartialPath) { return $contractFile; } }
return null; }
protected function applyResultsOnIngestData($ingestData, $sdResult) { $ingestData['text'] = $sdResult['content'];
// Update index ranges.
$indexes = [];
// Use original start for key in order to have the indexes sorted ASC.
foreach ($sdResult['indexes'] as $searcher => $searcherIndexes) { foreach ($searcherIndexes as $index) { $indexes[$index['original_start']] = $index; } }
$lastOffset = 0;
foreach ($ingestData['elements'] as $elementIndex => $element) { $currentOffset = 0;
foreach ($indexes as $i => $index) { if ($index['original_start'] > $element['range_end']) { break; }
if ($index['original_end'] < $element['range_start']) { continue; }
if ( $index['original_start'] >= $element['range_start'] && $index['original_end'] <= $element['range_end'] ) { $endDifference = ($index['end'] - $index['original_end']) - ($index['start'] - $index['original_start']);
$ingestData['elements'][$elementIndex]['range_end'] += $endDifference; $currentOffset += $endDifference;
unset($indexes[$i]); } }
$ingestData['elements'][$elementIndex]['range_start'] += $lastOffset; $ingestData['elements'][$elementIndex]['range_end'] += $lastOffset;
$lastOffset += $currentOffset; }
return $ingestData; }
protected function storeSearchers($id, $searchers, $storeResultAtPath) { $storage = Storage::disk('local'); $directory = "contracts/$id"; $storage->makeDirectory($directory);
$storage->put("$directory/searchers.json", json_encode([ 'searchers' => $searchers, 'document_path' => $storeResultAtPath ?? '', ])); }
/** * * @param $id * @return string * @throws \Illuminate\Contracts\Filesystem\FileNotFoundException */ protected function getSearchers($id) { $storage = Storage::disk('local'); $directory = "contracts/$id";
$searchers = $storage->get("$directory/searchers.json");
if ( ! $searchers) { throw new \Exception('Searchers do not exist.'); }
return json_decode($searchers, true)['searchers']; }
public function getStoreAtPathFromJsonFile($id) { $storage = Storage::disk('local'); $directory = "contracts/$id";
$searchers = $storage->get("$directory/searchers.json");
if ( ! $searchers) { throw new \Exception('Searchers do not exist.'); }
return json_decode($searchers, true)['document_path']; }
/** * * @param $id * @param $document * @throws \Exception */ protected function sendDocumentToIngest($id, $document, $fileName) { $sendDocument = new SendDocument();
$sendDocument->execute($id, [ 'path' => $document->getRealPath(), 'type' => $document->getMimeType(), 'name' => $fileName ], 'original'); }
/** * * @param $id * @param $data * @throws \GuzzleHttp\Exception\GuzzleException */ protected function sendDataToIngestToRebuild($id, $data) { $handler = new SendDataToRecreateDocument();
$handler->execute($id, $data); } }
|