Repo for the search and displace core module including the interface to select files and search and displace operations to run on them. https://searchanddisplace.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

207 lines
5.6 KiB

<?php
namespace App\SearchDisplace;
use App\SearchDisplace\Ingest\SendDataToRecreateDocument;
use App\SearchDisplace\Ingest\SendDocument;
use Illuminate\Support\Facades\Storage;
class SearchAndDisplaceOriginalDocument
{
/**
*
* @throws \Exception
*/
public function start($document, $searchers)
{
$id = time() . '_' . pathinfo($document->getClientOriginalName(), PATHINFO_FILENAME);
$this->storeSearchers($id, $searchers);
$this->sendDocumentToIngest($id, $document);
return $id;
}
/**
* @param $id
* @param $contents
* @throws \GuzzleHttp\Exception\GuzzleException
*/
public function applySD($id, $contents)
{
$data = json_decode($contents['document'], true);
try {
$searchAndDisplace = new SearchAndDisplace(
$data['contents']['text'],
[
'searchers' => $this->getSearchers($id),
]
);
$result = $searchAndDisplace->execute();
// Update text.
$x = $this->applyResultsOnIngestData($data['contents'], $result);
$data['contents'] = $x;
$this->sendDataToIngestToRebuild($id, $data);
} catch (\Exception $exception) {
\Illuminate\Support\Facades\Log::info($exception->getMessage());
}
}
public function onIngestFail($id)
{
$storage = Storage::disk('local');
$directory = "contracts/$id";
$storage->deleteDirectory($directory);
}
public function hasFailed($id)
{
$storage = Storage::disk('local');
$basePath = "contracts/$id";
$filePath = $basePath . '-document.docx';
return ! $storage->exists($basePath) && ! $storage->exists($filePath);
}
public function isInProgress($id)
{
$storage = Storage::disk('local');
$basePath = "contracts/$id";
// @TODO Set document extension.
return $storage->exists($basePath) && ! $storage->exists($basePath . '-document.docx');
}
/**
* @param $id
* @return string
* @throws \Exception
*/
public function getDownloadPath($id)
{
$storage = Storage::disk('local');
if ($this->hasFailed($id) || $this->isInProgress($id)) {
throw new \Exception('Document is not processed.');
}
// @TODO Set document extension.
return $storage->path('contracts/' . $id . '-document.docx');
}
protected function applyResultsOnIngestData($ingestData, $sdResult)
{
$ingestData['text'] = $sdResult['content'];
// Update index ranges.
$indexes = [];
// Use original start for key in order to have the indexes sorted ASC.
foreach ($sdResult['indexes'] as $searcher => $searcherIndexes) {
foreach ($searcherIndexes as $index) {
$indexes[$index['original_start']] = $index;
}
}
$lastOffset = 0;
foreach ($ingestData['elements'] as $elementIndex => $element) {
$currentOffset = 0;
foreach ($indexes as $i => $index) {
if ($index['original_start'] > $element['range_end']) {
break;
}
if ($index['original_end'] < $element['range_start']) {
continue;
}
if (
$index['original_start'] >= $element['range_start'] &&
$index['original_end'] <= $element['range_end']
) {
$endDifference = ($index['end'] - $index['original_end']) -
($index['start'] - $index['original_start']);
$ingestData['elements'][$elementIndex]['range_end'] += $endDifference;
$currentOffset += $endDifference;
unset($indexes[$i]);
}
}
$ingestData['elements'][$elementIndex]['range_start'] += $lastOffset;
$ingestData['elements'][$elementIndex]['range_end'] += $lastOffset;
$lastOffset += $currentOffset;
}
return $ingestData;
}
protected function storeSearchers($id, $searchers)
{
$storage = Storage::disk('local');
$directory = "contracts/$id";
$storage->makeDirectory($directory);
$storage->put("$directory/searchers.json", json_encode($searchers));
}
/**
*
* @param $id
* @return string
* @throws \Illuminate\Contracts\Filesystem\FileNotFoundException
*/
protected function getSearchers($id)
{
$storage = Storage::disk('local');
$directory = "contracts/$id";
$searchers = $storage->get("$directory/searchers.json");
if ( ! $searchers) {
throw new \Exception('Searchers do not exist.');
}
return json_decode($searchers, true);
}
/**
*
* @param $id
* @param $document
* @throws \Exception
*/
protected function sendDocumentToIngest($id, $document)
{
$sendDocument = new SendDocument();
$sendDocument->execute($id, [
'path' => $document->getRealPath(),
'type' => $document->getMimeType(),
'name' => $document->getClientOriginalName()
], 'original');
}
/**
*
* @param $id
* @param $data
* @throws \GuzzleHttp\Exception\GuzzleException
*/
protected function sendDataToIngestToRebuild($id, $data)
{
$handler = new SendDataToRecreateDocument();
$handler->execute($id, $data);
}
}