Repo for the search and displace core module including the interface to select files and search and displace operations to run on them.
https://searchanddisplace.com
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
<?php
namespace App\SearchDisplace;
use App\SearchDisplace\Ingest\SendDocument; use Illuminate\Support\Facades\Storage;
class SearchAndDisplaceOriginalDocument { /** * * @throws \Exception */ public function start($document, $searchers) { $id = time() . '_' . pathinfo($document->getClientOriginalName(), PATHINFO_FILENAME);
$this->storeSearchers($id, $searchers); $this->sendDocumentToIngest($id, $document);
return $id; }
public function applySD($id, $contents) { $data = json_decode($contents['document'], true);
try { $searchAndDisplace = new SearchAndDisplace( $data['contents']['text'],
[ 'searchers' => $this->getSearchers($id), ] );
$result = $searchAndDisplace->execute();
// Update text.
$data['contents'] = $this->applyResultsOnIngestData($data['contents'], $result);
\Illuminate\Support\Facades\Log::info($data['contents']);
// Send to Ingest to recreate doc.
} catch (\Exception $exception) { \Illuminate\Support\Facades\Log::info($exception->getMessage()); \Illuminate\Support\Facades\Log::info($exception->getTraceAsString()); } }
public function onIngestFail($id) { $storage = Storage::disk('local'); $directory = "contracts/$id";
$storage->deleteDirectory($directory); }
public function hasFailed($id) { $storage = Storage::disk('local'); $directory = "contracts/$id";
return ! $storage->exists($directory); }
public function isInProgress($id) { $storage = Storage::disk('local'); $directory = "contracts/$id";
return ! $storage->exists("$directory/document"); }
/** * @param $id * @return \Symfony\Component\HttpFoundation\StreamedResponse * @throws \Exception */ public function streamFile($id) { $storage = Storage::disk('local'); $directory = "contracts/$id";
if ($this->hasFailed($id) || $this->isInProgress($id)) { throw new \Exception('Document is not processed.'); }
return $storage->download("$directory/document"); }
protected function applyResultsOnIngestData($ingestData, $sdResult) { $ingestData['text'] = $sdResult['content'];
// Update index ranges.
$indexes = [];
// Use original start for key in order to have the indexes sorted ASC.
foreach ($sdResult['indexes'] as $searcher => $searcherIndexes) { foreach ($searcherIndexes as $index) { $indexes[$index['original_start']] = $index; } }
// 0 - 20
// 21 - 32
// 33 - 174
//
$startOffset = 0;
foreach ($ingestData['elements'] as $element) { $currentStartOffset = 0;
foreach ($indexes as $i => $index) { if ($index['original_start'] > $element['range_end']) { break; }
if ($index['original_end'] < $element['range_start']) { continue; }
if ( $index['original_start'] >= $element['range_start'] && $index['original_end'] <= $element['range_end'] ) { $endDifference = $index['end'] - $index['original_end'];
$element['range_end'] += $endDifference; $currentStartOffset += $endDifference;
unset($indexes[$i]); } }
$element['range_start'] += $startOffset;
$startOffset += $currentStartOffset; }
return $ingestData; }
protected function storeSearchers($id, $searchers) { $storage = Storage::disk('local'); $directory = "contracts/$id"; $storage->makeDirectory($directory);
$storage->put("$directory/searchers.json", json_encode($searchers)); }
/** * * @param $id * @return string * @throws \Illuminate\Contracts\Filesystem\FileNotFoundException */ protected function getSearchers($id) { $storage = Storage::disk('local'); $directory = "contracts/$id";
$searchers = $storage->get("$directory/searchers.json");
if ( ! $searchers) { throw new \Exception('Searchers do not exist.'); }
return json_decode($searchers, true); }
/** * * @param $id * @param $document * @throws \Exception */ protected function sendDocumentToIngest($id, $document) { $sendDocument = new SendDocument();
$sendDocument->execute($id, [ 'path' => $document->getRealPath(), 'type' => $document->getMimeType(), 'name' => $document->getClientOriginalName() ], 'original'); } }
|