getClientOriginalName(), PATHINFO_FILENAME); } else { // From CLI. $document = new File($document); $fileName = str_replace('.' . $document->getExtension(), '', $document->getFilename()); $storeResultAtPath = $document->getPath(); } $id = time() . '_' . $fileName; $this->storeSearchers($id, $searchers, $storeResultAtPath); $this->sendDocumentToIngest($id, $document, $fileName); return $id; } /** * @param $id * @param $contents * @param $documentFormat * @throws \GuzzleHttp\Exception\GuzzleException */ public function applySD($id, $contents, $documentFormat) { $data = json_decode($contents['document'], true); try { $searchAndDisplace = new SearchAndDisplace( $data['contents']['text'], [ 'searchers' => $this->getSearchers($id), ] ); $result = $searchAndDisplace->execute(); // Update text. $x = $this->applyResultsOnIngestData($data['contents'], $result); $data['contents'] = $x; $data['document_format'] = $documentFormat; $this->sendDataToIngestToRebuild($id, $data); } catch (\Exception $exception) { \Illuminate\Support\Facades\Log::info('========================'); \Illuminate\Support\Facades\Log::info('Exception - SearchAndDisplaceOriginalDocument@applySD'); \Illuminate\Support\Facades\Log::info($exception->getMessage()); \Illuminate\Support\Facades\Log::info($exception->getTraceAsString()); \Illuminate\Support\Facades\Log::info('========================'); } } public function onIngestFail($id) { $storage = Storage::disk('local'); $directory = "contracts/$id"; $storage->deleteDirectory($directory); } public function hasFailed($id) { $storage = Storage::disk('local'); $basePath = "contracts/$id"; if ($storage->exists($basePath)) { return false; } if ($this->findDocumentById($id) !== null) { return false; } return true; } public function isInProgress($id) { $storage = Storage::disk('local'); $basePath = "contracts/$id"; return $storage->exists($basePath) && $this->findDocumentById($id) === null; } /** * @param $id * @return string * @throws \Exception */ public function getDownloadPath($id) { $storage = Storage::disk('local'); // @TODO Improve this. Right now we are calling 'findDocumentById' multiple times. if ($this->hasFailed($id)) { throw new \Exception('Document has failed.'); } if ($this->isInProgress($id)) { throw new \Exception('Document is still processing.'); } return $storage->path($this->findDocumentById($id)); } protected function findDocumentById($id) { $storage = Storage::disk('local'); $documentPartialPath = "contracts/$id-document"; $contractFiles = $storage->files('contracts'); foreach ($contractFiles as $contractFile) { if (substr($contractFile, 0, strlen($documentPartialPath)) === $documentPartialPath) { return $contractFile; } } return null; } protected function applyResultsOnIngestData($ingestData, $sdResult) { $ingestData['text'] = $sdResult['content']; // Update index ranges. $indexes = []; // Use original start for key in order to have the indexes sorted ASC. foreach ($sdResult['indexes'] as $searcher => $searcherIndexes) { foreach ($searcherIndexes as $index) { $indexes[$index['original_start']] = $index; } } $lastOffset = 0; foreach ($ingestData['elements'] as $elementIndex => $element) { $currentOffset = 0; foreach ($indexes as $i => $index) { if ($index['original_start'] > $element['range_end']) { break; } if ($index['original_end'] < $element['range_start']) { continue; } if ( $index['original_start'] >= $element['range_start'] && $index['original_end'] <= $element['range_end'] ) { $endDifference = ($index['end'] - $index['original_end']) - ($index['start'] - $index['original_start']); $ingestData['elements'][$elementIndex]['range_end'] += $endDifference; $currentOffset += $endDifference; unset($indexes[$i]); } } $ingestData['elements'][$elementIndex]['range_start'] += $lastOffset; $ingestData['elements'][$elementIndex]['range_end'] += $lastOffset; $lastOffset += $currentOffset; } return $ingestData; } protected function storeSearchers($id, $searchers, $storeResultAtPath) { $storage = Storage::disk('local'); $directory = "contracts/$id"; $storage->makeDirectory($directory); $storage->put("$directory/searchers.json", json_encode([ 'searchers' => $searchers, 'document_path' => $storeResultAtPath ?? '', ])); } /** * * @param $id * @return string * @throws \Illuminate\Contracts\Filesystem\FileNotFoundException */ protected function getSearchers($id) { $storage = Storage::disk('local'); $directory = "contracts/$id"; $searchers = $storage->get("$directory/searchers.json"); if ( ! $searchers) { throw new \Exception('Searchers do not exist.'); } return json_decode($searchers, true)['searchers']; } public function getStoreAtPathFromJsonFile($id) { $storage = Storage::disk('local'); $directory = "contracts/$id"; $searchers = $storage->get("$directory/searchers.json"); if ( ! $searchers) { throw new \Exception('Searchers do not exist.'); } return json_decode($searchers, true)['document_path']; } /** * * @param $id * @param $document * @throws \Exception */ protected function sendDocumentToIngest($id, $document, $fileName) { $sendDocument = new SendDocument(); $sendDocument->execute($id, [ 'path' => $document->getRealPath(), 'type' => $document->getMimeType(), 'name' => $fileName ], 'original'); } /** * * @param $id * @param $data * @throws \GuzzleHttp\Exception\GuzzleException */ protected function sendDataToIngestToRebuild($id, $data) { $handler = new SendDataToRecreateDocument(); $handler->execute($id, $data); } }