From 516d06599b034a1a85f21724d29fa278892f5786 Mon Sep 17 00:00:00 2001 From: Orzu Ionut Date: Thu, 9 Sep 2021 16:15:12 +0300 Subject: [PATCH] Apply SD on original document from CLI command. Add performance analyzer for CLI ran operations. --- app/Console/Commands/RunSearchDisplace.php | 132 +++++++++++++----- app/Events/SDAppliedOnMarkdownDocument.php | 36 +++++ app/Events/SDAppliedOnOriginalDocument.php | 36 +++++ app/Listeners/AnalyzeOperationPerformance.php | 45 ++++++ app/Providers/EventServiceProvider.php | 11 ++ .../Ingest/HandleReceivedDocument.php | 24 +++- app/SearchDisplace/Ingest/SendDocument.php | 20 ++- app/SearchDisplace/SearchAndDisplace.php | 4 + .../SearchAndDisplaceFromFiles.php | 9 +- .../SearchAndDisplaceOriginalDocument.php | 53 +++++-- demo-cli/README.md | 30 +++- 11 files changed, 347 insertions(+), 53 deletions(-) create mode 100644 app/Events/SDAppliedOnMarkdownDocument.php create mode 100644 app/Events/SDAppliedOnOriginalDocument.php create mode 100644 app/Listeners/AnalyzeOperationPerformance.php diff --git a/app/Console/Commands/RunSearchDisplace.php b/app/Console/Commands/RunSearchDisplace.php index 760cabd..3bb28e0 100644 --- a/app/Console/Commands/RunSearchDisplace.php +++ b/app/Console/Commands/RunSearchDisplace.php @@ -3,7 +3,10 @@ namespace App\Console\Commands; use App\SearchDisplace\Ingest\SendDocument; +use App\SearchDisplace\SearchAndDisplaceOriginalDocument; use Illuminate\Console\Command; +use Illuminate\Support\Carbon; +use Illuminate\Support\Facades\Redis; use Illuminate\Support\Facades\Storage; class RunSearchDisplace extends Command @@ -15,7 +18,8 @@ class RunSearchDisplace extends Command */ protected $signature = 'sd:run {path : The document path} - {filters* : The filters which will be applied to the search}'; + {filters* : The filters which will be applied to the search} + {--original : Whether the operation will recreate the original document format}'; /** * The console command description. @@ -32,6 +36,8 @@ class RunSearchDisplace extends Command public function __construct() { parent::__construct(); + + // @TODO Add way to handle 'displace'.. Right now we are doing the 'replace' with ':'. } /** @@ -42,22 +48,16 @@ class RunSearchDisplace extends Command { $documentPath = $this->argument('path'); $searchers = $this->argument('filters'); - - $id = md5(uniqid(rand(), true)); - $pathDetails = pathinfo($documentPath); - - $resultedDocumentPath = $pathDetails['dirname'] . '/' . $pathDetails['filename'] . '-displaced.md'; + $original = $this->option('original'); try { - $this->storeSearchers($id, $searchers, $resultedDocumentPath); + $this->bootAnalyzer($documentPath); - $sendToIngest = new SendDocument(); - - $sendToIngest->execute($id, [ - 'path' => $documentPath, - 'name' => $pathDetails['basename'], - 'type' => $this->getFileMimeType($documentPath), - ]); + if ( ! $original) { + $this->runMarkdownOperation($documentPath, $searchers); + } else { + $this->runOriginalDocumentOperation($documentPath, $searchers); + } $this->info('Processing document..'); $this->info('After the processing will be done the result will show up at the same path as the input.'); @@ -66,6 +66,41 @@ class RunSearchDisplace extends Command } } + /** + * @param $documentPath + * @param $searchers + * @throws \Exception + */ + protected function runMarkdownOperation($documentPath, $searchers) + { + $id = md5(uniqid(rand(), true)); + $pathDetails = pathinfo($documentPath); + + $resultedDocumentPath = $pathDetails['dirname'] . '/' . $pathDetails['filename'] . '-displaced.md'; + + $this->storeSearchers($id, $searchers, $resultedDocumentPath); + + $sendToIngest = new SendDocument(); + + $sendToIngest->execute($id, [ + 'path' => $documentPath, + 'name' => $pathDetails['basename'], + 'type' => $this->getFileMimeType($documentPath), + ]); + } + + /** + * @param $documentPath + * @param $searchers + * @throws \Exception + */ + protected function runOriginalDocumentOperation($documentPath, $searchers) + { + $handler = new SearchAndDisplaceOriginalDocument(); + + $handler->start($documentPath, $this->getListOfSearchersAndActions($searchers)); + } + protected function storeSearchers($id, $searchers, $storeResultPath) { $data = [ @@ -78,6 +113,14 @@ class RunSearchDisplace extends Command $storage->put("searchers/$id.json", json_encode($data)); } + protected function bootAnalyzer($filePath) + { + $redis = Redis::connection(); + + $redis->set('analyze_performance_time', Carbon::now()->format('U')); + $redis->set('analyze_performance_path', pathinfo($filePath,PATHINFO_DIRNAME)); + } + protected function getSearchers($searchers) { if (count($searchers) === 1 && str_contains($searchers[0], '.json')) { @@ -93,18 +136,17 @@ class RunSearchDisplace extends Command $list = []; - foreach ($searchers as $searcher) { - $result = explode(':', $searcher); - - $searcherPath = 'searchers/' . $result[0] . '.json'; + foreach ($this->getListOfSearchersAndActions($searchers) as $searcherInfo) { + $searcherPath = 'searchers/' . $searcherInfo['key'] . '.json'; if ( ! $storage->exists($searcherPath)) { - throw new \Exception('Searcher does not exist'); + throw new \Exception('Searcher "' . $searcherInfo['key'] . '" does not exist'); } $list[] = [ 'content' => json_decode($storage->get($searcherPath), true), - 'replace_with' => count($result) > 1 ? $result[1] : '', + 'type' => $searcherInfo['type'], + 'value' => $searcherInfo['value'], ]; } @@ -113,18 +155,10 @@ class RunSearchDisplace extends Command protected function getSearchersFromFile($argument) { - $result = explode(':', $argument); + $searchersList = $this->getListOfSearchersAndActions([$argument]); + $searcherInfo = $searchersList[0]; - if (count($result) > 1) { - $path = $result[0]; - - $replaceWith = $result[1]; - } else { - $path = $argument; - $replaceWith = ''; - } - - $contents = file_get_contents($path); + $contents = file_get_contents($searcherInfo['key']); if ( ! $contents) { throw new \Exception('There is no data in the searcher JSON file.'); @@ -133,11 +167,45 @@ class RunSearchDisplace extends Command return [ [ 'content' => json_decode($contents), - 'replace_with' => $replaceWith, + 'type' => $searcherInfo['type'], + 'value' => $searcherInfo['value'], ], ]; } + protected function getListOfSearchersAndActions($searchers) + { + $searchersList = []; + + foreach ($searchers as $searcher) { + $replaceActionResult = explode(':', $searcher); + + $searcherKey = $replaceActionResult[0]; + $type = 'replace'; + $value = ''; + + if (count($replaceActionResult) === 1) { + $displaceActionResult = explode('+', $searcher); + + if (count($displaceActionResult) > 1) { + $searcherKey = $displaceActionResult[0]; + $type = 'displace'; + $value = $displaceActionResult[1]; + } + } else { + $value = $replaceActionResult[1]; + } + + $searchersList[] = [ + 'key' => $searcherKey, + 'type' => $type, + 'value' => $value, + ]; + } + + return $searchersList; + } + protected function getFileMimeType($file) { if (function_exists('finfo_file')) { $finfo = finfo_open(FILEINFO_MIME_TYPE); diff --git a/app/Events/SDAppliedOnMarkdownDocument.php b/app/Events/SDAppliedOnMarkdownDocument.php new file mode 100644 index 0000000..17f773a --- /dev/null +++ b/app/Events/SDAppliedOnMarkdownDocument.php @@ -0,0 +1,36 @@ +get('analyze_performance_path'); + $startedAt = $redis->get('analyze_performance_time'); + + if ( ! $directoryPath || ! $startedAt) { + return; + } + + $endedAt = Carbon::now()->format('U'); + + $data = 'Time elapsed in seconds: ' . ($endedAt - $startedAt) . "\n"; + + file_put_contents($directoryPath . '/sd_analyze_performance.txt', $data); + } +} diff --git a/app/Providers/EventServiceProvider.php b/app/Providers/EventServiceProvider.php index 3fd8270..987167c 100644 --- a/app/Providers/EventServiceProvider.php +++ b/app/Providers/EventServiceProvider.php @@ -3,6 +3,9 @@ namespace App\Providers; use App\Events\IngestDocumentReceived; +use App\Events\SDAppliedOnMarkdownDocument; +use App\Events\SDAppliedOnOriginalDocument; +use App\Listeners\AnalyzeOperationPerformance; use App\Listeners\RunSearchAndDisplaceOnDocument; use Illuminate\Foundation\Support\Providers\EventServiceProvider as ServiceProvider; @@ -17,6 +20,14 @@ class EventServiceProvider extends ServiceProvider IngestDocumentReceived::class => [ RunSearchAndDisplaceOnDocument::class, ], + + SDAppliedOnOriginalDocument::class => [ + AnalyzeOperationPerformance::class, + ], + + SDAppliedOnMarkdownDocument::class => [ + AnalyzeOperationPerformance::class, + ], ]; /** diff --git a/app/SearchDisplace/Ingest/HandleReceivedDocument.php b/app/SearchDisplace/Ingest/HandleReceivedDocument.php index e475e61..17778ca 100644 --- a/app/SearchDisplace/Ingest/HandleReceivedDocument.php +++ b/app/SearchDisplace/Ingest/HandleReceivedDocument.php @@ -3,9 +3,11 @@ namespace App\SearchDisplace\Ingest; use App\Events\IngestDocumentReceived; +use App\Events\SDAppliedOnOriginalDocument; use App\SearchDisplace\SearchAndDisplaceOriginalDocument; use GuzzleHttp\Client; use GuzzleHttp\Exception\ClientException; +use Illuminate\Support\Facades\Log; use Illuminate\Support\Facades\Storage; class HandleReceivedDocument @@ -101,14 +103,30 @@ class HandleReceivedDocument } $filePath = "document.$this->documentFormat"; - $storeDocumentAtPath = 'contracts/' . $this->id . '-' . $filePath; + + $storeDocumentAtPath = $storage->path('contracts/' . $this->id . '-' . $filePath); + + try { + // Change the path where to store the document if the operation was run from CLI. + $storeAtPathFromJsonFile = (new SearchAndDisplaceOriginalDocument())->getStoreAtPathFromJsonFile($this->id); + + if ($storeAtPathFromJsonFile) { + $storeDocumentAtPath = $storeAtPathFromJsonFile . + '/' . + $this->id . + '-displaced.' . + $this->documentFormat; + } + } catch (\Exception $exception) { + \Illuminate\Support\Facades\Log::info('Exception thrown when tried reading the storeDocumentAtPath from JSON file.'); + } $client = new Client(); $url = env('SD_INGEST_URL') . '/recreate-document/' . $this->id . '?file_path=' . $filePath; try { $client->request('GET', $url, [ - 'sink' => $storage->path($storeDocumentAtPath), + 'sink' => $storeDocumentAtPath, ]); } catch (ClientException $clientException) { $error = json_decode($clientException->getResponse()->getBody()->getContents(), true); @@ -117,6 +135,8 @@ class HandleReceivedDocument } finally { $storage->deleteDirectory("contracts/$this->id"); } + + SDAppliedOnOriginalDocument::dispatch($this->id); } /** diff --git a/app/SearchDisplace/Ingest/SendDocument.php b/app/SearchDisplace/Ingest/SendDocument.php index a978032..da0b063 100644 --- a/app/SearchDisplace/Ingest/SendDocument.php +++ b/app/SearchDisplace/Ingest/SendDocument.php @@ -4,6 +4,7 @@ namespace App\SearchDisplace\Ingest; use GuzzleHttp\Client; use GuzzleHttp\Exception\ClientException; +use GuzzleHttp\Exception\GuzzleException; class SendDocument { @@ -14,9 +15,21 @@ class SendDocument $this->url = env('SD_INGEST_URL') . '/ingest'; } - public function execute($id, $document, $fileResultType = 'md') + /** + * + * + * @param $id + * @param $document + * @param string $fileResultType + * @throws \Exception + */ + public function execute($id, $document, string $fileResultType = 'md') { try { + if ( ! in_array($fileResultType, ['md', 'original'])) { + throw new \Exception('Invalid file result type provided.'); + } + $response = $this->sendRequest($id, $document, $fileResultType); if ($response['status'] === 'fail') { @@ -30,6 +43,8 @@ class SendDocument // The file in Ingest si in Processing state. } catch (\Exception $exception) { throw new \Exception($exception->getMessage()); + } catch (GuzzleException $exception) { + throw new \Exception($exception->getMessage()); } } @@ -38,8 +53,9 @@ class SendDocument * * @param $id * @param $document + * @param $fileResultType * @return mixed - * @throws \GuzzleHttp\Exception\GuzzleException + * @throws GuzzleException */ public function sendRequest($id, $document, $fileResultType) { diff --git a/app/SearchDisplace/SearchAndDisplace.php b/app/SearchDisplace/SearchAndDisplace.php index a3ad06b..f999bc0 100644 --- a/app/SearchDisplace/SearchAndDisplace.php +++ b/app/SearchDisplace/SearchAndDisplace.php @@ -17,6 +17,10 @@ class SearchAndDisplace $this->searchOnly = $searchOnly; } + /** + * @return array|mixed + * @throws \Exception + */ public function execute() { $searchResult = $this->search(); diff --git a/app/SearchDisplace/SearchAndDisplaceFromFiles.php b/app/SearchDisplace/SearchAndDisplaceFromFiles.php index 6291f7e..ceb006d 100644 --- a/app/SearchDisplace/SearchAndDisplaceFromFiles.php +++ b/app/SearchDisplace/SearchAndDisplaceFromFiles.php @@ -2,6 +2,7 @@ namespace App\SearchDisplace; +use App\Events\SDAppliedOnMarkdownDocument; use Illuminate\Support\Facades\Storage; class SearchAndDisplaceFromFiles @@ -23,8 +24,8 @@ class SearchAndDisplaceFromFiles public function execute() { + // The files don't exist, so we don't have to apply S&D. if ( ! $this->storage->exists($this->directoryPath) || ! $this->storage->exists($this->infoFilePath)) { - // Handle this case, must report result to user. return; } @@ -41,7 +42,8 @@ class SearchAndDisplaceFromFiles 'searchers' => [ [ 'key' => $this->id, - 'replace_with' => $searchers[0]['replace_with'], + 'type' => $searchers[0]['type'], + 'value' => $searchers[0]['value'], ] ], ]); @@ -49,8 +51,11 @@ class SearchAndDisplaceFromFiles $result = $searchAndDisplace->execute(); file_put_contents($documentPath, $result['content']); + + SDAppliedOnMarkdownDocument::dispatch($this->id); } catch (\Exception $exception) { \Illuminate\Support\Facades\Log::info('EXCEPTION: ' . $exception->getMessage()); + \Illuminate\Support\Facades\Log::info('EXCEPTION: ' . $exception->getTraceAsString()); return; } finally { diff --git a/app/SearchDisplace/SearchAndDisplaceOriginalDocument.php b/app/SearchDisplace/SearchAndDisplaceOriginalDocument.php index 8825fc3..7f82e7c 100644 --- a/app/SearchDisplace/SearchAndDisplaceOriginalDocument.php +++ b/app/SearchDisplace/SearchAndDisplaceOriginalDocument.php @@ -4,6 +4,8 @@ namespace App\SearchDisplace; use App\SearchDisplace\Ingest\SendDataToRecreateDocument; use App\SearchDisplace\Ingest\SendDocument; +use Illuminate\Http\File; +use Illuminate\Http\UploadedFile; use Illuminate\Support\Facades\Storage; class SearchAndDisplaceOriginalDocument @@ -14,10 +16,24 @@ class SearchAndDisplaceOriginalDocument */ public function start($document, $searchers) { - $id = time() . '_' . pathinfo($document->getClientOriginalName(), PATHINFO_FILENAME); + $storeResultAtPath = ''; - $this->storeSearchers($id, $searchers); - $this->sendDocumentToIngest($id, $document); + if ($document instanceof UploadedFile) { + $fileName = pathinfo($document->getClientOriginalName(), PATHINFO_FILENAME); + } else { + // From CLI. + + $document = new File($document); + + $fileName = str_replace('.' . $document->getExtension(), '', $document->getFilename()); + + $storeResultAtPath = $document->getPath(); + } + + $id = time() . '_' . $fileName; + + $this->storeSearchers($id, $searchers, $storeResultAtPath); + $this->sendDocumentToIngest($id, $document, $fileName); return $id; } @@ -51,7 +67,11 @@ class SearchAndDisplaceOriginalDocument $this->sendDataToIngestToRebuild($id, $data); } catch (\Exception $exception) { + \Illuminate\Support\Facades\Log::info('========================'); + \Illuminate\Support\Facades\Log::info('Exception - SearchAndDisplaceOriginalDocument@applySD'); \Illuminate\Support\Facades\Log::info($exception->getMessage()); + \Illuminate\Support\Facades\Log::info($exception->getTraceAsString()); + \Illuminate\Support\Facades\Log::info('========================'); } } @@ -177,13 +197,16 @@ class SearchAndDisplaceOriginalDocument return $ingestData; } - protected function storeSearchers($id, $searchers) + protected function storeSearchers($id, $searchers, $storeResultAtPath) { $storage = Storage::disk('local'); $directory = "contracts/$id"; $storage->makeDirectory($directory); - $storage->put("$directory/searchers.json", json_encode($searchers)); + $storage->put("$directory/searchers.json", json_encode([ + 'searchers' => $searchers, + 'document_path' => $storeResultAtPath ?? '', + ])); } /** @@ -203,7 +226,21 @@ class SearchAndDisplaceOriginalDocument throw new \Exception('Searchers do not exist.'); } - return json_decode($searchers, true); + return json_decode($searchers, true)['searchers']; + } + + public function getStoreAtPathFromJsonFile($id) + { + $storage = Storage::disk('local'); + $directory = "contracts/$id"; + + $searchers = $storage->get("$directory/searchers.json"); + + if ( ! $searchers) { + throw new \Exception('Searchers do not exist.'); + } + + return json_decode($searchers, true)['document_path']; } /** @@ -212,14 +249,14 @@ class SearchAndDisplaceOriginalDocument * @param $document * @throws \Exception */ - protected function sendDocumentToIngest($id, $document) + protected function sendDocumentToIngest($id, $document, $fileName) { $sendDocument = new SendDocument(); $sendDocument->execute($id, [ 'path' => $document->getRealPath(), 'type' => $document->getMimeType(), - 'name' => $document->getClientOriginalName() + 'name' => $fileName ], 'original'); } diff --git a/demo-cli/README.md b/demo-cli/README.md index 7de20a5..92427de 100644 --- a/demo-cli/README.md +++ b/demo-cli/README.md @@ -1,18 +1,30 @@ # Running S&D via CLI ### Command -`php artisan sd:run {path} {searchers*}` +`php artisan sd:run {path} {searchers*} {--original}` The command accepts two arguments: - path: The path to the document file on which the Search&Displace will run - searchers: This argument can be one of the following two types: - - file searchers: the argument must only have one group in the format 'path:replace_with', where path is the path to a **valid JSON** file - - inline searchers: the argument can have multiple groups of inline searchers in the format 'key: replace_with'. + - file searchers: the argument must only have one group in the format 'path:replace_with' or 'path+displace_with', +where path is the path to a **valid JSON** file + - inline searchers: the argument can have multiple groups of inline searchers in +the format 'key:replace_with' or 'key+displace_with'. The 'key' represents a valid searcher found in the 'storage/app/searchers' directory, without the '.json' extension. +- --original: This optional argument can be used if the resulted document should be in the original document format + and having the same structure and styles, otherwise the resulted document will be a Markdown file. + +The ':replace_with' or '+displace_with' values are optional, not using them will remove the found text strings. -The 'replace_with' value is optional, not using it will remove the found text strings. +If the 'replace_with' value is used, like for example 'X:example', then all text strings found will be replaced with +the 'example' text. -The resulted Markdown document will be created in the same directory as the input document file. +If the 'displace_with' value is used, like for example 'Y+example', then all text strings found will be replaced with +text composed using the following format: '{DISPLACE_WITH} FOUND TEXT {/DISPLACE_WITH}', so in our example if the +operation finds the text string 'placeholder', then the result will be '{example} placeholder {/example}'. + +The resulted Markdown document or the document in the original format will be created in the +same directory as the input document file together with an analysis result file. ### Examples Note! These examples work when running the command from the root app directory, otherwise you have to @@ -25,12 +37,16 @@ input the correct paths in the command, including for the 'artisan' file. `php artisan sd:run ./demo-cli/demo_document.pdf ./demo-cli/demo_searcher.json:EMAIL` - Using valid searcher key (which exists in the directory 'storage/app/searchers') and removing all strings found - `php artisan sd:run ./demo-cli/demo_document.pdf demo_searcher:EMAIL` + `php artisan sd:run ./demo-cli/demo_document.pdf demo_searcher` - Using valid searcher key (which exists in the directory 'storage/app/searchers') and replacing all strings found with the string 'EMAIL' `php artisan sd:run ./demo-cli/demo_document.pdf demo_searcher:EMAIL` +- Using valid JSON file searcher and displacing all strings found using 'EMAIL' + `php artisan sd:run ./demo-cli/demo_document.pdf ./demo-cli/demo_searcher.json+EMAIL` - +- Using valid searcher key (which exists in the directory 'storage/app/searchers') and +displacing all strings found using 'EMAIL' + `php artisan sd:run ./demo-cli/demo_document.pdf demo_searcher+EMAIL`