Browse Source

Apply SD on original document from CLI command. Add performance analyzer for CLI ran operations.

master
Orzu Ionut 3 years ago
parent
commit
516d06599b
  1. 132
      app/Console/Commands/RunSearchDisplace.php
  2. 36
      app/Events/SDAppliedOnMarkdownDocument.php
  3. 36
      app/Events/SDAppliedOnOriginalDocument.php
  4. 45
      app/Listeners/AnalyzeOperationPerformance.php
  5. 11
      app/Providers/EventServiceProvider.php
  6. 24
      app/SearchDisplace/Ingest/HandleReceivedDocument.php
  7. 20
      app/SearchDisplace/Ingest/SendDocument.php
  8. 4
      app/SearchDisplace/SearchAndDisplace.php
  9. 9
      app/SearchDisplace/SearchAndDisplaceFromFiles.php
  10. 53
      app/SearchDisplace/SearchAndDisplaceOriginalDocument.php
  11. 30
      demo-cli/README.md

132
app/Console/Commands/RunSearchDisplace.php

@ -3,7 +3,10 @@
namespace App\Console\Commands;
use App\SearchDisplace\Ingest\SendDocument;
use App\SearchDisplace\SearchAndDisplaceOriginalDocument;
use Illuminate\Console\Command;
use Illuminate\Support\Carbon;
use Illuminate\Support\Facades\Redis;
use Illuminate\Support\Facades\Storage;
class RunSearchDisplace extends Command
@ -15,7 +18,8 @@ class RunSearchDisplace extends Command
*/
protected $signature = 'sd:run
{path : The document path}
{filters* : The filters which will be applied to the search}';
{filters* : The filters which will be applied to the search}
{--original : Whether the operation will recreate the original document format}';
/**
* The console command description.
@ -32,6 +36,8 @@ class RunSearchDisplace extends Command
public function __construct()
{
parent::__construct();
// @TODO Add way to handle 'displace'.. Right now we are doing the 'replace' with ':'.
}
/**
@ -42,22 +48,16 @@ class RunSearchDisplace extends Command
{
$documentPath = $this->argument('path');
$searchers = $this->argument('filters');
$id = md5(uniqid(rand(), true));
$pathDetails = pathinfo($documentPath);
$resultedDocumentPath = $pathDetails['dirname'] . '/' . $pathDetails['filename'] . '-displaced.md';
$original = $this->option('original');
try {
$this->storeSearchers($id, $searchers, $resultedDocumentPath);
$this->bootAnalyzer($documentPath);
$sendToIngest = new SendDocument();
$sendToIngest->execute($id, [
'path' => $documentPath,
'name' => $pathDetails['basename'],
'type' => $this->getFileMimeType($documentPath),
]);
if ( ! $original) {
$this->runMarkdownOperation($documentPath, $searchers);
} else {
$this->runOriginalDocumentOperation($documentPath, $searchers);
}
$this->info('Processing document..');
$this->info('After the processing will be done the result will show up at the same path as the input.');
@ -66,6 +66,41 @@ class RunSearchDisplace extends Command
}
}
/**
* @param $documentPath
* @param $searchers
* @throws \Exception
*/
protected function runMarkdownOperation($documentPath, $searchers)
{
$id = md5(uniqid(rand(), true));
$pathDetails = pathinfo($documentPath);
$resultedDocumentPath = $pathDetails['dirname'] . '/' . $pathDetails['filename'] . '-displaced.md';
$this->storeSearchers($id, $searchers, $resultedDocumentPath);
$sendToIngest = new SendDocument();
$sendToIngest->execute($id, [
'path' => $documentPath,
'name' => $pathDetails['basename'],
'type' => $this->getFileMimeType($documentPath),
]);
}
/**
* @param $documentPath
* @param $searchers
* @throws \Exception
*/
protected function runOriginalDocumentOperation($documentPath, $searchers)
{
$handler = new SearchAndDisplaceOriginalDocument();
$handler->start($documentPath, $this->getListOfSearchersAndActions($searchers));
}
protected function storeSearchers($id, $searchers, $storeResultPath)
{
$data = [
@ -78,6 +113,14 @@ class RunSearchDisplace extends Command
$storage->put("searchers/$id.json", json_encode($data));
}
protected function bootAnalyzer($filePath)
{
$redis = Redis::connection();
$redis->set('analyze_performance_time', Carbon::now()->format('U'));
$redis->set('analyze_performance_path', pathinfo($filePath,PATHINFO_DIRNAME));
}
protected function getSearchers($searchers)
{
if (count($searchers) === 1 && str_contains($searchers[0], '.json')) {
@ -93,18 +136,17 @@ class RunSearchDisplace extends Command
$list = [];
foreach ($searchers as $searcher) {
$result = explode(':', $searcher);
$searcherPath = 'searchers/' . $result[0] . '.json';
foreach ($this->getListOfSearchersAndActions($searchers) as $searcherInfo) {
$searcherPath = 'searchers/' . $searcherInfo['key'] . '.json';
if ( ! $storage->exists($searcherPath)) {
throw new \Exception('Searcher does not exist');
throw new \Exception('Searcher "' . $searcherInfo['key'] . '" does not exist');
}
$list[] = [
'content' => json_decode($storage->get($searcherPath), true),
'replace_with' => count($result) > 1 ? $result[1] : '',
'type' => $searcherInfo['type'],
'value' => $searcherInfo['value'],
];
}
@ -113,18 +155,10 @@ class RunSearchDisplace extends Command
protected function getSearchersFromFile($argument)
{
$result = explode(':', $argument);
$searchersList = $this->getListOfSearchersAndActions([$argument]);
$searcherInfo = $searchersList[0];
if (count($result) > 1) {
$path = $result[0];
$replaceWith = $result[1];
} else {
$path = $argument;
$replaceWith = '';
}
$contents = file_get_contents($path);
$contents = file_get_contents($searcherInfo['key']);
if ( ! $contents) {
throw new \Exception('There is no data in the searcher JSON file.');
@ -133,11 +167,45 @@ class RunSearchDisplace extends Command
return [
[
'content' => json_decode($contents),
'replace_with' => $replaceWith,
'type' => $searcherInfo['type'],
'value' => $searcherInfo['value'],
],
];
}
protected function getListOfSearchersAndActions($searchers)
{
$searchersList = [];
foreach ($searchers as $searcher) {
$replaceActionResult = explode(':', $searcher);
$searcherKey = $replaceActionResult[0];
$type = 'replace';
$value = '';
if (count($replaceActionResult) === 1) {
$displaceActionResult = explode('+', $searcher);
if (count($displaceActionResult) > 1) {
$searcherKey = $displaceActionResult[0];
$type = 'displace';
$value = $displaceActionResult[1];
}
} else {
$value = $replaceActionResult[1];
}
$searchersList[] = [
'key' => $searcherKey,
'type' => $type,
'value' => $value,
];
}
return $searchersList;
}
protected function getFileMimeType($file) {
if (function_exists('finfo_file')) {
$finfo = finfo_open(FILEINFO_MIME_TYPE);

36
app/Events/SDAppliedOnMarkdownDocument.php

@ -0,0 +1,36 @@
<?php
namespace App\Events;
use Illuminate\Broadcasting\Channel;
use Illuminate\Broadcasting\InteractsWithSockets;
use Illuminate\Broadcasting\PresenceChannel;
use Illuminate\Broadcasting\PrivateChannel;
use Illuminate\Contracts\Broadcasting\ShouldBroadcast;
use Illuminate\Foundation\Events\Dispatchable;
use Illuminate\Queue\SerializesModels;
class SDAppliedOnMarkdownDocument
{
use Dispatchable, InteractsWithSockets, SerializesModels;
/**
* Create a new event instance.
*
* @return void
*/
public function __construct()
{
//
}
/**
* Get the channels the event should broadcast on.
*
* @return \Illuminate\Broadcasting\Channel|array
*/
public function broadcastOn()
{
return new PrivateChannel('channel-name');
}
}

36
app/Events/SDAppliedOnOriginalDocument.php

@ -0,0 +1,36 @@
<?php
namespace App\Events;
use Illuminate\Broadcasting\Channel;
use Illuminate\Broadcasting\InteractsWithSockets;
use Illuminate\Broadcasting\PresenceChannel;
use Illuminate\Broadcasting\PrivateChannel;
use Illuminate\Contracts\Broadcasting\ShouldBroadcast;
use Illuminate\Foundation\Events\Dispatchable;
use Illuminate\Queue\SerializesModels;
class SDAppliedOnOriginalDocument
{
use Dispatchable, InteractsWithSockets, SerializesModels;
/**
* Create a new event instance.
*
* @return void
*/
public function __construct()
{
//
}
/**
* Get the channels the event should broadcast on.
*
* @return \Illuminate\Broadcasting\Channel|array
*/
public function broadcastOn()
{
return new PrivateChannel('channel-name');
}
}

45
app/Listeners/AnalyzeOperationPerformance.php

@ -0,0 +1,45 @@
<?php
namespace App\Listeners;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Support\Carbon;
use Illuminate\Support\Facades\Redis;
class AnalyzeOperationPerformance
{
/**
* Create the event listener.
*
* @return void
*/
public function __construct()
{
//
}
/**
* Handle the event.
*
* @param object $event
* @return void
*/
public function handle($event)
{
$redis = Redis::connection();
$directoryPath = $redis->get('analyze_performance_path');
$startedAt = $redis->get('analyze_performance_time');
if ( ! $directoryPath || ! $startedAt) {
return;
}
$endedAt = Carbon::now()->format('U');
$data = 'Time elapsed in seconds: ' . ($endedAt - $startedAt) . "\n";
file_put_contents($directoryPath . '/sd_analyze_performance.txt', $data);
}
}

11
app/Providers/EventServiceProvider.php

@ -3,6 +3,9 @@
namespace App\Providers;
use App\Events\IngestDocumentReceived;
use App\Events\SDAppliedOnMarkdownDocument;
use App\Events\SDAppliedOnOriginalDocument;
use App\Listeners\AnalyzeOperationPerformance;
use App\Listeners\RunSearchAndDisplaceOnDocument;
use Illuminate\Foundation\Support\Providers\EventServiceProvider as ServiceProvider;
@ -17,6 +20,14 @@ class EventServiceProvider extends ServiceProvider
IngestDocumentReceived::class => [
RunSearchAndDisplaceOnDocument::class,
],
SDAppliedOnOriginalDocument::class => [
AnalyzeOperationPerformance::class,
],
SDAppliedOnMarkdownDocument::class => [
AnalyzeOperationPerformance::class,
],
];
/**

24
app/SearchDisplace/Ingest/HandleReceivedDocument.php

@ -3,9 +3,11 @@
namespace App\SearchDisplace\Ingest;
use App\Events\IngestDocumentReceived;
use App\Events\SDAppliedOnOriginalDocument;
use App\SearchDisplace\SearchAndDisplaceOriginalDocument;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\ClientException;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Storage;
class HandleReceivedDocument
@ -101,14 +103,30 @@ class HandleReceivedDocument
}
$filePath = "document.$this->documentFormat";
$storeDocumentAtPath = 'contracts/' . $this->id . '-' . $filePath;
$storeDocumentAtPath = $storage->path('contracts/' . $this->id . '-' . $filePath);
try {
// Change the path where to store the document if the operation was run from CLI.
$storeAtPathFromJsonFile = (new SearchAndDisplaceOriginalDocument())->getStoreAtPathFromJsonFile($this->id);
if ($storeAtPathFromJsonFile) {
$storeDocumentAtPath = $storeAtPathFromJsonFile .
'/' .
$this->id .
'-displaced.' .
$this->documentFormat;
}
} catch (\Exception $exception) {
\Illuminate\Support\Facades\Log::info('Exception thrown when tried reading the storeDocumentAtPath from JSON file.');
}
$client = new Client();
$url = env('SD_INGEST_URL') . '/recreate-document/' . $this->id . '?file_path=' . $filePath;
try {
$client->request('GET', $url, [
'sink' => $storage->path($storeDocumentAtPath),
'sink' => $storeDocumentAtPath,
]);
} catch (ClientException $clientException) {
$error = json_decode($clientException->getResponse()->getBody()->getContents(), true);
@ -117,6 +135,8 @@ class HandleReceivedDocument
} finally {
$storage->deleteDirectory("contracts/$this->id");
}
SDAppliedOnOriginalDocument::dispatch($this->id);
}
/**

20
app/SearchDisplace/Ingest/SendDocument.php

@ -4,6 +4,7 @@ namespace App\SearchDisplace\Ingest;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\ClientException;
use GuzzleHttp\Exception\GuzzleException;
class SendDocument
{
@ -14,9 +15,21 @@ class SendDocument
$this->url = env('SD_INGEST_URL') . '/ingest';
}
public function execute($id, $document, $fileResultType = 'md')
/**
*
*
* @param $id
* @param $document
* @param string $fileResultType
* @throws \Exception
*/
public function execute($id, $document, string $fileResultType = 'md')
{
try {
if ( ! in_array($fileResultType, ['md', 'original'])) {
throw new \Exception('Invalid file result type provided.');
}
$response = $this->sendRequest($id, $document, $fileResultType);
if ($response['status'] === 'fail') {
@ -30,6 +43,8 @@ class SendDocument
// The file in Ingest si in Processing state.
} catch (\Exception $exception) {
throw new \Exception($exception->getMessage());
} catch (GuzzleException $exception) {
throw new \Exception($exception->getMessage());
}
}
@ -38,8 +53,9 @@ class SendDocument
*
* @param $id
* @param $document
* @param $fileResultType
* @return mixed
* @throws \GuzzleHttp\Exception\GuzzleException
* @throws GuzzleException
*/
public function sendRequest($id, $document, $fileResultType)
{

4
app/SearchDisplace/SearchAndDisplace.php

@ -17,6 +17,10 @@ class SearchAndDisplace
$this->searchOnly = $searchOnly;
}
/**
* @return array|mixed
* @throws \Exception
*/
public function execute()
{
$searchResult = $this->search();

9
app/SearchDisplace/SearchAndDisplaceFromFiles.php

@ -2,6 +2,7 @@
namespace App\SearchDisplace;
use App\Events\SDAppliedOnMarkdownDocument;
use Illuminate\Support\Facades\Storage;
class SearchAndDisplaceFromFiles
@ -23,8 +24,8 @@ class SearchAndDisplaceFromFiles
public function execute()
{
// The files don't exist, so we don't have to apply S&D.
if ( ! $this->storage->exists($this->directoryPath) || ! $this->storage->exists($this->infoFilePath)) {
// Handle this case, must report result to user.
return;
}
@ -41,7 +42,8 @@ class SearchAndDisplaceFromFiles
'searchers' => [
[
'key' => $this->id,
'replace_with' => $searchers[0]['replace_with'],
'type' => $searchers[0]['type'],
'value' => $searchers[0]['value'],
]
],
]);
@ -49,8 +51,11 @@ class SearchAndDisplaceFromFiles
$result = $searchAndDisplace->execute();
file_put_contents($documentPath, $result['content']);
SDAppliedOnMarkdownDocument::dispatch($this->id);
} catch (\Exception $exception) {
\Illuminate\Support\Facades\Log::info('EXCEPTION: ' . $exception->getMessage());
\Illuminate\Support\Facades\Log::info('EXCEPTION: ' . $exception->getTraceAsString());
return;
} finally {

53
app/SearchDisplace/SearchAndDisplaceOriginalDocument.php

@ -4,6 +4,8 @@ namespace App\SearchDisplace;
use App\SearchDisplace\Ingest\SendDataToRecreateDocument;
use App\SearchDisplace\Ingest\SendDocument;
use Illuminate\Http\File;
use Illuminate\Http\UploadedFile;
use Illuminate\Support\Facades\Storage;
class SearchAndDisplaceOriginalDocument
@ -14,10 +16,24 @@ class SearchAndDisplaceOriginalDocument
*/
public function start($document, $searchers)
{
$id = time() . '_' . pathinfo($document->getClientOriginalName(), PATHINFO_FILENAME);
$storeResultAtPath = '';
$this->storeSearchers($id, $searchers);
$this->sendDocumentToIngest($id, $document);
if ($document instanceof UploadedFile) {
$fileName = pathinfo($document->getClientOriginalName(), PATHINFO_FILENAME);
} else {
// From CLI.
$document = new File($document);
$fileName = str_replace('.' . $document->getExtension(), '', $document->getFilename());
$storeResultAtPath = $document->getPath();
}
$id = time() . '_' . $fileName;
$this->storeSearchers($id, $searchers, $storeResultAtPath);
$this->sendDocumentToIngest($id, $document, $fileName);
return $id;
}
@ -51,7 +67,11 @@ class SearchAndDisplaceOriginalDocument
$this->sendDataToIngestToRebuild($id, $data);
} catch (\Exception $exception) {
\Illuminate\Support\Facades\Log::info('========================');
\Illuminate\Support\Facades\Log::info('Exception - SearchAndDisplaceOriginalDocument@applySD');
\Illuminate\Support\Facades\Log::info($exception->getMessage());
\Illuminate\Support\Facades\Log::info($exception->getTraceAsString());
\Illuminate\Support\Facades\Log::info('========================');
}
}
@ -177,13 +197,16 @@ class SearchAndDisplaceOriginalDocument
return $ingestData;
}
protected function storeSearchers($id, $searchers)
protected function storeSearchers($id, $searchers, $storeResultAtPath)
{
$storage = Storage::disk('local');
$directory = "contracts/$id";
$storage->makeDirectory($directory);
$storage->put("$directory/searchers.json", json_encode($searchers));
$storage->put("$directory/searchers.json", json_encode([
'searchers' => $searchers,
'document_path' => $storeResultAtPath ?? '',
]));
}
/**
@ -203,7 +226,21 @@ class SearchAndDisplaceOriginalDocument
throw new \Exception('Searchers do not exist.');
}
return json_decode($searchers, true);
return json_decode($searchers, true)['searchers'];
}
public function getStoreAtPathFromJsonFile($id)
{
$storage = Storage::disk('local');
$directory = "contracts/$id";
$searchers = $storage->get("$directory/searchers.json");
if ( ! $searchers) {
throw new \Exception('Searchers do not exist.');
}
return json_decode($searchers, true)['document_path'];
}
/**
@ -212,14 +249,14 @@ class SearchAndDisplaceOriginalDocument
* @param $document
* @throws \Exception
*/
protected function sendDocumentToIngest($id, $document)
protected function sendDocumentToIngest($id, $document, $fileName)
{
$sendDocument = new SendDocument();
$sendDocument->execute($id, [
'path' => $document->getRealPath(),
'type' => $document->getMimeType(),
'name' => $document->getClientOriginalName()
'name' => $fileName
], 'original');
}

30
demo-cli/README.md

@ -1,18 +1,30 @@
# Running S&D via CLI
### Command
`php artisan sd:run {path} {searchers*}`
`php artisan sd:run {path} {searchers*} {--original}`
The command accepts two arguments:
- path: The path to the document file on which the Search&Displace will run
- searchers: This argument can be one of the following two types:
- file searchers: the argument must only have one group in the format 'path:replace_with', where path is the path to a **valid JSON** file
- inline searchers: the argument can have multiple groups of inline searchers in the format 'key: replace_with'.
- file searchers: the argument must only have one group in the format 'path:replace_with' or 'path+displace_with',
where path is the path to a **valid JSON** file
- inline searchers: the argument can have multiple groups of inline searchers in
the format 'key:replace_with' or 'key+displace_with'.
The 'key' represents a valid searcher found in the 'storage/app/searchers' directory, without the '.json' extension.
- --original: This optional argument can be used if the resulted document should be in the original document format
and having the same structure and styles, otherwise the resulted document will be a Markdown file.
The ':replace_with' or '+displace_with' values are optional, not using them will remove the found text strings.
The 'replace_with' value is optional, not using it will remove the found text strings.
If the 'replace_with' value is used, like for example 'X:example', then all text strings found will be replaced with
the 'example' text.
The resulted Markdown document will be created in the same directory as the input document file.
If the 'displace_with' value is used, like for example 'Y+example', then all text strings found will be replaced with
text composed using the following format: '{DISPLACE_WITH} FOUND TEXT {/DISPLACE_WITH}', so in our example if the
operation finds the text string 'placeholder', then the result will be '{example} placeholder {/example}'.
The resulted Markdown document or the document in the original format will be created in the
same directory as the input document file together with an analysis result file.
### Examples
Note! These examples work when running the command from the root app directory, otherwise you have to
@ -25,12 +37,16 @@ input the correct paths in the command, including for the 'artisan' file.
`php artisan sd:run ./demo-cli/demo_document.pdf ./demo-cli/demo_searcher.json:EMAIL`
- Using valid searcher key (which exists in the directory 'storage/app/searchers') and removing all strings found
`php artisan sd:run ./demo-cli/demo_document.pdf demo_searcher:EMAIL`
`php artisan sd:run ./demo-cli/demo_document.pdf demo_searcher`
- Using valid searcher key (which exists in the directory 'storage/app/searchers') and replacing all strings found with the string 'EMAIL'
`php artisan sd:run ./demo-cli/demo_document.pdf demo_searcher:EMAIL`
- Using valid JSON file searcher and displacing all strings found using 'EMAIL'
`php artisan sd:run ./demo-cli/demo_document.pdf ./demo-cli/demo_searcher.json+EMAIL`
- Using valid searcher key (which exists in the directory 'storage/app/searchers') and
displacing all strings found using 'EMAIL'
`php artisan sd:run ./demo-cli/demo_document.pdf demo_searcher+EMAIL`
Loading…
Cancel
Save