Repo for the search and displace core module including the interface to select files and search and displace operations to run on them.
https://searchanddisplace.com
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
308 lines
8.9 KiB
308 lines
8.9 KiB
<?php
|
|
|
|
namespace App\Console\Commands;
|
|
|
|
use App\Events\SDFailedToApply;
|
|
use App\Listeners\AnalyzeOperationPerformance;
|
|
use App\SearchDisplace\Ingest\SendDocument;
|
|
use App\SearchDisplace\SearchAndDisplaceOriginalDocument;
|
|
use Illuminate\Console\Command;
|
|
use Illuminate\Support\Carbon;
|
|
use Illuminate\Support\Facades\Redis;
|
|
use Illuminate\Support\Facades\Storage;
|
|
|
|
class RunSearchDisplace extends Command
|
|
{
|
|
/**
|
|
* The name and signature of the console command.
|
|
*
|
|
* @var string
|
|
*/
|
|
protected $signature = 'sd:run
|
|
{path : The document path}
|
|
{filters* : The filters which will be applied to the search}
|
|
{--original : Whether the operation will recreate the original document format}';
|
|
|
|
/**
|
|
* The console command description.
|
|
*
|
|
* @var string
|
|
*/
|
|
protected $description = 'Run search and displace on document with filters.';
|
|
|
|
/**
|
|
* Create a new command instance.
|
|
*
|
|
* @return void
|
|
*/
|
|
public function __construct()
|
|
{
|
|
parent::__construct();
|
|
}
|
|
|
|
/**
|
|
* Execute the console command.
|
|
*
|
|
*/
|
|
public function handle()
|
|
{
|
|
$documentPath = $this->argument('path');
|
|
$searchers = $this->argument('filters');
|
|
$original = $this->option('original');
|
|
|
|
try {
|
|
$isDirectory = is_dir($documentPath);
|
|
|
|
if ($isDirectory) {
|
|
$this->handleDirectory($documentPath, $original, $searchers);
|
|
} else {
|
|
$this->handleDocument($documentPath, $original, $searchers);
|
|
}
|
|
|
|
$this->info('Processing ' . $isDirectory ? 'directory' : 'document' . '..');
|
|
$this->info('After the processing will be done the result will show up at the same path as the input.');
|
|
} catch (\Exception $exception) {
|
|
\Illuminate\Support\Facades\Log::info($exception->getTraceAsString());
|
|
|
|
$this->error('Something went wrong. (' . $exception->getMessage() . ')');
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param $path
|
|
* @param $original
|
|
* @param $searchers
|
|
* @throws \Exception
|
|
*/
|
|
protected function handleDirectory($path, $original, $searchers)
|
|
{
|
|
$allFiles = $this->getDirContents($path);
|
|
|
|
$filesCount = count($allFiles);
|
|
|
|
$this->bootAnalyzer($path, $filesCount);
|
|
|
|
foreach ($allFiles as $file) {
|
|
try {
|
|
if ( ! $original) {
|
|
$this->runMarkdownOperation($file, $searchers);
|
|
} else {
|
|
$this->runOriginalDocumentOperation($file, $searchers);
|
|
}
|
|
} catch (\Exception $exception) {
|
|
SDFailedToApply::dispatch();
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param $documentPath
|
|
* @param $original
|
|
* @param $searchers
|
|
* @throws \Exception
|
|
*/
|
|
protected function handleDocument($documentPath, $original, $searchers)
|
|
{
|
|
$path = pathinfo($documentPath,PATHINFO_DIRNAME);
|
|
|
|
$this->bootAnalyzer($path, 1);
|
|
|
|
try {
|
|
if ( ! $original) {
|
|
$this->runMarkdownOperation($documentPath, $searchers);
|
|
} else {
|
|
$this->runOriginalDocumentOperation($documentPath, $searchers);
|
|
}
|
|
} catch (\Exception $exception) {
|
|
SDFailedToApply::dispatch();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param $documentPath
|
|
* @param $searchers
|
|
* @throws \Exception
|
|
*/
|
|
protected function runMarkdownOperation($documentPath, $searchers)
|
|
{
|
|
$id = md5(uniqid(rand(), true));
|
|
$pathDetails = pathinfo($documentPath);
|
|
|
|
$resultedDocumentPath = $pathDetails['dirname'] . '/' . $pathDetails['filename'] . '-displaced.md';
|
|
|
|
$this->storeSearchers($id, $searchers, $resultedDocumentPath);
|
|
|
|
$sendToIngest = new SendDocument();
|
|
|
|
$sendToIngest->execute($id, [
|
|
'path' => $documentPath,
|
|
'name' => $pathDetails['basename'],
|
|
'type' => $this->getFileMimeType($documentPath),
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* @param $documentPath
|
|
* @param $searchers
|
|
* @throws \Exception
|
|
*/
|
|
protected function runOriginalDocumentOperation($documentPath, $searchers)
|
|
{
|
|
$handler = new SearchAndDisplaceOriginalDocument();
|
|
|
|
$handler->start($documentPath, $this->getListOfSearchersAndActions($searchers));
|
|
}
|
|
|
|
protected function storeSearchers($id, $searchers, $storeResultPath)
|
|
{
|
|
$data = [
|
|
'searchers' => $this->getSearchers($searchers),
|
|
|
|
'document_path' => $storeResultPath,
|
|
];
|
|
|
|
$storage = Storage::disk('local');
|
|
$storage->put("searchers/$id.json", json_encode($data));
|
|
}
|
|
|
|
protected function bootAnalyzer($path, $filesCount)
|
|
{
|
|
$redis = Redis::connection();
|
|
|
|
$redis->set('analyze_performance_time', Carbon::now()->format('U'));
|
|
$redis->set('analyze_performance_path', $path);
|
|
|
|
$redis->set('analyze_performance_remaining_files', $filesCount);
|
|
}
|
|
|
|
protected function getSearchers($searchers)
|
|
{
|
|
if (count($searchers) === 1 && str_contains($searchers[0], '.json')) {
|
|
return $this->getSearchersFromFile($searchers[0]);
|
|
}
|
|
|
|
return $this->getSearchersFromList($searchers);
|
|
}
|
|
|
|
protected function getSearchersFromList($searchers)
|
|
{
|
|
$storage = Storage::disk('local');
|
|
|
|
$list = [];
|
|
|
|
foreach ($this->getListOfSearchersAndActions($searchers) as $searcherInfo) {
|
|
$searcherPath = 'searchers/' . $searcherInfo['key'] . '.json';
|
|
|
|
if ( ! $storage->exists($searcherPath)) {
|
|
throw new \Exception('Searcher "' . $searcherInfo['key'] . '" does not exist');
|
|
}
|
|
|
|
$list[] = [
|
|
'content' => json_decode($storage->get($searcherPath), true),
|
|
'type' => $searcherInfo['type'],
|
|
'value' => $searcherInfo['value'],
|
|
];
|
|
}
|
|
|
|
return $list;
|
|
}
|
|
|
|
protected function getSearchersFromFile($argument)
|
|
{
|
|
$searchersList = $this->getListOfSearchersAndActions([$argument]);
|
|
$searcherInfo = $searchersList[0];
|
|
|
|
$contents = file_get_contents($searcherInfo['key']);
|
|
|
|
if ( ! $contents) {
|
|
throw new \Exception('There is no data in the searcher JSON file.');
|
|
}
|
|
|
|
return [
|
|
[
|
|
'content' => json_decode($contents),
|
|
'type' => $searcherInfo['type'],
|
|
'value' => $searcherInfo['value'],
|
|
],
|
|
];
|
|
}
|
|
|
|
protected function getListOfSearchersAndActions($searchers)
|
|
{
|
|
$searchersList = [];
|
|
|
|
foreach ($searchers as $searcher) {
|
|
$replaceActionResult = explode(':', $searcher);
|
|
|
|
$searcherKey = $replaceActionResult[0];
|
|
$type = 'replace';
|
|
$value = '';
|
|
|
|
if (count($replaceActionResult) === 1) {
|
|
$displaceActionResult = explode('+', $searcher);
|
|
|
|
if (count($displaceActionResult) > 1) {
|
|
$searcherKey = $displaceActionResult[0];
|
|
$type = 'displace';
|
|
$value = $displaceActionResult[1];
|
|
}
|
|
} else {
|
|
$value = $replaceActionResult[1];
|
|
}
|
|
|
|
$searchersList[] = [
|
|
'key' => $searcherKey,
|
|
'type' => $type,
|
|
'value' => $value,
|
|
];
|
|
}
|
|
|
|
return $searchersList;
|
|
}
|
|
|
|
protected function getFileMimeType($file)
|
|
{
|
|
if (function_exists('finfo_file')) {
|
|
$finfo = finfo_open(FILEINFO_MIME_TYPE);
|
|
$type = finfo_file($finfo, $file);
|
|
finfo_close($finfo);
|
|
} else {
|
|
require_once 'upgradephp/ext/mime.php';
|
|
$type = mime_content_type($file);
|
|
}
|
|
|
|
if (!$type || in_array($type, array('application/octet-stream', 'text/plain'))) {
|
|
$secondOpinion = exec('file -b --mime-type ' . escapeshellarg($file), $foo, $returnCode);
|
|
if ($returnCode === 0 && $secondOpinion) {
|
|
$type = $secondOpinion;
|
|
}
|
|
}
|
|
|
|
if (!$type || in_array($type, array('application/octet-stream', 'text/plain'))) {
|
|
require_once 'upgradephp/ext/mime.php';
|
|
$exifImageType = exif_imagetype($file);
|
|
if ($exifImageType !== false) {
|
|
$type = image_type_to_mime_type($exifImageType);
|
|
}
|
|
}
|
|
|
|
return $type;
|
|
}
|
|
|
|
protected function getDirContents($dir, &$results = array())
|
|
{
|
|
$files = scandir($dir);
|
|
|
|
foreach ($files as $key => $value) {
|
|
$path = realpath($dir . DIRECTORY_SEPARATOR . $value);
|
|
|
|
if (!is_dir($path)) {
|
|
$results[] = $path;
|
|
} else if ($value != "." && $value != "..") {
|
|
$this->getDirContents($path, $results);
|
|
}
|
|
}
|
|
|
|
return $results;
|
|
}
|
|
}
|