Repo for the search and displace core module including the interface to select files and search and displace operations to run on them. https://searchanddisplace.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

308 lines
8.9 KiB

  1. <?php
  2. namespace App\Console\Commands;
  3. use App\Events\SDFailedToApply;
  4. use App\Listeners\AnalyzeOperationPerformance;
  5. use App\SearchDisplace\Ingest\SendDocument;
  6. use App\SearchDisplace\SearchAndDisplaceOriginalDocument;
  7. use Illuminate\Console\Command;
  8. use Illuminate\Support\Carbon;
  9. use Illuminate\Support\Facades\Redis;
  10. use Illuminate\Support\Facades\Storage;
  11. class RunSearchDisplace extends Command
  12. {
  13. /**
  14. * The name and signature of the console command.
  15. *
  16. * @var string
  17. */
  18. protected $signature = 'sd:run
  19. {path : The document path}
  20. {filters* : The filters which will be applied to the search}
  21. {--original : Whether the operation will recreate the original document format}';
  22. /**
  23. * The console command description.
  24. *
  25. * @var string
  26. */
  27. protected $description = 'Run search and displace on document with filters.';
  28. /**
  29. * Create a new command instance.
  30. *
  31. * @return void
  32. */
  33. public function __construct()
  34. {
  35. parent::__construct();
  36. }
  37. /**
  38. * Execute the console command.
  39. *
  40. */
  41. public function handle()
  42. {
  43. $documentPath = $this->argument('path');
  44. $searchers = $this->argument('filters');
  45. $original = $this->option('original');
  46. try {
  47. $isDirectory = is_dir($documentPath);
  48. if ($isDirectory) {
  49. $this->handleDirectory($documentPath, $original, $searchers);
  50. } else {
  51. $this->handleDocument($documentPath, $original, $searchers);
  52. }
  53. $this->info('Processing ' . $isDirectory ? 'directory' : 'document' . '..');
  54. $this->info('After the processing will be done the result will show up at the same path as the input.');
  55. } catch (\Exception $exception) {
  56. \Illuminate\Support\Facades\Log::info($exception->getTraceAsString());
  57. $this->error('Something went wrong. (' . $exception->getMessage() . ')');
  58. }
  59. }
  60. /**
  61. * @param $path
  62. * @param $original
  63. * @param $searchers
  64. * @throws \Exception
  65. */
  66. protected function handleDirectory($path, $original, $searchers)
  67. {
  68. $allFiles = $this->getDirContents($path);
  69. $filesCount = count($allFiles);
  70. $this->bootAnalyzer($path, $filesCount);
  71. foreach ($allFiles as $file) {
  72. try {
  73. if ( ! $original) {
  74. $this->runMarkdownOperation($file, $searchers);
  75. } else {
  76. $this->runOriginalDocumentOperation($file, $searchers);
  77. }
  78. } catch (\Exception $exception) {
  79. SDFailedToApply::dispatch();
  80. }
  81. }
  82. }
  83. /**
  84. * @param $documentPath
  85. * @param $original
  86. * @param $searchers
  87. * @throws \Exception
  88. */
  89. protected function handleDocument($documentPath, $original, $searchers)
  90. {
  91. $path = pathinfo($documentPath,PATHINFO_DIRNAME);
  92. $this->bootAnalyzer($path, 1);
  93. try {
  94. if ( ! $original) {
  95. $this->runMarkdownOperation($documentPath, $searchers);
  96. } else {
  97. $this->runOriginalDocumentOperation($documentPath, $searchers);
  98. }
  99. } catch (\Exception $exception) {
  100. SDFailedToApply::dispatch();
  101. }
  102. }
  103. /**
  104. * @param $documentPath
  105. * @param $searchers
  106. * @throws \Exception
  107. */
  108. protected function runMarkdownOperation($documentPath, $searchers)
  109. {
  110. $id = md5(uniqid(rand(), true));
  111. $pathDetails = pathinfo($documentPath);
  112. $resultedDocumentPath = $pathDetails['dirname'] . '/' . $pathDetails['filename'] . '-displaced.md';
  113. $this->storeSearchers($id, $searchers, $resultedDocumentPath);
  114. $sendToIngest = new SendDocument();
  115. $sendToIngest->execute($id, [
  116. 'path' => $documentPath,
  117. 'name' => $pathDetails['basename'],
  118. 'type' => $this->getFileMimeType($documentPath),
  119. ]);
  120. }
  121. /**
  122. * @param $documentPath
  123. * @param $searchers
  124. * @throws \Exception
  125. */
  126. protected function runOriginalDocumentOperation($documentPath, $searchers)
  127. {
  128. $handler = new SearchAndDisplaceOriginalDocument();
  129. $handler->start($documentPath, $this->getListOfSearchersAndActions($searchers));
  130. }
  131. protected function storeSearchers($id, $searchers, $storeResultPath)
  132. {
  133. $data = [
  134. 'searchers' => $this->getSearchers($searchers),
  135. 'document_path' => $storeResultPath,
  136. ];
  137. $storage = Storage::disk('local');
  138. $storage->put("searchers/$id.json", json_encode($data));
  139. }
  140. protected function bootAnalyzer($path, $filesCount)
  141. {
  142. $redis = Redis::connection();
  143. $redis->set('analyze_performance_time', Carbon::now()->format('U'));
  144. $redis->set('analyze_performance_path', $path);
  145. $redis->set('analyze_performance_remaining_files', $filesCount);
  146. }
  147. protected function getSearchers($searchers)
  148. {
  149. if (count($searchers) === 1 && str_contains($searchers[0], '.json')) {
  150. return $this->getSearchersFromFile($searchers[0]);
  151. }
  152. return $this->getSearchersFromList($searchers);
  153. }
  154. protected function getSearchersFromList($searchers)
  155. {
  156. $storage = Storage::disk('local');
  157. $list = [];
  158. foreach ($this->getListOfSearchersAndActions($searchers) as $searcherInfo) {
  159. $searcherPath = 'searchers/' . $searcherInfo['key'] . '.json';
  160. if ( ! $storage->exists($searcherPath)) {
  161. throw new \Exception('Searcher "' . $searcherInfo['key'] . '" does not exist');
  162. }
  163. $list[] = [
  164. 'content' => json_decode($storage->get($searcherPath), true),
  165. 'type' => $searcherInfo['type'],
  166. 'value' => $searcherInfo['value'],
  167. ];
  168. }
  169. return $list;
  170. }
  171. protected function getSearchersFromFile($argument)
  172. {
  173. $searchersList = $this->getListOfSearchersAndActions([$argument]);
  174. $searcherInfo = $searchersList[0];
  175. $contents = file_get_contents($searcherInfo['key']);
  176. if ( ! $contents) {
  177. throw new \Exception('There is no data in the searcher JSON file.');
  178. }
  179. return [
  180. [
  181. 'content' => json_decode($contents),
  182. 'type' => $searcherInfo['type'],
  183. 'value' => $searcherInfo['value'],
  184. ],
  185. ];
  186. }
  187. protected function getListOfSearchersAndActions($searchers)
  188. {
  189. $searchersList = [];
  190. foreach ($searchers as $searcher) {
  191. $replaceActionResult = explode(':', $searcher);
  192. $searcherKey = $replaceActionResult[0];
  193. $type = 'replace';
  194. $value = '';
  195. if (count($replaceActionResult) === 1) {
  196. $displaceActionResult = explode('+', $searcher);
  197. if (count($displaceActionResult) > 1) {
  198. $searcherKey = $displaceActionResult[0];
  199. $type = 'displace';
  200. $value = $displaceActionResult[1];
  201. }
  202. } else {
  203. $value = $replaceActionResult[1];
  204. }
  205. $searchersList[] = [
  206. 'key' => $searcherKey,
  207. 'type' => $type,
  208. 'value' => $value,
  209. ];
  210. }
  211. return $searchersList;
  212. }
  213. protected function getFileMimeType($file)
  214. {
  215. if (function_exists('finfo_file')) {
  216. $finfo = finfo_open(FILEINFO_MIME_TYPE);
  217. $type = finfo_file($finfo, $file);
  218. finfo_close($finfo);
  219. } else {
  220. require_once 'upgradephp/ext/mime.php';
  221. $type = mime_content_type($file);
  222. }
  223. if (!$type || in_array($type, array('application/octet-stream', 'text/plain'))) {
  224. $secondOpinion = exec('file -b --mime-type ' . escapeshellarg($file), $foo, $returnCode);
  225. if ($returnCode === 0 && $secondOpinion) {
  226. $type = $secondOpinion;
  227. }
  228. }
  229. if (!$type || in_array($type, array('application/octet-stream', 'text/plain'))) {
  230. require_once 'upgradephp/ext/mime.php';
  231. $exifImageType = exif_imagetype($file);
  232. if ($exifImageType !== false) {
  233. $type = image_type_to_mime_type($exifImageType);
  234. }
  235. }
  236. return $type;
  237. }
  238. protected function getDirContents($dir, &$results = array())
  239. {
  240. $files = scandir($dir);
  241. foreach ($files as $key => $value) {
  242. $path = realpath($dir . DIRECTORY_SEPARATOR . $value);
  243. if (!is_dir($path)) {
  244. $results[] = $path;
  245. } else if ($value != "." && $value != "..") {
  246. $this->getDirContents($path, $results);
  247. }
  248. }
  249. return $results;
  250. }
  251. }