Repo for the search and displace core module including the interface to select files and search and displace operations to run on them. https://searchanddisplace.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

191 lines
5.0 KiB

3 years ago
  1. <?php
  2. namespace App\SearchDisplace;
  3. use App\SearchDisplace\Ingest\SendDocument;
  4. use Illuminate\Support\Facades\Storage;
  5. class SearchAndDisplaceOriginalDocument
  6. {
  7. /**
  8. *
  9. * @throws \Exception
  10. */
  11. public function start($document, $searchers)
  12. {
  13. $id = time() . '_' . pathinfo($document->getClientOriginalName(), PATHINFO_FILENAME);
  14. $this->storeSearchers($id, $searchers);
  15. $this->sendDocumentToIngest($id, $document);
  16. return $id;
  17. }
  18. public function applySD($id, $contents)
  19. {
  20. $data = json_decode($contents['document'], true);
  21. try {
  22. $searchAndDisplace = new SearchAndDisplace(
  23. $data['contents']['text'],
  24. [
  25. 'searchers' => $this->getSearchers($id),
  26. ]
  27. );
  28. $result = $searchAndDisplace->execute();
  29. // Update text.
  30. $data['contents'] = $this->applyResultsOnIngestData($data['contents'], $result);
  31. \Illuminate\Support\Facades\Log::info($data['contents']);
  32. // Send to Ingest to recreate doc.
  33. } catch (\Exception $exception) {
  34. \Illuminate\Support\Facades\Log::info($exception->getMessage());
  35. \Illuminate\Support\Facades\Log::info($exception->getTraceAsString());
  36. }
  37. }
  38. public function onIngestFail($id)
  39. {
  40. $storage = Storage::disk('local');
  41. $directory = "contracts/$id";
  42. $storage->deleteDirectory($directory);
  43. }
  44. public function hasFailed($id)
  45. {
  46. $storage = Storage::disk('local');
  47. $directory = "contracts/$id";
  48. return ! $storage->exists($directory);
  49. }
  50. public function isInProgress($id)
  51. {
  52. $storage = Storage::disk('local');
  53. $directory = "contracts/$id";
  54. return ! $storage->exists("$directory/document");
  55. }
  56. /**
  57. * @param $id
  58. * @return \Symfony\Component\HttpFoundation\StreamedResponse
  59. * @throws \Exception
  60. */
  61. public function streamFile($id)
  62. {
  63. $storage = Storage::disk('local');
  64. $directory = "contracts/$id";
  65. if ($this->hasFailed($id) || $this->isInProgress($id)) {
  66. throw new \Exception('Document is not processed.');
  67. }
  68. return $storage->download("$directory/document");
  69. }
  70. protected function applyResultsOnIngestData($ingestData, $sdResult)
  71. {
  72. $ingestData['text'] = $sdResult['content'];
  73. // Update index ranges.
  74. $indexes = [];
  75. // Use original start for key in order to have the indexes sorted ASC.
  76. foreach ($sdResult['indexes'] as $searcher => $searcherIndexes) {
  77. foreach ($searcherIndexes as $index) {
  78. $indexes[$index['original_start']] = $index;
  79. }
  80. }
  81. // 0 - 20
  82. // 21 - 32
  83. // 33 - 174
  84. //
  85. $startOffset = 0;
  86. foreach ($ingestData['elements'] as $element) {
  87. $currentStartOffset = 0;
  88. foreach ($indexes as $i => $index) {
  89. if ($index['original_start'] > $element['range_end']) {
  90. break;
  91. }
  92. if ($index['original_end'] < $element['range_start']) {
  93. continue;
  94. }
  95. if (
  96. $index['original_start'] >= $element['range_start'] &&
  97. $index['original_end'] <= $element['range_end']
  98. ) {
  99. $endDifference = $index['end'] - $index['original_end'];
  100. $element['range_end'] += $endDifference;
  101. $currentStartOffset += $endDifference;
  102. unset($indexes[$i]);
  103. }
  104. }
  105. $element['range_start'] += $startOffset;
  106. $startOffset += $currentStartOffset;
  107. }
  108. return $ingestData;
  109. }
  110. protected function storeSearchers($id, $searchers)
  111. {
  112. $storage = Storage::disk('local');
  113. $directory = "contracts/$id";
  114. $storage->makeDirectory($directory);
  115. $storage->put("$directory/searchers.json", json_encode($searchers));
  116. }
  117. /**
  118. *
  119. * @param $id
  120. * @return string
  121. * @throws \Illuminate\Contracts\Filesystem\FileNotFoundException
  122. */
  123. protected function getSearchers($id)
  124. {
  125. $storage = Storage::disk('local');
  126. $directory = "contracts/$id";
  127. $searchers = $storage->get("$directory/searchers.json");
  128. if ( ! $searchers) {
  129. throw new \Exception('Searchers do not exist.');
  130. }
  131. return json_decode($searchers, true);
  132. }
  133. /**
  134. *
  135. * @param $id
  136. * @param $document
  137. * @throws \Exception
  138. */
  139. protected function sendDocumentToIngest($id, $document)
  140. {
  141. $sendDocument = new SendDocument();
  142. $sendDocument->execute($id, [
  143. 'path' => $document->getRealPath(),
  144. 'type' => $document->getMimeType(),
  145. 'name' => $document->getClientOriginalName()
  146. ], 'original');
  147. }
  148. }