Repo for the search and displace core module including the interface to select files and search and displace operations to run on them. https://searchanddisplace.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

224 lines
6.5 KiB

3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
  1. <?php
  2. namespace App\SearchDisplace\Searchers;
  3. class Searcher
  4. {
  5. protected $searchers;
  6. protected $content;
  7. protected $ducklingMapper;
  8. protected $searchersStorage;
  9. public function __construct($searchers, $content)
  10. {
  11. $this->searchers = $searchers;
  12. $this->content = $content;
  13. ksort($this->searchers);
  14. $this->ducklingMapper = new Mapper();
  15. $this->searchersStorage = new SearchersStorage();
  16. }
  17. /**
  18. * Handle searchers.
  19. *
  20. * @return mixed
  21. * @throws \Exception
  22. */
  23. public function execute()
  24. {
  25. $results = [];
  26. $content = $this->content;
  27. // Apply searchers in a half serial, half parallel manner, so apply each searcher on the same initial content
  28. // but with the modifications made by the previous searchers, this way the 'order' matters.
  29. foreach ($this->searchers as $searcher) {
  30. $results[$searcher['key']] = $this->ducklingMapper->has($searcher['key'])
  31. ? $this->applyDucklingSearcher($content, $searcher['key'])
  32. : $this->applyCustomSearcher($content, $searcher);
  33. }
  34. return $this->processResults($results);
  35. }
  36. protected function applyDucklingSearcher($content, $dimension)
  37. {
  38. $duckling = new Duckling();
  39. $result = $duckling->execute($content, [$dimension]);
  40. return array_map(function ($item) {
  41. return [
  42. 'start' => $item['start'],
  43. 'end' => $item['end'] - 1,
  44. 'content' => $item['body'],
  45. ];
  46. }, $result);
  47. }
  48. /**
  49. * @param $content
  50. * @param $searcher
  51. * @return mixed
  52. * @throws \Exception
  53. */
  54. protected function applyCustomSearcher($content, $searcher)
  55. {
  56. if (!$this->searchersStorage->has($searcher['key'])) {
  57. throw new \Exception('Invalid searcher: ' . $searcher['key']);
  58. }
  59. $searcherData = $this->searchersStorage->get($searcher['key']);
  60. return $this->handleSearcher($searcherData, $content);
  61. }
  62. /**
  63. * @param $searcher
  64. * @param $content
  65. * @return mixed
  66. * @throws \Exception
  67. */
  68. protected function handleSearcher($searcher, $content, $mustMatchStartAndEnd = false)
  69. {
  70. if (array_key_exists('rows', $searcher)) {
  71. return $this->handleParallelSearchers($searcher['rows'], $content, $mustMatchStartAndEnd);
  72. }
  73. if (array_key_exists('key', $searcher)) {
  74. if ($this->ducklingMapper->has($searcher['key'])) {
  75. return $this->applyDucklingSearcher($content, $searcher['key']);
  76. } else {
  77. throw new \Exception('Invalid searcher: ' . $searcher['key']);
  78. }
  79. }
  80. if (array_key_exists('expression', $searcher)) {
  81. return $this->handleExpression($searcher['expression'], $content, $mustMatchStartAndEnd);
  82. }
  83. throw new \Exception('Invalid searcher.');
  84. }
  85. /**
  86. * @param $searchers
  87. * @param $content
  88. * @param bool $mustMatchStartAndEnd
  89. * @return array|array[]
  90. * @throws \Exception
  91. */
  92. protected function handleParallelSearchers($searchers, $content, $mustMatchStartAndEnd = false)
  93. {
  94. // Parallel searchers. Apply searcher on the previous searcher's result.
  95. $serialSearchersResults = [
  96. [
  97. 'start' => 0,
  98. 'end' => strlen($content) - 1,
  99. 'content' => $content,
  100. ]
  101. ];
  102. foreach ($searchers as $row) {
  103. $newSerialSearchersResults = [];
  104. foreach ($serialSearchersResults as $serialSearcherItem) {
  105. $newSerialSearcherResult = $this->handleSerialSearchers(
  106. $row,
  107. $serialSearcherItem['content'],
  108. $mustMatchStartAndEnd
  109. );
  110. foreach ($newSerialSearcherResult as $newSerialSearcherItem) {
  111. $start = $serialSearcherItem['start'] + $newSerialSearcherItem['start'];
  112. $newSerialSearchersResults[] = [
  113. 'start' => $start,
  114. 'end' => $start + strlen($newSerialSearcherItem['content']) - 1,
  115. 'content' => $newSerialSearcherItem['content'],
  116. ];
  117. }
  118. }
  119. $serialSearchersResults = $newSerialSearchersResults;
  120. $mustMatchStartAndEnd = true;
  121. }
  122. return $serialSearchersResults;
  123. }
  124. /**
  125. * @param $serialSearchers
  126. * @param $content
  127. * @return mixed
  128. * @throws \Exception
  129. */
  130. protected function handleSerialSearchers($serialSearchers, $content, $mustMatchStartAndEnd)
  131. {
  132. $results = [];
  133. foreach ($serialSearchers as $searcher) {
  134. $searcherResult = $this->handleSearcher($searcher, $content, $mustMatchStartAndEnd);
  135. $results = array_merge($results, $searcherResult);
  136. }
  137. return $results;
  138. }
  139. protected function handleExpression($expression, $content, $mustMatchStartAndEnd)
  140. {
  141. $pattern = $mustMatchStartAndEnd
  142. ? "/^$expression$/"
  143. : "/$expression/";
  144. $hasMatches = preg_match_all($pattern, $content, $matches, PREG_OFFSET_CAPTURE);
  145. if ( ! $hasMatches) {
  146. return [];
  147. }
  148. return array_map(function ($item) {
  149. return [
  150. 'start' => $item[1],
  151. 'end' => $item[1] + strlen($item[0]) - 1,
  152. 'content' => $item[0],
  153. ];
  154. }, $matches[0]);
  155. }
  156. protected function processResults($results)
  157. {
  158. $intervals = [];
  159. $processedResults = [];
  160. foreach ($results as $searcher => $searcherResults) {
  161. $processedResults[$searcher] = [];
  162. foreach ($searcherResults as $item) {
  163. $intervalIsOk = true;
  164. foreach ($intervals as $start => $end) {
  165. if (
  166. $item['start'] >= $start && $item['start'] <= $end ||
  167. $item['end'] >= $start && $item['end'] <= $end ||
  168. $item['start'] <= $start && $item['end'] >= $end
  169. ) {
  170. $intervalIsOk = false;
  171. break;
  172. }
  173. }
  174. if ($intervalIsOk) {
  175. $intervals[$item['start']] = $item['end'];
  176. $processedResults[$searcher][] = $item;
  177. }
  178. }
  179. }
  180. return $processedResults;
  181. }
  182. }