Repo for the search and displace core module including the interface to select files and search and displace operations to run on them. https://searchanddisplace.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

236 lines
6.9 KiB

3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
  1. <?php
  2. namespace App\SearchDisplace\Searchers;
  3. class Searcher
  4. {
  5. protected $searchers;
  6. protected $content;
  7. protected $ducklingMapper;
  8. protected $searchersStorage;
  9. public function __construct($searchers, $content)
  10. {
  11. $this->searchers = $searchers;
  12. $this->content = $content;
  13. ksort($this->searchers);
  14. $this->ducklingMapper = new Mapper();
  15. $this->searchersStorage = new SearchersStorage();
  16. }
  17. /**
  18. * Handle searchers.
  19. *
  20. * @return mixed
  21. * @throws \Exception
  22. */
  23. public function execute()
  24. {
  25. $results = [];
  26. $content = $this->content;
  27. // Apply searchers in a half serial, half parallel manner, so apply each searcher on the same initial content
  28. // but with the modifications made by the previous searchers, this way the 'order' matters.
  29. foreach ($this->searchers as $searcher) {
  30. $results[$searcher['key']] = $this->ducklingMapper->has($searcher['key'])
  31. ? $this->applyDucklingSearcher($content, $searcher['key'])
  32. : $this->applyCustomSearcher($content, $searcher);
  33. }
  34. return $this->processResults($results);
  35. }
  36. protected function applyDucklingSearcher($content, $dimension)
  37. {
  38. $duckling = new Duckling();
  39. $result = $duckling->execute($content, [$dimension]);
  40. return array_map(function ($item) {
  41. return [
  42. 'start' => $item['start'],
  43. 'end' => $item['end'] - 1,
  44. 'content' => filter_var($item['body'], FILTER_SANITIZE_URL),
  45. ];
  46. }, $result);
  47. }
  48. /**
  49. * @param $content
  50. * @param $searcher
  51. * @return mixed
  52. * @throws \Exception
  53. */
  54. protected function applyCustomSearcher($content, $searcher)
  55. {
  56. if (!$this->searchersStorage->has($searcher['key'])) {
  57. throw new \Exception('Invalid searcher: ' . $searcher['key']);
  58. }
  59. $searcherData = $this->searchersStorage->get($searcher['key']);
  60. return $this->handleSearcher($searcherData, $content);
  61. }
  62. /**
  63. * @param $searcher
  64. * @param $content
  65. * @return mixed
  66. * @throws \Exception
  67. */
  68. protected function handleSearcher($searcher, $content, $mustMatchStartAndEnd = false)
  69. {
  70. if (array_key_exists('rows', $searcher)) {
  71. return $this->handleParallelSearchers($searcher['rows'], $content, $mustMatchStartAndEnd);
  72. }
  73. if (array_key_exists('key', $searcher)) {
  74. if ($this->ducklingMapper->has($searcher['key'])) {
  75. return $this->applyDucklingSearcher($content, $searcher['key']);
  76. } else {
  77. throw new \Exception('Invalid searcher: ' . $searcher['key']);
  78. }
  79. }
  80. if (array_key_exists('expression', $searcher)) {
  81. return $this->handleExpression($searcher['expression'], $content, $mustMatchStartAndEnd);
  82. }
  83. if (array_key_exists('id', $searcher) && $this->ducklingMapper->has($searcher['id'])) {
  84. return $this->applyDucklingSearcher($content, $searcher['id']);
  85. }
  86. throw new \Exception('Invalid searcher.');
  87. }
  88. /**
  89. * @param $searchers
  90. * @param $content
  91. * @param bool $mustMatchStartAndEnd
  92. * @return array|array[]
  93. * @throws \Exception
  94. */
  95. protected function handleParallelSearchers($searchers, $content, $mustMatchStartAndEnd = false)
  96. {
  97. // Parallel searchers. Apply searcher on the previous searcher's result.
  98. $serialSearchersResults = [
  99. [
  100. 'start' => 0,
  101. 'end' => strlen($content) - 1,
  102. 'content' => $content,
  103. ]
  104. ];
  105. foreach ($searchers as $index => $row) {
  106. $newSerialSearchersResults = [];
  107. foreach ($serialSearchersResults as $serialSearcherItem) {
  108. $newSerialSearcherResult = $this->handleSerialSearchers(
  109. $row,
  110. $serialSearcherItem['content'],
  111. $mustMatchStartAndEnd
  112. );
  113. foreach ($newSerialSearcherResult as $newSerialSearcherItem) {
  114. $start = $serialSearcherItem['start'] + $newSerialSearcherItem['start'];
  115. $newSerialSearchersResults[] = [
  116. 'start' => $start,
  117. 'end' => $start + strlen($newSerialSearcherItem['content']) - 1,
  118. 'content' => $newSerialSearcherItem['content'],
  119. ];
  120. }
  121. }
  122. $serialSearchersResults = $newSerialSearchersResults;
  123. $mustMatchStartAndEnd = true;
  124. }
  125. return $serialSearchersResults;
  126. }
  127. /**
  128. * @param $serialSearchers
  129. * @param $content
  130. * @return mixed
  131. * @throws \Exception
  132. */
  133. protected function handleSerialSearchers($serialSearchers, $content, $mustMatchStartAndEnd)
  134. {
  135. $results = [];
  136. foreach ($serialSearchers as $searcher) {
  137. $searcherResult = $this->handleSearcher($searcher, $content, $mustMatchStartAndEnd);
  138. $results = array_merge($results, $searcherResult);
  139. }
  140. return $results;
  141. }
  142. protected function handleExpression($expression, $content, $mustMatchStartAndEnd)
  143. {
  144. $pattern = $mustMatchStartAndEnd
  145. ? "/^$expression$/"
  146. : "/$expression/";
  147. $hasMatches = preg_match_all($pattern, $content, $matches, PREG_OFFSET_CAPTURE);
  148. if ( ! $hasMatches) {
  149. return [];
  150. }
  151. $results = array_map(function ($item) {
  152. if ( ! $item[0]) {
  153. return [];
  154. }
  155. return [
  156. 'start' => $item[1],
  157. 'end' => $item[1] + strlen($item[0]) - 1,
  158. 'content' => $item[0],
  159. ];
  160. }, $matches[0]);
  161. return array_filter($results, function ($result) {
  162. return count($result) > 0;
  163. });
  164. }
  165. protected function processResults($results)
  166. {
  167. $intervals = [];
  168. $processedResults = [];
  169. foreach ($results as $searcher => $searcherResults) {
  170. $processedResults[$searcher] = [];
  171. foreach ($searcherResults as $item) {
  172. $intervalIsOk = true;
  173. foreach ($intervals as $start => $end) {
  174. if (
  175. $item['start'] >= $start && $item['start'] <= $end ||
  176. $item['end'] >= $start && $item['end'] <= $end ||
  177. $item['start'] <= $start && $item['end'] >= $end
  178. ) {
  179. $intervalIsOk = false;
  180. break;
  181. }
  182. }
  183. if ($intervalIsOk) {
  184. $intervals[$item['start']] = $item['end'];
  185. $processedResults[$searcher][] = $item;
  186. }
  187. }
  188. }
  189. return $processedResults;
  190. }
  191. }