searchers = $searchers; $this->content = $content; ksort($this->searchers); $this->ducklingMapper = new Mapper(); $this->searchersStorage = new SearchersStorage(); } /** * Handle searchers. * * @return mixed * @throws \Exception */ public function execute() { $results = []; $content = $this->content; // Apply searchers in a half serial, half parallel manner, so apply each searcher on the same initial content // but with the modifications made by the previous searchers, this way the 'order' matters. foreach ($this->searchers as $searcher) { $results[$searcher['key']] = $this->ducklingMapper->has($searcher['key']) ? $this->applyDucklingSearcher($content, $searcher['key']) : $this->applyCustomSearcher($content, $searcher); } return $this->processResults($results); } protected function applyDucklingSearcher($content, $dimension) { $duckling = new Duckling(); $result = $duckling->execute($content, [$dimension]); return array_map(function ($item) { return [ 'start' => $item['start'], 'end' => $item['end'] - 1, 'content' => filter_var($item['body'], FILTER_SANITIZE_URL), ]; }, $result); } /** * @param $content * @param $searcher * @return mixed * @throws \Exception */ protected function applyCustomSearcher($content, $searcher) { if (!$this->searchersStorage->has($searcher['key'])) { throw new \Exception('Invalid searcher: ' . $searcher['key']); } $searcherData = $this->searchersStorage->get($searcher['key']); return $this->handleSearcher($searcherData, $content); } /** * @param $searcher * @param $content * @return mixed * @throws \Exception */ protected function handleSearcher($searcher, $content, $mustMatchStartAndEnd = false) { if (array_key_exists('rows', $searcher)) { return $this->handleParallelSearchers($searcher['rows'], $content, $mustMatchStartAndEnd); } if (array_key_exists('key', $searcher)) { if ($this->ducklingMapper->has($searcher['key'])) { return $this->applyDucklingSearcher($content, $searcher['key']); } else { throw new \Exception('Invalid searcher: ' . $searcher['key']); } } if (array_key_exists('expression', $searcher)) { return $this->handleExpression($searcher['expression'], $content, $mustMatchStartAndEnd); } if (array_key_exists('id', $searcher) && $this->ducklingMapper->has($searcher['id'])) { return $this->applyDucklingSearcher($content, $searcher['id']); } throw new \Exception('Invalid searcher.'); } /** * @param $searchers * @param $content * @param bool $mustMatchStartAndEnd * @return array|array[] * @throws \Exception */ protected function handleParallelSearchers($searchers, $content, $mustMatchStartAndEnd = false) { // Parallel searchers. Apply searcher on the previous searcher's result. $serialSearchersResults = [ [ 'start' => 0, 'end' => strlen($content) - 1, 'content' => $content, ] ]; foreach ($searchers as $index => $row) { $newSerialSearchersResults = []; foreach ($serialSearchersResults as $serialSearcherItem) { $newSerialSearcherResult = $this->handleSerialSearchers( $row, $serialSearcherItem['content'], $mustMatchStartAndEnd ); foreach ($newSerialSearcherResult as $newSerialSearcherItem) { $start = $serialSearcherItem['start'] + $newSerialSearcherItem['start']; $newSerialSearchersResults[] = [ 'start' => $start, 'end' => $start + strlen($newSerialSearcherItem['content']) - 1, 'content' => $newSerialSearcherItem['content'], ]; } } $serialSearchersResults = $newSerialSearchersResults; $mustMatchStartAndEnd = true; } return $serialSearchersResults; } /** * @param $serialSearchers * @param $content * @return mixed * @throws \Exception */ protected function handleSerialSearchers($serialSearchers, $content, $mustMatchStartAndEnd) { $results = []; foreach ($serialSearchers as $searcher) { $searcherResult = $this->handleSearcher($searcher, $content, $mustMatchStartAndEnd); $results = array_merge($results, $searcherResult); } return $results; } protected function handleExpression($expression, $content, $mustMatchStartAndEnd) { $pattern = $mustMatchStartAndEnd ? "/^$expression$/" : "/$expression/"; $hasMatches = preg_match_all($pattern, $content, $matches, PREG_OFFSET_CAPTURE); if ( ! $hasMatches) { return []; } $results = array_map(function ($item) { if ( ! $item[0]) { return []; } return [ 'start' => $item[1], 'end' => $item[1] + strlen($item[0]) - 1, 'content' => $item[0], ]; }, $matches[0]); return array_filter($results, function ($result) { return count($result) > 0; }); } protected function processResults($results) { $intervals = []; $processedResults = []; foreach ($results as $searcher => $searcherResults) { $processedResults[$searcher] = []; foreach ($searcherResults as $item) { $intervalIsOk = true; foreach ($intervals as $start => $end) { if ( $item['start'] >= $start && $item['start'] <= $end || $item['end'] >= $start && $item['end'] <= $end || $item['start'] <= $start && $item['end'] >= $end ) { $intervalIsOk = false; break; } } if ($intervalIsOk) { $intervals[$item['start']] = $item['end']; $processedResults[$searcher][] = $item; } } } return $processedResults; } }