|
|
<?php
namespace App\SearchDisplace\Searchers;
class Searcher { protected $searchers; protected $content; protected $ducklingMapper; protected $searchersStorage;
public function __construct($searchers, $content) { $this->searchers = $searchers; $this->content = $content;
ksort($this->searchers);
$this->ducklingMapper = new Mapper();
$this->searchersStorage = new SearchersStorage(); }
/** * Handle searchers. * * @return mixed * @throws \Exception */ public function execute() { $results = []; $content = $this->content;
// Apply searchers in a half serial, half parallel manner, so apply each searcher on the same initial content
// but with the modifications made by the previous searchers, this way the 'order' matters.
foreach ($this->searchers as $searcher) { $results[$searcher['key']] = $this->ducklingMapper->has($searcher['key']) ? $this->applyDucklingSearcher($content, $searcher['key']) : $this->applyCustomSearcher($content, $searcher); }
return $this->processResults($results); }
protected function applyDucklingSearcher($content, $dimension) { $duckling = new Duckling();
$result = $duckling->execute($content, [$dimension]);
return array_map(function ($item) { return [ 'start' => $item['start'], 'end' => $item['end'] - 1, 'content' => filter_var($item['body'], FILTER_SANITIZE_URL), ]; }, $result); }
/** * @param $content * @param $searcher * @return mixed * @throws \Exception */ protected function applyCustomSearcher($content, $searcher) { if (!$this->searchersStorage->has($searcher['key'])) { throw new \Exception('Invalid searcher: ' . $searcher['key']); }
$searcherData = $this->searchersStorage->get($searcher['key']);
return $this->handleSearcher($searcherData, $content); }
/** * @param $searcher * @param $content * @return mixed * @throws \Exception */ protected function handleSearcher($searcher, $content, $mustMatchStartAndEnd = false) { if (array_key_exists('rows', $searcher)) { return $this->handleParallelSearchers($searcher['rows'], $content, $mustMatchStartAndEnd); }
if (array_key_exists('key', $searcher)) { if ($this->ducklingMapper->has($searcher['key'])) { return $this->applyDucklingSearcher($content, $searcher['key']); } else { throw new \Exception('Invalid searcher: ' . $searcher['key']); } }
if (array_key_exists('expression', $searcher)) { return $this->handleExpression($searcher['expression'], $content, $mustMatchStartAndEnd); }
if (array_key_exists('id', $searcher) && $this->ducklingMapper->has($searcher['id'])) { return $this->applyDucklingSearcher($content, $searcher['id']); }
throw new \Exception('Invalid searcher.'); }
/** * @param $searchers * @param $content * @param bool $mustMatchStartAndEnd * @return array|array[] * @throws \Exception */ protected function handleParallelSearchers($searchers, $content, $mustMatchStartAndEnd = false) { // Parallel searchers. Apply searcher on the previous searcher's result.
$serialSearchersResults = [ [ 'start' => 0, 'end' => strlen($content) - 1, 'content' => $content, ] ];
foreach ($searchers as $index => $row) { $newSerialSearchersResults = [];
foreach ($serialSearchersResults as $serialSearcherItem) { $newSerialSearcherResult = $this->handleSerialSearchers( $row, $serialSearcherItem['content'], $mustMatchStartAndEnd );
foreach ($newSerialSearcherResult as $newSerialSearcherItem) { $start = $serialSearcherItem['start'] + $newSerialSearcherItem['start'];
$newSerialSearchersResults[] = [ 'start' => $start, 'end' => $start + strlen($newSerialSearcherItem['content']) - 1, 'content' => $newSerialSearcherItem['content'], ]; } }
$serialSearchersResults = $newSerialSearchersResults; $mustMatchStartAndEnd = true; }
return $serialSearchersResults; }
/** * @param $serialSearchers * @param $content * @return mixed * @throws \Exception */ protected function handleSerialSearchers($serialSearchers, $content, $mustMatchStartAndEnd) { $results = [];
foreach ($serialSearchers as $searcher) { $searcherResult = $this->handleSearcher($searcher, $content, $mustMatchStartAndEnd);
$results = array_merge($results, $searcherResult); }
return $results; }
protected function handleExpression($expression, $content, $mustMatchStartAndEnd) { $pattern = $mustMatchStartAndEnd ? "/^$expression$/" : "/$expression/";
$hasMatches = preg_match_all($pattern, $content, $matches, PREG_OFFSET_CAPTURE);
if ( ! $hasMatches) { return []; }
$results = array_map(function ($item) { if ( ! $item[0]) { return []; }
return [ 'start' => $item[1], 'end' => $item[1] + strlen($item[0]) - 1, 'content' => $item[0], ]; }, $matches[0]);
return array_filter($results, function ($result) { return count($result) > 0; }); }
protected function processResults($results) { $intervals = []; $processedResults = [];
foreach ($results as $searcher => $searcherResults) { $processedResults[$searcher] = [];
foreach ($searcherResults as $item) { $intervalIsOk = true;
foreach ($intervals as $start => $end) { if ( $item['start'] >= $start && $item['start'] <= $end || $item['end'] >= $start && $item['end'] <= $end || $item['start'] <= $start && $item['end'] >= $end ) { $intervalIsOk = false;
break; } }
if ($intervalIsOk) { $intervals[$item['start']] = $item['end'];
$processedResults[$searcher][] = $item; } } }
return $processedResults; } }
|