Repo for the search and displace core module including the interface to select files and search and displace operations to run on them. https://searchanddisplace.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

236 lines
6.9 KiB

<?php
namespace App\SearchDisplace\Searchers;
class Searcher
{
protected $searchers;
protected $content;
protected $ducklingMapper;
protected $searchersStorage;
public function __construct($searchers, $content)
{
$this->searchers = $searchers;
$this->content = $content;
ksort($this->searchers);
$this->ducklingMapper = new Mapper();
$this->searchersStorage = new SearchersStorage();
}
/**
* Handle searchers.
*
* @return mixed
* @throws \Exception
*/
public function execute()
{
$results = [];
$content = $this->content;
// Apply searchers in a half serial, half parallel manner, so apply each searcher on the same initial content
// but with the modifications made by the previous searchers, this way the 'order' matters.
foreach ($this->searchers as $searcher) {
$results[$searcher['key']] = $this->ducklingMapper->has($searcher['key'])
? $this->applyDucklingSearcher($content, $searcher['key'])
: $this->applyCustomSearcher($content, $searcher);
}
return $this->processResults($results);
}
protected function applyDucklingSearcher($content, $dimension)
{
$duckling = new Duckling();
$result = $duckling->execute($content, [$dimension]);
return array_map(function ($item) {
return [
'start' => $item['start'],
'end' => $item['end'] - 1,
'content' => filter_var($item['body'], FILTER_SANITIZE_URL),
];
}, $result);
}
/**
* @param $content
* @param $searcher
* @return mixed
* @throws \Exception
*/
protected function applyCustomSearcher($content, $searcher)
{
if (!$this->searchersStorage->has($searcher['key'])) {
throw new \Exception('Invalid searcher: ' . $searcher['key']);
}
$searcherData = $this->searchersStorage->get($searcher['key']);
return $this->handleSearcher($searcherData, $content);
}
/**
* @param $searcher
* @param $content
* @return mixed
* @throws \Exception
*/
protected function handleSearcher($searcher, $content, $mustMatchStartAndEnd = false)
{
if (array_key_exists('rows', $searcher)) {
return $this->handleParallelSearchers($searcher['rows'], $content, $mustMatchStartAndEnd);
}
if (array_key_exists('key', $searcher)) {
if ($this->ducklingMapper->has($searcher['key'])) {
return $this->applyDucklingSearcher($content, $searcher['key']);
} else {
throw new \Exception('Invalid searcher: ' . $searcher['key']);
}
}
if (array_key_exists('expression', $searcher)) {
return $this->handleExpression($searcher['expression'], $content, $mustMatchStartAndEnd);
}
if (array_key_exists('id', $searcher) && $this->ducklingMapper->has($searcher['id'])) {
return $this->applyDucklingSearcher($content, $searcher['id']);
}
throw new \Exception('Invalid searcher.');
}
/**
* @param $searchers
* @param $content
* @param bool $mustMatchStartAndEnd
* @return array|array[]
* @throws \Exception
*/
protected function handleParallelSearchers($searchers, $content, $mustMatchStartAndEnd = false)
{
// Parallel searchers. Apply searcher on the previous searcher's result.
$serialSearchersResults = [
[
'start' => 0,
'end' => strlen($content) - 1,
'content' => $content,
]
];
foreach ($searchers as $index => $row) {
$newSerialSearchersResults = [];
foreach ($serialSearchersResults as $serialSearcherItem) {
$newSerialSearcherResult = $this->handleSerialSearchers(
$row,
$serialSearcherItem['content'],
$mustMatchStartAndEnd
);
foreach ($newSerialSearcherResult as $newSerialSearcherItem) {
$start = $serialSearcherItem['start'] + $newSerialSearcherItem['start'];
$newSerialSearchersResults[] = [
'start' => $start,
'end' => $start + strlen($newSerialSearcherItem['content']) - 1,
'content' => $newSerialSearcherItem['content'],
];
}
}
$serialSearchersResults = $newSerialSearchersResults;
$mustMatchStartAndEnd = true;
}
return $serialSearchersResults;
}
/**
* @param $serialSearchers
* @param $content
* @return mixed
* @throws \Exception
*/
protected function handleSerialSearchers($serialSearchers, $content, $mustMatchStartAndEnd)
{
$results = [];
foreach ($serialSearchers as $searcher) {
$searcherResult = $this->handleSearcher($searcher, $content, $mustMatchStartAndEnd);
$results = array_merge($results, $searcherResult);
}
return $results;
}
protected function handleExpression($expression, $content, $mustMatchStartAndEnd)
{
$pattern = $mustMatchStartAndEnd
? "/^$expression$/"
: "/$expression/";
$hasMatches = preg_match_all($pattern, $content, $matches, PREG_OFFSET_CAPTURE);
if ( ! $hasMatches) {
return [];
}
$results = array_map(function ($item) {
if ( ! $item[0]) {
return [];
}
return [
'start' => $item[1],
'end' => $item[1] + strlen($item[0]) - 1,
'content' => $item[0],
];
}, $matches[0]);
return array_filter($results, function ($result) {
return count($result) > 0;
});
}
protected function processResults($results)
{
$intervals = [];
$processedResults = [];
foreach ($results as $searcher => $searcherResults) {
$processedResults[$searcher] = [];
foreach ($searcherResults as $item) {
$intervalIsOk = true;
foreach ($intervals as $start => $end) {
if (
$item['start'] >= $start && $item['start'] <= $end ||
$item['end'] >= $start && $item['end'] <= $end ||
$item['start'] <= $start && $item['end'] >= $end
) {
$intervalIsOk = false;
break;
}
}
if ($intervalIsOk) {
$intervals[$item['start']] = $item['end'];
$processedResults[$searcher][] = $item;
}
}
}
return $processedResults;
}
}