Repo for the search and displace core module including the interface to select files and search and displace operations to run on them.
https://searchanddisplace.com
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
236 lines
6.9 KiB
236 lines
6.9 KiB
<?php
|
|
|
|
namespace App\SearchDisplace\Searchers;
|
|
|
|
class Searcher
|
|
{
|
|
protected $searchers;
|
|
protected $content;
|
|
protected $ducklingMapper;
|
|
protected $searchersStorage;
|
|
|
|
public function __construct($searchers, $content)
|
|
{
|
|
$this->searchers = $searchers;
|
|
$this->content = $content;
|
|
|
|
ksort($this->searchers);
|
|
|
|
$this->ducklingMapper = new Mapper();
|
|
|
|
$this->searchersStorage = new SearchersStorage();
|
|
}
|
|
|
|
/**
|
|
* Handle searchers.
|
|
*
|
|
* @return mixed
|
|
* @throws \Exception
|
|
*/
|
|
public function execute()
|
|
{
|
|
$results = [];
|
|
$content = $this->content;
|
|
|
|
// Apply searchers in a half serial, half parallel manner, so apply each searcher on the same initial content
|
|
// but with the modifications made by the previous searchers, this way the 'order' matters.
|
|
foreach ($this->searchers as $searcher) {
|
|
$results[$searcher['key']] = $this->ducklingMapper->has($searcher['key'])
|
|
? $this->applyDucklingSearcher($content, $searcher['key'])
|
|
: $this->applyCustomSearcher($content, $searcher);
|
|
}
|
|
|
|
return $this->processResults($results);
|
|
}
|
|
|
|
protected function applyDucklingSearcher($content, $dimension)
|
|
{
|
|
$duckling = new Duckling();
|
|
|
|
$result = $duckling->execute($content, [$dimension]);
|
|
|
|
return array_map(function ($item) {
|
|
return [
|
|
'start' => $item['start'],
|
|
'end' => $item['end'] - 1,
|
|
'content' => $item['body'],
|
|
];
|
|
}, $result);
|
|
}
|
|
|
|
/**
|
|
* @param $content
|
|
* @param $searcher
|
|
* @return mixed
|
|
* @throws \Exception
|
|
*/
|
|
protected function applyCustomSearcher($content, $searcher)
|
|
{
|
|
if (!$this->searchersStorage->has($searcher['key'])) {
|
|
throw new \Exception('Invalid searcher: ' . $searcher['key']);
|
|
}
|
|
|
|
$searcherData = $this->searchersStorage->get($searcher['key']);
|
|
|
|
return $this->handleSearcher($searcherData, $content);
|
|
}
|
|
|
|
/**
|
|
* @param $searcher
|
|
* @param $content
|
|
* @return mixed
|
|
* @throws \Exception
|
|
*/
|
|
protected function handleSearcher($searcher, $content, $mustMatchStartAndEnd = false)
|
|
{
|
|
if (array_key_exists('rows', $searcher)) {
|
|
return $this->handleParallelSearchers($searcher['rows'], $content, $mustMatchStartAndEnd);
|
|
}
|
|
|
|
if (array_key_exists('key', $searcher)) {
|
|
if ($this->ducklingMapper->has($searcher['key'])) {
|
|
return $this->applyDucklingSearcher($content, $searcher['key']);
|
|
} else {
|
|
throw new \Exception('Invalid searcher: ' . $searcher['key']);
|
|
}
|
|
}
|
|
|
|
if (array_key_exists('expression', $searcher)) {
|
|
return $this->handleExpression($searcher['expression'], $content, $mustMatchStartAndEnd);
|
|
}
|
|
|
|
if (array_key_exists('id', $searcher) && $this->ducklingMapper->has($searcher['id'])) {
|
|
return $this->applyDucklingSearcher($content, $searcher['id']);
|
|
}
|
|
|
|
throw new \Exception('Invalid searcher.');
|
|
}
|
|
|
|
/**
|
|
* @param $searchers
|
|
* @param $content
|
|
* @param bool $mustMatchStartAndEnd
|
|
* @return array|array[]
|
|
* @throws \Exception
|
|
*/
|
|
protected function handleParallelSearchers($searchers, $content, $mustMatchStartAndEnd = false)
|
|
{
|
|
// Parallel searchers. Apply searcher on the previous searcher's result.
|
|
$serialSearchersResults = [
|
|
[
|
|
'start' => 0,
|
|
'end' => strlen($content) - 1,
|
|
'content' => $content,
|
|
]
|
|
];
|
|
|
|
foreach ($searchers as $index => $row) {
|
|
$newSerialSearchersResults = [];
|
|
|
|
foreach ($serialSearchersResults as $serialSearcherItem) {
|
|
$newSerialSearcherResult = $this->handleSerialSearchers(
|
|
$row,
|
|
$serialSearcherItem['content'],
|
|
$mustMatchStartAndEnd
|
|
);
|
|
|
|
foreach ($newSerialSearcherResult as $newSerialSearcherItem) {
|
|
$start = $serialSearcherItem['start'] + $newSerialSearcherItem['start'];
|
|
|
|
$newSerialSearchersResults[] = [
|
|
'start' => $start,
|
|
'end' => $start + strlen($newSerialSearcherItem['content']) - 1,
|
|
'content' => $newSerialSearcherItem['content'],
|
|
];
|
|
}
|
|
}
|
|
|
|
$serialSearchersResults = $newSerialSearchersResults;
|
|
$mustMatchStartAndEnd = true;
|
|
}
|
|
|
|
return $serialSearchersResults;
|
|
}
|
|
|
|
/**
|
|
* @param $serialSearchers
|
|
* @param $content
|
|
* @return mixed
|
|
* @throws \Exception
|
|
*/
|
|
protected function handleSerialSearchers($serialSearchers, $content, $mustMatchStartAndEnd)
|
|
{
|
|
$results = [];
|
|
|
|
foreach ($serialSearchers as $searcher) {
|
|
$searcherResult = $this->handleSearcher($searcher, $content, $mustMatchStartAndEnd);
|
|
|
|
$results = array_merge($results, $searcherResult);
|
|
}
|
|
|
|
return $results;
|
|
}
|
|
|
|
protected function handleExpression($expression, $content, $mustMatchStartAndEnd)
|
|
{
|
|
$pattern = $mustMatchStartAndEnd
|
|
? "/^$expression$/"
|
|
: "/$expression/";
|
|
|
|
$hasMatches = preg_match_all($pattern, $content, $matches, PREG_OFFSET_CAPTURE);
|
|
|
|
if ( ! $hasMatches) {
|
|
return [];
|
|
}
|
|
|
|
$results = array_map(function ($item) {
|
|
if ( ! $item[0]) {
|
|
return [];
|
|
}
|
|
|
|
return [
|
|
'start' => $item[1],
|
|
'end' => $item[1] + strlen($item[0]) - 1,
|
|
'content' => $item[0],
|
|
];
|
|
}, $matches[0]);
|
|
|
|
return array_filter($results, function ($result) {
|
|
return count($result) > 0;
|
|
});
|
|
}
|
|
|
|
protected function processResults($results)
|
|
{
|
|
$intervals = [];
|
|
$processedResults = [];
|
|
|
|
foreach ($results as $searcher => $searcherResults) {
|
|
$processedResults[$searcher] = [];
|
|
|
|
foreach ($searcherResults as $item) {
|
|
$intervalIsOk = true;
|
|
|
|
foreach ($intervals as $start => $end) {
|
|
if (
|
|
$item['start'] >= $start && $item['start'] <= $end ||
|
|
$item['end'] >= $start && $item['end'] <= $end ||
|
|
$item['start'] <= $start && $item['end'] >= $end
|
|
) {
|
|
$intervalIsOk = false;
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ($intervalIsOk) {
|
|
$intervals[$item['start']] = $item['end'];
|
|
|
|
$processedResults[$searcher][] = $item;
|
|
}
|
|
}
|
|
}
|
|
|
|
return $processedResults;
|
|
}
|
|
}
|