Repo for the search and displace ingest module that takes odf, docx and pdf and transforms it into .md to be used with search and displace operations
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

157 lines
3.7 KiB

<?php
namespace App\Jobs;
use App\Ingest\Convertor;
use App\Parser\ParseXml;
use App\Parser\DocxParser\ParseDocx;
use App\Parser\HtmlParser\ParseHtml;
use App\Parser\ParseHtmlArray;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Support\Carbon;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Redis;
use Illuminate\Support\Facades\Storage;
class IngestDocuments implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable;
protected $id;
private $path;
protected $type;
protected $fromRequest;
/**
* @var \Illuminate\Contracts\Filesystem\Filesystem
*/
private $storage;
/**
* @var \App\Parser\DocxParser\ParseDocx
*/
private $parserDocx;
/**
* @var \App\Parser\ParseXml
*/
private $parserXml;
/**
* @var \App\Parser\HtmlParser\ParseHtml
*/
private $parserHtml;
/**
* @var \App\Parser\ParseHtmlArray
*/
private $parseHtmlArray;
/**
* Create a new job instance.
*
* @param $id
* @param string $path
* @param $type
* @param $fromRequest
*/
public function __construct($id, string $path, $type, $fromRequest)
{
$this->id = $id;
$this->path = $path;
$this->type = $type;
$this->fromRequest = $fromRequest;
$this->storage = Storage::disk('local');
$this->parserDocx = new ParseDocx();
$this->parserXml = new ParseXml();
$this->parserHtml = new ParseHtml();
$this->parseHtmlArray = new ParseHtmlArray();
}
/**
* Execute the job.
*
* @return void
*/
public function handle()
{
$convertor = new Convertor($this->path, $this->type);
try {
$convertor->execute();
} catch (\Exception $exception) {
\Illuminate\Support\Facades\Log::info($exception->getMessage());
$this->failed();
return;
}
$directoryPath = pathinfo($this->path, PATHINFO_DIRNAME);
if ($this->fromRequest) {
SendToCore::dispatch($this->id, $directoryPath);
return;
}
$this->storage->deleteDirectory($directoryPath);
$this->updateAnalyzer();
}
public function failed()
{
if ( ! $this->storage) {
$this->storage = Storage::disk('local');
}
Log::error('Ingest documents failed. ' . $this->path);
$directoryPath = pathinfo($this->path, PATHINFO_DIRNAME);
if ($this->fromRequest) {
SendToCore::dispatch($this->id, $directoryPath, true);
return;
}
$this->storage->deleteDirectory($directoryPath);
$this->updateAnalyzer(true);
}
protected function updateAnalyzer($failed = false)
{
$redis = Redis::connection();
if ($failed) {
$redis->set('analyze_performance_error', '1');
}
$remainingFiles = $redis->get('analyze_performance_remaining_files');
$remainingFiles -= 1;
if ($remainingFiles === 0) {
$startedAt = $redis->get('analyze_performance_time');
$endedAt = Carbon::now()->format('U');
$directoryPath = $redis->get('analyze_performance_path');
$data = 'Time elapsed in seconds: ' . ($endedAt - $startedAt) . "\n";
if ($failed) {
$data = $data . 'Something went wrong while processing the files.';
}
file_put_contents($directoryPath . '/ingest_analyze_performance.txt', $data);
return;
}
$redis->set('analyze_performance_remaining_files', $remainingFiles);
}
}