You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
91 lines
2.3 KiB
91 lines
2.3 KiB
<?php
|
|
|
|
namespace App\Ingest;
|
|
|
|
use Illuminate\Support\Facades\Storage;
|
|
use Symfony\Component\Process\Exception\ProcessFailedException;
|
|
use Symfony\Component\Process\Process;
|
|
|
|
class Convertor
|
|
{
|
|
/**
|
|
* @var \Illuminate\Contracts\Filesystem\Filesystem
|
|
*/
|
|
private $storage;
|
|
private $path;
|
|
protected $type;
|
|
|
|
public function __construct($path, $type)
|
|
{
|
|
$this->storage = Storage::disk('local');
|
|
$this->path = $path;
|
|
$this->type = $type;
|
|
}
|
|
|
|
/**
|
|
* @return mixed
|
|
* @throws \Exception
|
|
*/
|
|
public function execute()
|
|
{
|
|
if ($this->type === 'txt') {
|
|
$convertor = new TextConvertor($this->storage, $this->path);
|
|
} else if ($this->type === 'pdf') {
|
|
$convertor = new PDFConvertor($this->storage, $this->path);
|
|
} else if ($this->type === 'docx') {
|
|
$convertor = new DocxConvertor($this->storage, $this->path);
|
|
} else {
|
|
$convertor = new OtherConvertor($this->storage, $this->path);
|
|
}
|
|
|
|
$convertor->execute();
|
|
|
|
//$this->convertToHtml();
|
|
}
|
|
|
|
private function convertToHtml()
|
|
{
|
|
(new Process(['export HOME=' . env('USER_HOME_PATH')]))->run();
|
|
|
|
$process = new Process([
|
|
'soffice',
|
|
'--headless',
|
|
'--convert-to',
|
|
'html:HTML:EmbedImages',
|
|
$this->storage->path($this->path),
|
|
'--outdir',
|
|
$this->storage->path('contracts')
|
|
]);
|
|
|
|
$process->run();
|
|
|
|
if (!$process->isSuccessful()) {
|
|
throw new ProcessFailedException($process);
|
|
}
|
|
|
|
$this->storage->delete($this->path);
|
|
|
|
$this->path = str_replace(".$this->type", '.html', $this->path);
|
|
}
|
|
|
|
private function convertToXML()
|
|
{
|
|
//Convert the file to xml using pdftohtml to xml and run a python scrypt to fix the paragraphs
|
|
$process = new Process([
|
|
'pdftohtml',
|
|
'-xml',
|
|
'-i',
|
|
$this->storage->path($this->path)
|
|
]);
|
|
|
|
$process->run();
|
|
|
|
if (!$process->isSuccessful()) {
|
|
throw new ProcessFailedException($process);
|
|
}
|
|
|
|
$this->storage->delete($this->path);
|
|
|
|
$this->path = str_replace(".$this->type", '.xml', $this->path);
|
|
}
|
|
}
|