You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
84 lines
2.1 KiB
84 lines
2.1 KiB
<?php
|
|
|
|
namespace App\Ingest;
|
|
|
|
use Illuminate\Support\Facades\Storage;
|
|
use Symfony\Component\Process\Exception\ProcessFailedException;
|
|
use Symfony\Component\Process\Process;
|
|
|
|
class Convertor
|
|
{
|
|
/**
|
|
* @var \Illuminate\Contracts\Filesystem\Filesystem
|
|
*/
|
|
private $storage;
|
|
private $path;
|
|
protected $type;
|
|
|
|
public function __construct($path, $type)
|
|
{
|
|
$this->storage = Storage::disk('local');
|
|
$this->path = $path;
|
|
$this->type = $type;
|
|
}
|
|
|
|
/**
|
|
* @throws \Exception
|
|
*/
|
|
public function execute()
|
|
{
|
|
if ($this->type === 'txt') {
|
|
$convertor = new TextConvertor($this->storage, $this->path);
|
|
} else if ($this->type === 'pdf') {
|
|
$convertor = new PDFConvertor($this->storage, $this->path);
|
|
} else if ($this->type === 'docx') {
|
|
$convertor = new DocxConvertor($this->storage, $this->path);
|
|
} else {
|
|
$convertor = new OtherConvertor($this->storage, $this->path);
|
|
}
|
|
|
|
$convertor->execute();
|
|
|
|
//$this->convertToHtml();
|
|
}
|
|
|
|
private function convertToHtml()
|
|
{
|
|
$office = new Office();
|
|
|
|
$success = $office->run(
|
|
'html:HTML:EmbedImages',
|
|
$this->storage->path($this->path),
|
|
$this->storage->path('contracts')
|
|
);
|
|
|
|
if (! $success) {
|
|
throw new \Exception('Something went wrong while tried converting to HTML for file: ' . $this->path);
|
|
}
|
|
|
|
$this->storage->delete($this->path);
|
|
|
|
$this->path = str_replace(".$this->type", '.html', $this->path);
|
|
}
|
|
|
|
private function convertToXML()
|
|
{
|
|
//Convert the file to xml using pdftohtml to xml and run a python scrypt to fix the paragraphs
|
|
$process = new Process([
|
|
'pdftohtml',
|
|
'-xml',
|
|
'-i',
|
|
$this->storage->path($this->path)
|
|
]);
|
|
|
|
$process->run();
|
|
|
|
if (!$process->isSuccessful()) {
|
|
throw new ProcessFailedException($process);
|
|
}
|
|
|
|
$this->storage->delete($this->path);
|
|
|
|
$this->path = str_replace(".$this->type", '.xml', $this->path);
|
|
}
|
|
}
|