|
|
@ -3,8 +3,6 @@ |
|
|
|
namespace App\Ingest; |
|
|
|
|
|
|
|
use Illuminate\Support\Facades\Storage; |
|
|
|
use Symfony\Component\Process\Exception\ProcessFailedException; |
|
|
|
use Symfony\Component\Process\Process; |
|
|
|
|
|
|
|
class Convertor |
|
|
|
{ |
|
|
@ -31,8 +29,8 @@ class Convertor |
|
|
|
$convertor = new TextConvertor($this->storage, $this->path); |
|
|
|
} else if ($this->type === 'pdf') { |
|
|
|
$convertor = new PDFConvertor($this->storage, $this->path); |
|
|
|
} else if ($this->type === 'docx') { |
|
|
|
$convertor = new DocxConvertor($this->storage, $this->path); |
|
|
|
} else if ($this->type === 'docx' || $this->type === 'odt') { |
|
|
|
$convertor = new DocxAndOdtConvertor($this->storage, $this->path, $this->type); |
|
|
|
} else { |
|
|
|
$convertor = new OtherConvertor($this->storage, $this->path); |
|
|
|
} |
|
|
@ -41,44 +39,4 @@ class Convertor |
|
|
|
|
|
|
|
//$this->convertToHtml();
|
|
|
|
} |
|
|
|
|
|
|
|
private function convertToHtml() |
|
|
|
{ |
|
|
|
$office = new Office(); |
|
|
|
|
|
|
|
$success = $office->run( |
|
|
|
'html:HTML:EmbedImages', |
|
|
|
$this->storage->path($this->path), |
|
|
|
$this->storage->path('contracts') |
|
|
|
); |
|
|
|
|
|
|
|
if (! $success) { |
|
|
|
throw new \Exception('Something went wrong while tried converting to HTML for file: ' . $this->path); |
|
|
|
} |
|
|
|
|
|
|
|
$this->storage->delete($this->path); |
|
|
|
|
|
|
|
$this->path = str_replace(".$this->type", '.html', $this->path); |
|
|
|
} |
|
|
|
|
|
|
|
private function convertToXML() |
|
|
|
{ |
|
|
|
//Convert the file to xml using pdftohtml to xml and run a python scrypt to fix the paragraphs
|
|
|
|
$process = new Process([ |
|
|
|
'pdftohtml', |
|
|
|
'-xml', |
|
|
|
'-i', |
|
|
|
$this->storage->path($this->path) |
|
|
|
]); |
|
|
|
|
|
|
|
$process->run(); |
|
|
|
|
|
|
|
if (!$process->isSuccessful()) { |
|
|
|
throw new ProcessFailedException($process); |
|
|
|
} |
|
|
|
|
|
|
|
$this->storage->delete($this->path); |
|
|
|
|
|
|
|
$this->path = str_replace(".$this->type", '.xml', $this->path); |
|
|
|
} |
|
|
|
} |