From aaaeaf55230303558c937b818959da5e5271c1ac Mon Sep 17 00:00:00 2001 From: Alex Puiu Date: Fri, 25 Feb 2022 13:19:28 +0200 Subject: [PATCH] Skip conversion from odt to docx. --- README.md | 1 - app/Ingest/AbstractConvertor.php | 4 +- app/Ingest/Convertor.php | 46 +------------------ ...xConvertor.php => DocxAndOdtConvertor.php} | 4 +- app/Ingest/OtherConvertor.php | 5 +- app/Jobs/RecreateDocument.php | 4 +- app/Jobs/SendToCore.php | 2 +- composer.json | 5 +- 8 files changed, 13 insertions(+), 58 deletions(-) rename app/Ingest/{DocxConvertor.php => DocxAndOdtConvertor.php} (92%) diff --git a/README.md b/README.md index d549055..b542f47 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,6 @@ ### Ubuntu Packages ```bash # LibreOffice -apt-get install python-software-properties apt-add-repository ppa:libreoffice/ppa apt-get update apt-get install libreoffice diff --git a/app/Ingest/AbstractConvertor.php b/app/Ingest/AbstractConvertor.php index 689dbf6..d5bb9f4 100644 --- a/app/Ingest/AbstractConvertor.php +++ b/app/Ingest/AbstractConvertor.php @@ -7,12 +7,14 @@ abstract class AbstractConvertor protected $storage; protected $path; protected $directoryPath; + protected $type = null; - public function __construct($storage, $path) + public function __construct($storage, $path, $type = null) { $this->storage = $storage; $this->path = $path; $this->directoryPath = pathinfo($path, PATHINFO_DIRNAME); + $this->type = strtoupper($type); } abstract public function execute(); diff --git a/app/Ingest/Convertor.php b/app/Ingest/Convertor.php index 9c5940c..a9b7632 100644 --- a/app/Ingest/Convertor.php +++ b/app/Ingest/Convertor.php @@ -3,8 +3,6 @@ namespace App\Ingest; use Illuminate\Support\Facades\Storage; -use Symfony\Component\Process\Exception\ProcessFailedException; -use Symfony\Component\Process\Process; class Convertor { @@ -31,8 +29,8 @@ class Convertor $convertor = new TextConvertor($this->storage, $this->path); } else if ($this->type === 'pdf') { $convertor = new PDFConvertor($this->storage, $this->path); - } else if ($this->type === 'docx') { - $convertor = new DocxConvertor($this->storage, $this->path); + } else if ($this->type === 'docx' || $this->type === 'odt') { + $convertor = new DocxAndOdtConvertor($this->storage, $this->path, $this->type); } else { $convertor = new OtherConvertor($this->storage, $this->path); } @@ -41,44 +39,4 @@ class Convertor //$this->convertToHtml(); } - - private function convertToHtml() - { - $office = new Office(); - - $success = $office->run( - 'html:HTML:EmbedImages', - $this->storage->path($this->path), - $this->storage->path('contracts') - ); - - if (! $success) { - throw new \Exception('Something went wrong while tried converting to HTML for file: ' . $this->path); - } - - $this->storage->delete($this->path); - - $this->path = str_replace(".$this->type", '.html', $this->path); - } - - private function convertToXML() - { - //Convert the file to xml using pdftohtml to xml and run a python scrypt to fix the paragraphs - $process = new Process([ - 'pdftohtml', - '-xml', - '-i', - $this->storage->path($this->path) - ]); - - $process->run(); - - if (!$process->isSuccessful()) { - throw new ProcessFailedException($process); - } - - $this->storage->delete($this->path); - - $this->path = str_replace(".$this->type", '.xml', $this->path); - } } diff --git a/app/Ingest/DocxConvertor.php b/app/Ingest/DocxAndOdtConvertor.php similarity index 92% rename from app/Ingest/DocxConvertor.php rename to app/Ingest/DocxAndOdtConvertor.php index b1e179b..5b55d86 100644 --- a/app/Ingest/DocxConvertor.php +++ b/app/Ingest/DocxAndOdtConvertor.php @@ -5,7 +5,7 @@ namespace App\Ingest; use Symfony\Component\Process\Exception\ProcessFailedException; use Symfony\Component\Process\Process; -class DocxConvertor extends AbstractConvertor +class DocxAndOdtConvertor extends AbstractConvertor { /** * @@ -107,7 +107,7 @@ class DocxConvertor extends AbstractConvertor ); if (! $success) { - throw new \Exception('Failed when converting from DOCX to ' . strtoupper($format) . ' for file: ' . $this->path); + throw new \Exception('Failed when converting from ' . $this->type . ' to ' . strtoupper($format) . ' for file: ' . $this->path); } $this->deleteOriginalDocument(); diff --git a/app/Ingest/OtherConvertor.php b/app/Ingest/OtherConvertor.php index 3ab91bb..f00b23b 100644 --- a/app/Ingest/OtherConvertor.php +++ b/app/Ingest/OtherConvertor.php @@ -2,16 +2,13 @@ namespace App\Ingest; -use Symfony\Component\Process\Exception\ProcessFailedException; -use Symfony\Component\Process\Process; - class OtherConvertor extends AbstractConvertor { public function execute() { $this->convertToDocx(); - $convertor = new DocxConvertor($this->storage, "$this->directoryPath/document.docx"); + $convertor = new DocxAndOdtConvertor($this->storage, "$this->directoryPath/document.docx"); $convertor->execute(); } diff --git a/app/Jobs/RecreateDocument.php b/app/Jobs/RecreateDocument.php index e7d9a0d..d860415 100644 --- a/app/Jobs/RecreateDocument.php +++ b/app/Jobs/RecreateDocument.php @@ -2,7 +2,7 @@ namespace App\Jobs; -use App\Ingest\DocxConvertor; +use App\Ingest\DocxAndOdtConvertor; use App\Ingest\DocxWriter; use Illuminate\Bus\Queueable; use Illuminate\Contracts\Queue\ShouldQueue; @@ -122,7 +122,7 @@ class RecreateDocument implements ShouldQueue return; } - $convertor = new DocxConvertor($this->storage, $this->path); + $convertor = new DocxAndOdtConvertor($this->storage, $this->path); if ($this->data['document_format'] === 'pdf') { $convertor->convertToPdfWithLibreOffice(); diff --git a/app/Jobs/SendToCore.php b/app/Jobs/SendToCore.php index 434c77b..25a83c7 100644 --- a/app/Jobs/SendToCore.php +++ b/app/Jobs/SendToCore.php @@ -70,7 +70,7 @@ class SendToCore implements ShouldQueue $content = $this->getContent(); } - $sent = $this->sendTheData($content); + $this->sendTheData($content); // if ($this->directoryPath && $sent) { if ($this->directoryPath) { diff --git a/composer.json b/composer.json index 831f919..36e384e 100644 --- a/composer.json +++ b/composer.json @@ -9,16 +9,15 @@ "license": "MIT", "require": { "php": "^7.2", + "ext-json": "*", "cebe/markdown": "^1.2", "fideloper/proxy": "^4.0", "laravel/framework": "^6.2", "laravel/tinker": "^2.0", - "phpoffice/phpword": "^0.17.0", "predis/predis": "^1.1", "spatie/laravel-webhook-server": "^1.13", "spatie/pdf-to-text": "^1.3", - "thiagoalessio/tesseract_ocr": "^2.11", - "ext-json": "*" + "thiagoalessio/tesseract_ocr": "^2.11" }, "require-dev": { "facade/ignition": "^1.4",