Browse Source

Skip conversion from odt to docx.

master
Alex Puiu 2 years ago
parent
commit
aaaeaf5523
  1. 1
      README.md
  2. 4
      app/Ingest/AbstractConvertor.php
  3. 46
      app/Ingest/Convertor.php
  4. 4
      app/Ingest/DocxAndOdtConvertor.php
  5. 5
      app/Ingest/OtherConvertor.php
  6. 4
      app/Jobs/RecreateDocument.php
  7. 2
      app/Jobs/SendToCore.php
  8. 5
      composer.json

1
README.md

@ -15,7 +15,6 @@
### Ubuntu Packages
```bash
# LibreOffice
apt-get install python-software-properties
apt-add-repository ppa:libreoffice/ppa
apt-get update
apt-get install libreoffice

4
app/Ingest/AbstractConvertor.php

@ -7,12 +7,14 @@ abstract class AbstractConvertor
protected $storage;
protected $path;
protected $directoryPath;
protected $type = null;
public function __construct($storage, $path)
public function __construct($storage, $path, $type = null)
{
$this->storage = $storage;
$this->path = $path;
$this->directoryPath = pathinfo($path, PATHINFO_DIRNAME);
$this->type = strtoupper($type);
}
abstract public function execute();

46
app/Ingest/Convertor.php

@ -3,8 +3,6 @@
namespace App\Ingest;
use Illuminate\Support\Facades\Storage;
use Symfony\Component\Process\Exception\ProcessFailedException;
use Symfony\Component\Process\Process;
class Convertor
{
@ -31,8 +29,8 @@ class Convertor
$convertor = new TextConvertor($this->storage, $this->path);
} else if ($this->type === 'pdf') {
$convertor = new PDFConvertor($this->storage, $this->path);
} else if ($this->type === 'docx') {
$convertor = new DocxConvertor($this->storage, $this->path);
} else if ($this->type === 'docx' || $this->type === 'odt') {
$convertor = new DocxAndOdtConvertor($this->storage, $this->path, $this->type);
} else {
$convertor = new OtherConvertor($this->storage, $this->path);
}
@ -41,44 +39,4 @@ class Convertor
//$this->convertToHtml();
}
private function convertToHtml()
{
$office = new Office();
$success = $office->run(
'html:HTML:EmbedImages',
$this->storage->path($this->path),
$this->storage->path('contracts')
);
if (! $success) {
throw new \Exception('Something went wrong while tried converting to HTML for file: ' . $this->path);
}
$this->storage->delete($this->path);
$this->path = str_replace(".$this->type", '.html', $this->path);
}
private function convertToXML()
{
//Convert the file to xml using pdftohtml to xml and run a python scrypt to fix the paragraphs
$process = new Process([
'pdftohtml',
'-xml',
'-i',
$this->storage->path($this->path)
]);
$process->run();
if (!$process->isSuccessful()) {
throw new ProcessFailedException($process);
}
$this->storage->delete($this->path);
$this->path = str_replace(".$this->type", '.xml', $this->path);
}
}

4
app/Ingest/DocxConvertor.php → app/Ingest/DocxAndOdtConvertor.php

@ -5,7 +5,7 @@ namespace App\Ingest;
use Symfony\Component\Process\Exception\ProcessFailedException;
use Symfony\Component\Process\Process;
class DocxConvertor extends AbstractConvertor
class DocxAndOdtConvertor extends AbstractConvertor
{
/**
*
@ -107,7 +107,7 @@ class DocxConvertor extends AbstractConvertor
);
if (! $success) {
throw new \Exception('Failed when converting from DOCX to ' . strtoupper($format) . ' for file: ' . $this->path);
throw new \Exception('Failed when converting from ' . $this->type . ' to ' . strtoupper($format) . ' for file: ' . $this->path);
}
$this->deleteOriginalDocument();

5
app/Ingest/OtherConvertor.php

@ -2,16 +2,13 @@
namespace App\Ingest;
use Symfony\Component\Process\Exception\ProcessFailedException;
use Symfony\Component\Process\Process;
class OtherConvertor extends AbstractConvertor
{
public function execute()
{
$this->convertToDocx();
$convertor = new DocxConvertor($this->storage, "$this->directoryPath/document.docx");
$convertor = new DocxAndOdtConvertor($this->storage, "$this->directoryPath/document.docx");
$convertor->execute();
}

4
app/Jobs/RecreateDocument.php

@ -2,7 +2,7 @@
namespace App\Jobs;
use App\Ingest\DocxConvertor;
use App\Ingest\DocxAndOdtConvertor;
use App\Ingest\DocxWriter;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
@ -122,7 +122,7 @@ class RecreateDocument implements ShouldQueue
return;
}
$convertor = new DocxConvertor($this->storage, $this->path);
$convertor = new DocxAndOdtConvertor($this->storage, $this->path);
if ($this->data['document_format'] === 'pdf') {
$convertor->convertToPdfWithLibreOffice();

2
app/Jobs/SendToCore.php

@ -70,7 +70,7 @@ class SendToCore implements ShouldQueue
$content = $this->getContent();
}
$sent = $this->sendTheData($content);
$this->sendTheData($content);
// if ($this->directoryPath && $sent) {
if ($this->directoryPath) {

5
composer.json

@ -9,16 +9,15 @@
"license": "MIT",
"require": {
"php": "^7.2",
"ext-json": "*",
"cebe/markdown": "^1.2",
"fideloper/proxy": "^4.0",
"laravel/framework": "^6.2",
"laravel/tinker": "^2.0",
"phpoffice/phpword": "^0.17.0",
"predis/predis": "^1.1",
"spatie/laravel-webhook-server": "^1.13",
"spatie/pdf-to-text": "^1.3",
"thiagoalessio/tesseract_ocr": "^2.11",
"ext-json": "*"
"thiagoalessio/tesseract_ocr": "^2.11"
},
"require-dev": {
"facade/ignition": "^1.4",

Loading…
Cancel
Save