From 3a4e470cc639e5b9f063dc26779bbd5d95fd4f9d Mon Sep 17 00:00:00 2001 From: Orzu Ionut Date: Wed, 16 Jun 2021 15:33:50 +0300 Subject: [PATCH] Convert DOCX to PDF so we will handle DOCX files the same way --- README.md | 3 +++ app/Ingest/DocxConvertor.php | 35 ++++++++++++++++++++++++++++------- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 55d694f..92553c5 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,9 @@ pip3 install -r requirements.txt pip3 install mat2 apt-get install gir1.2-poppler-0.18 +# DOCX to PDF Convertor +apt-get install unoconv + ``` ## Local Usage diff --git a/app/Ingest/DocxConvertor.php b/app/Ingest/DocxConvertor.php index 421a408..9940453 100644 --- a/app/Ingest/DocxConvertor.php +++ b/app/Ingest/DocxConvertor.php @@ -9,18 +9,19 @@ class DocxConvertor extends AbstractConvertor { public function execute() { - $this->convertToText(); +// $this->convertToText(); +// +// $convertor = new TextConvertor($this->storage, "$this->directoryPath/document.txt"); +// +// $convertor->execute(); - $convertor = new TextConvertor($this->storage, "$this->directoryPath/document.txt"); + $this->convertToPDF(); + + $convertor = new PDFConvertor($this->storage, "$this->directoryPath/document.pdf"); $convertor->execute(); } - /** - * Convert docx file to text - * - * @return void - */ protected function convertToText() { (new Process(['export HOME=' . env('USER_HOME_PATH')]))->run(); @@ -43,4 +44,24 @@ class DocxConvertor extends AbstractConvertor $this->deleteOriginalDocument(); } + + protected function convertToPDF() + { + (new Process(['export HOME=' . env('USER_HOME_PATH')]))->run(); + + $process = new Process([ + 'unoconv', + '-f', + 'pdf', + $this->storage->path($this->path), + ]); + + $process->run(); + + if (!$process->isSuccessful()) { + throw new ProcessFailedException($process); + } + + $this->deleteOriginalDocument(); + } }