Browse Source

PDFs with images

hidden_tags_with_bookmarks
Orzu Ionut 3 years ago
parent
commit
fc833a9ab7
  1. 3
      README.md
  2. 24
      app/Ingest/AbstractConvertor.php
  3. 434
      app/Ingest/Convertor.php
  4. 6
      app/Ingest/DocumentHandler.php
  5. 46
      app/Ingest/DocxConvertor.php
  6. 50
      app/Ingest/OtherConvertor.php
  7. 271
      app/Ingest/PDFConvertor.php
  8. 52
      app/Ingest/TextConvertor.php
  9. 103
      app/Jobs/IngestDocuments.php
  10. 74
      app/Jobs/SendToCore.php

3
README.md

@ -70,6 +70,9 @@ cd Bin
# Dewarp
pip3 install opencv-python
cd DEWARP_INSTALLATION_DIRECTORY
pip3 install -r requirements.txt
# MAT2 (Metadata remover) - Not used at the moment
pip3 install mat2
apt-get install gir1.2-poppler-0.18

24
app/Ingest/AbstractConvertor.php

@ -0,0 +1,24 @@
<?php
namespace App\Ingest;
abstract class AbstractConvertor
{
protected $storage;
protected $path;
protected $directoryPath;
public function __construct($storage, $path)
{
$this->storage = $storage;
$this->path = $path;
$this->directoryPath = pathinfo($path, PATHINFO_DIRNAME);
}
abstract public function execute();
protected function deleteOriginalDocument()
{
$this->storage->delete($this->path);
}
}

434
app/Ingest/Convertor.php

@ -5,7 +5,6 @@ namespace App\Ingest;
use Illuminate\Support\Facades\Storage;
use Symfony\Component\Process\Exception\ProcessFailedException;
use Symfony\Component\Process\Process;
use League\HTMLToMarkdown\HtmlConverter;
class Convertor
{
@ -30,191 +29,18 @@ class Convertor
public function execute()
{
if ($this->type === 'txt') {
return $this->path;
$convertor = new TextConvertor($this->storage, $this->path);
} else if ($this->type === 'pdf') {
$convertor = new PDFConvertor($this->storage, $this->path);
} else if ($this->type === 'docx') {
$convertor = new DocxConvertor($this->storage, $this->path);
} else {
$convertor = new OtherConvertor($this->storage, $this->path);
}
if ($this->type === 'pdf') {
// $this->convertPdfToText();
$this->convertPdfToMD();
// $this->getHtmlContentsFromPdfWithImages();
$convertor->execute();
return $this->path;
}
if ($this->type !== 'docx') {
$this->convertToDocx();
}
$this->convertDocumentToText();
//$this->convertToHtml();
return $this->path;
}
/**
* Convert doc,dot,rtf,odt,pdf,docx to docx
*
*
* @return string|void
*/
private function convertToDocx()
{
(new Process(['export HOME=' . env('USER_HOME_PATH')]))->run();
/**
* Convert doc,dot,rtf,odt to docx
*/
$process = new Process([
'soffice',
'--headless',
'--convert-to',
'docx',
$this->storage->path($this->path),
'--outdir',
$this->storage->path('contracts')
]);
$process->run();
if (!$process->isSuccessful()) {
throw new ProcessFailedException($process);
}
$this->storage->delete($this->path);
$this->path = str_replace(".$this->type", '.docx', $this->path);
}
/**
* Convert docx file to text
*
* @return void
*/
private function convertDocumentToText()
{
(new Process(['export HOME=' . env('USER_HOME_PATH')]))->run();
$process = new Process([
'soffice',
'--headless',
'--convert-to',
'txt',
$this->storage->path($this->path),
'--outdir',
$this->storage->path('contracts')
]);
$process->run();
if (!$process->isSuccessful()) {
throw new ProcessFailedException($process);
}
$this->storage->delete($this->path);
$this->path = str_replace(['.docx', '.bin'], '.txt', $this->path);
}
protected function convertPdfToText()
{
$this->prepareForConvertPDF();
$images = $this->getImagesFromPDF();
$contents = $this->getTextContentsFromPDF();
if (!$contents && count($images) === 0) {
throw new \Exception('Could not read from file.');
}
// Handle images and image contents.
if (count($images) > 0) {
foreach ($images as $image) {
try {
$ocr = new OCR($this->storage->path($image));
$imageContents = $ocr->execute();
$contents = $contents . "\n" . $imageContents;
} catch (\Exception $exception) {
\Illuminate\Support\Facades\Log::info('something wrong: ' . $exception->getMessage());
}
}
$dir = str_replace('.pdf', '', $this->path);
$this->storage->deleteDirectory($dir);
}
$this->storage->delete($this->path);
$this->path = str_replace('.pdf', '.txt', $this->path);
$this->storage->put($this->path, $contents);
}
protected function convertPdfToMD()
{
// $this->prepareForConvertPDF();
$result = $this->getContentsFromPdf();
if ( ! $result['has_images'] && ! $result['has_text']) {
throw new \Exception('Cannot get pdf file contents.');
}
if ($result['has_text']) {
if ($result['has_images']) {
// Both text and images.
throw new \Exception('Not supported for now.');
}
// Delete directory because the contents are in the '$result' variable.
$this->storage->deleteDirectory($this->path);
$mdContents = '';
foreach ($result['htmls'] as $html) {
$converter = new HtmlConverter();
$converter->getConfig()->setOption('strip_tags', true);
$contents = $converter->convert($html);
$mdContents = $mdContents . $contents;
}
$this->path = "$this->path.md";
$this->storage->put($this->path, $mdContents);
return;
}
// Only contains images.
$imagesContent = '';
$files = $this->storage->allFiles($this->path);
foreach ($files as $file) {
// Only get the image files from the directory, it may contain some empty html files too.
if (in_array(pathinfo($file, PATHINFO_EXTENSION), ['jpg', 'png'])) {
$ocr = new OCR($this->storage->path($file));
$imagesContent = $imagesContent . $ocr->execute();
}
}
\Illuminate\Support\Facades\Log::info('============================');
\Illuminate\Support\Facades\Log::info($this->path);
// We are done with the images processing, delete directory.
$this->storage->deleteDirectory($this->path);
$this->path = "$this->path.md";
\Illuminate\Support\Facades\Log::info($this->path);
\Illuminate\Support\Facades\Log::info('++++++++++++++++++++++++++');
$this->storage->put($this->path, $imagesContent);
}
private function convertToHtml()
@ -262,248 +88,4 @@ class Convertor
$this->path = str_replace(".$this->type", '.xml', $this->path);
}
protected function prepareForConvertPDF()
{
(new Process(['export HOME=' . env('USER_HOME_PATH')]))->run();
$process = new Process([
'pip3',
'install',
'pdftotext',
]);
$process->run();
if (!$process->isSuccessful()) {
throw new ProcessFailedException($process);
}
}
protected function getImagesFromPDF()
{
$dir = str_replace('.pdf', '', $this->path);
$this->storage->makeDirectory($dir);
$process = new Process([
'pdfimages',
'-p',
$this->storage->path($this->path),
'-tiff',
$this->storage->path("$dir/ocr")
]);
$process->run();
if (!$process->isSuccessful()) {
throw new ProcessFailedException($process);
}
return $this->storage->allFiles($dir);
}
protected function getTextContentsFromPDF()
{
$outputPath = $this->storage->path(str_replace('.pdf', '.txt', $this->path));
$process = new Process([
'python3',
storage_path('scripts' . DIRECTORY_SEPARATOR . 'parse-pdf.py'),
'-i',
$this->storage->path($this->path),
'-o',
$outputPath
]);
$process->run();
if (!$process->isSuccessful()) {
throw new ProcessFailedException($process);
}
return file_get_contents($outputPath);
}
protected function getHtmlContentsFromPdfWithImages()
{
$dirName = str_replace('.pdf', '', $this->path);
$this->storage->makeDirectory($dirName);
$outputPath = $this->storage->path("$dirName/html");
$process = new Process([
'pdftohtml',
'-noframes',
$this->storage->path($this->path),
$outputPath
]);
$process->run();
if (!$process->isSuccessful()) {
throw new ProcessFailedException($process);
}
$this->storage->delete($this->path);
$this->path = $dirName;
$converter = new HtmlConverter();
$converter->getConfig()->setOption('strip_tags', true);
$files = $this->storage->allFiles($this->path);
$htmlFileIndex = null;
foreach ($files as $index => $file) {
// if (pathinfo($file, PATHINFO_BASENAME) === 'html-html.html') {
// if (pathinfo($file, PATHINFO_EXTENSION) === 'html') {
if (pathinfo($file, PATHINFO_BASENAME) === 'html.html') {
$htmlFileIndex = $index;
break;
}
}
$htmlContents = $this->storage->get($files[$htmlFileIndex]);
$contents = $converter->convert($htmlContents);
// $this->storage->deleteDirectory($this->path);
$this->path = "$this->path.md";
$this->storage->put($this->path, $contents);
dd(3);
}
protected function getContentsFromPdf()
{
$dirName = str_replace('.pdf', '', $this->path);
$this->storage->makeDirectory($dirName);
$outputPath = $this->storage->path("$dirName/html");
$process = new Process([
'pdftohtml',
'-xml',
$this->storage->path($this->path),
$outputPath
]);
$process->run();
if (!$process->isSuccessful()) {
throw new ProcessFailedException($process);
}
$this->storage->delete($this->path);
$this->path = $dirName;
$contents = $this->storage->get("$this->path/html.xml");
$xml = simplexml_load_string($contents);
$fonts = [];
foreach ($xml->page as $page) {
foreach ($page as $p) {
if ($p->getName() === 'fontspec') {
$fonts[(int) $p['id']]['family'] = (string) $p['family'];
$fonts[(int) $p['id']]['size'] = (string) $p['size'];
$fonts[(int) $p['id']]['color'] = (string) $p['color'];
}
}
}
$htmls = [];
$hasImages = false;
$hasText = false;
try {
foreach ($xml->page as $page) {
$html = '';
$previousP = null;
foreach ($page as $p) {
if ($p->getName() == 'image') {
$html = $html . '<img style="position: absolute; top: ' . $p['top'] . 'px; left: ' . $p['left'] . 'px;" width="' . $p['width'] . '" height="' . $p['height'] . '" src="' . $p['src'] . '">';
$hasImages = true;
}
if ($p->getName() == 'text') {
$id = (int) $p['font'];
$font_size = $fonts[$id]['size'];
$font_color = $fonts[$id]['color'];
$font_family = $fonts[$id]['family'];
$style = '';
$style = $style . 'position: absolute;';
$style = $style . "color: $font_color;";
$style = $style . "font-family: $font_family;";
$style = $style . "font-weight: 900;";
$style = $style . "width: " . $p['width'] . "px;";
$style = $style . "height: " . $p['height'] . "px;";
$style = $style . "top: " . $p['top'] . "px;";
$style = $style . "left: " . $p['left'] . "px;";
// $style = $style . "font-size: $font_size" . "px;";
if ($p->i) {
$content = '<i>' . $p->i . '</i>';
} else if ($p->b) {
$content = '<b>' . $p->b . '</b>';
} else {
$content = $p;
}
// @TODO Must chain paragraphs if top are almost same.
$tag = $this->getTag($p, $previousP, $font_size);
$html = $html . '<' . $tag . ' style="' . $style . '">' . $content . '</' . $tag . '>';
$hasText = true;
}
$previousP = $p;
}
$htmls[] = '<html><head><title></title></head><body>' . $html . '</body></html>';
}
} catch (\Exception $exception) {
\Illuminate\Support\Facades\Log::info($exception->getTraceAsString());
}
return [
'has_images' => $hasImages,
'has_text' => $hasText,
'htmls' => $htmls,
];
}
protected function getTag($p, $previousP, $size)
{
if ($size > 24) {
return 'h1';
}
if ($size > 18) {
return 'h2';
}
if ($size > 16) {
return 'h3';
}
if ($previousP && $p['top'] - $previousP['top'] <= 5) {
return 'span';
}
return 'p';
}
}

6
app/Ingest/DocumentHandler.php

@ -53,8 +53,10 @@ class DocumentHandler
$type = $this->supportedFiles[$mimeType];
$path = $storage->putFileAs("contracts", $file, "$this->id.$type");
$id = str_replace(' ', '_', $this->id);
IngestDocuments::dispatch($path, $type);
$path = $storage->putFileAs("contracts/$id", $file, "document.$type");
IngestDocuments::dispatch($this->id, $path, $type);
}
}

46
app/Ingest/DocxConvertor.php

@ -0,0 +1,46 @@
<?php
namespace App\Ingest;
use Symfony\Component\Process\Exception\ProcessFailedException;
use Symfony\Component\Process\Process;
class DocxConvertor extends AbstractConvertor
{
public function execute()
{
$this->convertToText();
$convertor = new TextConvertor($this->storage, "$this->directoryPath/document.txt");
$convertor->execute();
}
/**
* Convert docx file to text
*
* @return void
*/
protected function convertToText()
{
(new Process(['export HOME=' . env('USER_HOME_PATH')]))->run();
$process = new Process([
'soffice',
'--headless',
'--convert-to',
'txt',
$this->storage->path($this->path),
'--outdir',
$this->storage->path($this->directoryPath)
]);
$process->run();
if (!$process->isSuccessful()) {
throw new ProcessFailedException($process);
}
$this->deleteOriginalDocument();
}
}

50
app/Ingest/OtherConvertor.php

@ -0,0 +1,50 @@
<?php
namespace App\Ingest;
use Symfony\Component\Process\Exception\ProcessFailedException;
use Symfony\Component\Process\Process;
class OtherConvertor extends AbstractConvertor
{
public function execute()
{
$this->convertToDocx();
$convertor = new DocxConvertor($this->storage, "$this->directoryPath/document.docx");
$convertor->execute();
}
/**
* Convert doc,dot,rtf,odt,pdf,docx to docx
*
*
* @return string|void
*/
private function convertToDocx()
{
(new Process(['export HOME=' . env('USER_HOME_PATH')]))->run();
/**
* Convert doc,dot,rtf,odt to docx
*/
$process = new Process([
'soffice',
'--headless',
'--convert-to',
'docx',
$this->storage->path($this->path),
'--outdir',
$this->storage->path($this->directoryPath)
]);
$process->run();
if (!$process->isSuccessful()) {
throw new ProcessFailedException($process);
}
$this->deleteOriginalDocument();
}
}

271
app/Ingest/PDFConvertor.php

@ -0,0 +1,271 @@
<?php
namespace App\Ingest;
use League\HTMLToMarkdown\HtmlConverter;
use Symfony\Component\Process\Exception\ProcessFailedException;
use Symfony\Component\Process\Process;
class PDFConvertor extends AbstractConvertor
{
public function execute()
{
// $this->prepareForConvertPDF();
$result = $this->getFileContents();
if ( ! $result['has_images'] && ! $result['has_text']) {
throw new \Exception('Cannot get pdf file contents.');
}
if ($result['has_text']) {
$mdContents = '';
foreach ($result['htmls'] as $html) {
$converter = new HtmlConverter();
$converter->getConfig()->setOption('strip_tags', true);
$contents = $converter->convert($html);
$mdContents = $mdContents . "\n\n" . $contents;
}
$this->storage->put("$this->directoryPath/document.md", $mdContents);
return;
}
// Only contains images.
$imagesContent = '';
$files = $this->storage->allFiles($this->path);
foreach ($files as $file) {
// Only get the image files from the directory, it may contain some empty html files too.
// @TODO Only OCR images with text and delete them afterwards, the remaining ignore and keep.
if (in_array(pathinfo($file, PATHINFO_EXTENSION), ['jpg', 'png'])) {
$ocr = new OCR($this->storage->path($file));
$imagesContent = $imagesContent . $ocr->execute();
$this->storage->delete($file);
}
}
$this->storage->put("$this->directoryPath/document.md", $imagesContent);
}
protected function getFileContents()
{
$outputPath = $this->storage->path("$this->directoryPath/html");
$process = new Process([
'pdftohtml',
'-xml',
$this->storage->path($this->path),
$outputPath
]);
$process->run();
if (!$process->isSuccessful()) {
throw new ProcessFailedException($process);
}
// Remove original document.
$this->storage->delete($this->path);
return $this->getDataFromXML();
}
protected function getDataFromXML()
{
$xmlFilePath = "$this->directoryPath/html.xml";
$contents = $this->storage->get($xmlFilePath);
$xml = simplexml_load_string($contents);
$orderedList = [];
$fonts = [];
foreach ($xml->page as $page) {
$pageNumber = (int) $page['number'][0];
$orderedList[$pageNumber] = [];
foreach ($page as $p) {
if ($p->getName() === 'fontspec') {
$fonts[(int) $p['id']]['family'] = (string) $p['family'];
$fonts[(int) $p['id']]['size'] = (string) $p['size'];
$fonts[(int) $p['id']]['color'] = (string) $p['color'];
}
if (isset($p['top'])) {
$top = (int) $p['top'];
if ( ! array_key_exists($top, $orderedList[$pageNumber])) {
$orderedList[$pageNumber][$top] = [];
}
$orderedList[$pageNumber][$top][] = $p;
}
}
ksort($orderedList[$pageNumber]);
}
$htmls = [];
$hasImages = false;
$hasText = false;
$imagesCount = 0;
$imagesInFooter = true;
try {
foreach ($orderedList as $page) {
$html = '';
$footerImages = [];
foreach ($page as $items) {
$continuousP = '';
foreach ($items as $p) {
if ($p->getName() == 'image') {
$hasImages = true;
$imagesCount += 1;
$caption = "Fig. $imagesCount";
$imageHTML = $this->handleImage($p, $caption);
if ( ! $imagesInFooter) {
$html = $html . $imageHTML;
} else {
$html = $html . "<p> $caption </p>";
$footerImages[] = $imageHTML;
}
}
if ($p->getName() == 'text') {
$continuousP = $continuousP . $this->handleText($p, $fonts);
$hasText = true;
}
}
$html = $html . '<p>' . $continuousP . '</p>';
}
if ($imagesInFooter) {
foreach ($footerImages as $index => $footerImage) {
$html = $html . '<p>' . $footerImage . '</p>';
// $html = $html . '<p> Fig. ' . ($index + 1) . '</p>';
}
}
$htmls[] = '<html><head><title></title></head><body>' . $html . '</body></html>';
}
} catch (\Exception $exception) {
$this->storage->deleteDirectory($this->directoryPath);
\Illuminate\Support\Facades\Log::info($exception->getTraceAsString());
throw new \Exception('Something went wrong.');
}
if ( ! $hasText && ! $hasImages) {
// Remove directory because we do not have any use for it anymore.
$this->storage->deleteDirectory($this->directoryPath);
} else {
// Remove the unnecessary 'xml' file.
$this->storage->delete($xmlFilePath);
}
return [
'has_images' => $hasImages,
'has_text' => $hasText,
'htmls' => $htmls,
];
}
protected function handleImage($p, $caption)
{
$html = '';
$src = './' . pathinfo($p['src'], PATHINFO_BASENAME);
$html = $html . '<br>';
$html = $html . '<img style="position: absolute; top: ' . $p['top'] . 'px; left: ' . $p['left'] . 'px;" width="' . $p['width'] . '" height="' . $p['height'] . '" src="' . $src . '" alt="' . $caption . '" title="' . $caption . '">';
$html = $html . '<br>';
$html = $html . '<br>';
return $html;
}
protected function handleText($p, $fonts)
{
$id = (int) $p['font'];
$font_size = $fonts[$id]['size'];
$font_color = $fonts[$id]['color'];
$font_family = $fonts[$id]['family'];
$style = '';
$style = $style . 'position: absolute;';
$style = $style . "color: $font_color;";
$style = $style . "font-family: $font_family;";
$style = $style . "font-weight: 900;";
$style = $style . "width: " . $p['width'] . "px;";
$style = $style . "height: " . $p['height'] . "px;";
$style = $style . "top: " . $p['top'] . "px;";
$style = $style . "left: " . $p['left'] . "px;";
$style = $style . "font-size: $font_size" . "px;";
if ($p->i) {
$content = '<i>' . $p->i . '</i>';
} else if ($p->b) {
$content = '<b>' . $p->b . '</b>';
} else {
$content = $p;
}
$tag = $this->getTag($font_size);
return '<' . $tag . ' style="' . $style . '">' . $content . '</' . $tag . '>';
}
protected function getTag($size)
{
if ($size > 24) {
return 'h1';
}
if ($size > 18) {
return 'h2';
}
if ($size > 16) {
return 'h3';
}
return 'span';
}
protected function prepareForConvertPDF()
{
(new Process(['export HOME=' . env('USER_HOME_PATH')]))->run();
$process = new Process([
'pip3',
'install',
'pdftotext',
]);
$process->run();
if (!$process->isSuccessful()) {
throw new ProcessFailedException($process);
}
}
}

52
app/Ingest/TextConvertor.php

@ -0,0 +1,52 @@
<?php
namespace App\Ingest;
use App\Parser\ParseTextArray;
class TextConvertor extends AbstractConvertor
{
public function execute()
{
$textParser = new ParseTextArray();
$content = $textParser->fromFile($this->storage->path($this->path));
if ( ! $content) {
throw new \Exception('Could not read content.');
}
$content = $this->convertToUTF8($content);
$this->storeContent($content);
}
protected function convertToUTF8($content)
{
array_walk_recursive(
$content,
function (&$entry) {
$entry = mb_convert_encoding(
$entry,
'UTF-8'
);
}
);
return $content;
}
protected function storeContent($content)
{
$this->storeMD($content);
$this->deleteOriginalDocument();
}
protected function storeMD($content)
{
$convertor = new MDConvertor($content);
$this->storage->put("$this->directoryPath/document.md", $convertor->execute());
}
}

103
app/Jobs/IngestDocuments.php

@ -3,12 +3,10 @@
namespace App\Jobs;
use App\Ingest\Convertor;
use App\Ingest\MDConvertor;
use App\Parser\ParseXml;
use App\Parser\DocxParser\ParseDocx;
use App\Parser\HtmlParser\ParseHtml;
use App\Parser\ParseHtmlArray;
use App\Parser\ParseTextArray;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
@ -20,6 +18,7 @@ class IngestDocuments implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable;
protected $id;
private $path;
protected $type;
@ -51,11 +50,13 @@ class IngestDocuments implements ShouldQueue
/**
* Create a new job instance.
*
* @param $id
* @param string $path
* @param $type
*/
public function __construct(string $path, $type)
public function __construct($id, string $path, $type)
{
$this->id = $id;
$this->path = $path;
$this->type = $type;
@ -76,7 +77,7 @@ class IngestDocuments implements ShouldQueue
$convertor = new Convertor($this->path, $this->type);
try {
$this->path = $convertor->execute();
$convertor->execute();
} catch (\Exception $exception) {
\Illuminate\Support\Facades\Log::info($exception->getMessage());
@ -85,30 +86,7 @@ class IngestDocuments implements ShouldQueue
return;
}
// @TODO Replace later, the convertor will create the .md file.
if ($this->type !== 'pdf') {
$content = $this->getContent();
if ( ! $content) {
$this->failed();
return;
}
$content = $this->convertToUTF8($content);
try {
$filePath = $this->storeContent($content);
} catch (\Exception $e) {
Log::error('Error writing in to the file: ' . $e->getMessage());
// report($e);
}
} else {
$filePath = $this->path;
}
SendToCore::dispatch($filePath);
SendToCore::dispatch($this->id, pathinfo($this->path, PATHINFO_DIRNAME));
}
public function failed()
@ -124,73 +102,6 @@ class IngestDocuments implements ShouldQueue
// $this->storage->delete($this->path);
// }
SendToCore::dispatch($this->path, true);
}
protected function getContent()
{
if ($this->type === 'pdf') {
// Wait while it finishes.
while (!$this->storage->exists($this->path)) {
sleep(1);
}
$textParser = new ParseTextArray(true);
return $textParser->fromFile($this->storage->path($this->path));
}
$textParser = new ParseTextArray();
return $textParser->fromFile($this->storage->path($this->path));
}
protected function convertToUTF8($content)
{
array_walk_recursive(
$content,
function (&$entry) {
$entry = mb_convert_encoding(
$entry,
'UTF-8'
);
}
);
return $content;
}
protected function storeContent($content)
{
$result = explode('.', $this->path);
$name = $result[0];
// Or json?
$filePath = $this->storeMD($name, $content);
// Delete converted file. We now have the .md file.
$this->storage->delete($this->path);
return $filePath;
}
protected function storeMD($name, $content)
{
$fileName = "$name.md";
$convertor = new MDConvertor($content);
$this->storage->put($fileName, $convertor->execute());
return $fileName;
}
protected function storeJson($name, $content)
{
$fileName = "$name.json";
$this->storage->put($fileName, $content);
return $fileName;
SendToCore::dispatch($this->id, pathinfo($this->path, PATHINFO_DIRNAME), true);
}
}

74
app/Jobs/SendToCore.php

@ -18,7 +18,7 @@ class SendToCore implements ShouldQueue
private $secret;
private $filePath;
private $directoryPath;
private $id;
@ -32,19 +32,18 @@ class SendToCore implements ShouldQueue
/**
* Create a new job instance.
*
* @param null $filePath
* @param $id
* @param null $directoryPath
* @param bool $hasFailed
*/
public function __construct($filePath = null, $hasFailed = false)
public function __construct($id, $directoryPath = null, $hasFailed = false)
{
$this->url = env('WEBHOOK_CORE_URL') . '/webhooks';
$this->secret = env('WEBHOOK_CORE_SECRET');
$this->filePath = $filePath;
$this->hasFailed = $hasFailed;
$string = str_replace('contracts/', '', $this->filePath);
$result = explode('.', $string);
$this->id = $result[0];
$this->id = $id;
$this->directoryPath = $directoryPath;
$this->hasFailed = $hasFailed;
}
/**
@ -55,70 +54,99 @@ class SendToCore implements ShouldQueue
*/
public function handle()
{
$content = '';
$content = [];
// File exists, send content.
if ($this->filePath && ! $this->hasFailed) {
// Directory exists, send content.
if ($this->directoryPath && ! $this->hasFailed) {
$this->storage = Storage::disk('local');
// @TODO Check if the file exists multiple times?
if ( ! $this->storage->exists($this->filePath)) {
if ( ! $this->storage->exists($this->directoryPath)) {
throw new \Exception('File does not exist yet.');
}
$content = $this->storage->get($this->filePath);
$content = $this->getContent();
}
$sent = $this->sendTheData($content);
// if ($this->filePath && $sent) {
if ($this->filePath) {
// if ($this->directoryPath && $sent) {
if ($this->directoryPath) {
if ( ! $this->storage) {
$this->storage = Storage::disk('local');
}
$this->storage->delete($this->filePath);
$this->storage->deleteDirectory($this->directoryPath);
}
}
public function failed()
{
if ($this->filePath) {
if ($this->directoryPath) {
if ( ! $this->storage) {
$this->storage = Storage::disk('local');
}
$this->storage->delete($this->filePath);
$this->storage->delete($this->directoryPath);
}
}
/**
* Send the data to the core trough webhooks
* Send the data to the core through webhooks
*
* @param $content
* @return bool
*/
protected function sendTheData($content)
protected function sendTheData(array $content)
{
try {
WebhookCall::create()
->url($this->url)
->payload(['data' => [
'id' => $this->id,
'content' => $this->encodeContent($content),
'status' => $content ? 'success' : 'fail',
'content' => $content,
'status' => count($content) > 0 ? 'success' : 'fail',
]])
->useSecret($this->secret)
->dispatch();
return true;
} catch (\Exception $exception) {
Log::error('SendToCore@sendTheData' . $exception->getMessage());
Log::error('SendToCore@sendTheData: ' . $exception->getMessage());
return false;
}
}
protected function getContent()
{
$document = $this->storage->get("$this->directoryPath/document.md");
$document = $this->encodeContent($document);
$images = [];
$allFiles = $this->storage->allFiles($this->directoryPath);
foreach ($allFiles as $file) {
// @TODO We are using this check in the 'PDFConvertor' file, refactor and improve.
if (in_array(pathinfo($file, PATHINFO_EXTENSION), ['jpg', 'png'])) {
$name = pathinfo($file, PATHINFO_FILENAME);
$type = pathinfo($file, PATHINFO_EXTENSION);
$images[] = [
'name' => $name,
'type' => $type,
'contents' => 'data:image/' . $type . ';base64,' . base64_encode($this->storage->get($file)),
];
}
}
return [
'document' => $document,
'images' => $images,
];
}
protected function encodeContent($content)
{
$encoding = mb_detect_encoding($content, 'UTF-8, ISO-8859-1, WINDOWS-1252, WINDOWS-1251', true);

Loading…
Cancel
Save