Browse Source

Keep images position in document

dev
Alex Puiu 2 years ago
parent
commit
fbbf623408
  1. 9
      app/Ingest/PDFConvertor.php

9
app/Ingest/PDFConvertor.php

@ -84,7 +84,6 @@ class PDFConvertor extends AbstractConvertor
$hasText = false;
$imagesCount = 0;
$imagesInFooter = true;
$mdContents = '';
$htmlContents = '';
@ -99,12 +98,14 @@ class PDFConvertor extends AbstractConvertor
foreach ($items as $key => $p) {
if ($p->getName() == 'image') {
$imageInFooter = false;
$basePath = $this->storage->path('');
$imageFilePath = str_replace($basePath, '', $p['src']);
$textContents = $this->applyOCR($imageFilePath);
if ($textContents) {
$imageInFooter = true;
if ($html) {
$mdContents = $mdContents . $this->convertHtmlToMD($html) . "\n\n";
$htmlContents = $htmlContents . $html;
@ -126,7 +127,7 @@ class PDFConvertor extends AbstractConvertor
$imageHTML = $this->handleImage($p, $caption);
if ( ! $imagesInFooter) {
if (!$imageInFooter) {
$html = $html . $imageHTML;
} else {
$html = $html . "<p> $caption </p>";
@ -155,8 +156,8 @@ class PDFConvertor extends AbstractConvertor
$html = $html . '<p>' . $continuousP . '</p>';
}
if ($imagesInFooter) {
foreach ($footerImages as $index => $footerImage) {
if (!empty($footerImages) > 0) {
foreach ($footerImages as $footerImage) {
$html = $html . '<p>' . $footerImage . '</p>';
}
}

Loading…
Cancel
Save