diff --git a/app/Ingest/PDFConvertor.php b/app/Ingest/PDFConvertor.php
index 800b585..36af542 100644
--- a/app/Ingest/PDFConvertor.php
+++ b/app/Ingest/PDFConvertor.php
@@ -2,7 +2,6 @@
namespace App\Ingest;
-use League\HTMLToMarkdown\HtmlConverter;
use Symfony\Component\Process\Exception\ProcessFailedException;
use Symfony\Component\Process\Process;
@@ -98,7 +97,7 @@ class PDFConvertor extends AbstractConvertor
foreach ($page as $items) {
$continuousP = '';
- foreach ($items as $p) {
+ foreach ($items as $key => $p) {
if ($p->getName() == 'image') {
$basePath = $this->storage->path('');
$imageFilePath = str_replace($basePath, '', $p['src']);
@@ -138,7 +137,16 @@ class PDFConvertor extends AbstractConvertor
}
if ($p->getName() == 'text') {
- $continuousP = $continuousP . $this->handleText($p, $fonts);
+ if($p == '·') {
+ continue;
+ }
+
+ $addition = null;
+ if(isset($items[$key-1]) && $items[$key-1] == '·') {
+ $addition = '· ';
+ }
+
+ $continuousP = $continuousP . $this->handleText($p, $fonts, $addition);
$hasText = true;
}
@@ -179,17 +187,17 @@ class PDFConvertor extends AbstractConvertor
{
$html = '';
- $src = './' . pathinfo($p['src'], PATHINFO_BASENAME);
+ $src = './contracts-images/' . pathinfo($this->directoryPath, PATHINFO_BASENAME) . '/' . pathinfo($p['src'], PATHINFO_BASENAME);
$html = $html . '
';
- $html = $html . '';
+ $html = $html . '';
$html = $html . '
';
$html = $html . '
';
return $html;
}
- protected function handleText($p, $fonts)
+ protected function handleText($p, $fonts, $addition = null)
{
$id = (int) $p['font'];
$font_size = $fonts[$id]['size'];
@@ -197,7 +205,7 @@ class PDFConvertor extends AbstractConvertor
$font_family = $fonts[$id]['family'];
$style = '';
- $style = $style . 'position: absolute;';
+ $style = $style . 'position: relative;';
$style = $style . "color: $font_color;";
$style = $style . "font-family: $font_family;";
$style = $style . "font-weight: 900;";
@@ -211,10 +219,16 @@ class PDFConvertor extends AbstractConvertor
$content = '' . $p->i . '';
} else if ($p->b) {
$content = '' . $p->b . '';
+ } else if ($p->a) {
+ $content = $p . '' . $p->a . '';
} else {
$content = $p;
}
+ if($addition) {
+ $content = $addition . $content;
+ }
+
$tag = $this->getTag($font_size);
return '<' . $tag . ' style="' . $style . '">' . $content . '' . $tag . '>';
@@ -236,7 +250,7 @@ class PDFConvertor extends AbstractConvertor
return 'h3';
}
- return 'span';
+ return 'p';
}
protected function applyOCR($path)
@@ -249,11 +263,27 @@ class PDFConvertor extends AbstractConvertor
protected function convertHtmlToMD($contents)
{
$html = '