Repo for the search and displace ingest module that takes odf, docx and pdf and transforms it into .md to be used with search and displace operations
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

147 lines
3.6 KiB

<?php
namespace App\Parser\DocxParser;
use App\Parser\DocxParser\Traits\Helper;
class Text
{
use Helper;
public function handle($textElement)
{
$data = $this->getElementData($textElement);
$data[ 'type' ] = 'text';
return $data;
}
/**
* @param $textElement
*
* @return array
*/
private function getElementData($textElement)
{
$text = $textElement->getText();
//if (strpos($text, 'PPOINTMENT AND GRANT OF LICENSE') !== false) {
// dd($textElement->getParent()->getDepth());
//}
$textData = $this->getNumberingFromText($text);
if (strlen($textData[ 'content' ])) {
$textData[ 'content' ] = $this->styleTheText($textData[ 'content' ], $textElement);
}
return $textData;
}
/**
* @param $text
*
* @return array
*/
private function getNumberingFromText($text)
{
$data = [];
preg_match('/^([0-9.])([^(A-Z)(a-z) ]*)/', trim($text), $match);
if ($match && isset($match[ 0 ]) && $match[ 0 ] !== '.') {
$data[ 'content' ] = trim(str_replace($match[ 0 ], '', $text));
$data[ 'numbering' ] = $match[ 0 ];
} else {
$data[ 'content' ] = trim(preg_replace('/\t+/', '', $text));
}
return $data;
}
private function styleTheText($textString, $textObject)
{
$textStyle = [
'font' => $textObject->getFontStyle(),
'paragraph' => $textObject->getParagraphStyle()
];
$fontStyle = $textStyle[ 'font' ]->getStyleValues();
$inlineStyle = $this->getInlineStyles(array_merge($fontStyle[ 'style' ], $fontStyle[ 'basic' ]));
return '<span'.(($inlineStyle) ? ' style="'.$inlineStyle.'"' : '').'>'.$this->getStyledText($textString,
$fontStyle[ 'style' ]).'</span>';
}
/**
* @param $styles
*
* @return string
*/
private function getInlineStyles($styles)
{
$styleString = '';
$acceptedInline = [
"dStrike" => 'text-decoration: line-through;text-decoration-style: double;',
"smallCaps" => 'text-transform: lowercase;',
"allCaps" => 'text-transform: capitalize;',
"fgColor" => 'background-color:'.$styles[ 'fgColor' ].';',
"hidden" => 'display:none;',
"size" => 'font-size:'.$styles[ 'size' ].'pt;',
"color" => 'color:#'.$styles[ 'color' ].';'
];
foreach ($styles as $style => $value) {
if (array_key_exists($style, $acceptedInline) && $value && ! in_array($value, ['none', 'auto'])) {
$styleString .= $acceptedInline[ $style ];
}
}
return $styleString;
}
/**
* @param $text
* @param $styles
*
* @return string
*/
private function getStyledText($text, $styles)
{
$mappedStyle = [
'bold' => 'strong',
'italic' => 'i',
'underline' => 'u',
'strike' => 'strike',
"super" => 'sup',
"sub" => 'sub',
];
foreach ($styles as $style => $active) {
if (array_key_exists($style, $mappedStyle) && $active && $active !== 'none') {
$text = $this->appendHtmlStyle($text, $mappedStyle[ $style ]);
}
}
return $text;
}
/**
* @param $text
* @param $styleType
*
* @return string
*/
private function appendHtmlStyle($text, $styleType)
{
return "<$styleType>$text</$styleType>";
}
}