Repo for the search and displace ingest module that takes odf, docx and pdf and transforms it into .md to be used with search and displace operations
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

291 lines
8.3 KiB

<?php
namespace App\Ingest;
use PhpOffice\PhpWord\Element\TextRun;
use PhpOffice\PhpWord\IOFactory;
use PhpOffice\PhpWord\PhpWord;
use PhpOffice\PhpWord\Style;
use PhpOffice\PhpWord\Style\Font;
class DocxWriter
{
protected $storage;
protected $saveAtPath;
protected $handler;
public function __construct($storage, $saveAtPath)
{
$this->storage = $storage;
$this->saveAtPath = $saveAtPath;
$this->handler = new PhpWord();
}
public function execute(array $data)
{
$this->handler->setDefaultFontName($data['default_font_name']);
$this->handler->setDefaultFontSize($data['default_font_size']);
$this->setStyles($data['styles']);
foreach ($data['elements'] as $section) {
$this->handleSection($section);
}
$objWriter = IOFactory::createWriter($this->handler, 'Word2007');
$objWriter->save($this->storage->path($this->saveAtPath));
}
protected function handleSection($element)
{
$section = $this->handler->addSection($element['style']);
foreach ($element['headers'] as $header) {
$this->handleHeader($header, $section);
}
foreach ($element['footers'] as $footer) {
$this->handleFooter($footer, $section);
}
$this->addElementsToElement($section, $element['elements']);
}
protected function handleHeader($header, $section)
{
$headerElement = $section->addHeader($header['type']);
$this->addElementsToElement($headerElement, $header['elements']);
return $headerElement;
}
protected function handleFooter($footer, $section)
{
$footerElement = $section->addHeader($footer['type']);
$this->addElementsToElement($footerElement, $footer['elements']);
return $footerElement;
}
protected function addElementsToElement($parentElement, $elements)
{
foreach ($elements as $element) {
$method = 'handle' . $element['element_type'];
$this->$method($parentElement, $element);
}
}
protected function handleImage($parentElement, array $element)
{
return;
$parentElement->addImage(
$element['source'],
$element['style'],
$element['is_watermark'],
$element['name']
);
}
protected function handleBookmark($parentElement, array $element)
{
$parentElement->addBookmark($element['name']);
}
protected function handleLine($parentElement, array $element)
{
$parentElement->addLine($element['style']);
}
protected function handleLink($parentElement, array $element)
{
}
protected function handleListItem($parentElement, array $element)
{
$data = array_key_exists('text_object', $element) ? $element['text_object'] : $element;
$parentElement->addListItem(
$data['text'],
$element['depth'],
$data['font_style'],
$element['style'],
$data['paragraph_style']
);
}
protected function handleListItemRun($parentElement, array $element)
{
$createdElement = $parentElement->addListItemRun(
$element['depth'],
$element['style'],
$element['paragraph_style']
);
if ($createdElement) {
$this->addElementsToElement($createdElement, $element['elements']);
}
}
protected function handlePageBreak($parentElement, array $element)
{
$parentElement->addPageBreak();
}
protected function handlePreserveText($parentElement, array $element)
{
$parentElement->addPreserveText(
$element['text'],
$element['font_style'],
$element['paragraph_style']
);
}
protected function handleText($parentElement, array $element)
{
// @TODO Improve bold, italic, list items styles, other styles..
$parentElement->addText(
$element['text'],
$element['font_style'],
$element['paragraph_style']
);
}
protected function handleTextBreak($parentElement, array $element)
{
$fontStyle = null;
if ($fs = $element['font_style']) {
$paragraphStyle = array_key_exists('Paragraph', $fs) ? $fs['Paragraph'] : null;
$fontStyle = new Font($fs['StyleName'], $paragraphStyle);
// Basic
$fontStyle->setName($fs['Name']);
$fontStyle->setSize($fs['Size']);
$fontStyle->setColor($fs['Color']);
$fontStyle->setHint($fs['Hint']);
// Style
$fontStyle->setBold($fs['Bold']);
$fontStyle->setItalic($fs['Italic']);
$fontStyle->setUnderline($fs['Underline']);
$fontStyle->setStrikethrough($fs['Strikethrough']);
$fontStyle->setDoubleStrikethrough($fs['DoubleStrikethrough']);
$fontStyle->setSuperScript($fs['SuperScript']);
$fontStyle->setSubScript($fs['SubScript']);
$fontStyle->setSmallCaps($fs['SmallCaps']);
$fontStyle->setAllCaps($fs['AllCaps']);
$fontStyle->setFgColor($fs['FgColor']);
$fontStyle->setHidden($fs['Hidden']);
// Spacing
$fontStyle->setScale($fs['Scale']);
$fontStyle->setSpacing($fs['Spacing']);
$fontStyle->setKerning($fs['Kerning']);
$fontStyle->setPosition($fs['Position']);
}
$parentElement->addTextBreak(
1,
$fontStyle,
$element['paragraph_style']
);
}
protected function handleTextRun($parentElement, array $element)
{
// $createdElement = $parentElement->addTextRun($element['paragraph_style']);
$createdElement = $parentElement->addTextRun();
if ($createdElement) {
$this->addElementsToElement($createdElement, $element['elements']);
}
}
protected function handleTable($parentElement, array $element)
{
$table = $parentElement->addTable($element['style']);
$table->setWidth($element['width']);
foreach ($element['rows'] as $row) {
$addedRow = $table->addRow($row['height'], $row['style']);
foreach ($row['cells'] as $cell) {
$addedCell = $addedRow->addCell($cell['width'], $cell['style']);
if (count($cell['elements']) > 0) {
$this->addElementsToElement($addedCell, $cell['elements']);
}
}
}
}
protected function handleTitle($parentElement, array $element)
{
if (is_array($element['text'])) {
$textRun = new TextRun($element['text']['paragraph_style']);
$this->addElementsToElement($textRun, $element['text']['elements']);
$text = $textRun;
} else {
$text = $element['text'];
}
return $parentElement->addTitle($text, $element['depth']);
}
protected function setStyles($styles)
{
foreach ($styles as $name => $style) {
if ($name == 'Title') {
Style::addTitleStyle(null, $style, $style['Paragraph']);
continue;
}
if (strpos($name, 'Heading_') !== false) {
$name = str_replace('Heading_', '', $name);
$depth = (int) $name;
Style::addTitleStyle($depth, $style, $style['Paragraph']);
continue;
}
if ($style['style'] === 'font') {
$paragraphStyle = isset($style['Paragraph']) ? $style['Paragraph'] : null;
Style::addFontStyle($name, $style, $paragraphStyle);
}
if ($style['style'] === 'paragraph') {
Style::addParagraphStyle($name, $style);
}
if ($style['style'] === 'link') {
// Style::addLinkStyle();
}
if ($style['style'] === 'numbering') {
// $style['Levels'][0]['Format'] = 'decimal';
Style::addNumberingStyle($name, $style);
}
if ($style['style'] === 'title') {
// Style::addTitleStyle();
}
if ($style['style'] === 'table') {
// Style::addTableStyle();
}
}
}
}