textContents = [ 'text' => '', 'elements' => [], ]; $this->textLength = 0; // $this->readersMapper = [ // 'docx' => 'Word2007', // 'odt' => 'ODText', // 'rtf' => 'RTF', // ]; } public function execute() { // Converting to HTML and then back to DOCX loses some content and styles (lost when converted to HTML). $data = []; $handler = IOFactory::load($this->storage->path($this->path)); /** * @ISSUE * At the moment of this writing (08/sept/2021) phpword does not support reading bookmarks from the * DOCX file, in order to add the support we can, for example, add the following lines of * code to the AbstractPart.php file before the Text and TextRun handling case. * * $els = $xmlReader->getElements('w:bookmarkStart', $domNode); * if ($els && $els->count() > 0) { * $parent->addBookmark($els[0]->getAttribute('w:name')); * } * */ $data['default_font_name'] = $handler->getDefaultFontName(); $data['default_font_size'] = $handler->getDefaultFontSize(); $data['styles'] = $this->handleStyles(Style::getStyles()); $elements = []; $sections = $handler->getSections(); foreach ($sections as $section) { $elements[] = $this->sectionToJson($section); } $data['elements'] = $elements; $data['contents'] = $this->textContents; return $data; } protected function sectionToJson(Section $section) { return array_merge($this->elementToJson($section), [ 'element_type' => 'Section', 'elements' => $this->elementsToJson($section->getElements()), 'footers' => $this->handleFooters($section->getFooters()), // 'footnote_properties' => $this->handleFooters($section->getFootnotePropoperties()), 'headers' => $this->handleHeaders($section->getHeaders()), 'style' => $this->sectionStyleToJson($section->getStyle()), ]); } protected function handleStyles($styles) { $self = $this; return array_map(function ($style) use ($self) { $class = get_class($style); $a = explode('\\', $class); $a = array_reverse($a); $className = $a[0]; $handler = lcfirst($className) . 'StyleToJson'; return $self->$handler($style); }, $styles); } protected function handleHeaders(array $headers) { $list = []; foreach ($headers as $header) { $list[] = $this->headerToJson($header); } return $list; } protected function handleFooters(array $footers) { $list = []; foreach ($footers as $footer) { $list[] = $this->footerToJson($footer); } return $list; } protected function handleElement($element) { $class = get_class($element); $a = explode('\\', $class); $a = array_reverse($a); $className = $a[0]; $handler = lcfirst($className) . 'ToJson'; return array_merge( $this->elementToJson($element), $this->$handler($element) ); } protected function elementToJson(AbstractElement $element) { return [ // 'comment_range_end' => $element->getCommentRangeEnd(), // 'comment_range_start' => $element->getCommentRangeStart(), 'doc_part' => $element->getDocPart(), 'doc_part_id' => $element->getDocPartId(), 'element_id' => $element->getElementId(), 'element_index' => $element->getElementIndex(), // 'parent' => $element->getParent(), 'nested_level' => $element->getNestedLevel(), 'relation_id' => $element->getRelationId(), 'section_id' => $element->getSectionId(), ]; } protected function elementsToJson(array $elements) { $list = []; foreach ($elements as $element) { $list[] = $this->handleElement($element); } return $list; } protected function headerToJson(Header $header) { return array_merge( $this->footerToJson($header), [ 'element_type' => 'Header', ] ); } protected function footerToJson($footer) { return array_merge($this->elementToJson($footer), [ 'element_type' => 'Footer', 'elements' => $this->elementsToJson($footer->getElements()), 'type' => $footer->getType(), ]); } protected function bookmarkToJson(Bookmark $element) { return array_merge($this->elementToJson($element), [ 'element_type' => 'Bookmark', 'name' => $element->getName(), ]); } protected function imageToJson(Image $image) { return array_merge($this->elementToJson($image), [ 'element_type' => 'Image', 'name' => $image->getName(), 'style' => $this->imageStyleToJson($image->getStyle()), 'source' => $image->getSource(), 'source_type' => $image->getSourceType(), 'is_watermark' => $image->isWatermark(), ]); } protected function lineToJson(Line $element) { return array_merge($this->elementToJson($element), [ 'element_type' => 'Line', 'style' => $this->lineStyleToJson($element->getStyle()), ]); } protected function linkToJson(Link $element) { return [ 'element_type' => 'Link', ]; } protected function listItemToJson(ListItem $element) { return array_merge($this->elementToJson($element), [ 'element_type' => 'ListItem', 'depth' => $element->getDepth(), 'style' => $this->listItemStyleToJson($element->getStyle()), 'text' => $element->getText(), 'text_object' => $this->textToJson($element->getTextObject()), ]); } protected function listItemRunToJson(ListItemRun $element) { return array_merge($this->textRunToJson($element), [ 'element_type' => 'ListItemRun', 'depth' => $element->getDepth(), 'style' => $this->listItemStyleToJson($element->getStyle()), ]); } protected function preserveTextToJson(PreserveText $element) { $fontStyle = $element->getFontStyle(); $paragraphStyle = $element->getParagraphStyle(); $text = $element->getText(); $text = is_array($text) ? $text[0] : $text; $hash = $this->addText($text); return array_merge($this->elementToJson($element), [ 'element_type' => 'PreserveText', 'font_style' => $fontStyle ? $this->fontStyleToJson($fontStyle) : null, 'paragraph_style' => $paragraphStyle ? $this->paragraphStyleToJson($paragraphStyle) : null, 'text' => $text, 'hash' => $hash, ]); } protected function pageBreakToJson(PageBreak $element) { return [ 'element_type' => 'PageBreak', ]; } protected function textToJson(Text $element) { $fontStyle = $element->getFontStyle(); $paragraphStyle = $element->getParagraphStyle(); $text = $element->getText(); $hash = $this->addText($text); return array_merge($this->elementToJson($element), [ 'element_type' => 'Text', 'font_style' => $fontStyle ? $this->fontStyleToJson($fontStyle) : null, 'paragraph_style' => $paragraphStyle ? $this->paragraphStyleToJson($paragraphStyle) : null, 'text' => $text, 'hash' => $hash, ]); } protected function textBreakToJson(TextBreak $element) { $fontStyle = $element->getFontStyle(); $paragraphStyle = $element->getParagraphStyle(); return array_merge($this->elementToJson($element), [ 'element_type' => 'TextBreak', 'font_style' => $fontStyle ? $this->fontStyleToJson($fontStyle) : null, 'paragraph_style' => $paragraphStyle ? $this->paragraphStyleToJson($paragraphStyle) : null, ]); } protected function textRunToJson(TextRun $element) { $paragraphStyle = $element->getParagraphStyle(); return array_merge($this->elementToJson($element), [ 'element_type' => 'TextRun', 'paragraph_style' => $paragraphStyle ? $this->paragraphStyleToJson($paragraphStyle) : null, 'elements' => $this->elementsToJson($element->getElements()), ]); } protected function tableToJson(Table $element) { $self = $this; return array_merge($this->elementToJson($element), [ 'element_type' => 'Table', 'style' => $this->tableStyleToJson($element->getStyle()), 'rows' => array_map(function($row) use ($self) { return $self->rowToJson($row); }, $element->getRows()), 'width' => $element->getWidth(), ]); } protected function titleToJson(Title $element) { $elements = []; $text = $element->getText(); if (is_object($text)) { $elements = [$this->textRunToJson($text)]; $text = ''; } $result = array_merge($this->elementToJson($element), [ 'element_type' => 'Title', 'depth' => $element->getDepth(), 'style' => $element->getStyle(), 'text' => $text, 'elements' => $elements, ]); if ($text) { $hash = $this->addText($text); $result['hash'] = $hash; } return $result; } protected function rowToJson($row) { $self = $this; return [ 'height' => $row->getHeight(), 'style' => $this->rowStyleToJson($row->getStyle()), 'cells' => array_map(function($cell) use ($self) { return $self->cellToJson($cell); }, $row->getCells()), ]; } protected function cellToJson($cell) { return [ 'style' => $this->cellStyleToJson($cell->getStyle()), 'width' => $cell->getWidth(), 'elements' => $this->elementsToJson($cell->getElements()), ]; } // Styles protected function borderStyleToJson($style) { return [ 'style' => 'border', 'BorderTopSize' => $style->getBorderTopSize(), 'BorderTopColor' => $style->getBorderTopColor(), 'BorderTopStyle' => $style->getBorderTopStyle(), 'BorderLeftSize' => $style->getBorderLeftSize(), 'BorderLeftColor' => $style->getBorderLeftColor(), 'BorderLeftStyle' => $style->getBorderLeftStyle(), 'BorderRightSize' => $style->getBorderRightSize(), 'BorderRightColor' => $style->getBorderRightColor(), 'BorderRightStyle' => $style->getBorderRightStyle(), 'BorderBottomSize' => $style->getBorderBottomSize(), 'BorderBottomColor' => $style->getBorderBottomColor(), 'BorderBottomStyle' => $style->getBorderBottomStyle(), ]; } protected function cellStyleToJson(Style\Cell $style) { $styles = array_merge($this->borderStyleToJson($style), [ 'style' => 'cell', 'TextDirection' => $style->getTextDirection(), 'BgColor' => $style->getBgColor(), 'GridSpan' => $style->getGridSpan(), 'VMerge' => $style->getVMerge(), 'Shading' => $this->shadingStyleToJson($style->getShading()), 'Width' => $style->getWidth(), 'Unit' => $style->getUnit(), ]); if ($vAlign = $style->getVAlign()) { $styles['VAlign'] = $vAlign; } return $styles; } protected function sectionStyleToJson(Style\Section $style) { $styles = [ 'style' => 'section', 'BreakType' => $style->getBreakType(), 'ColsNum' => $style->getColsNum(), 'ColsSpace' => $style->getColsSpace(), 'FooterHeight' => $style->getFooterHeight(), 'Gutter' => $style->getGutter(), 'HeaderHeight' => $style->getHeaderHeight(), 'LineNumbering' => $style->getLineNumbering(), 'MarginBottom' => $style->getMarginBottom(), 'MarginLeft' => $style->getMarginLeft(), 'MarginRight' => $style->getMarginRight(), 'MarginTop' => $style->getMarginTop(), 'Orientation' => $style->getOrientation(), 'PageNumberingStart' => $style->getPageNumberingStart(), 'PageSizeH' => $style->getPageSizeH(), 'PageSizeW' => $style->getPageSizeW(), 'PaperSize' => $style->getPaperSize(), ]; $vAlign = $style->getVAlign(); if ($vAlign) { $styles['VAlign'] = $vAlign; } return $styles; } protected function shadingStyleToJson($style) { if ( ! $style) { return null; } return [ 'style' => 'shading', 'pattern' => $style->getPattern(), 'color' => $style->getColor(), 'fill' => $style->getFill(), ]; } protected function lineStyleToJson(Style\Line $style) { return array_merge($this->imageStyleToJson($style), [ 'style' => 'line', 'BeginArrow' => $style->getBeginArrow(), 'Color' => $style->getColor(), 'ConnectorType' => $style->getConnectorType(), 'Dash' => $style->getDash(), 'EndArrow' => $style->getEndArrow(), 'Flip' => $style->isFlip(), 'Weight' => $style->getWeight(), ]); } protected function listItemStyleToJson(Style\ListItem $style) { return [ 'style' => 'line_item', 'ListType' => $style->getListType(), 'NumStyle' => $style->getNumStyle(), ]; } protected function fontStyleToJson(Style\Font $style) { $styles = [ 'style' => 'font', 'StyleName' => $style->getStyleName(), 'Name' => $style->getName(), 'Size' => $style->getSize(), 'Color' => $style->getColor(), 'Hint' => $style->getHint(), 'Bold' => $style->isBold(), 'Italic' => $style->isItalic(), 'Underline' => $style->getUnderline(), 'Strikethrough' => $style->isStrikethrough(), 'DoubleStrikethrough' => $style->isDoubleStrikethrough(), 'SuperScript' => $style->isSuperScript(), 'SubScript' => $style->isSubScript(), 'SmallCaps' => $style->isSmallCaps(), 'AllCaps' => $style->isAllCaps(), 'FgColor' => $style->getFgColor(), 'Hidden' => $style->isHidden(), 'Type' => $style->getStyleType(), 'Scale' => $style->getScale(), 'Spacing' => $style->getSpacing(), 'Kerning' => $style->getKerning(), 'Position' => $style->getPosition(), ]; if ($style->getParagraph()) { $styles['Paragraph'] = $this->paragraphStyleToJson($style->getParagraph()); } return $styles; } protected function frameStyleToJson(Style\Frame $style) { return [ 'style' => 'frame', 'Alignment' => $style->getAlignment(), 'Height' => $style->getHeight(), 'Left' => $style->getLeft(), 'HPos' => $style->getHPos(), 'HPosRelTo' => $style->getHPosRelTo(), 'Pos' => $style->getPos(), 'VPos' => $style->getVPos(), 'VPosRelTo' => $style->getVPosRelTo(), 'Position' => $style->getPosition(), 'Top' => $style->getTop(), 'Unit' => $style->getUnit(), 'Width' => $style->getWidth(), 'Wrap' => $style->getWrap(), 'WrapDistanceBottom' => $style->getWrapDistanceBottom(), 'WrapDistanceLeft' => $style->getWrapDistanceLeft(), 'WrapDistanceRight' => $style->getWrapDistanceRight(), 'WrapDistanceTop' => $style->getWrapDistanceTop(), ]; } protected function imageStyleToJson(Style\Image $style) { return array_merge($this->frameStyleToJson($style), [ 'style' => 'image', 'MarginLeft' => $style->getMarginLeft(), 'MarginTop' => $style->getMarginTop(), 'WrappingStyle' => $style->getWrappingStyle(), 'Positioning' => $style->getPositioning(), 'PosHorizontal' => $style->getPosHorizontal(), 'PosHorizontalRel' => $style->getPosHorizontalRel(), 'PosVertical' => $style->getPosVertical(), 'PosVerticalRel' => $style->getPosVerticalRel(), ]); } protected function indentationStyleToJson($style) { if ( ! $style) { return null; } return [ 'style' => 'indentation', 'Left' => $style->getLeft(), 'Right' => $style->getRight(), 'FirstLine' => $style->getFirstLine(), 'Hanging' => $style->getHanging(), ]; } protected function spacingStyleToJson(Style\Spacing $style) { return [ 'style' => 'spacing', 'Before' => $style->getBefore(), 'After' => $style->getAfter(), 'Line' => $style->getLine(), 'LineRule' => $style->getLineRule(), ]; } protected function numberingStyleToJson(Style\Numbering $style) { $self = $this; return [ 'style' => 'numbering', 'NumId' => $style->getNumId(), 'Type' => $style->getType(), 'StyleName' => $style->getStyleName(), 'Index' => $style->getIndex(), 'Levels' => array_map(function ($numberingLevel) use ($self) { return $self->numberingLevelStyleToJson($numberingLevel); }, $style->getLevels()), ]; } protected function numberingLevelStyleToJson(Style\NumberingLevel $style) { return [ 'type' => 'numbering_level', 'Level' => $style->getLevel(), 'Start' => $style->getStart(), 'Format' => $style->getFormat(), 'Restart' => $style->getRestart(), 'PStyle' => $style->getPStyle(), 'Suffix' => $style->getSuffix(), 'Text' => $style->getText(), 'Alignment' => $style->getAlignment(), 'Left' => $style->getLeft(), 'Hanging' => $style->getHanging(), 'TabPos' => $style->getTabPos(), 'Font' => $style->getFont(), 'Hint' => $style->getHint(), ]; } protected function paragraphStyleToJson(Style\Paragraph $style) { $styles = [ 'Name' => $style->getStyleName(), 'BasedOn' => $style->getBasedOn(), 'Next' => $style->getNext(), 'Alignment' => $style->getAlignment(), 'Indentation' => $style->getIndentation(), 'Spacing' => $style->getSpacing(), 'WidowControl' => $style->hasWidowControl(), 'KeepNext' => $style->isKeepNext(), 'KeepLines' => $style->isKeepLines(), 'PageBreakBefore' => $style->hasPageBreakBefore(), 'NumStyle' => $style->getNumStyle(), 'NumLevel' => $style->getNumLevel(), 'Tabs' => $style->getTabs(), 'Shading' => $style->getShading(), 'ContextualSpacing' => $style->hasContextualSpacing(), 'Bidi' => $style->isBidi(), 'TextAlignment' => $style->getTextAlignment(), 'SuppressAutoHyphens' => $style->hasSuppressAutoHyphens(), ]; $styles['style'] = 'paragraph'; if ( ! $styles['Alignment']) { $styles['Alignment'] = 'baseline'; } if ( ! $styles['TextAlignment']) { $styles['TextAlignment'] = 'baseline'; } if ($styles['Indentation']) { $styles['Indentation'] = $this->indentationStyleToJson($styles['Indentation']); } if ($styles['Spacing']) { $styles['Spacing'] = $this->spacingStyleToJson($styles['Spacing']); } return $styles; } protected function tableStyleToJson($style) { if ( ! $style) { return []; } if (is_string($style)) { return $style; } return array_merge( $this->borderStyleToJson($style), [ 'style' => 'table', 'BgColor' => $style->getBgColor(), 'CellSpacing' => $style->getCellSpacing(), 'Shading' => $style->getShading(), 'Alignment' => $style->getAlignment(), 'Width' => $style->getWidth(), 'Unit' => $style->getUnit(), 'Layout' => $style->getLayout(), 'ColumnWidths' => $style->getColumnWidths(), 'BidiVisual' => $style->isBidiVisual(), 'position' => $this->tablePositionStyleToJson($style->getPosition()), 'first_row' => $this->tableStyleToJson($style->getFirstRow()), 'BorderInsideHSize' => $style->getBorderInsideHSize(), 'BorderInsideHColor' => $style->getBorderInsideHColor(), 'BorderInsideVSize' => $style->getBorderInsideVSize(), 'BorderInsideVColor' => $style->getBorderInsideVColor(), 'CellMarginTop' => $style->getCellMarginTop(), 'CellMarginRight' => $style->getCellMarginRight(), 'CellMarginLeft' => $style->getCellMarginLeft(), 'CellMarginBottom' => $style->getCellMarginBottom(), ] ); } protected function tablePositionStyleToJson($style) { if ( ! $style) { return []; } return [ 'style' => 'table_position', 'LeftFromText' => $style->getLeftFromText(), 'RightFromText' => $style->getRightFromText(), 'TopFromText' => $style->getTopFromText(), 'BottomFromText' => $style->getBottomFromText(), 'VertAnchor' => $style->getVertAnchor(), 'HorzAnchor' => $style->getHorzAnchor(), 'TblpXSpec' => $style->getTblpXSpec(), 'TblpX' => $style->getTblpX(), 'TblpYSpec' => $style->getTblpYSpec(), 'TblpY' => $style->getTblpY(), ]; } protected function rowStyleToJson($style) { return [ 'style' => 'row', 'TblHeader' => $style->isTblHeader(), 'CantSplit' => $style->isCantSplit(), 'ExactHeight' => $style->isExactHeight(), ]; } protected function addText($text) { $hash = $this->generateHash(); $this->textContents['text'] .= $text; $this->textContents['elements'][] = [ 'hash' => $hash, 'range_start' => $this->textLength, 'range_end' => $this->textLength + (strlen($text) > 0 ? strlen($text) - 1 : 0), ]; $this->textLength = $this->textLength + (strlen($text) > 0 ? strlen($text) : 1); return $hash; } protected function generateHash() { return uniqid(); } }