Lucian Pricop
2 years ago
50 changed files with 71 additions and 9950 deletions
-
6.env.example
-
531README.md
-
9app/Console/Commands/DeployWorker.php
-
57app/Console/Commands/TestMachine.php
-
5app/Jobs/IngestDocuments.php
-
17app/Parser/DocxParser/Footer.php
-
41app/Parser/DocxParser/Footnote.php
-
11app/Parser/DocxParser/Header.php
-
26app/Parser/DocxParser/Link.php
-
77app/Parser/DocxParser/ListItemRun.php
-
11app/Parser/DocxParser/PageBreak.php
-
269app/Parser/DocxParser/ParseDocx.php
-
32app/Parser/DocxParser/PreserveText.php
-
41app/Parser/DocxParser/Section.php
-
35app/Parser/DocxParser/Table.php
-
41app/Parser/DocxParser/Table/Cell.php
-
41app/Parser/DocxParser/Table/Row.php
-
147app/Parser/DocxParser/Text.php
-
17app/Parser/DocxParser/TextBreak.php
-
74app/Parser/DocxParser/TextRun.php
-
72app/Parser/DocxParser/Title.php
-
117app/Parser/DocxParser/Traits/Helper.php
-
527app/Parser/HtmlParser/ParseHtml.php
-
670app/Parser/ParseHtmlArray.php
-
406app/Parser/ParseXml.php
-
4composer.json
-
35database/migrations/2019_08_19_000000_create_failed_jobs_table.php
-
16database/seeds/DatabaseSeeder.php
-
21package.json
-
1resources/js/app.js
-
28resources/js/bootstrap.js
-
19resources/lang/en/auth.php
-
19resources/lang/en/pagination.php
-
22resources/lang/en/passwords.php
-
151resources/lang/en/validation.php
-
1resources/sass/app.scss
-
5resources/views/errors/401.blade.php
-
5resources/views/errors/403.blade.php
-
4resources/views/errors/404.blade.php
-
4resources/views/errors/405.blade.php
-
5resources/views/errors/419.blade.php
-
6resources/views/errors/429.blade.php
-
5resources/views/errors/500.blade.php
-
5resources/views/errors/503.blade.php
-
126resources/views/errors/minimal.blade.php
-
18routes/api.php
-
15routes/channels.php
-
38tests/Feature/ProcessDocxDocumentTest.php
-
15webpack.mix.js
-
6173yarn.lock
57
app/Console/Commands/TestMachine.php
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -1,17 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser\DocxParser; |
|||
|
|||
class Footer |
|||
{ |
|||
public function handle($element){ |
|||
dd('Footer',get_class_methods($element)); |
|||
//return ['content' => [
|
|||
// 'content' => '<'.$heading.(($inlineStyle) ? ' style="'.$inlineStyle.'"' : '').'>'.$element->getText().'</'.$heading.'>',
|
|||
// 'type' => 'title',
|
|||
//],
|
|||
// 'type' => 'title',
|
|||
// 'depth' => (int) $element->getDepth()];
|
|||
|
|||
} |
|||
} |
@ -1,41 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser\DocxParser; |
|||
|
|||
use App\Parser\DocxParser\Traits\Helper; |
|||
use Exception; |
|||
|
|||
class Footnote |
|||
{ |
|||
|
|||
use Helper; |
|||
|
|||
public function handle($section) |
|||
{ |
|||
$result = []; |
|||
|
|||
$sectionElements = $this->getElements($section); |
|||
foreach ($sectionElements as $element) { |
|||
try { |
|||
$handler = $this->getHandler($element); |
|||
} catch (Exception $e) { |
|||
throw new Exception($e->getMessage()); |
|||
} |
|||
finally { |
|||
$data = $handler->handle($element); |
|||
if ($data) { |
|||
$result[] = $handler->handle($element); |
|||
} |
|||
} |
|||
|
|||
|
|||
} |
|||
|
|||
if (count($result) > 0) { |
|||
//dd($result);
|
|||
return $result; |
|||
} |
|||
|
|||
return; |
|||
} |
|||
} |
@ -1,11 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser\DocxParser; |
|||
|
|||
class Header |
|||
{ |
|||
public function handle($element){ |
|||
|
|||
//dd('Header',$element);
|
|||
} |
|||
} |
@ -1,26 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser\DocxParser; |
|||
|
|||
class Link |
|||
{ |
|||
|
|||
public function handle($element) |
|||
{ |
|||
$text = $element->getText(); |
|||
//if (! is_string($text)) {
|
|||
// dd($element);
|
|||
//}
|
|||
|
|||
return [ |
|||
'content' => $this->buildHtmlLink($element, $text), |
|||
'type' => 'link' |
|||
]; |
|||
} |
|||
|
|||
|
|||
private function buildHtmlLink($element, $text) |
|||
{ |
|||
return "<a href='".$element->getLinkSrc()."' target='_blank'>".$text."</a>"; |
|||
} |
|||
} |
@ -1,77 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser\DocxParser; |
|||
|
|||
use App\Parser\DocxParser\Traits\Helper; |
|||
use Exception; |
|||
use PhpOffice\PhpWord\Reader\Word2007\Numbering; |
|||
use PhpOffice\PhpWord\Style; |
|||
|
|||
class ListItemRun |
|||
{ |
|||
|
|||
use Helper; |
|||
|
|||
public function handle($list) |
|||
{ |
|||
$result = []; |
|||
$listElements = $this->getElements($list); |
|||
|
|||
if (count($listElements)) { |
|||
foreach ($listElements as $index => $element) { |
|||
|
|||
//dd($element->getFontStyle());
|
|||
|
|||
try { |
|||
$handler = $this->getHandler($element); |
|||
$data = $handler->handle($element); |
|||
|
|||
if ($data && isset($data[ 'content' ]) && strlen(trim(strip_tags($data[ 'content' ])))) { |
|||
$styleName = $list->getParagraphStyle()->getStyleName(); |
|||
|
|||
if ($index === 0) { |
|||
$result[] = [ |
|||
'content' => $data, |
|||
'type' => 'listItemRun', |
|||
'depth' => (int) $list->getDepth(), |
|||
'styleDepth' => $this->getStyleListDepth($styleName), |
|||
'styleName' => $styleName, |
|||
'index' => $list->getElementIndex(), |
|||
'children' => [] |
|||
|
|||
]; |
|||
} else { |
|||
if (isset($result[ count($result) - 1 ])) { |
|||
$result[ count($result) - 1 ][ 'content' ][ 'content' ] .= ' '.$data[ 'content' ]; |
|||
} else { |
|||
$result[] = [ |
|||
'content' => $data, |
|||
'type' => 'listItemRun', |
|||
'depth' => (int) $list->getDepth(), |
|||
'styleDepth' => $this->getStyleListDepth($styleName), |
|||
'styleName' => $styleName, |
|||
'index' => $list->getElementIndex(), |
|||
'children' => [] |
|||
]; |
|||
} |
|||
} |
|||
} |
|||
} catch (Exception $e) { |
|||
throw new Exception($e->getMessage()); |
|||
} |
|||
|
|||
|
|||
} |
|||
if ($result) { |
|||
if (count($result) === 1) { |
|||
$result = reset($result); |
|||
$result[ 'content' ][ 'content' ] = '<p>'.$result[ 'content' ][ 'content' ].'</p>'; |
|||
} |
|||
|
|||
} |
|||
} |
|||
|
|||
return $result; |
|||
} |
|||
|
|||
} |
@ -1,11 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser\DocxParser; |
|||
|
|||
class PageBreak |
|||
{ |
|||
public function handle($element) |
|||
{ |
|||
return; |
|||
} |
|||
} |
@ -1,269 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser\DocxParser; |
|||
|
|||
use App\Parser\DocxParser\Traits\Helper; |
|||
use Illuminate\Support\Facades\Log; |
|||
use PhpOffice\PhpWord\IOFactory; |
|||
use function GuzzleHttp\Psr7\str; |
|||
|
|||
class ParseDocx |
|||
{ |
|||
|
|||
use Helper; |
|||
|
|||
protected $currentNumberingIndex = 1; |
|||
|
|||
|
|||
public function fromUploadedFile($file) |
|||
{ |
|||
try { |
|||
$docxFileLoader = IOFactory::load($file); |
|||
Log::info('Parse docx'); |
|||
|
|||
return $this->parseLoadedDocx($docxFileLoader); |
|||
} catch (\Exception $exception) { |
|||
dd($exception); |
|||
throw new \Exception($exception->getMessage()); |
|||
} |
|||
|
|||
} |
|||
|
|||
|
|||
private function parseLoadedDocx($docx) |
|||
{ |
|||
$styles = 0; |
|||
foreach ($docx->getSections() as $page) { |
|||
|
|||
$handler = $this->getHandler($page); |
|||
$paragraphs = $handler->handle($page); |
|||
if ($paragraphs) { |
|||
foreach ($paragraphs as $index => $paragraph) { |
|||
try { |
|||
if ($paragraph && $paragraph[ 'type' ] !== 'textBreak' && (isset($paragraph[ 'content' ][ 'type' ]) && $paragraph[ 'content' ][ 'type' ] !== 'textBreak') || $paragraph[ 'type' ] == 'table') { |
|||
$result[] = $paragraph; |
|||
if (isset($paragraph[ 'styleName' ])) { |
|||
$styles++; |
|||
} |
|||
} |
|||
} catch (\Exception $e) { |
|||
dd($e); |
|||
} |
|||
|
|||
} |
|||
} |
|||
} |
|||
$depthTypeType = count($result) / 2 <= $styles ? 'styleDepth' : 'depth'; |
|||
|
|||
return $this->setTheNumbering($result, null, $depthTypeType); |
|||
} |
|||
|
|||
|
|||
private function setTheNumbering($paragraphs, $parentNumbering = null, $depthType = 'depth') |
|||
{ |
|||
$result = []; |
|||
$paragraphs = $this->buildTheChildrens($paragraphs, $depthType); |
|||
for ($index = 0; $index < count($paragraphs); $index++) { |
|||
$paragraph = $paragraphs[ $index ]; |
|||
try { |
|||
if ($paragraph[ 'type' ] !== 'table' && ($paragraph[ $depthType ] === 0 || $parentNumbering) && strpos($paragraph[ 'styleName' ], |
|||
'BodyText') === false) { |
|||
|
|||
$paragraph[ 'content' ][ 'numbering' ] = ($parentNumbering) ? $parentNumbering.((int) $index + 1).'.' : $this->currentNumberingIndex.'.'; |
|||
$paragraph[ 'content' ][ 'numbering_row' ] = ($parentNumbering) ? ((int) $index + 1) : $this->currentNumberingIndex; |
|||
|
|||
if ($paragraph[ 'children' ] && count($paragraph[ 'children' ])) { |
|||
$paragraph[ 'children' ] = $this->setTheNumbering($paragraph[ 'children' ], |
|||
$paragraph[ 'content' ][ 'numbering' ], $depthType); |
|||
|
|||
} |
|||
|
|||
if (! $parentNumbering) { |
|||
|
|||
$this->currentNumberingIndex++; |
|||
} |
|||
|
|||
|
|||
} elseif (isset($paragraph[ 'content' ][ 'numbering' ]) && isset($paragraph[ 'children' ]) && count($paragraph[ 'children' ])) { |
|||
$paragraphs[ $index ] = $this->setChildrenNumbering($paragraphs[ $index ]); |
|||
} elseif (isset($paragraphs[ $index ][ 'content' ][ 'numbering' ]) && isset(last($result)[ 'content' ][ 'numbering' ]) && $paragraphs[ $index ][ 'content' ][ 'numbering' ] == last($result)[ 'content' ][ 'numbering' ]) { |
|||
|
|||
|
|||
} |
|||
} catch (\Exception $e) { |
|||
dd($e); |
|||
} |
|||
$result[] = $paragraphs[ $index ]; |
|||
|
|||
} |
|||
|
|||
return $result; |
|||
} |
|||
|
|||
|
|||
/** |
|||
* @param $parent |
|||
* |
|||
* @return mixed |
|||
*/ |
|||
private function setChildrenNumbering($parent) |
|||
{ |
|||
|
|||
$numbering = 1; |
|||
for ($j = 0; $j < count($parent[ 'children' ]); $j++) { |
|||
$children = $parent[ 'children' ][ $j ]; |
|||
|
|||
if ($children[ 'type' ] == 'listItemRun' || isset($children[ 'content' ][ 'numbering' ])) { |
|||
$parentNumber = $parent[ 'content' ][ 'numbering' ]; |
|||
$parent[ 'children' ][ $j ][ 'content' ][ 'numbering' ] = (substr(trim($parentNumber), |
|||
strlen(trim($parentNumber)) - 1) == '.') ? $parentNumber.$numbering : $parentNumber.'.'.$numbering; |
|||
if (count($parent[ 'children' ][ $j ][ 'children' ])) { |
|||
|
|||
$parent[ 'children' ][ $j ] = $this->setChildrenNumbering($parent[ 'children' ][ $j ]); |
|||
} |
|||
|
|||
$numbering++; |
|||
} |
|||
} |
|||
|
|||
return $parent; |
|||
} |
|||
|
|||
|
|||
/** |
|||
* @param $paragraphs |
|||
* |
|||
* @return array |
|||
*/ |
|||
private function buildTheChildrens($paragraphs, $depthType) |
|||
{ |
|||
$alreadyHandledIndexes = []; |
|||
$result = []; |
|||
|
|||
for ($i = 0; $i < count($paragraphs); $i++) { |
|||
|
|||
if (in_array($i, $alreadyHandledIndexes)) { |
|||
continue; |
|||
} |
|||
$j = $i + 1; |
|||
|
|||
for ($j; $j < count($paragraphs); $j++) { |
|||
|
|||
if (in_array($j, $alreadyHandledIndexes)) { |
|||
continue; |
|||
} |
|||
|
|||
if (isset($paragraphs[ $j ][ 'content' ][ 'content' ]) && $paragraphs[ $j ][ 'content' ][ 'content' ] === '<p></p>') { |
|||
$alreadyHandledIndexes[] = $j; |
|||
$j++; |
|||
} |
|||
|
|||
if (isset($paragraphs[ $i ][ $depthType ]) && isset($paragraphs[ $j ][ $depthType ]) && $paragraphs[ $i ][ $depthType ] !== null && $paragraphs[ $j ][ $depthType ] !== null && $paragraphs[ $i ][ $depthType ] < $paragraphs[ $j ][ $depthType ]) { |
|||
|
|||
$paragraphs[ $i ] = $this->handlePossibleChild($paragraphs[ $i ], $paragraphs[ $j ], $i, |
|||
$depthType); |
|||
|
|||
|
|||
} elseif (isset($paragraphs[ $j ][ 'styleName' ]) && $paragraphs[ $j ][ 'styleName' ] === 'ListParagraph' && $paragraphs[ $i ][ $depthType ] === null && substr(strip_tags($paragraphs[ $i ][ 'content' ][ 'content' ]), |
|||
-1) === ':') { |
|||
$paragraphs[ $i ] = $this->handlePossibleChild($paragraphs[ $i ], $paragraphs[ $j ], $i, |
|||
$depthType); |
|||
|
|||
} elseif (isset($paragraphs[ $j + 1 ]) && isset($paragraphs[ $j + 1 ][ 'content' ][ 'content' ]) && isset($paragraphs[ $j ]) && isset($paragraphs[ $j ][ 'content' ][ 'content' ]) && substr(strip_tags($paragraphs[ $j ][ 'content' ][ 'content' ]), |
|||
-1) === ':' && (isset($paragraphs[ $j + 1 ]) && ctype_lower(substr(trim(strip_tags($paragraphs[ $j + 1 ][ 'content' ][ 'content' ])), |
|||
0, |
|||
1)) || (isset($paragraphs[ $j + 1 ]) && substr(trim(strip_tags($paragraphs[ $j + 1 ][ 'content' ][ 'content' ])), |
|||
strlen(trim(strip_tags($paragraphs[ $j + 1 ][ 'content' ][ 'content' ]))) - 1) == ';'))) { |
|||
$k = $j + 1; |
|||
$alreadyHandledIndexes[] = $k; |
|||
while (isset($paragraphs[ $k ]) && substr(str_replace('and', '', |
|||
trim(strip_tags(str_replace('and', '', $paragraphs[ $k ][ 'content' ][ 'content' ])))), |
|||
strlen(str_replace('and', '', trim(strip_tags(str_replace('and', '', |
|||
$paragraphs[ $k ][ 'content' ][ 'content' ]))))) - 1) == ';') { |
|||
$paragraphs[ $j ][ 'children' ][] = $paragraphs[ $k ]; |
|||
$alreadyHandledIndexes[] = $k++; |
|||
|
|||
} |
|||
|
|||
$paragraphs[ $i ] = $this->handlePossibleChild($paragraphs[ $i ], $paragraphs[ $j ], $i, |
|||
$depthType); |
|||
|
|||
|
|||
} elseif (isset($paragraphs[ $i ][ 'styleName' ]) && $paragraphs[ $i ][ $depthType ] !== $paragraphs[ $j ][ $depthType ] && strpos($paragraphs[ $i ][ 'styleName' ], |
|||
'Heading2') !== false && ((isset($paragraphs[ $j ][ 'depth' ]) || ($paragraphs[ $j ][ 'type' ] == 'textRun' && isset($paragraphs[ $j ][ 'content' ][ 'numbering' ])) && is_null($paragraphs[ $j ][ 'styleName' ])))) { |
|||
|
|||
$paragraphs[ $i ] = $this->handlePossibleChild($paragraphs[ $i ], $paragraphs[ $j ], $i, |
|||
$depthType); |
|||
|
|||
|
|||
} else { |
|||
|
|||
break; |
|||
} |
|||
|
|||
$alreadyHandledIndexes[] = $j; |
|||
|
|||
} |
|||
$result[] = $paragraphs[ $i ]; |
|||
$alreadyHandledIndexes[] = $i; |
|||
|
|||
} |
|||
|
|||
return $result; |
|||
} |
|||
|
|||
|
|||
/** |
|||
* @param $parent |
|||
* @param $child |
|||
* @param $i |
|||
* |
|||
* @return mixed |
|||
*/ |
|||
private function handlePossibleChild($parent, $child, $i, $depthType) |
|||
{ |
|||
|
|||
// Must iterate through parent children
|
|||
if (isset($parent[ 'children' ]) && count($parent[ 'children' ]) === 0) { |
|||
if ($parent[ $depthType ] < $child[ $depthType ] || $parent[ $depthType ] === null) { |
|||
$parent[ 'children' ][] = $child; |
|||
} elseif (strpos($parent[ 'styleName' ], |
|||
'Heading') !== false && isset($child[ 'content' ][ 'numbering' ]) && substr_count($child[ 'content' ][ 'numbering' ], |
|||
'.') == 1) { |
|||
$parent[ 'children' ][] = $child; |
|||
} else { |
|||
return $parent; |
|||
} |
|||
|
|||
return $parent; |
|||
} |
|||
|
|||
$lastParentChild = last($parent[ 'children' ]); |
|||
// Possible to be either child or grandchild
|
|||
if ($lastParentChild[ $depthType ] && $child[ $depthType ] > $lastParentChild[ $depthType ]) { |
|||
|
|||
$lastParentChild = $this->handlePossibleChild($lastParentChild, $child, $i, $depthType); |
|||
|
|||
} else { |
|||
|
|||
if ($child[ $depthType ] === $lastParentChild[ $depthType ]) { |
|||
$parent[ 'children' ][] = $child; |
|||
|
|||
return $parent; |
|||
} |
|||
|
|||
if (((isset($lastParentChild[ 'styleDepth' ]) && $lastParentChild[ 'styleDepth' ] === $child[ 'depth' ])) && $lastParentChild[ 'index' ] !== $child[ 'index' ]) { |
|||
|
|||
$parent[ 'children' ][] = $child; |
|||
|
|||
return $parent; |
|||
} |
|||
} |
|||
|
|||
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild; |
|||
|
|||
return $parent; |
|||
|
|||
} |
|||
|
|||
} |
@ -1,32 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser\DocxParser; |
|||
|
|||
use App\Parser\DocxParser\Traits\Helper; |
|||
|
|||
class PreserveText |
|||
{ |
|||
|
|||
use Helper; |
|||
|
|||
public function handle($element) |
|||
{ |
|||
$text = $element->getText(); |
|||
if (is_array($text)) { |
|||
$text = implode(' ', $text); |
|||
} |
|||
|
|||
return [ |
|||
'content' => [ |
|||
'content' => preg_replace("/\{[^)]+\}/", '{REF_NUMBER}', $text, 1), |
|||
'type' => 'text' |
|||
], |
|||
'type' => 'preserveText', |
|||
'index' => $element->getElementIndex(), |
|||
'children' => [], |
|||
'styleName' => 'Level2Number', |
|||
'styleDepth' => 1, |
|||
'depth' => 0 |
|||
]; |
|||
} |
|||
} |
@ -1,41 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser\DocxParser; |
|||
|
|||
use App\Parser\DocxParser\Traits\Helper; |
|||
use Exception; |
|||
use PhpOffice\PhpWord\Element\Section as WordSection; |
|||
|
|||
class Section |
|||
{ |
|||
|
|||
use Helper; |
|||
|
|||
public function handle($section) |
|||
{ |
|||
$result = []; |
|||
if ($section instanceof WordSection) { |
|||
$sectionElements = $this->getElements($section); |
|||
foreach ($sectionElements as $element) { |
|||
|
|||
try { |
|||
$handler = $this->getHandler($element); |
|||
} catch (Exception $e) { |
|||
throw new Exception($e->getMessage()); |
|||
} |
|||
$data = $handler->handle($element); |
|||
if($data){ |
|||
$result[] = $handler->handle($element); |
|||
} |
|||
|
|||
} |
|||
} |
|||
|
|||
|
|||
if (count($result) > 0) { |
|||
return $result; |
|||
} |
|||
|
|||
return; |
|||
} |
|||
} |
@ -1,35 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser\DocxParser; |
|||
|
|||
use App\Parser\DocxParser\Traits\Helper; |
|||
|
|||
class Table |
|||
{ |
|||
|
|||
use Helper; |
|||
|
|||
public function handle($table) |
|||
{ |
|||
$result = []; |
|||
foreach ($table->getRows() as $row) { |
|||
$handlerName = "\App\Parser\DocxParser\\".substr(strrchr(__CLASS__, "\\"), |
|||
1).'\\'.$this->getReflectionClass($row); |
|||
$handler = new $handlerName; |
|||
$data = $handler->handle($row); |
|||
if ($data) { |
|||
$result [] = $handler->handle($row); |
|||
} |
|||
} |
|||
|
|||
//dd($table->getNestedLevel(),get_class_methods($table));
|
|||
//
|
|||
return [ |
|||
'content' => '', |
|||
'children' => $result, |
|||
'styleDepth' => $table->getNestedLevel() + 1, |
|||
'depth' => $table->getNestedLevel() + 1, |
|||
'type' => 'table', |
|||
]; |
|||
} |
|||
} |
@ -1,41 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser\DocxParser\Table; |
|||
|
|||
use App\Parser\DocxParser\Traits\Helper; |
|||
use Exception; |
|||
use Illuminate\Support\Arr; |
|||
use PhpOffice\PhpWord\Element\TextBreak; |
|||
|
|||
class Cell |
|||
{ |
|||
|
|||
use Helper; |
|||
|
|||
public function handle($cell) |
|||
{ |
|||
$result = []; |
|||
$cells = $this->getElements($cell); |
|||
foreach ($this->getElements($cell) as $index => $element) { |
|||
if (! $element instanceof TextBreak) { |
|||
try { |
|||
$handler = $this->getHandler($element); |
|||
} catch (Exception $e) { |
|||
throw new Exception($e->getMessage()); |
|||
} |
|||
$data = $handler->handle($element); |
|||
$data['width']= $cell->getWidth(); |
|||
$result[] = $data; |
|||
} |
|||
|
|||
} |
|||
|
|||
return [ |
|||
'content' => '', |
|||
'children' => $result, |
|||
'depth' => null, |
|||
'type' => 'cell', |
|||
]; |
|||
|
|||
} |
|||
} |
@ -1,41 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser\DocxParser\Table; |
|||
|
|||
use App\Parser\DocxParser\Traits\Helper; |
|||
use Illuminate\Support\Arr; |
|||
|
|||
class Row |
|||
{ |
|||
|
|||
use Helper; |
|||
|
|||
/** |
|||
* @param $row |
|||
* |
|||
* @return mixed |
|||
*/ |
|||
public function handle($row) |
|||
{ |
|||
$rows = $row->getCells(); |
|||
$result = []; |
|||
foreach ($rows as $index => $cell) { |
|||
$handler = new Cell(); |
|||
$result[] = $handler->handle($cell); |
|||
|
|||
|
|||
} |
|||
|
|||
return [ |
|||
'content' => '', |
|||
'children' => $result, |
|||
'depth' => null, |
|||
'height' => $row->getHeight(), |
|||
'isTblHeader' => $row->getStyle()->isTblHeader(), |
|||
'index' => $row->getElementIndex(), |
|||
'type' => 'row', |
|||
]; |
|||
|
|||
|
|||
} |
|||
} |
@ -1,147 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser\DocxParser; |
|||
|
|||
use App\Parser\DocxParser\Traits\Helper; |
|||
|
|||
class Text |
|||
{ |
|||
|
|||
use Helper; |
|||
|
|||
public function handle($textElement) |
|||
{ |
|||
$data = $this->getElementData($textElement); |
|||
$data[ 'type' ] = 'text'; |
|||
|
|||
return $data; |
|||
} |
|||
|
|||
|
|||
/** |
|||
* @param $textElement |
|||
* |
|||
* @return array |
|||
*/ |
|||
private function getElementData($textElement) |
|||
{ |
|||
$text = $textElement->getText(); |
|||
//if (strpos($text, 'PPOINTMENT AND GRANT OF LICENSE') !== false) {
|
|||
// dd($textElement->getParent()->getDepth());
|
|||
//}
|
|||
$textData = $this->getNumberingFromText($text); |
|||
|
|||
if (strlen($textData[ 'content' ])) { |
|||
$textData[ 'content' ] = $this->styleTheText($textData[ 'content' ], $textElement); |
|||
} |
|||
|
|||
|
|||
|
|||
return $textData; |
|||
|
|||
} |
|||
|
|||
|
|||
/** |
|||
* @param $text |
|||
* |
|||
* @return array |
|||
*/ |
|||
private function getNumberingFromText($text) |
|||
{ |
|||
|
|||
$data = []; |
|||
preg_match('/^([0-9.])([^(A-Z)(a-z) ]*)/', trim($text), $match); |
|||
|
|||
if ($match && isset($match[ 0 ]) && $match[ 0 ] !== '.') { |
|||
$data[ 'content' ] = trim(str_replace($match[ 0 ], '', $text)); |
|||
$data[ 'numbering' ] = $match[ 0 ]; |
|||
} else { |
|||
$data[ 'content' ] = trim(preg_replace('/\t+/', '', $text)); |
|||
} |
|||
|
|||
return $data; |
|||
} |
|||
|
|||
|
|||
private function styleTheText($textString, $textObject) |
|||
{ |
|||
|
|||
$textStyle = [ |
|||
'font' => $textObject->getFontStyle(), |
|||
'paragraph' => $textObject->getParagraphStyle() |
|||
]; |
|||
|
|||
$fontStyle = $textStyle[ 'font' ]->getStyleValues(); |
|||
$inlineStyle = $this->getInlineStyles(array_merge($fontStyle[ 'style' ], $fontStyle[ 'basic' ])); |
|||
|
|||
return '<span'.(($inlineStyle) ? ' style="'.$inlineStyle.'"' : '').'>'.$this->getStyledText($textString, |
|||
$fontStyle[ 'style' ]).'</span>'; |
|||
} |
|||
|
|||
|
|||
/** |
|||
* @param $styles |
|||
* |
|||
* @return string |
|||
*/ |
|||
private function getInlineStyles($styles) |
|||
{ |
|||
$styleString = ''; |
|||
$acceptedInline = [ |
|||
"dStrike" => 'text-decoration: line-through;text-decoration-style: double;', |
|||
"smallCaps" => 'text-transform: lowercase;', |
|||
"allCaps" => 'text-transform: capitalize;', |
|||
"fgColor" => 'background-color:'.$styles[ 'fgColor' ].';', |
|||
"hidden" => 'display:none;', |
|||
"size" => 'font-size:'.$styles[ 'size' ].'pt;', |
|||
"color" => 'color:#'.$styles[ 'color' ].';' |
|||
]; |
|||
|
|||
foreach ($styles as $style => $value) { |
|||
if (array_key_exists($style, $acceptedInline) && $value && ! in_array($value, ['none', 'auto'])) { |
|||
$styleString .= $acceptedInline[ $style ]; |
|||
} |
|||
} |
|||
|
|||
return $styleString; |
|||
} |
|||
|
|||
|
|||
/** |
|||
* @param $text |
|||
* @param $styles |
|||
* |
|||
* @return string |
|||
*/ |
|||
private function getStyledText($text, $styles) |
|||
{ |
|||
$mappedStyle = [ |
|||
'bold' => 'strong', |
|||
'italic' => 'i', |
|||
'underline' => 'u', |
|||
'strike' => 'strike', |
|||
"super" => 'sup', |
|||
"sub" => 'sub', |
|||
]; |
|||
foreach ($styles as $style => $active) { |
|||
if (array_key_exists($style, $mappedStyle) && $active && $active !== 'none') { |
|||
$text = $this->appendHtmlStyle($text, $mappedStyle[ $style ]); |
|||
} |
|||
} |
|||
|
|||
return $text; |
|||
} |
|||
|
|||
|
|||
/** |
|||
* @param $text |
|||
* @param $styleType |
|||
* |
|||
* @return string |
|||
*/ |
|||
private function appendHtmlStyle($text, $styleType) |
|||
{ |
|||
return "<$styleType>$text</$styleType>"; |
|||
} |
|||
} |
@ -1,17 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser\DocxParser; |
|||
|
|||
class TextBreak |
|||
{ |
|||
|
|||
public function handle($element) |
|||
{ |
|||
return; |
|||
return [ |
|||
'content' => '<br>', |
|||
'type' => 'textBreak' |
|||
]; |
|||
|
|||
} |
|||
} |
@ -1,74 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser\DocxParser; |
|||
|
|||
use App\Parser\DocxParser\Traits\Helper; |
|||
use Exception; |
|||
|
|||
class TextRun |
|||
{ |
|||
|
|||
use Helper; |
|||
|
|||
public function handle($textRun) |
|||
{ |
|||
$result = []; |
|||
$textRunElements = $this->getElements($textRun); |
|||
if (count($textRunElements)) { |
|||
foreach ($textRunElements as $index => $element) { |
|||
|
|||
try { |
|||
$handler = $this->getHandler($element); |
|||
$data = $handler->handle($element); |
|||
if ($data) { |
|||
$styleName = $textRun->getParagraphStyle()->getStyleName(); |
|||
if ($index === 0) { |
|||
$result[] = [ |
|||
'content' => $handler->handle($element), |
|||
'type' => 'textRun', |
|||
'depth' => $textRun->getDepth(), |
|||
'styleDepth' => $this->getStyleListDepth($styleName), |
|||
'styleName' => $styleName, |
|||
'index' => $textRun->getElementIndex(), |
|||
'children' => [] |
|||
]; |
|||
} else { |
|||
if (isset($result[ count($result) - 1 ])) { |
|||
|
|||
$result[ count($result) - 1 ][ 'content' ][ 'content' ] .= ' '.$data[ 'content' ]; |
|||
|
|||
} else { |
|||
$result[] = [ |
|||
'content' => $data, |
|||
'type' => 'textRun', |
|||
'depth' => (int) $textRun->getDepth(), |
|||
'styleDepth' => $this->getStyleListDepth($styleName), |
|||
'styleName' => $styleName, |
|||
'index' => $textRun->getElementIndex(), |
|||
'children' => [] |
|||
|
|||
]; |
|||
} |
|||
} |
|||
|
|||
} |
|||
} catch (Exception $e) { |
|||
dd($e, 2); |
|||
throw new Exception($e->getMessage()); |
|||
} |
|||
|
|||
|
|||
} |
|||
if ($result) { |
|||
if (count($result) === 1) { |
|||
$result = reset($result); |
|||
$result[ 'content' ][ 'content' ] = '<p>'.$result[ 'content' ][ 'content' ].'</p>'; |
|||
} |
|||
|
|||
} |
|||
} |
|||
|
|||
return $result; |
|||
|
|||
} |
|||
} |
@ -1,72 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser\DocxParser; |
|||
|
|||
use App\Parser\DocxParser\Traits\Helper; |
|||
use PhpOffice\PhpWord\Style; |
|||
use PhpOffice\PhpWord\Element\Title as WordTitle; |
|||
|
|||
class Title |
|||
{ |
|||
|
|||
use Helper; |
|||
|
|||
public function handle($element) |
|||
{ |
|||
if (! $element instanceof WordTitle) { |
|||
return; |
|||
} |
|||
|
|||
$title = $element->getText(); |
|||
if (! is_string($title)) { |
|||
$handler = $this->getHandler($title); |
|||
|
|||
return $handler->handle($title); |
|||
} |
|||
|
|||
//dd($element->getText(),get_class_methods($element),$element->getDepth());
|
|||
|
|||
$style = $this->getTitleStyle($element); |
|||
$headings = [ |
|||
'Title' => 'h1', |
|||
'Subtitle' => 'h2', |
|||
'Heading1' => 'h1', |
|||
'Heading2' => 'h2', |
|||
'Heading3' => 'h3', |
|||
'Heading4' => 'h4', |
|||
'Heading5' => 'h5', |
|||
]; |
|||
$fontStyle = $style[ 'font' ]->getStyleValues(); |
|||
$inlineStyle = $this->getInlineStyles(array_merge($fontStyle[ 'style' ], $fontStyle[ 'basic' ])); |
|||
$heading = array_key_exists($style[ 'heading' ], $headings) ? $headings[ $style[ 'heading' ] ] : 'h5'; |
|||
|
|||
return [ |
|||
'content' => [ |
|||
'content' => '<'.$heading.(($inlineStyle) ? ' style="'.$inlineStyle.'"' : '').'>'.$element->getText().'</'.$heading.'>', |
|||
'type' => 'title', |
|||
], |
|||
'type' => 'title', |
|||
'depth' => null, |
|||
'styleDepth' => $this->getStyleListDepth($element->getStyle()), |
|||
'styleName' => $element->getStyle(), |
|||
'index' => $element->getElementIndex(), |
|||
'children' => [] |
|||
]; |
|||
|
|||
} |
|||
|
|||
|
|||
private function getTitleStyle($element) |
|||
{ |
|||
if (strpos($element->getStyle(), 'Heading') !== false) { |
|||
$font = Style::getStyle(str_replace('Heading', 'Heading_', $element->getStyle())); |
|||
} else { |
|||
$font = Style::getStyle($element->getStyle()); |
|||
} |
|||
|
|||
return [ |
|||
'font' => $font, |
|||
'heading' => $element->getStyle() |
|||
]; |
|||
} |
|||
} |
@ -1,117 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser\DocxParser\Traits; |
|||
|
|||
use ReflectionClass; |
|||
|
|||
trait Helper |
|||
{ |
|||
|
|||
/** |
|||
* @param $element |
|||
* |
|||
* @return string |
|||
* @throws \Exception |
|||
*/ |
|||
public function getHandler($element) |
|||
{ |
|||
try { |
|||
$reflectClass = $this->getReflectionClass($element); |
|||
} catch (\Exception $exception) { |
|||
throw new \Exception($exception->getMessage()); |
|||
} |
|||
$handleClass = 'App\Parser\DocxParser\\'.$reflectClass; |
|||
if (class_exists($handleClass)) { |
|||
return new $handleClass; |
|||
} else { |
|||
throw new \Exception("Handler class $handleClass dose not exists!"); |
|||
} |
|||
} |
|||
|
|||
|
|||
/** |
|||
* @param $element |
|||
* |
|||
* @return string |
|||
*/ |
|||
public function getReflectionClass($element) |
|||
{ |
|||
try { |
|||
$reflectClass = new ReflectionClass($element); |
|||
} catch (\ReflectionException $e) { |
|||
throwException($e); |
|||
} |
|||
|
|||
return $reflectClass->getShortName(); |
|||
} |
|||
|
|||
|
|||
/** |
|||
* Get the child elements of an element |
|||
* |
|||
* @param $element |
|||
* |
|||
* @return mixed |
|||
*/ |
|||
public function getElements($element) |
|||
{ |
|||
return $element->getElements(); |
|||
} |
|||
|
|||
|
|||
/** |
|||
* Check if an element has childrens |
|||
* |
|||
* @param $element |
|||
* |
|||
* @return bool |
|||
*/ |
|||
public function hasElements($element) |
|||
{ |
|||
return (bool) count($this->getElements($element)); |
|||
} |
|||
|
|||
|
|||
/** |
|||
* @param $styles |
|||
* |
|||
* @return string |
|||
*/ |
|||
private function getInlineStyles($styles) |
|||
{ |
|||
$styleString = ''; |
|||
$acceptedInline = [ |
|||
"dStrike" => 'text-decoration: line-through;text-decoration-style: double;', |
|||
"smallCaps" => 'text-transform: lowercase;', |
|||
"allCaps" => 'text-transform: capitalize;', |
|||
"fgColor" => 'background-color:'.$styles[ 'fgColor' ].';', |
|||
"hidden" => 'display:none;', |
|||
"size" => 'font-size:'.$styles[ 'size' ].'pt;', |
|||
"color" => 'color:#'.$styles[ 'color' ].';' |
|||
]; |
|||
|
|||
foreach ($styles as $style => $value) { |
|||
if (array_key_exists($style, $acceptedInline) && $value && ! in_array($value, ['none', 'auto'])) { |
|||
$styleString .= $acceptedInline[ $style ]; |
|||
} |
|||
} |
|||
|
|||
return $styleString; |
|||
} |
|||
|
|||
|
|||
public function getStyleListDepth($styleName) |
|||
{ |
|||
|
|||
$getNumberFromStyleName = filter_var($styleName, FILTER_SANITIZE_NUMBER_FLOAT, FILTER_FLAG_ALLOW_FRACTION); |
|||
if (is_numeric($getNumberFromStyleName) && strpos(strtolower($styleName), 'definition') === false) { |
|||
$depth = (int) $getNumberFromStyleName - 1; |
|||
|
|||
} else { |
|||
$depth = null; |
|||
|
|||
} |
|||
|
|||
return $depth; |
|||
} |
|||
} |
@ -1,527 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser\HtmlParser; |
|||
|
|||
use DOMDocument; |
|||
use Illuminate\Support\Facades\Log; |
|||
|
|||
class ParseHtml |
|||
{ |
|||
|
|||
public function fromUploadedFile($file) |
|||
{ |
|||
try { |
|||
$htmlDom = new DomDocument(); |
|||
Log::info('Parse html from file:'.$file); |
|||
$htmlString = file_get_contents($file); |
|||
libxml_use_internal_errors(true); |
|||
$htmlDom->loadHTML($htmlString); |
|||
$htmlDom->preserveWhiteSpace = false; |
|||
|
|||
return $this->parseLoadedHtml($htmlDom); |
|||
} catch (\Exception $exception) { |
|||
dd($exception); |
|||
} |
|||
} |
|||
|
|||
|
|||
private function parseLoadedHtml($htmlDom) |
|||
{ |
|||
$response = []; |
|||
$page = $htmlDom->getElementsByTagName("body")[ 0 ]; |
|||
$dataStructuredArray = $this->buildTheParsedResponse($this->domToArray($page)); |
|||
foreach ($dataStructuredArray as $index => $item) { |
|||
if (isset($item[ '_type' ]) && $item[ '_type' ] !== 'table') { |
|||
$data = $this->handleChildrens($item); |
|||
if (isset($data[ 'content' ])) { |
|||
|
|||
$data[ 'content' ] = $this->closetags($data[ 'content' ]); |
|||
$data[ 'clean_content' ] = preg_replace("/(\r\n|\t|\r|\n)+/", " ", strip_tags($data[ 'content' ])); |
|||
$response[] = $data; |
|||
} |
|||
} |
|||
|
|||
} |
|||
|
|||
return $this->fixChildrenStructure($response); |
|||
} |
|||
|
|||
|
|||
private function domToArray($root) |
|||
{ |
|||
$result = []; |
|||
|
|||
//handle classic node
|
|||
if ($root->nodeType == XML_ELEMENT_NODE) { |
|||
$result[ '_type' ] = $root->nodeName; |
|||
if ($root->nodeName === 'ol') { |
|||
if ($root->hasAttribute('start')) { |
|||
$result[ '_startFrom' ] = $root->getAttribute('start'); |
|||
} else { |
|||
$result[ '_startFrom' ] = 1; |
|||
} |
|||
} |
|||
$result[ '_numberOfChildren' ] = $root->childNodes->length; |
|||
if ($root->hasChildNodes()) { |
|||
$children = $root->childNodes; |
|||
for ($i = 0; $i < $children->length; $i++) { |
|||
$child = $this->domToArray($children->item($i)); |
|||
|
|||
//don't keep textnode with only spaces and newline
|
|||
if (! empty($child)) { |
|||
$result[ '_children' ][] = $child; |
|||
} |
|||
} |
|||
} |
|||
|
|||
//handle text node
|
|||
} elseif ($root->nodeType == XML_TEXT_NODE || $root->nodeType == XML_CDATA_SECTION_NODE) { |
|||
$value = $root->nodeValue; |
|||
if (! empty($value)) { |
|||
$cleanText = preg_replace("/(\r\n|\t|\r|\n)+/", " ", $value); |
|||
if (! empty(str_replace(' ', '', $cleanText))) { |
|||
$result[ '_type' ] = '_text'; |
|||
$result[ '_content' ] = ltrim($cleanText); |
|||
} |
|||
|
|||
} |
|||
} |
|||
|
|||
//list attributes
|
|||
if ($root->hasAttributes()) { |
|||
foreach ($root->attributes as $attribute) { |
|||
$result[ '_attributes' ][ $attribute->name ] = $attribute->value; |
|||
} |
|||
} |
|||
|
|||
return $result; |
|||
} |
|||
|
|||
|
|||
private function buildTheParsedResponse(array $htmElementsAsArray): array |
|||
{ |
|||
$parsedResponse = []; |
|||
foreach ($htmElementsAsArray[ '_children' ] as $index => $elementArray) { |
|||
$data = []; |
|||
if ($elementArray[ '_type' ] === '_text') { |
|||
$data[ '_type' ] = $elementArray[ '_type' ]; |
|||
$data[ 'content' ] = $this->parseParagraph($elementArray); |
|||
} elseif (isset($elementArray[ '_children' ])) { |
|||
|
|||
|
|||
$parsedResponseData = $this->buildTheParsedResponse($elementArray); |
|||
if (! empty($parsedResponseData)) { |
|||
$data[ '_type' ] = $elementArray[ '_type' ]; |
|||
if (in_array($elementArray[ '_type' ], ['ul', 'ol'])) { |
|||
if (isset($elementArray[ '_startFrom' ])) { |
|||
$data[ 'start' ] = $elementArray[ '_startFrom' ]; |
|||
} |
|||
$data [ 'children' ] = $parsedResponseData; |
|||
} else { |
|||
|
|||
$data [ 'content' ] = $parsedResponseData; |
|||
} |
|||
|
|||
} |
|||
|
|||
} |
|||
if (! empty($data)) { |
|||
if (isset($elementArray[ '_attributes' ])) { |
|||
$data[ '_attributes' ] = $elementArray[ '_attributes' ]; |
|||
} |
|||
$parsedResponse[] = $data; |
|||
} |
|||
} |
|||
|
|||
return $parsedResponse; |
|||
} |
|||
|
|||
|
|||
private function remove_empty_tags_recursive($str, $repto = null) |
|||
{ |
|||
//** Return if string not given or empty.
|
|||
if (! is_string($str) || trim($str) == '') { |
|||
return $str; |
|||
} |
|||
|
|||
//** Recursive empty HTML tags.
|
|||
return preg_replace( |
|||
|
|||
//** Pattern written by Junaid Atari.
|
|||
'/<([^<\/>]*)>([\s]*?|(?R))<\/\1>/imsU', |
|||
|
|||
//** Replace with nothing if string empty.
|
|||
! is_string($repto) ? '' : $repto, |
|||
|
|||
//** Source string
|
|||
$str); |
|||
} |
|||
|
|||
|
|||
private function closetags($text) |
|||
{ |
|||
$tagstack = []; |
|||
$stacksize = 0; |
|||
$tagqueue = ''; |
|||
$newtext = ''; |
|||
// Known single-entity/self-closing tags.
|
|||
$single_tags = [ |
|||
'area', |
|||
'base', |
|||
'basefont', |
|||
'br', |
|||
'col', |
|||
'command', |
|||
'embed', |
|||
'frame', |
|||
'hr', |
|||
'img', |
|||
'input', |
|||
'isindex', |
|||
'link', |
|||
'meta', |
|||
'param', |
|||
'source' |
|||
]; |
|||
// Tags that can be immediately nested within themselves.
|
|||
$nestable_tags = ['blockquote', 'div', 'object', 'q', 'span']; |
|||
|
|||
// WP bug fix for comments - in case you REALLY meant to type '< !--'.
|
|||
$text = str_replace('< !--', '< !--', $text); |
|||
// WP bug fix for LOVE <3 (and other situations with '<' before a number).
|
|||
$text = preg_replace('#<([0-9]{1})#', '<$1', $text); |
|||
|
|||
/** |
|||
* Matches supported tags. |
|||
* |
|||
* To get the pattern as a string without the comments paste into a PHP |
|||
* REPL like `php -a`. |
|||
* |
|||
* @see https://html.spec.whatwg.org/#elements-2
|
|||
* @see https://w3c.github.io/webcomponents/spec/custom/#valid-custom-element-name
|
|||
* |
|||
* @example |
|||
* ~# php -a
|
|||
* php > $s = [paste copied contents of expression below including parentheses]; |
|||
* php > echo $s; |
|||
*/ |
|||
$tag_pattern = ('#<'. // Start with an opening bracket.
|
|||
'(/?)'. // Group 1 - If it's a closing tag it'll have a leading slash.
|
|||
'('. // Group 2 - Tag name.
|
|||
// Custom element tags have more lenient rules than HTML tag names.
|
|||
'(?:[a-z](?:[a-z0-9._]*)-(?:[a-z0-9._-]+)+)'.'|'.// Traditional tag rules approximate HTML tag names. |
|||
'(?:[\w:]+)'.')'.'(?:'.// We either immediately close the tag with its '>' and have nothing here. |
|||
'\s*'.'(/?)'. // Group 3 - "attributes" for empty tag.
|
|||
'|'.// Or we must start with space characters to separate the tag name from the attributes (or whitespace). |
|||
'(\s+)'. // Group 4 - Pre-attribute whitespace.
|
|||
'([^>]*)'. // Group 5 - Attributes.
|
|||
')'.'>#' // End with a closing bracket.
|
|||
); |
|||
|
|||
while (preg_match($tag_pattern, $text, $regex)) { |
|||
$full_match = $regex[ 0 ]; |
|||
$has_leading_slash = ! empty($regex[ 1 ]); |
|||
$tag_name = $regex[ 2 ]; |
|||
$tag = strtolower($tag_name); |
|||
$is_single_tag = in_array($tag, $single_tags, true); |
|||
$pre_attribute_ws = isset($regex[ 4 ]) ? $regex[ 4 ] : ''; |
|||
$attributes = trim(isset($regex[ 5 ]) ? $regex[ 5 ] : $regex[ 3 ]); |
|||
$has_self_closer = '/' === substr($attributes, -1); |
|||
|
|||
$newtext .= $tagqueue; |
|||
|
|||
$i = strpos($text, $full_match); |
|||
$l = strlen($full_match); |
|||
|
|||
// Clear the shifter.
|
|||
$tagqueue = ''; |
|||
if ($has_leading_slash) { // End tag.
|
|||
// If too many closing tags.
|
|||
if ($stacksize <= 0) { |
|||
$tag = ''; |
|||
// Or close to be safe $tag = '/' . $tag.
|
|||
|
|||
// If stacktop value = tag close value, then pop.
|
|||
} elseif ($tagstack[ $stacksize - 1 ] === $tag) { // Found closing tag.
|
|||
$tag = '</'.$tag.'>'; // Close tag.
|
|||
array_pop($tagstack); |
|||
$stacksize--; |
|||
} else { // Closing tag not at top, search for it.
|
|||
for ($j = $stacksize - 1; $j >= 0; $j--) { |
|||
if ($tagstack[ $j ] === $tag) { |
|||
// Add tag to tagqueue.
|
|||
for ($k = $stacksize - 1; $k >= $j; $k--) { |
|||
$tagqueue .= '</'.array_pop($tagstack).'>'; |
|||
$stacksize--; |
|||
} |
|||
break; |
|||
} |
|||
} |
|||
$tag = ''; |
|||
} |
|||
} else { // Begin tag.
|
|||
if ($has_self_closer) { // If it presents itself as a self-closing tag...
|
|||
// ...but it isn't a known single-entity self-closing tag, then don't let it be treated as such
|
|||
// and immediately close it with a closing tag (the tag will encapsulate no text as a result).
|
|||
if (! $is_single_tag) { |
|||
$attributes = trim(substr($attributes, 0, -1))."></$tag"; |
|||
} |
|||
} elseif ($is_single_tag) { // Else if it's a known single-entity tag but it doesn't close itself, do so.
|
|||
$pre_attribute_ws = ' '; |
|||
$attributes .= '/'; |
|||
} else { // It's not a single-entity tag.
|
|||
// If the top of the stack is the same as the tag we want to push, close previous tag.
|
|||
if ($stacksize > 0 && ! in_array($tag, $nestable_tags, |
|||
true) && $tagstack[ $stacksize - 1 ] === $tag) { |
|||
$tagqueue = '</'.array_pop($tagstack).'>'; |
|||
$stacksize--; |
|||
} |
|||
$stacksize = array_push($tagstack, $tag); |
|||
} |
|||
|
|||
// Attributes.
|
|||
if ($has_self_closer && $is_single_tag) { |
|||
// We need some space - avoid <br/> and prefer <br />.
|
|||
$pre_attribute_ws = ' '; |
|||
} |
|||
|
|||
$tag = '<'.$tag.$pre_attribute_ws.$attributes.'>'; |
|||
// If already queuing a close tag, then put this tag on too.
|
|||
if (! empty($tagqueue)) { |
|||
$tagqueue .= $tag; |
|||
$tag = ''; |
|||
} |
|||
} |
|||
$newtext .= substr($text, 0, $i).$tag; |
|||
$text = substr($text, $i + $l); |
|||
} |
|||
|
|||
// Clear tag queue.
|
|||
$newtext .= $tagqueue; |
|||
|
|||
// Add remaining text.
|
|||
$newtext .= $text; |
|||
|
|||
while ($x = array_pop($tagstack)) { |
|||
$newtext .= '</'.$x.'>'; // Add remaining tags to close.
|
|||
} |
|||
|
|||
// WP fix for the bug with HTML comments.
|
|||
$newtext = str_replace('< !--', '<!--', $newtext); |
|||
$newtext = str_replace('< !--', '< !--', $newtext); |
|||
|
|||
return $this->remove_empty_tags_recursive($newtext); |
|||
} |
|||
|
|||
|
|||
private function parseParagraph($elementArray, $type = null, $number = null) |
|||
{ |
|||
$data = []; |
|||
|
|||
$data[ '_content' ] = ($type) ? $this->closetags(implode('', |
|||
$type).$elementArray[ '_content' ]) : $elementArray[ '_content' ]; |
|||
|
|||
return $data; |
|||
} |
|||
|
|||
|
|||
private function handleChildrens($data, $parsed = []) |
|||
{ |
|||
if ($data[ '_type' ] !== 'table') { |
|||
|
|||
$parsed[ 'content' ] = '<'.$data[ '_type' ].'>'; |
|||
if (in_array($data[ '_type' ], ['ol', 'ul'])) { |
|||
$parsed[ 'children' ] = []; |
|||
if (isset($data[ 'start' ])) { |
|||
$startFrom = $data[ 'start' ]; |
|||
} |
|||
|
|||
foreach ($data[ 'children' ] as $child) { |
|||
if (isset($child[ 'start' ])) { |
|||
$startFrom = $child[ 'start' ]; |
|||
} |
|||
if (isset($child[ 'content' ])) { |
|||
foreach ($child[ 'content' ] as $li) { |
|||
$data = $this->handleChildrens($li); |
|||
if (isset($data[ 'content' ])) { |
|||
$data[ 'clean_content' ] = preg_replace("/(\r\n|\t|\r|\n)+/", " ", |
|||
strip_tags($data[ 'content' ])); |
|||
if (isset($startFrom) && strlen(trim($data[ 'clean_content' ])) > 0) { |
|||
$data[ 'numbering_row' ] = $startFrom; |
|||
$startFrom++; |
|||
} |
|||
|
|||
$parsed[ 'children' ][] = $data; |
|||
} |
|||
|
|||
} |
|||
} else { |
|||
$data = $this->handleChildrens($child); |
|||
$data[ 'clean_content' ] = preg_replace("/(\r\n|\t|\r|\n)+/", " ", |
|||
strip_tags($data[ 'content' ])); |
|||
$parsed[ 'children' ][] = $data; |
|||
} |
|||
} |
|||
} elseif (isset($data[ '_type' ]) && ($data[ '_type' ] === 'div')) { |
|||
foreach ($data[ 'content' ] as $child) { |
|||
$data = $this->handleChildrens($child); |
|||
if (isset($data[ 'content' ])) { |
|||
$data[ 'clean_content' ] = preg_replace("/(\r\n|\t|\r|\n)+/", " ", |
|||
strip_tags($data[ 'content' ])); |
|||
$data[ 'content' ] = $this->closetags($data[ 'content' ]); |
|||
} |
|||
|
|||
$parsed[ 'children' ][] = $data; |
|||
} |
|||
|
|||
} else { |
|||
$contentChilds = count($data[ 'content' ]); |
|||
foreach ($data[ 'content' ] as $index => $child) { |
|||
if ($child[ '_type' ] !== '_text') { |
|||
if (! isset($parsed[ 'content' ])) { |
|||
$parsed[ 'content' ] = '<'.$child[ '_type' ].'>'; |
|||
} else { |
|||
$parsed[ 'content' ] .= '<'.$child[ '_type' ].'>'; |
|||
|
|||
} |
|||
$childs = $this->handleChildrens($child, $parsed); |
|||
if ($childs && isset($child[ 'content' ])) { |
|||
$parsed[ 'content' ] .= $childs[ 'content' ]; |
|||
} |
|||
|
|||
} else { |
|||
if (! isset($parsed[ 'content' ])) { |
|||
$parsed[ 'content' ] = $child[ 'content' ][ '_content' ]; |
|||
} else { |
|||
$parsed[ 'content' ] .= $child[ 'content' ][ '_content' ]; |
|||
} |
|||
} |
|||
if ($contentChilds == $index + 1) { |
|||
$parsed[ 'content' ] = $this->closetags($parsed[ 'content' ]); |
|||
} |
|||
$parsed[ 'children' ] = []; |
|||
} |
|||
} |
|||
|
|||
return $parsed; |
|||
} |
|||
|
|||
} |
|||
|
|||
|
|||
private function fixChildrenStructure($data) |
|||
{ |
|||
|
|||
$result = []; |
|||
$alreadyHandledIndexes = []; |
|||
|
|||
for ($i = 0; $i < count($data); $i++) { |
|||
|
|||
if (isset($data[ $i ][ 'content' ]) && $data[ $i ][ 'content' ] == '<ol>') { |
|||
$alreadyHandledIndexes[] = $i; |
|||
continue; |
|||
} |
|||
if (array_key_exists($i, $alreadyHandledIndexes)) { |
|||
continue; |
|||
} |
|||
|
|||
if(isset($data[ $i ]['content']) && $data[ $i ]['content']==='' && count($data[ $i ]['children'])==1){ |
|||
$data[ $i ] = last($data[ $i ]['children']); |
|||
} |
|||
|
|||
|
|||
$j = $i + 1; |
|||
for ($j; $j < count($data); $j++) { |
|||
if (array_key_exists($i, $alreadyHandledIndexes)) { |
|||
continue; |
|||
} |
|||
|
|||
if (! isset($data[ $j ][ 'content' ]) || strpos($data[ $j ][ 'content' ], 'h1') !== false) { |
|||
break; |
|||
} |
|||
|
|||
if(isset($data[$i]['numbering_row'])){ |
|||
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); |
|||
$alreadyHandledIndexes[] = $j; |
|||
|
|||
}else { |
|||
break; |
|||
} |
|||
|
|||
|
|||
} |
|||
|
|||
//if (isset($data[ $i ][ 'content' ]) && empty($data[ $i ][ 'content' ])) {
|
|||
// $data[ $i ] = last($data[ $i ][ 'children' ]);
|
|||
//}
|
|||
if (is_array($data[ $i ]) && count($data[ $i ]) > 1 && ! isset($data[ $i ][ 'content' ])) { |
|||
$result = array_merge($result, $data[ $i ]); |
|||
} else { |
|||
$result[] = $data[ $i ]; |
|||
} |
|||
|
|||
$alreadyHandledIndexes[] = $i; |
|||
|
|||
} |
|||
|
|||
return $result; |
|||
} |
|||
|
|||
|
|||
private function handlePossibleChild($parent, $child = []) |
|||
{ |
|||
|
|||
if($child['content']===''){ |
|||
dd($parent); |
|||
} |
|||
|
|||
|
|||
|
|||
if (isset($parent[ 'children' ])) { |
|||
if (empty($parent[ 'content' ]) && count($parent[ 'children' ]) === 1) { |
|||
|
|||
$parent = $parent[ 'children' ][ 0 ]; |
|||
} elseif (empty($parent[ 'content' ]) && count($parent[ 'children' ]) > 1) { |
|||
$parent = $this->fixChildrenStructure($parent[ 'children' ]); |
|||
} |
|||
|
|||
|
|||
} |
|||
|
|||
if (isset($child[ 'content' ]) && $child[ 'content' ] == '<ol>') { |
|||
for ($i = 0; $i < count($child[ 'children' ]); $i++) { |
|||
$newChild = $child[ 'children' ][ $i ]; |
|||
if ($child[ 'children' ][ $i ][ 'content' ] == '<ol>') { |
|||
$lastParentChild = last($parent[ 'children' ]); |
|||
|
|||
$newChild = $this->handlePossibleChild($lastParentChild, $child[ 'children' ][ $i ]); |
|||
} |
|||
|
|||
$parent[ 'children' ][] = $newChild; |
|||
|
|||
} |
|||
|
|||
//return $parent;
|
|||
|
|||
} |
|||
|
|||
|
|||
if (isset($parent[ 'clean_content' ]) && strlen($parent[ 'clean_content' ]) && strpbrk(substr($parent[ 'clean_content' ], |
|||
-1), '.,;\'"0123456789') === false && ctype_lower(substr($parent[ 'clean_content' ], |
|||
-1)) && isset($child[ 'clean_content' ]) && strlen($child[ 'clean_content' ])) { |
|||
$parent[ 'content' ] .= ' '.$child[ 'content' ]; |
|||
$parent[ 'children' ] = array_merge($parent[ 'children' ], $child[ 'children' ]); |
|||
$parent[ 'clean_content' ] .= ' '.$child[ 'clean_content' ]; |
|||
|
|||
} |
|||
|
|||
if (is_array($parent) && count($parent) == 1 && ! isset($parent[ 'content' ])) { |
|||
$parent = array_shift($parent); |
|||
} |
|||
|
|||
|
|||
|
|||
return $parent; |
|||
} |
|||
|
|||
} |
@ -1,670 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser; |
|||
|
|||
use Illuminate\Support\Facades\Log; |
|||
|
|||
class ParseHtmlArray |
|||
{ |
|||
|
|||
public function fromFile($filePath) |
|||
{ |
|||
if (file_exists($filePath)) { |
|||
$fileContent = file_get_contents($filePath); |
|||
$fileContent = str_replace('}, |
|||
|
|||
]', "}
|
|||
|
|||
]", $fileContent);
|
|||
return $this->handle(json_decode($fileContent,true)); |
|||
} else { |
|||
Log::error('The given file dose not exists!'); |
|||
} |
|||
|
|||
} |
|||
|
|||
|
|||
|
|||
|
|||
public function handle($docxAsHtmlArray) |
|||
{ |
|||
$response=[]; |
|||
foreach ($docxAsHtmlArray as $i => $array) { |
|||
|
|||
$response = array_merge($response, $this->handleTestHtml($array)); |
|||
} |
|||
|
|||
return $this->buildTheStructure($response); |
|||
|
|||
} |
|||
|
|||
|
|||
private function buildTheStructure($data) |
|||
{ |
|||
$response = []; |
|||
$alreadyHandled = []; |
|||
$numbers = []; |
|||
for ($i = 0; $i < count($data); $i++) { |
|||
|
|||
if (array_key_exists($i, $alreadyHandled)) { |
|||
continue; |
|||
} |
|||
|
|||
$parent = $data[ $i ]; |
|||
//get numbering from first 10 chars of the string
|
|||
preg_match('/^([-+]?\d*\.?\d+)(?:[eE]([-+]?\d+))?/', preg_replace('/[^0-9\.)]/', '', |
|||
substr(trim(preg_replace('/[^A-Za-z0-9.)]/', '', preg_replace('/\)/', '.', |
|||
preg_replace("/\{.+/", "", html_entity_decode($data[ $i ][ 'content' ]))))), 0, 5)), |
|||
$parentNumbering); |
|||
|
|||
if ($parentNumbering && count($numbers) == 0 && last($parentNumbering) < 5) { |
|||
$numbers[] = $parentNumbering[ 0 ]; |
|||
$data[ $i ][ 'numbering' ] = rtrim($parentNumbering[ 0 ], '.'); |
|||
} elseif ($parentNumbering && count($numbers) > 0 && $parentNumbering[ 0 ] >= last($numbers)) { |
|||
$numbers[] = $parentNumbering[ 0 ]; |
|||
$data[ $i ][ 'numbering' ] = rtrim($parentNumbering[ 0 ], '.'); |
|||
} |
|||
|
|||
//check if string starts with bold
|
|||
//check if number of bolds equals to 1
|
|||
//check if not empty html and contains words
|
|||
|
|||
if ((strpos($parent[ 'content' ], "<b>") === 0 || (substr_count($parent[ 'content' ], |
|||
"<b>") == 1 || $parentNumbering) && strlen(trim(strip_tags($parent[ 'content' ]))) > 0) || (str_word_count(preg_replace('/[A-Za-z]{4,}/', |
|||
'', strip_tags($data[ $i ][ 'content' ]))) < 2)) { |
|||
$childNumbers = []; |
|||
|
|||
$j = $i + 1; |
|||
//check if data exists
|
|||
if (isset($data[ $j ]) && strlen($data[ $j ][ 'content' ])) { |
|||
|
|||
|
|||
for ($j; $j < count($data); $j++) { |
|||
if ($data[ $j ][ 'content' ] == '\u00a0') { |
|||
$alreadyHandled[] = $j; |
|||
} |
|||
if (array_key_exists($j, $alreadyHandled)) { |
|||
continue; |
|||
} |
|||
|
|||
$child = $data[ $j ]; |
|||
|
|||
preg_match('/^([-+]?\d*\.?\d+)(?:[eE]([-+]?\d+))?/', |
|||
substr(trim(urldecode(str_replace(['<b>', '</b>'], '', |
|||
strip_tags($data[ $j ][ 'content' ])))), 0, 5), $childNumbering); |
|||
|
|||
if ($childNumbering && ! preg_match("/[a-z]/i", rtrim(trim($childNumbering[ 0 ])))) { |
|||
if ($childNumbering && count($childNumbers) == 0 && trim($childNumbering[ 0 ]) < 5) { |
|||
$childNumbers[] = trim($childNumbering[ 0 ]); |
|||
$data[ $j ][ 'numbering' ] = rtrim(trim($childNumbering[ 0 ]), '.'); |
|||
|
|||
} elseif ($childNumbering && count($childNumbers) > 0 && trim($childNumbering[ 0 ]) >= last($childNumbers)) { |
|||
$childNumbers[] = trim($childNumbering[ 0 ]); |
|||
$data[ $j ][ 'numbering' ] = rtrim(trim($childNumbering[ 0 ]), '.'); |
|||
|
|||
} elseif ($childNumbering && trim($childNumbering[ 0 ]) < 100) { |
|||
$childNumbers[] = trim($childNumbering[ 0 ]); |
|||
$data[ $j ][ 'numbering' ] = rtrim(trim($childNumbering[ 0 ]), '.'); |
|||
} |
|||
} |
|||
|
|||
if (empty(trim($data[ $i ][ 'content' ])) && isset($data[ $j ][ 'numbering' ])) { |
|||
break; |
|||
} |
|||
|
|||
$breakPoints = array_change_key_case([ |
|||
'TERMS OF THE {P1_Pros}', |
|||
'TERMS AND CONDITIONS', |
|||
'BACKGROUND', |
|||
'OPERATIVE PROVISIONS', |
|||
'Products and/or Services', |
|||
'PAYMENT', |
|||
'GRANT OF LICENCE', |
|||
'TERM OF LICENCE AGREEMENT', |
|||
'ROYALTY', |
|||
'PAYMENT', |
|||
'PERFORMANCE TARGETS', |
|||
'STATIONERY', |
|||
'QUALITY CONTROL', |
|||
'THE DISTRIBUTOR\'S OBLIGATIONS', |
|||
'NON SOLICITATION', |
|||
'SALE OF BUSINESS', |
|||
'TERMINATION OF AGREEMENT', |
|||
'CONDITIONS FOLLOWING TERMINATION', |
|||
'RESTRAINT', |
|||
'TIME OF ESSENCE AND NOTICES', |
|||
'INTERPRETATION', |
|||
'ARBITRATION', |
|||
'DOMICILIUM AND REGISTERED OFFICE', |
|||
'USE OF TRADE MARKS, TRADE NAME, GOODWILL AND KNOW-HOW', |
|||
'GENERAL', |
|||
'DESCRIPTION OF {P2_NAME} INFORMATION', |
|||
'PAYMENT OF FEES', |
|||
'SUPPLIER\'S STATUS', |
|||
'SUPPLIER\’S OBLIGATIONS', |
|||
'DEFINITIONS AND INTERPRETATION', |
|||
'DEFINITIONS', |
|||
'CONFIDENTIALITY', |
|||
'TERMINATION', |
|||
'RESTRICTIVE COVENANTS AND INTELLECTUAL PROPERTY', |
|||
'DETAILS AND IDENTITY OF CONSULTANT', |
|||
'ANTI-BRIBERY', |
|||
'ASSIGNMENT SCHEDULE', |
|||
'SCHEDULE 1', |
|||
'{P1_NAME}\'S LIABILITY', |
|||
'DURATION OF AGREEMENT AND SUPPLY', |
|||
'SUPPLY OF HARDWARE', |
|||
'SUPPLY OF SOFTWARE AND DOCUMENTATION', |
|||
'SUPPLY OF SUPPORT SERVICES', |
|||
'INTELLECTUAL PROPERTY RIGHTS', |
|||
'THE CONTRACT', |
|||
'{P1_NAME}\U2019S LIABILITY', |
|||
'UPDATES', |
|||
'TERMS OF THE {P1_NAME} PRODUCTS.', |
|||
'CUSTOMER RESPONSIBILITIES', |
|||
'EXHIBIT A', |
|||
'EXHIBIT A-1', |
|||
'EXHIBIT A-2', |
|||
'WARRANTIES', |
|||
'EXIT, TERMINATION AND SUSPENSION', |
|||
'EXHIBIT B', |
|||
'EXHIBIT B-1', |
|||
'EXHIBIT B-2', |
|||
'COUNTERPARTS', |
|||
'LICENSE GRANT', |
|||
'INDEMNIFICATION BY CUSTOMER', |
|||
'TERMS OF THE {P1_NAME} PRODUCTS', |
|||
'TERMS OF CLOUD SERVICE', |
|||
'INDEMNIFICATION BY CUSTOMER', |
|||
'TERMINATION', |
|||
'TERMS OF THE {P1_PROS}', |
|||
'SUPPORT', |
|||
'SUB CONTRACTING AND THIRD PARTY RECOMMENDATIONS', |
|||
'LICENCE AND ACCESS TO SOFTWARE AND HARDWARE', |
|||
'DECLARATION OF NON-LIAISON AND ANTI-CORRUPTION COMMITMENT', |
|||
'{P1_NAME}\'S DUTIES' |
|||
], CASE_UPPER); |
|||
//$breakPoints = [];
|
|||
|
|||
if ($this->paragraphBrake($data[ $j ], $breakPoints)) { |
|||
break; |
|||
} |
|||
|
|||
if (substr(trim(str_replace(array_merge([')'], $childNumbering), '', $data[ $j ][ 'content' ])), |
|||
0, 3) == '<b>' && str_word_count(strip_tags(str_replace(array_merge([')'], |
|||
$childNumbering), '', |
|||
$data[ $j ][ 'content' ]))) == str_word_count($this->getTextBetweenTags(str_replace(array_merge([')',], |
|||
$childNumbering), '', $data[ $j ][ 'content' ]), |
|||
'b')) && (isset($data[ $j + 1 ]) && ((ctype_upper(substr($data[ $j + 1 ][ 'content' ], |
|||
0, |
|||
1)) || (isset($data[ $i ][ 'numbering' ]) && isset($data[ $j ][ 'numbering' ]) && $data[ $j ][ 'numbering' ] - $data[ $i ][ 'numbering' ] == 1))))) { |
|||
|
|||
break; |
|||
|
|||
} |
|||
|
|||
if (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && ! isset($data[ $i ][ 'numbering' ]) && ctype_upper(str_replace(' ', |
|||
'', $data[ $j ][ 'content' ])) && str_word_count($data[ $j ][ 'content' ]) >= 1) { |
|||
|
|||
break; |
|||
} |
|||
|
|||
if (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && ! isset($data[ $i ][ 'numbering' ]) && ctype_upper(str_replace([ |
|||
'<b>', |
|||
'</b>', |
|||
last($childNumbering), |
|||
last($childNumbering), |
|||
')', |
|||
'.' |
|||
], '', trim(str_replace(' ', '', |
|||
$data[ $j ][ 'content' ])))) && str_word_count($data[ $j ][ 'content' ]) >= 1) { |
|||
|
|||
break; |
|||
} |
|||
|
|||
//if(isset($data[$j]['numbering']) && isset($data[$i]['numbering']) && )
|
|||
|
|||
if (isset($data[ $i ][ 'children' ]) && isset($data[ $i ][ 'numbering' ]) && count($data[ $i ][ 'children' ]) && isset($data[ $j ][ 'numbering' ]) && isset(last($data[ $i ][ 'children' ])[ 'numbering' ]) && ($data[ $j ][ 'numbering' ] - last($data[ $i ][ 'children' ])[ 'numbering' ] !== 1 && $data[ $i ][ 'numbering' ] < $data[ $j ][ 'numbering' ]) && ! in_array(substr(strip_tags(last($data[ $i ][ 'children' ])[ 'content' ]), |
|||
strlen(strip_tags(last($data[ $i ][ 'children' ])[ 'content' ])) - 1), |
|||
[':', '-']) && ! strpos($data[ $j ][ 'numbering' ], '.')) { |
|||
|
|||
|
|||
break; |
|||
} |
|||
|
|||
if (in_array(strtoupper(trim(str_replace([ |
|||
'<b>', |
|||
'</b>', |
|||
last($parentNumbering), |
|||
last($parentNumbering), |
|||
')', |
|||
'.' |
|||
], '', strip_tags($data[ $i ][ 'content' ])))), $breakPoints)) { |
|||
if ((! isset($data[ $i ][ 'numbering' ]) && isset($data[ $j ][ 'numbering' ]) && (substr($data[ $i ][ 'content' ], |
|||
0, |
|||
3) != '<b>') || (str_word_count(strip_tags($data[ $i ][ 'content' ])) != str_word_count($this->getTextBetweenTags($data[ $i ][ 'content' ], |
|||
'b'))))) { |
|||
if (! in_array($data[ $i ][ 'content' ], $breakPoints)) { |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
|
|||
if (in_array(strtoupper(trim(str_replace([ |
|||
'<b>', |
|||
'</b>', |
|||
last($childNumbering), |
|||
last($childNumbering), |
|||
')', |
|||
'.' |
|||
], '', strip_tags($data[ $j ][ 'content' ])))), $breakPoints)) { |
|||
break; |
|||
} |
|||
|
|||
if (in_array(substr(strip_tags($data[ $j ][ 'content' ]), |
|||
strlen(strip_tags($data[ $j ][ 'content' ])) - 1), [':', '-'])) { |
|||
|
|||
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); |
|||
$alreadyHandled[] = $j; |
|||
|
|||
} elseif (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && ctype_lower(substr(last($data[ $i ][ 'children' ])[ 'content' ], |
|||
strlen(last($data[ $i ][ 'children' ])[ 'content' ]) - 1)) && ctype_lower(substr(trim($data[ $j ][ 'content' ]), |
|||
0, 1))) { |
|||
|
|||
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); |
|||
$alreadyHandled[] = $j; |
|||
} elseif (str_word_count(preg_replace('/[A-Za-z]{4,}/', '', |
|||
strip_tags($data[ $j ][ 'content' ]))) < 3 && strlen(strip_tags($data[ $j ][ 'content' ])) && ! isset($data[ $j ][ 'numbering' ]) && ctype_upper(substr($data[ $j ][ 'content' ], |
|||
0, 1)) && str_word_count($data[ $j ][ 'content' ]) < 10) { |
|||
|
|||
if (isset($data[ $i ][ 'children' ]) && ! in_array(substr(trim(last($data[ $i ][ 'children' ])[ 'content' ]), |
|||
strlen(trim(last($data[ $i ][ 'children' ])[ 'content' ])) - 1), |
|||
['!', '.', '?', '_', '}'])) { |
|||
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); |
|||
$alreadyHandled[] = $j; |
|||
|
|||
} else { |
|||
break; |
|||
|
|||
} |
|||
|
|||
//dd($data[$i]);
|
|||
} elseif (str_word_count(preg_replace('/[A-Za-z]{4,}/', '', |
|||
strip_tags($data[ $i ][ 'content' ]))) < 2 && strlen(strip_tags($data[ $i ][ 'content' ]))) { |
|||
|
|||
|
|||
if (isset($data[ $i ][ 'numbering' ]) && isset($data[ $j ][ 'numbering' ]) && is_numeric($data[ $j ][ 'numbering' ]) && abs($data[ $j ][ 'numbering' ] - $data[ $i ][ 'numbering' ]) == 1 && str_word_count($data[ $j ] |
|||
[ 'content' ]) < 6) { |
|||
|
|||
break; |
|||
} |
|||
if (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && ((str_word_count($data[ $j ] |
|||
[ 'content' ]) < 6) || (substr_count($data[ $j ][ 'content' ], |
|||
'<b>') == 1 && substr_count(last($data[ $i ][ 'children' ])[ 'content' ], |
|||
'<b>') == 0 && ! isset(last($data[ $i ][ 'children' ])[ 'numbering' ]))) && ctype_upper((substr($data[ $j ][ 'content' ], |
|||
0, 1)))) { |
|||
break; |
|||
} |
|||
if (isset($data[ $i ][ 'numbering' ]) && isset($data[ $j ][ 'numbering' ]) && $data[ $j ][ 'numbering' ] + 1 == $data[ $i ][ 'numbering' ] && str_word_count($data[ $j ][ 'content' ]) < 6) { |
|||
break; |
|||
} |
|||
|
|||
if (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && ! isset($data[ $i ][ 'numbering' ]) && ! isset(last($data[ $i ][ 'children' ])[ 'numbering' ]) && isset($data[ $j ][ 'numbering' ])) { |
|||
|
|||
break; |
|||
} |
|||
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); |
|||
|
|||
$alreadyHandled[] = $j; |
|||
} elseif (! in_array(trim(strtolower(strip_tags($data[ $j ][ 'content' ]))), |
|||
['definitions']) && ! ctype_space($data[ $j ][ 'content' ]) && strlen(trim(strip_tags($data[ $j ][ 'content' ]))) && ! isset($data[ $i ][ 'numbering' ]) && ! isset($data[ $j ][ 'numbering' ])) { |
|||
|
|||
|
|||
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); |
|||
$alreadyHandled[] = $j; |
|||
} elseif (isset($data[ $i ][ 'numbering' ]) && isset($data[ $j ][ 'numbering' ])) { |
|||
|
|||
|
|||
if (is_numeric($data[ $j ][ 'numbering' ]) && is_numeric($data[ $i ][ 'numbering' ]) && ((float) $data[ $j ][ 'numbering' ] - (float) $data[ $i ][ 'numbering' ]) == 1 && str_word_count($data[ $j ][ 'content' ]) < str_word_count($data[ $i ][ 'content' ])) { |
|||
break; |
|||
} |
|||
|
|||
if (is_numeric($data[ $j ][ 'numbering' ]) && abs($data[ $j ][ 'numbering' ] - $data[ $i ][ 'numbering' ]) === 1 && (isset($data[ $i ][ 'children' ]) && (! (isset(last($data[ $i ][ 'children' ])[ 'numbering' ])) || (isset(last($data[ $i ][ 'children' ])[ 'numbering' ]) && abs(last($data[ $i ][ 'children' ])[ 'numbering' ] - $data[ $j ][ 'numbering' ]) !== 1))) && str_word_count($data[ $j ][ 'content' ]) < 8) { |
|||
|
|||
break; |
|||
|
|||
} |
|||
|
|||
if (substr_count($data[ $j ][ 'numbering' ], '.') > substr_count($data[ $i ][ 'numbering' ], |
|||
'.') && ((float) $data[ $j ][ 'numbering' ] - (float) $data[ $i ][ 'numbering' ]) < 1) { |
|||
|
|||
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); |
|||
$alreadyHandled[] = $j; |
|||
} elseif (((float) $data[ $j ][ 'numbering' ] > (float) $data[ $i ][ 'numbering' ] && substr_count($data[ $j ][ 'content' ], |
|||
'<b>') == 0 && substr_count($data[ $i ][ 'content' ], |
|||
'<b>') == 1) || (substr_count($data[ $i ][ 'content' ], |
|||
"<b>") == 1 && (substr_count($data[ $j ][ 'content' ], |
|||
'<b>') == 0 || substr_count($data[ $j ][ 'content' ], '<b>')) > 1)) { |
|||
|
|||
|
|||
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); |
|||
$alreadyHandled[] = $j; |
|||
|
|||
} elseif (substr_count($data[ $i ][ 'content' ], |
|||
'<b>') == 1 && str_word_count($data[ $j ][ 'content' ]) > 6 && isset($data[ $j ][ 'numbering' ])) { |
|||
if (strpos($data[ $j ][ 'content' ], |
|||
'Networking infrastructure (hardware, firmware, software an') !== false) { |
|||
dd('aa'); |
|||
} |
|||
|
|||
if (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ])) { |
|||
$lastParentChild = last($data[ $i ][ 'children' ]); |
|||
if (isset($lastParentChild[ 'numbering' ]) && abs($lastParentChild[ 'numbering' ] - $data[ $j ][ 'numbering' ]) === 1 && (substr_count($data[ $j ][ 'content' ], |
|||
'<b>') == 1)) { |
|||
|
|||
break; |
|||
} |
|||
|
|||
} |
|||
|
|||
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); |
|||
$alreadyHandled[] = $j; |
|||
|
|||
} elseif (isset($data[ $i ][ 'numbering' ]) && abs($data[ $i ][ 'numbering' ] - $data[ $j ][ 'numbering' ]) === 1 && str_word_count($data[ $j ][ 'content' ]) >= 6) { |
|||
|
|||
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); |
|||
$alreadyHandled[] = $j; |
|||
} elseif (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && isset($data[ $j ][ 'numbering' ]) && isset(last($data[ $i ][ 'children' ])[ 'numbering' ]) && abs((float) $data[ $j ][ 'numbering' ] - (float) last($data[ $i ][ 'children' ])[ 'numbering' ]) == (float) 1) { |
|||
|
|||
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); |
|||
$alreadyHandled[] = $j; |
|||
} elseif (isset($data[ $i ][ 'numbering' ]) && abs($data[ $i ][ 'numbering' ] - $data[ $j ][ 'numbering' ]) == 0 && str_word_count($data[ $j ][ 'content' ]) >= 6) { |
|||
|
|||
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); |
|||
$alreadyHandled[] = $j; |
|||
} else { |
|||
|
|||
break; |
|||
} |
|||
|
|||
} elseif (isset($data[ $i ][ 'numbering' ]) && ! isset($data[ $j ][ 'numbering' ]) && str_word_count($data[ $j ][ 'content' ]) > 6) { |
|||
|
|||
if (substr_count($data[ $j ][ 'content' ], |
|||
"<b>") == 1 && strpos(strtolower($data[ $i ][ 'content' ]), |
|||
'definition') === false) { |
|||
|
|||
break; |
|||
} |
|||
|
|||
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); |
|||
$alreadyHandled[] = $j; |
|||
|
|||
} elseif (empty($data[ $j ][ 'content' ]) && (isset($data[ $j + 1 ]) && isset($data[ $j - 1 ]) && isset($data[ $i ][ 'children' ]))) { |
|||
|
|||
if (isset(last($data[ $i ][ 'children' ])[ 'numbering' ]) && strlen(last($data[ $i ][ 'children' ])[ 'numbering' ]) == strlen(preg_replace('/[^0-9\.)]/', |
|||
'', substr(trim(preg_replace('/ +/', ' ', preg_replace('/[^A-Za-z0-9 .]/', ' ', |
|||
urldecode(strip_tags($data[ $j + 1 ][ 'content' ]))))), 0, |
|||
5))) && ! empty($data[ $j ][ 'content' ])) { |
|||
dd('Here', $data[ $i ], $data[ $j ]); |
|||
$alreadyHandled[] = $j; |
|||
} else { |
|||
|
|||
break; |
|||
} |
|||
|
|||
} elseif (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && isset($data[ $j ][ 'numbering' ])) { |
|||
|
|||
$lastParentChild = last($data[ $i ][ 'children' ]); |
|||
if (isset($lastParentChild[ 'numbering' ]) && isset($child[ 'numbering' ]) && substr_count($lastParentChild[ 'numbering' ], |
|||
'.') > substr_count($data[ $j ][ 'numbering' ], '.')) { |
|||
dd('111'); |
|||
|
|||
} else { |
|||
|
|||
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); |
|||
$alreadyHandled[] = $j; |
|||
|
|||
} |
|||
} else { |
|||
|
|||
break; |
|||
} |
|||
|
|||
//if(strpos($data[$i]['content'],'<b>2. TERMS OF THE {P1_Pros}.</b>')!==false || strpos($data[$j]['content'],'<b>2. TERMS OF THE {P1_Pros}.</b>')!==false){
|
|||
// dd($data[$i],$data[$j]);
|
|||
//}
|
|||
} |
|||
} |
|||
|
|||
} |
|||
|
|||
if (strlen(trim(strip_tags($data[ $i ][ 'content' ])))) { |
|||
|
|||
$response[] = $data[ $i ]; |
|||
//if ($data[ $i ][ 'content' ] == "Duration of Agreement and Supply") {
|
|||
// dd(121,$data[$i],$i);
|
|||
//}
|
|||
//if($i > 73){
|
|||
// dd($i,$data[$i],$response);
|
|||
//}
|
|||
} |
|||
|
|||
$alreadyHandled[] = $i; |
|||
} |
|||
|
|||
return $response; |
|||
} |
|||
|
|||
|
|||
private function handlePossibleChild($parent, $child) |
|||
{ |
|||
|
|||
|
|||
if (empty($parent[ 'content' ]) && ! empty($child[ 'content' ])) { |
|||
return $child; |
|||
} |
|||
if (empty($child[ 'content' ])) { |
|||
return $parent; |
|||
} |
|||
|
|||
// Must iterate through parent children
|
|||
if (! isset($parent[ 'children' ]) || (isset($parent[ 'children' ]) && count($parent[ 'children' ]) == 0)) { |
|||
|
|||
$parent[ 'children' ] = []; |
|||
if (str_word_count(strip_tags($child[ 'content' ])) >= 5 && strpos($child[ 'content' ], '<b>') === false) { |
|||
$parent[ 'children' ][] = $child; |
|||
} elseif (strpos($parent[ 'content' ], '<b>') !== false && strpos($child[ 'content' ], '<b>') !== false) { |
|||
$parent[ 'children' ][] = $child; |
|||
} elseif (isset($child[ 'content' ])) { |
|||
$parent[ 'children' ][] = $child; |
|||
} |
|||
|
|||
return $parent; |
|||
} |
|||
|
|||
$lastParentChild = last($parent[ 'children' ]); |
|||
|
|||
if ($lastParentChild && substr($lastParentChild[ 'content' ], |
|||
strlen($lastParentChild[ 'content' ]) - 1) === ':' && ((ctype_lower(substr($child[ 'content' ], 0, |
|||
1)) || (ctype_digit(substr($child[ 'content' ], 0, |
|||
1)) && str_word_count($child[ 'content' ]) > 5)))) { |
|||
|
|||
$lastParentChild = $this->handlePossibleChild($lastParentChild, $child); |
|||
if (isset($lastParentChild[ 'numbering' ]) && isset($child[ 'numbering' ]) && $child[ 'numbering' ] - 1 == $lastParentChild[ 'numbering' ]) { |
|||
$parent[ 'children' ][] = $child; |
|||
|
|||
} else { |
|||
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild; |
|||
|
|||
} |
|||
|
|||
return $parent; |
|||
|
|||
} |
|||
|
|||
if (isset($lastParentChild[ 'numbering' ]) && isset($child[ 'numbering' ]) && strlen($child[ 'numbering' ]) > strlen($lastParentChild[ 'numbering' ])) { |
|||
|
|||
if (isset($parent[ 'children' ]) && isset(last($parent[ 'children' ])[ 'numbering' ]) && $child[ 'numbering' ]) { |
|||
|
|||
if (is_numeric($child[ 'numbering' ]) && abs($child[ 'numbering' ] - $lastParentChild[ 'numbering' ]) === 1) { |
|||
$parent[ 'children' ][] = $child; |
|||
|
|||
return $parent; |
|||
} |
|||
} |
|||
|
|||
if (isset($child[ 'numbering' ]) && isset($lastParentChild[ 'numbering' ]) && substr_count($lastParentChild[ 'numbering' ], |
|||
'.') == substr_count($child[ 'numbering' ], '.')) { |
|||
$parent[ 'children' ][] = $child; |
|||
|
|||
return $parent; |
|||
} |
|||
|
|||
$lastParentChild = $this->handlePossibleChild($lastParentChild, $child); |
|||
|
|||
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild; |
|||
|
|||
return $parent; |
|||
|
|||
} |
|||
|
|||
if (! in_array(substr(trim(str_replace(['and', 'or'], '', $lastParentChild[ 'content' ])), |
|||
strlen(trim(str_replace(['and', 'or'], '', $lastParentChild[ 'content' ]))) - 1), |
|||
['!', '.', '?', ';', '_', ':']) && (ctype_lower(substr(trim($child[ 'content' ]), 0, |
|||
1)) || ((ctype_upper(substr(trim($child[ 'content' ]), 0, |
|||
1)) && ! isset($child[ 'numbering' ]))))) { |
|||
//dd($lastParentChild,$child);
|
|||
if (strpos($lastParentChild[ 'content' ], |
|||
'e, this Agreement and the {P1_Name} Software Licence Agreement') !== false) { |
|||
dd('aa', $lastParentChild, $child); |
|||
} |
|||
$lastParentChild[ 'content' ] .= ' '.$child[ 'content' ]; |
|||
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild; |
|||
|
|||
return $parent; |
|||
} elseif (! in_array(substr(trim($parent[ 'content' ]), strlen(trim($parent[ 'content' ])) - 1), |
|||
['!', '.', '?', ';']) && ctype_lower(substr(trim($lastParentChild[ 'content' ]), |
|||
strlen(trim($lastParentChild[ 'content' ])) - 1)) && ctype_lower(substr(trim($child[ 'content' ]), 0, |
|||
1))) { |
|||
|
|||
|
|||
$parent[ 'children' ][] = $child; |
|||
} elseif (! in_array(substr(trim(str_replace(['and', 'or'], '', $lastParentChild[ 'content' ])), |
|||
strlen(trim(str_replace(['and', 'or'], '', $lastParentChild[ 'content' ]))) - 1), [ |
|||
'!', |
|||
'.', |
|||
'?', |
|||
';', |
|||
'_', |
|||
':' |
|||
]) && isset($lastParentChild[ 'numbering' ]) && isset($child[ 'numbering' ]) && $lastParentChild[ 'numbering' ] > $child[ 'numbering' ]) { |
|||
$lastParentChild[ 'children' ][] = $child; |
|||
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild; |
|||
|
|||
} else { |
|||
$parent[ 'children' ][] = $child; |
|||
|
|||
} |
|||
|
|||
return $parent; |
|||
} |
|||
|
|||
|
|||
public function handleTestHtml($array) |
|||
{ |
|||
$data = []; |
|||
foreach ($array as $item) { |
|||
|
|||
if (count($item) == 1 && is_array(last($item))) { |
|||
|
|||
return $this->handleTestHtml($item); |
|||
|
|||
} else { |
|||
$html = $this->buildParagraphs($item); |
|||
|
|||
if (! isset($data[ 'content' ]) && count($html) > 1) { |
|||
$data = array_merge($data, $html); |
|||
} elseif ($html) { |
|||
|
|||
$data = $html; |
|||
} |
|||
|
|||
} |
|||
} |
|||
|
|||
return $data; |
|||
} |
|||
|
|||
|
|||
private function buildParagraphs($paragraphs) |
|||
{ |
|||
$result = []; |
|||
$alreadyHandled = []; |
|||
for ($i = 0; $i < count($paragraphs); $i++) { |
|||
if (array_key_exists($i, $alreadyHandled)) { |
|||
continue; |
|||
} |
|||
$paragraph = $paragraphs[ $i ]; |
|||
if (is_array($paragraph)) { |
|||
$result = array_merge($result, $this->buildParagraphs($paragraph)); |
|||
} elseif (strlen($paragraph) && ! ctype_space($paragraph)) { |
|||
|
|||
$cleanHtml = trim(str_replace('<b> </b>', '', |
|||
preg_replace('/<([^>\s]+)[^>]*>(?:\s*(?:<br \/>| | | | | | | )\s*)*<\/\1>/', |
|||
'', preg_replace('/(<font[^>]*>)|(<\/font>)/', '', preg_replace('/\s+/S', " ", $paragraph))))); |
|||
|
|||
if (! empty($cleanHtml)) { |
|||
$result[] = ['content' => html_entity_decode($cleanHtml, ENT_COMPAT | ENT_HTML401, 'UTF-8')]; |
|||
} |
|||
|
|||
|
|||
} |
|||
} |
|||
|
|||
return $result; |
|||
} |
|||
|
|||
|
|||
/* |
|||
* Get text between html tag |
|||
*/ |
|||
private function getTextBetweenTags($string, $tagname) |
|||
{ |
|||
$pattern = "/<$tagname ?.*>(.*)<\/$tagname>/"; |
|||
preg_match($pattern, str_replace(['<u>', '</u>'], '', $string), $matches); |
|||
if ($matches) { |
|||
return last($matches); |
|||
} |
|||
|
|||
return ''; |
|||
|
|||
} |
|||
|
|||
|
|||
private function paragraphBrake($paragraph, array $breakPoints) |
|||
{ |
|||
//$paragraph[ 'content' ] = '2) <b>TERMS OF THE {P1_Pros}.</b> Subject to the terms of the Agreement, {P1_Name} grants Customer and/or its Affiliates a non-exclusive, non-transferable (except to a successor in interest as permitted hereunder) license to use the {P1_Pros} listed on the <u>Order Form</u> during the Term. Customer\’s and/or its Affiliates\’ right to use the {P1_Pros} is limited to the volume and other restrictions contained herein and in the Order Form and the Documentation.';
|
|||
//$paragraph[ 'numbering' ] = '2';
|
|||
preg_replace('/<b ?.*>(\d+)<\/b>/', $paragraph[ 'content' ], $paragraph[ 'content' ]); |
|||
preg_replace('/(\d+)\)/', $paragraph[ 'content' ], $paragraph[ 'content' ]); |
|||
if (isset($paragraph[ 'numbering' ])) { |
|||
$paragraph[ 'content' ] = str_replace(['.', ')', $paragraph[ 'numbering' ]], '', $paragraph[ 'content' ]); |
|||
} |
|||
if (substr_count($paragraph[ 'content' ], '</b>') === 1) { |
|||
$breakString = explode('</b>', $paragraph[ 'content' ]); |
|||
if ($breakString) { |
|||
$breakString = trim(str_replace('<b>', '', trim($breakString[ 0 ]))); |
|||
if (in_array($breakString, $breakPoints)) { |
|||
|
|||
return true; |
|||
} |
|||
} |
|||
|
|||
|
|||
} |
|||
|
|||
return false; |
|||
} |
|||
|
|||
} |
|||
|
@ -1,406 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace App\Parser; |
|||
|
|||
use Illuminate\Support\Facades\Log; |
|||
use Illuminate\Support\Facades\Storage; |
|||
use SimpleXMLElement; |
|||
|
|||
class ParseXml |
|||
{ |
|||
|
|||
/** |
|||
* @var int |
|||
*/ |
|||
private $titleFontThreshold; |
|||
|
|||
/** |
|||
* @var int |
|||
*/ |
|||
private $headerFontFooterThreshold; |
|||
|
|||
|
|||
/** |
|||
* ParseXml constructor. |
|||
*/ |
|||
public function __construct() |
|||
{ |
|||
$this->headerFontFooterThreshold = null; |
|||
$this->titleFontThreshold = null; |
|||
} |
|||
|
|||
|
|||
/** |
|||
* Handle xml files |
|||
* |
|||
* @param $xmlFile |
|||
* |
|||
* @return mixed |
|||
*/ |
|||
public function handle($xmlFile) |
|||
{ |
|||
if (is_string($xmlFile)) { |
|||
try { |
|||
$storageDisk = Storage::disk('contracts'); |
|||
while (! $storageDisk->exists($xmlFile)) { |
|||
//Sleep if file not yet written
|
|||
sleep(1); |
|||
} |
|||
$file = $storageDisk->get($xmlFile); |
|||
} catch (\Exception $exception) { |
|||
Log::error('Failed to load the xml file '.$exception->getMessage()); |
|||
} |
|||
} else { |
|||
$file = file_get_contents($xmlFile); |
|||
} |
|||
//foreach (simplexml_load_string($file) as $key =>$xmlElementPage){
|
|||
// dd($xmlElementPage);
|
|||
//}
|
|||
return $this->buildChildStructure($this->handleElements(simplexml_load_string($file)->xpath('//text'))); |
|||
|
|||
} |
|||
|
|||
|
|||
|
|||
/** |
|||
* @param $element |
|||
* |
|||
* @return mixed |
|||
*/ |
|||
private function handleElements($element) |
|||
{ |
|||
if (is_array($element)) { |
|||
$elements = $element; |
|||
} else { |
|||
$elements = (array) $element; |
|||
} |
|||
//dd(!in_array(trim(last(explode(' ', strip_tags('modify or make additions to the {P1_Name} Software, except to the extent permitted by law; or')))),['and','or']),trim(last(explode(' ', strip_tags('modify or make additions to the {P1_Name} Software, except to the extent permitted by law; or')))));
|
|||
$this->setTitleThreshold($elements); |
|||
$numberOfNodes = count($elements); |
|||
$rows = []; |
|||
for ($i = 0; $i < $numberOfNodes; $i++) { |
|||
$current = $elements[ $i ]; |
|||
$listContent = []; |
|||
if ($current instanceof SimpleXMLElement) { |
|||
$content = $this->getNodeContent($current); |
|||
//if(strpos($content,'Provided that the Customer has continued to pay ')!==false){
|
|||
// dd(($i + 1 <= $numberOfNodes && isset($elements[ $i + 1 ]) && (((int) $elements[ $i + 1 ][ 'top' ] === (int) $current[ 'top' ]) || (int) $elements[ $i + 1 ][ 'top' ] <= ((int) $current[ 'top' ] + (int) $current[ 'height' ] + 3)) && (int) $current[ 'top' ] <= (int) $elements[ $i + 1 ][ 'top' ])
|
|||
// || (isset($elements[ $i + 1 ]) && ctype_lower(substr(trim(strip_tags($this->getNodeContent($elements[ $i + 1 ]))),0,1))), substr(trim(strip_tags($this->getNodeContent($elements[ $i + 1 ]))),0,1))));
|
|||
//}
|
|||
$parentNumbering = []; |
|||
|
|||
while ($i + 1 <= $numberOfNodes && isset($elements[ $i + 1 ]) && |
|||
(((((((int) $elements[ $i + 1 ][ 'top' ] === (int) $current[ 'top' ]) || (int) $elements[ $i + 1 ][ 'top' ] <= ((int) $current[ 'top' ] + (int) $current[ 'height' ] + 3)) && (int) $current[ 'top' ] <= (int) $elements[ $i + 1 ][ 'top' ]) |
|||
|| (ctype_lower(substr(trim(strip_tags($this->getNodeContent($elements[ $i + 1 ]))),0,1))) |
|||
|| (! in_array(substr(trim(strip_tags($this->getNodeContent($elements[ $i + 1 ]))),0, 1), [','])) |
|||
|| (ctype_lower(substr(trim(strip_tags($content)),strlen(trim(strip_tags($content))) - 1)))) |
|||
&& ! in_array(substr(trim(str_replace(['and','or'], '', $content)), strlen(trim(str_replace(['and', 'or'], '', $content))) - 1),['!', '.', '?', ';', '_', ':', ')']) |
|||
&& ! preg_match('/^.*?\-[^\d]*(\d+)[^\d]*\-.*$/',$content) |
|||
&& (substr(trim($this->getNodeContent($elements[ $i + 1 ])), 0,strlen('<b>')) !== '<b>' |
|||
&& ctype_lower((substr(trim(strip_tags($content)),strlen(trim(strip_tags($content))) - 1))))) |
|||
|| ((int) $elements[ $i ][ 'top' ] === (int) $elements[ $i + 1 ][ 'top' ])) |
|||
|| (isset($elements[ $i + 1 ]) && trim(strip_tags($this->getNodeContent($elements[ $i+1])))=='[') |
|||
) { |
|||
//if($parentNumbering){
|
|||
// dd($parentNumbering,$content);
|
|||
//}
|
|||
|
|||
preg_match('/^([-+]?\d*\.?\d+)(?:[-+]?\d*\.?\d+)(?:[eE]([-+]?\d+))?/', |
|||
preg_replace('/[^0-9\.)]/', '', substr(trim(preg_replace('/[^A-Za-z0-9.)]/', '', |
|||
preg_replace('/\)/', '.', preg_replace("/\{.+/", "", html_entity_decode($content))))), |
|||
0, 5)), $childNumbering); |
|||
if (! $childNumbering) { |
|||
preg_match('/^([-+]?\d*\.?\d+)(?:[eE]([-+]?\d+))?/', preg_replace('/[^0-9\.)]/', '', |
|||
substr(trim(preg_replace('/[^A-Za-z0-9.)]/', '', |
|||
preg_replace('/\)/', '.', preg_replace("/\{.+/", "", html_entity_decode($content))))), |
|||
0, 5)), $parentNumbering); |
|||
} |
|||
//if($childNumbering && strpos($childNumbering[0],"2.1.5")!==false){
|
|||
// dd(11,$content,$elements[$i],$i,$i+1);
|
|||
//}
|
|||
|
|||
$nextElement = $elements[ $i + 1 ]; |
|||
$nextElementContent = $this->getNodeContent($nextElement); |
|||
$content .= ' '.$nextElementContent; |
|||
$current[ 'top' ] = $nextElement[ 'top' ]; |
|||
$current[ 'height' ] = $nextElement[ 'height' ]; |
|||
|
|||
|
|||
|
|||
if (count($parentNumbering)) { |
|||
$current[ 'row_numbering' ] = $parentNumbering[ 0 ]; |
|||
$content = str_replace($current[ 'row_numbering' ], '', $content); |
|||
$i++; |
|||
|
|||
break; |
|||
|
|||
} elseif ($childNumbering) { |
|||
$current[ 'row_numbering' ] = $childNumbering[ 0 ]; |
|||
$content = str_replace($current[ 'row_numbering' ], '', $content); |
|||
if (strlen(trim(strip_tags($content))) && ! in_array(substr(trim(strip_tags($content)), |
|||
strlen(trim(strip_tags($content))) - 1), |
|||
['.', ':', '!', '?','[',',']) && !ctype_lower(substr(trim(strip_tags($content)), |
|||
strlen(trim(strip_tags($content)))-1)) && (!ctype_lower(substr(trim(strip_tags($this->getNodeContent($elements[$i+1]))), |
|||
0, 1)) || !in_array(substr(trim(strip_tags($this->getNodeContent($elements[$i+1]))), 0, 1), |
|||
['[', '{']))) { |
|||
$i++; |
|||
|
|||
|
|||
break; |
|||
} |
|||
|
|||
|
|||
|
|||
} |
|||
if( ! empty($current[ 'row_numbering' ]) && ctype_digit(trim(preg_replace("/[^0-9a-zA-Z]/", |
|||
"", strip_tags($this->getNodeContent($elements[$i])))))){ |
|||
|
|||
$i++; |
|||
break; |
|||
} |
|||
//$current[ 'font' ] = $nextElement[ 'font' ];
|
|||
$i++; |
|||
|
|||
continue; |
|||
} |
|||
|
|||
$data = $this->extractNumbering($content); |
|||
|
|||
$content = [ |
|||
'type' => (int) $current[ 'font' ] === $this->titleFontThreshold ? 'title' : null, |
|||
'content' => $data[ 'content' ], |
|||
'numbering' => (! empty($current[ 'row_numbering' ])) ? (int)$current[ 'row_numbering' ] : $data[ 'numbering' ], |
|||
'top' => (int) $current[ 'top' ], |
|||
'height' => (int) $current[ 'height' ], |
|||
'left' => (int) $current[ 'left' ], |
|||
'font' => (int) $current[ 'font' ], |
|||
'children' => $listContent |
|||
]; |
|||
|
|||
$rows[] = $content; |
|||
} |
|||
|
|||
} |
|||
|
|||
return $rows; |
|||
} |
|||
|
|||
|
|||
/** |
|||
* Returns the xml node content |
|||
* |
|||
* @param $node |
|||
* |
|||
* @return string|string[]|null |
|||
*/ |
|||
private function getNodeContent($node) |
|||
{ |
|||
|
|||
return preg_replace('!\s+!', ' ', preg_match_all("/<text.*?>(.*?)<\/text>/", $node->asXML(), |
|||
$matches) ? $matches[ 1 ] ? $matches[ 1 ][ 0 ] : '' : ''); |
|||
|
|||
} |
|||
|
|||
|
|||
/** |
|||
* Extract the numbering if exists from the string |
|||
* |
|||
* @param $content |
|||
* |
|||
* @return array |
|||
*/ |
|||
private function extractNumbering($content) |
|||
{ |
|||
$regexOne = '/^(([a-zA-Z0-9]+[.\)])+)([ ]|[a-z]|[A-Z])/'; |
|||
$regexTwo = '/^(([\d\.]+)\d)/'; |
|||
|
|||
if (preg_match($regexOne, $content, $n)) { |
|||
|
|||
$numbering = trim(last($n)); |
|||
} else { |
|||
if (preg_match($regexTwo, $content, $n)) { |
|||
$numbering = trim(last($n)); |
|||
} else { |
|||
$numbering = ''; |
|||
} |
|||
} |
|||
if (strlen($numbering) > 1) { |
|||
return [ |
|||
'content' => '<p>'.trim(str_replace($numbering, '', $content)).'</p>', |
|||
'numbering' => $numbering |
|||
]; |
|||
} |
|||
return [ |
|||
'content' => '<p>'.trim($content).'</p>', |
|||
'numbering' => '' |
|||
]; |
|||
} |
|||
|
|||
|
|||
/** |
|||
* Build the structure as required by the editor and the gamification module |
|||
* |
|||
* @param $elements |
|||
* |
|||
* @return array |
|||
*/ |
|||
private function buildChildStructure($elements) |
|||
{ |
|||
$alreadyHandledIndexes = []; |
|||
$build = []; |
|||
|
|||
// 0 1 2 3 4 5 6
|
|||
// 1 1.1 1.1.1 1.2 1.2.1 1.3 1.3.1 2 3 4 4.1 4.2 5 6
|
|||
|
|||
for ($i = 0; $i < count($elements) - 1; $i++) { |
|||
if (! isset($elements[ $i ][ 'type' ])) { |
|||
if ($elements[ $i ][ 'top' ] < 100) { |
|||
$elements[ $i ][ 'type' ] = 'header'; |
|||
} elseif ($elements[ $i ][ 'top' ] > 1150) { |
|||
$elements[ $i ][ 'type' ] = 'footer'; |
|||
} |
|||
} |
|||
if (in_array($i, $alreadyHandledIndexes)) { |
|||
continue; |
|||
} |
|||
if (isset($elements[ $i ][ 'type' ]) && in_array($elements[ $i ][ 'type' ], ['footer', 'header'])) { |
|||
continue; |
|||
} |
|||
|
|||
for ($j = $i + 1; $j < count($elements); $j++) { |
|||
|
|||
if (! isset($elements[ $j ][ 'type' ])) { |
|||
if ($elements[ $j ][ 'top' ] < 100) { |
|||
$elements[ $j ][ 'type' ] = 'header'; |
|||
} elseif ($elements[ $j ][ 'top' ] > 1150) { |
|||
$elements[ $j ][ 'type' ] = 'footer'; |
|||
} |
|||
} |
|||
if (in_array($j, $alreadyHandledIndexes)) { |
|||
continue; |
|||
} |
|||
if (isset($elements[ $j ][ 'type' ]) && in_array($elements[ $j ][ 'type' ], ['footer', 'header'])) { |
|||
continue; |
|||
} |
|||
if ($elements[ $j ][ 'type' ] === 'title' && $elements[ $i ][ 'top' ] !== $elements[ $j ][ 'top' ] && ! ctype_digit(trim(preg_replace("/[^0-9a-zA-Z]/", |
|||
"", strip_tags($elements[ $i ][ 'content' ]))))) { |
|||
|
|||
break; |
|||
} |
|||
|
|||
if ($elements[ $i ][ 'left' ] < $elements[ $j ][ 'left' ] || ($elements[ $i ][ 'type' ] == 'title' && is_null($elements[ $j ][ 'type' ]))) { |
|||
|
|||
$elements[ $i ] = $this->handlePossibleChild($elements[ $i ], $elements[ $j ]); |
|||
|
|||
$alreadyHandledIndexes[] = $j; |
|||
} else { |
|||
|
|||
break; |
|||
} |
|||
} |
|||
if (! in_array($elements[ $i ][ 'type' ], ['header', 'footer'])) { |
|||
$build[] = $elements[ $i ]; |
|||
|
|||
} |
|||
$alreadyHandledIndexes[] = $i; |
|||
|
|||
} |
|||
|
|||
return $build; |
|||
} |
|||
|
|||
|
|||
/** |
|||
* Handle each node child's |
|||
* |
|||
* @param $parent |
|||
* @param $child |
|||
* |
|||
* @return mixed |
|||
*/ |
|||
protected function handlePossibleChild($parent, $child) |
|||
{ |
|||
|
|||
// 1
|
|||
// 1.1
|
|||
// 1.1.1
|
|||
// 2
|
|||
|
|||
|
|||
|
|||
// Must iterate through parent children
|
|||
if (count($parent[ 'children' ]) === 0) { |
|||
$parent[ 'children' ][] = $child; |
|||
|
|||
return $parent; |
|||
} |
|||
|
|||
$lastParentChild = last($parent[ 'children' ]); |
|||
|
|||
// Possible to be either child or grandchild
|
|||
if ($child[ 'left' ] > $lastParentChild[ 'left' ]) { |
|||
|
|||
$lastParentChild = $this->handlePossibleChild($lastParentChild, $child); |
|||
} elseif ($child[ 'left' ] === $parent[ 'left' ] && $parent[ 'type' ] == 'title' && is_null($child[ 'type' ])) { |
|||
|
|||
$parent[ 'children' ][] = $child; |
|||
|
|||
return $parent; |
|||
|
|||
} else { |
|||
if ($child[ 'left' ] === $lastParentChild[ 'left' ]) { |
|||
$parent[ 'children' ][] = $child; |
|||
|
|||
return $parent; |
|||
} |
|||
} |
|||
|
|||
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild; |
|||
|
|||
return $parent; |
|||
} |
|||
|
|||
|
|||
/** |
|||
* Set's the title threshold |
|||
* |
|||
* @param $elements |
|||
*/ |
|||
protected function setTitleThreshold($elements) |
|||
{ |
|||
$nextElement = null; |
|||
foreach ($elements as $index => $element) { |
|||
if ($index + 1 < count($elements) && ! isset($this->titleFontThreshold)) { |
|||
$nextElement = $elements[ $index + 1 ]; |
|||
if ((isset($current->b) || $index == 0 || (! is_null($nextElement) && (int) $element[ 'font' ] < (int) $nextElement[ 'font' ]))) { |
|||
$this->titleFontThreshold = (int) $element[ 'font' ]; |
|||
} |
|||
} else { |
|||
continue; |
|||
} |
|||
} |
|||
|
|||
} |
|||
|
|||
|
|||
/** |
|||
* Set's the header and footer threshold |
|||
* |
|||
* @param $elements |
|||
*/ |
|||
protected function setHeaderFooterThreshold($elements) |
|||
{ |
|||
foreach ($elements as $index => $element) { |
|||
if (isset($elements[ $index + 1 ]) && ! isset($this->headerFontFooterThreshold)) { |
|||
$nextElement = $elements[ $index + 1 ]; |
|||
if (! isset($nextElement[ 'type' ]) && $element[ 'top' ] > $nextElement[ 'top' ]) { |
|||
$this->headerFontFooterThreshold = $nextElement[ 'font' ]; |
|||
} |
|||
} else { |
|||
continue; |
|||
} |
|||
} |
|||
|
|||
} |
|||
|
|||
} |
@ -1,35 +0,0 @@ |
|||
<?php |
|||
|
|||
use Illuminate\Database\Migrations\Migration; |
|||
use Illuminate\Database\Schema\Blueprint; |
|||
use Illuminate\Support\Facades\Schema; |
|||
|
|||
class CreateFailedJobsTable extends Migration |
|||
{ |
|||
/** |
|||
* Run the migrations. |
|||
* |
|||
* @return void |
|||
*/ |
|||
public function up() |
|||
{ |
|||
Schema::create('failed_jobs', function (Blueprint $table) { |
|||
$table->bigIncrements('id'); |
|||
$table->text('connection'); |
|||
$table->text('queue'); |
|||
$table->longText('payload'); |
|||
$table->longText('exception'); |
|||
$table->timestamp('failed_at')->useCurrent(); |
|||
}); |
|||
} |
|||
|
|||
/** |
|||
* Reverse the migrations. |
|||
* |
|||
* @return void |
|||
*/ |
|||
public function down() |
|||
{ |
|||
Schema::dropIfExists('failed_jobs'); |
|||
} |
|||
} |
@ -1,16 +0,0 @@ |
|||
<?php |
|||
|
|||
use Illuminate\Database\Seeder; |
|||
|
|||
class DatabaseSeeder extends Seeder |
|||
{ |
|||
/** |
|||
* Seed the application's database. |
|||
* |
|||
* @return void |
|||
*/ |
|||
public function run() |
|||
{ |
|||
// $this->call(UsersTableSeeder::class);
|
|||
} |
|||
} |
@ -1,21 +0,0 @@ |
|||
{ |
|||
"private": true, |
|||
"scripts": { |
|||
"dev": "npm run development", |
|||
"development": "cross-env NODE_ENV=development node_modules/webpack/bin/webpack.js --progress --hide-modules --config=node_modules/laravel-mix/setup/webpack.config.js", |
|||
"watch": "npm run development -- --watch", |
|||
"watch-poll": "npm run watch -- --watch-poll", |
|||
"hot": "cross-env NODE_ENV=development node_modules/webpack-dev-server/bin/webpack-dev-server.js --inline --hot --config=node_modules/laravel-mix/setup/webpack.config.js", |
|||
"prod": "npm run production", |
|||
"production": "cross-env NODE_ENV=production node_modules/webpack/bin/webpack.js --no-progress --hide-modules --config=node_modules/laravel-mix/setup/webpack.config.js" |
|||
}, |
|||
"devDependencies": { |
|||
"axios": "^0.19", |
|||
"cross-env": "^5.1", |
|||
"laravel-mix": "^5.0.1", |
|||
"lodash": "^4.17.13", |
|||
"resolve-url-loader": "^2.3.1", |
|||
"sass": "^1.15.2", |
|||
"sass-loader": "^8.0.0" |
|||
} |
|||
} |
@ -1 +0,0 @@ |
|||
require('./bootstrap'); |
@ -1,28 +0,0 @@ |
|||
window._ = require('lodash'); |
|||
|
|||
/** |
|||
* We'll load the axios HTTP library which allows us to easily issue requests |
|||
* to our Laravel back-end. This library automatically handles sending the |
|||
* CSRF token as a header based on the value of the "XSRF" token cookie. |
|||
*/ |
|||
|
|||
window.axios = require('axios'); |
|||
|
|||
window.axios.defaults.headers.common['X-Requested-With'] = 'XMLHttpRequest'; |
|||
|
|||
/** |
|||
* Echo exposes an expressive API for subscribing to channels and listening |
|||
* for events that are broadcast by Laravel. Echo and event broadcasting |
|||
* allows your team to easily build robust real-time web applications. |
|||
*/ |
|||
|
|||
// import Echo from 'laravel-echo';
|
|||
|
|||
// window.Pusher = require('pusher-js');
|
|||
|
|||
// window.Echo = new Echo({
|
|||
// broadcaster: 'pusher',
|
|||
// key: process.env.MIX_PUSHER_APP_KEY,
|
|||
// cluster: process.env.MIX_PUSHER_APP_CLUSTER,
|
|||
// forceTLS: true
|
|||
// });
|
@ -1,19 +0,0 @@ |
|||
<?php |
|||
|
|||
return [ |
|||
|
|||
/* |
|||
|-------------------------------------------------------------------------- |
|||
| Authentication Language Lines |
|||
|-------------------------------------------------------------------------- |
|||
| |
|||
| The following language lines are used during authentication for various |
|||
| messages that we need to display to the user. You are free to modify |
|||
| these language lines according to your application's requirements. |
|||
| |
|||
*/ |
|||
|
|||
'failed' => 'These credentials do not match our records.', |
|||
'throttle' => 'Too many login attempts. Please try again in :seconds seconds.', |
|||
|
|||
]; |
@ -1,19 +0,0 @@ |
|||
<?php |
|||
|
|||
return [ |
|||
|
|||
/* |
|||
|-------------------------------------------------------------------------- |
|||
| Pagination Language Lines |
|||
|-------------------------------------------------------------------------- |
|||
| |
|||
| The following language lines are used by the paginator library to build |
|||
| the simple pagination links. You are free to change them to anything |
|||
| you want to customize your views to better match your application. |
|||
| |
|||
*/ |
|||
|
|||
'previous' => '« Previous', |
|||
'next' => 'Next »', |
|||
|
|||
]; |
@ -1,22 +0,0 @@ |
|||
<?php |
|||
|
|||
return [ |
|||
|
|||
/* |
|||
|-------------------------------------------------------------------------- |
|||
| Password Reset Language Lines |
|||
|-------------------------------------------------------------------------- |
|||
| |
|||
| The following language lines are the default lines which match reasons |
|||
| that are given by the password broker for a password update attempt |
|||
| has failed, such as for an invalid token or invalid new password. |
|||
| |
|||
*/ |
|||
|
|||
'reset' => 'Your password has been reset!', |
|||
'sent' => 'We have e-mailed your password reset link!', |
|||
'throttled' => 'Please wait before retrying.', |
|||
'token' => 'This password reset token is invalid.', |
|||
'user' => "We can't find a user with that e-mail address.", |
|||
|
|||
]; |
@ -1,151 +0,0 @@ |
|||
<?php |
|||
|
|||
return [ |
|||
|
|||
/* |
|||
|-------------------------------------------------------------------------- |
|||
| Validation Language Lines |
|||
|-------------------------------------------------------------------------- |
|||
| |
|||
| The following language lines contain the default error messages used by |
|||
| the validator class. Some of these rules have multiple versions such |
|||
| as the size rules. Feel free to tweak each of these messages here. |
|||
| |
|||
*/ |
|||
|
|||
'accepted' => 'The :attribute must be accepted.', |
|||
'active_url' => 'The :attribute is not a valid URL.', |
|||
'after' => 'The :attribute must be a date after :date.', |
|||
'after_or_equal' => 'The :attribute must be a date after or equal to :date.', |
|||
'alpha' => 'The :attribute may only contain letters.', |
|||
'alpha_dash' => 'The :attribute may only contain letters, numbers, dashes and underscores.', |
|||
'alpha_num' => 'The :attribute may only contain letters and numbers.', |
|||
'array' => 'The :attribute must be an array.', |
|||
'before' => 'The :attribute must be a date before :date.', |
|||
'before_or_equal' => 'The :attribute must be a date before or equal to :date.', |
|||
'between' => [ |
|||
'numeric' => 'The :attribute must be between :min and :max.', |
|||
'file' => 'The :attribute must be between :min and :max kilobytes.', |
|||
'string' => 'The :attribute must be between :min and :max characters.', |
|||
'array' => 'The :attribute must have between :min and :max items.', |
|||
], |
|||
'boolean' => 'The :attribute field must be true or false.', |
|||
'confirmed' => 'The :attribute confirmation does not match.', |
|||
'date' => 'The :attribute is not a valid date.', |
|||
'date_equals' => 'The :attribute must be a date equal to :date.', |
|||
'date_format' => 'The :attribute does not match the format :format.', |
|||
'different' => 'The :attribute and :other must be different.', |
|||
'digits' => 'The :attribute must be :digits digits.', |
|||
'digits_between' => 'The :attribute must be between :min and :max digits.', |
|||
'dimensions' => 'The :attribute has invalid image dimensions.', |
|||
'distinct' => 'The :attribute field has a duplicate value.', |
|||
'email' => 'The :attribute must be a valid email address.', |
|||
'ends_with' => 'The :attribute must end with one of the following: :values.', |
|||
'exists' => 'The selected :attribute is invalid.', |
|||
'file' => 'The :attribute must be a file.', |
|||
'filled' => 'The :attribute field must have a value.', |
|||
'gt' => [ |
|||
'numeric' => 'The :attribute must be greater than :value.', |
|||
'file' => 'The :attribute must be greater than :value kilobytes.', |
|||
'string' => 'The :attribute must be greater than :value characters.', |
|||
'array' => 'The :attribute must have more than :value items.', |
|||
], |
|||
'gte' => [ |
|||
'numeric' => 'The :attribute must be greater than or equal :value.', |
|||
'file' => 'The :attribute must be greater than or equal :value kilobytes.', |
|||
'string' => 'The :attribute must be greater than or equal :value characters.', |
|||
'array' => 'The :attribute must have :value items or more.', |
|||
], |
|||
'image' => 'The :attribute must be an image.', |
|||
'in' => 'The selected :attribute is invalid.', |
|||
'in_array' => 'The :attribute field does not exist in :other.', |
|||
'integer' => 'The :attribute must be an integer.', |
|||
'ip' => 'The :attribute must be a valid IP address.', |
|||
'ipv4' => 'The :attribute must be a valid IPv4 address.', |
|||
'ipv6' => 'The :attribute must be a valid IPv6 address.', |
|||
'json' => 'The :attribute must be a valid JSON string.', |
|||
'lt' => [ |
|||
'numeric' => 'The :attribute must be less than :value.', |
|||
'file' => 'The :attribute must be less than :value kilobytes.', |
|||
'string' => 'The :attribute must be less than :value characters.', |
|||
'array' => 'The :attribute must have less than :value items.', |
|||
], |
|||
'lte' => [ |
|||
'numeric' => 'The :attribute must be less than or equal :value.', |
|||
'file' => 'The :attribute must be less than or equal :value kilobytes.', |
|||
'string' => 'The :attribute must be less than or equal :value characters.', |
|||
'array' => 'The :attribute must not have more than :value items.', |
|||
], |
|||
'max' => [ |
|||
'numeric' => 'The :attribute may not be greater than :max.', |
|||
'file' => 'The :attribute may not be greater than :max kilobytes.', |
|||
'string' => 'The :attribute may not be greater than :max characters.', |
|||
'array' => 'The :attribute may not have more than :max items.', |
|||
], |
|||
'mimes' => 'The :attribute must be a file of type: :values.', |
|||
'mimetypes' => 'The :attribute must be a file of type: :values.', |
|||
'min' => [ |
|||
'numeric' => 'The :attribute must be at least :min.', |
|||
'file' => 'The :attribute must be at least :min kilobytes.', |
|||
'string' => 'The :attribute must be at least :min characters.', |
|||
'array' => 'The :attribute must have at least :min items.', |
|||
], |
|||
'not_in' => 'The selected :attribute is invalid.', |
|||
'not_regex' => 'The :attribute format is invalid.', |
|||
'numeric' => 'The :attribute must be a number.', |
|||
'password' => 'The password is incorrect.', |
|||
'present' => 'The :attribute field must be present.', |
|||
'regex' => 'The :attribute format is invalid.', |
|||
'required' => 'The :attribute field is required.', |
|||
'required_if' => 'The :attribute field is required when :other is :value.', |
|||
'required_unless' => 'The :attribute field is required unless :other is in :values.', |
|||
'required_with' => 'The :attribute field is required when :values is present.', |
|||
'required_with_all' => 'The :attribute field is required when :values are present.', |
|||
'required_without' => 'The :attribute field is required when :values is not present.', |
|||
'required_without_all' => 'The :attribute field is required when none of :values are present.', |
|||
'same' => 'The :attribute and :other must match.', |
|||
'size' => [ |
|||
'numeric' => 'The :attribute must be :size.', |
|||
'file' => 'The :attribute must be :size kilobytes.', |
|||
'string' => 'The :attribute must be :size characters.', |
|||
'array' => 'The :attribute must contain :size items.', |
|||
], |
|||
'starts_with' => 'The :attribute must start with one of the following: :values.', |
|||
'string' => 'The :attribute must be a string.', |
|||
'timezone' => 'The :attribute must be a valid zone.', |
|||
'unique' => 'The :attribute has already been taken.', |
|||
'uploaded' => 'The :attribute failed to upload.', |
|||
'url' => 'The :attribute format is invalid.', |
|||
'uuid' => 'The :attribute must be a valid UUID.', |
|||
|
|||
/* |
|||
|-------------------------------------------------------------------------- |
|||
| Custom Validation Language Lines |
|||
|-------------------------------------------------------------------------- |
|||
| |
|||
| Here you may specify custom validation messages for attributes using the |
|||
| convention "attribute.rule" to name the lines. This makes it quick to |
|||
| specify a specific custom language line for a given attribute rule. |
|||
| |
|||
*/ |
|||
|
|||
'custom' => [ |
|||
'attribute-name' => [ |
|||
'rule-name' => 'custom-message', |
|||
], |
|||
], |
|||
|
|||
/* |
|||
|-------------------------------------------------------------------------- |
|||
| Custom Validation Attributes |
|||
|-------------------------------------------------------------------------- |
|||
| |
|||
| The following language lines are used to swap our attribute placeholder |
|||
| with something more reader friendly such as "E-Mail Address" instead |
|||
| of "email". This simply helps us make our message more expressive. |
|||
| |
|||
*/ |
|||
|
|||
'attributes' => [], |
|||
|
|||
]; |
@ -1 +0,0 @@ |
|||
// |
@ -1,5 +0,0 @@ |
|||
@extends(' errors.minimal') |
|||
|
|||
@section('title', __('Unauthorized')) |
|||
@section('code', '401') |
|||
@section('message', __('Unauthorized')) |
@ -1,5 +0,0 @@ |
|||
@extends(' errors.minimal') |
|||
|
|||
@section('title', __('Forbidden')) |
|||
@section('code', '403') |
|||
@section('message', __($exception->getMessage() ?: 'Forbidden')) |
@ -1,4 +0,0 @@ |
|||
@extends('errors.minimal') |
|||
@section('title', __('Not Found')) |
|||
@section('code', '404') |
|||
@section('message', __('The page you are looking for might have been removed had its name changed or is temporarily unavailable.')) |
@ -1,4 +0,0 @@ |
|||
@extends('errors.minimal') |
|||
@section('title', __('405 Error')) |
|||
@section('code', '405') |
|||
@section('message', __('The page you are looking for might have been removed had its name changed or is temporarily unavailable.')) |
@ -1,5 +0,0 @@ |
|||
@extends(' errors.minimal') |
|||
|
|||
@section('title', __('Page Expired')) |
|||
@section('code', '419') |
|||
@section('message', __('Page Expired')) |
@ -1,6 +0,0 @@ |
|||
@extends(' errors.minimal') |
|||
|
|||
@section('title', __('Too Many Requests')) |
|||
@section('code', '429') |
|||
@section('message', __('Too Many Requests')) |
|||
|
@ -1,5 +0,0 @@ |
|||
@extends(' errors.minimal') |
|||
|
|||
@section('title', __('Server Error')) |
|||
@section('code', '500') |
|||
@section('message', __('Server Error')) |
@ -1,5 +0,0 @@ |
|||
@extends(' errors.minimal') |
|||
|
|||
@section('title', __('Service Unavailable')) |
|||
@section('code', '503') |
|||
@section('message', __($exception->getMessage() ?: 'Service Unavailable')) |
@ -1,126 +0,0 @@ |
|||
<!DOCTYPE html> |
|||
<html lang="en"> |
|||
<head> |
|||
<meta charset="utf-8"> |
|||
<meta name="viewport" content="width=device-width, initial-scale=1"> |
|||
|
|||
<title>@yield('title')</title> |
|||
|
|||
<!-- Fonts --> |
|||
<link rel="dns-prefetch" href="//fonts.gstatic.com"> |
|||
<link href="https://fonts.googleapis.com/css?family=Josefin+Sans:400,700" rel="stylesheet"> |
|||
|
|||
<style> |
|||
* { |
|||
-webkit-box-sizing: border-box; |
|||
box-sizing: border-box; |
|||
} |
|||
|
|||
body { |
|||
padding: 0; |
|||
margin: 0; |
|||
} |
|||
|
|||
#container {
|
|||
position: relative; |
|||
height: 100vh; |
|||
background-color: #f3f3f3;
|
|||
} |
|||
|
|||
#container .container {
|
|||
position: absolute; |
|||
left: 50%; |
|||
top: 50%; |
|||
-webkit-transform: translate(-50%, -50%); |
|||
-ms-transform: translate(-50%, -50%); |
|||
transform: translate(-50%, -50%); |
|||
} |
|||
|
|||
.container { |
|||
max-width: 460px; |
|||
width: 100%; |
|||
text-align: center; |
|||
line-height: 1.4; |
|||
} |
|||
|
|||
.container .code { |
|||
height: 158px; |
|||
line-height: 153px; |
|||
} |
|||
|
|||
.container .code h1 { |
|||
font-family: 'Josefin Sans', sans-serif; |
|||
color: #222;
|
|||
font-size: 220px; |
|||
letter-spacing: 10px; |
|||
margin: 0; |
|||
font-weight: 700; |
|||
text-shadow: 2px 2px 0 #c9c9c9, -2px -2px 0 #c9c9c9;
|
|||
} |
|||
|
|||
.container .code h1 > span { |
|||
text-shadow: 2px 2px 0 #198fd7, -2px -2px 0 #198fd7, 0 0 8px #198fe7;
|
|||
} |
|||
|
|||
.container p { |
|||
font-family: 'Josefin Sans', sans-serif; |
|||
color: #484848;
|
|||
padding-top: 10px; |
|||
font-size: 16px; |
|||
font-weight: 400; |
|||
margin-top: 0; |
|||
margin-bottom: 15px; |
|||
} |
|||
|
|||
.container a { |
|||
font-family: 'Josefin Sans', sans-serif; |
|||
font-size: 14px; |
|||
text-decoration: none; |
|||
text-transform: uppercase; |
|||
background: transparent; |
|||
color: #484848;
|
|||
border: 2px solid #484848;
|
|||
display: inline-block; |
|||
padding: 10px 25px; |
|||
font-weight: 700; |
|||
-webkit-transition: 0.2s all; |
|||
transition: 0.2s all; |
|||
} |
|||
|
|||
.container a:hover { |
|||
color: #198fd7;
|
|||
border-color: #198fe7;
|
|||
} |
|||
|
|||
@media only screen and (max-width: 480px) { |
|||
.container .code { |
|||
height: 122px; |
|||
line-height: 122px; |
|||
} |
|||
|
|||
.container .code h1 { |
|||
font-size: 122px; |
|||
} |
|||
} |
|||
|
|||
</style> |
|||
</head> |
|||
|
|||
<body> |
|||
<?php |
|||
$codeAsArray = str_split(app()->view->getSections()[ 'code' ]); |
|||
$codeAsArray[ 1 ] = '<span>'.$codeAsArray[ 1 ].'</span>'; |
|||
$styledCode = implode('', $codeAsArray); |
|||
|
|||
?>
|
|||
<div id="container"> |
|||
<div class="container"> |
|||
<div class="code"> |
|||
<h1>{!! $styledCode !!}</h1> |
|||
</div> |
|||
<p>@yield('message')</p> |
|||
<a href="https://contrai.io">home page</a> |
|||
</div> |
|||
</div> |
|||
</body> |
|||
</html> |
@ -1,19 +1 @@ |
|||
<?php |
|||
|
|||
use Illuminate\Http\Request; |
|||
|
|||
/* |
|||
|-------------------------------------------------------------------------- |
|||
| API Routes |
|||
|-------------------------------------------------------------------------- |
|||
| |
|||
| Here is where you can register API routes for your application. These |
|||
| routes are loaded by the RouteServiceProvider within a group which |
|||
| is assigned the "api" middleware group. Enjoy building your API! |
|||
| |
|||
*/ |
|||
|
|||
Route::middleware('auth:api')->get('/user', function (Request $request) { |
|||
return $request->user(); |
|||
}); |
|||
|
@ -1,16 +1 @@ |
|||
<?php |
|||
|
|||
/* |
|||
|-------------------------------------------------------------------------- |
|||
| Broadcast Channels |
|||
|-------------------------------------------------------------------------- |
|||
| |
|||
| Here you may register all of the event broadcasting channels that your |
|||
| application supports. The given channel authorization callbacks are |
|||
| used to check if an authenticated user can listen to the channel. |
|||
| |
|||
*/ |
|||
|
|||
Broadcast::channel('App.User.{id}', function ($user, $id) { |
|||
return (int) $user->id === (int) $id; |
|||
}); |
@ -1,38 +0,0 @@ |
|||
<?php |
|||
|
|||
namespace Tests\Feature; |
|||
|
|||
use App\Ingest\DocxReader; |
|||
use App\Ingest\DocxWriter; |
|||
use App\Jobs\RecreateDocument; |
|||
use Illuminate\Support\Facades\Storage; |
|||
use Tests\TestCase; |
|||
|
|||
class ProcessDocxDocumentTest extends TestCase |
|||
{ |
|||
/** @test */ |
|||
public function it_reads_docx_documents_content() |
|||
{ |
|||
$storage = Storage::disk('local'); |
|||
|
|||
// $reader = new DocxReader($storage, 'contracts/x.docx');
|
|||
// $reader = new DocxReader($storage, 'contracts/y.docx');
|
|||
// $reader = new DocxReader($storage, 'contracts/z.docx');
|
|||
$reader = new DocxReader($storage, 'contracts/with-bookmarks.docx'); |
|||
$result = $reader->execute(); |
|||
|
|||
$writer = new DocxWriter($storage, 'contracts/test-write.docx'); |
|||
$writer->execute($result); |
|||
} |
|||
|
|||
/** @test */ |
|||
public function it_recreates_original_document_from_json() |
|||
{ |
|||
// $data = Storage::disk('local')->get('contracts/x.json');
|
|||
$data = Storage::disk('local')->get('contracts/a.json'); |
|||
$data = json_decode($data, true); |
|||
|
|||
$recreateDocument = new RecreateDocument('test123', $data); |
|||
$recreateDocument->handle(); |
|||
} |
|||
} |
@ -1,15 +0,0 @@ |
|||
const mix = require('laravel-mix'); |
|||
|
|||
/* |
|||
|-------------------------------------------------------------------------- |
|||
| Mix Asset Management |
|||
|-------------------------------------------------------------------------- |
|||
| |
|||
| Mix provides a clean, fluent API for defining some Webpack build steps |
|||
| for your Laravel application. By default, we are compiling the Sass |
|||
| file for the application as well as bundling up all the JS files. |
|||
| |
|||
*/ |
|||
|
|||
mix.js('resources/js/app.js', 'public/js') |
|||
.sass('resources/sass/app.scss', 'public/css'); |
6173
yarn.lock
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
Write
Preview
Loading…
Cancel
Save
Reference in new issue