headerFontFooterThreshold = null; $this->titleFontThreshold = null; } /** * Handle xml files * * @param $xmlFile * * @return mixed */ public function handle($xmlFile) { if (is_string($xmlFile)) { try { $storageDisk = Storage::disk('contracts'); while (! $storageDisk->exists($xmlFile)) { //Sleep if file not yet written sleep(1); } $file = $storageDisk->get($xmlFile); } catch (\Exception $exception) { Log::error('Failed to load the xml file '.$exception->getMessage()); } } else { $file = file_get_contents($xmlFile); } //foreach (simplexml_load_string($file) as $key =>$xmlElementPage){ // dd($xmlElementPage); //} return $this->buildChildStructure($this->handleElements(simplexml_load_string($file)->xpath('//text'))); } /** * @param $element * * @return mixed */ private function handleElements($element) { if (is_array($element)) { $elements = $element; } else { $elements = (array) $element; } //dd(!in_array(trim(last(explode(' ', strip_tags('modify or make additions to the {P1_Name} Software, except to the extent permitted by law; or')))),['and','or']),trim(last(explode(' ', strip_tags('modify or make additions to the {P1_Name} Software, except to the extent permitted by law; or'))))); $this->setTitleThreshold($elements); $numberOfNodes = count($elements); $rows = []; for ($i = 0; $i < $numberOfNodes; $i++) { $current = $elements[ $i ]; $listContent = []; if ($current instanceof SimpleXMLElement) { $content = $this->getNodeContent($current); //if(strpos($content,'Provided that the Customer has continued to pay ')!==false){ // dd(($i + 1 <= $numberOfNodes && isset($elements[ $i + 1 ]) && (((int) $elements[ $i + 1 ][ 'top' ] === (int) $current[ 'top' ]) || (int) $elements[ $i + 1 ][ 'top' ] <= ((int) $current[ 'top' ] + (int) $current[ 'height' ] + 3)) && (int) $current[ 'top' ] <= (int) $elements[ $i + 1 ][ 'top' ]) // || (isset($elements[ $i + 1 ]) && ctype_lower(substr(trim(strip_tags($this->getNodeContent($elements[ $i + 1 ]))),0,1))), substr(trim(strip_tags($this->getNodeContent($elements[ $i + 1 ]))),0,1)))); //} $parentNumbering = []; while ($i + 1 <= $numberOfNodes && isset($elements[ $i + 1 ]) && (((((((int) $elements[ $i + 1 ][ 'top' ] === (int) $current[ 'top' ]) || (int) $elements[ $i + 1 ][ 'top' ] <= ((int) $current[ 'top' ] + (int) $current[ 'height' ] + 3)) && (int) $current[ 'top' ] <= (int) $elements[ $i + 1 ][ 'top' ]) || (ctype_lower(substr(trim(strip_tags($this->getNodeContent($elements[ $i + 1 ]))),0,1))) || (! in_array(substr(trim(strip_tags($this->getNodeContent($elements[ $i + 1 ]))),0, 1), [','])) || (ctype_lower(substr(trim(strip_tags($content)),strlen(trim(strip_tags($content))) - 1)))) && ! in_array(substr(trim(str_replace(['and','or'], '', $content)), strlen(trim(str_replace(['and', 'or'], '', $content))) - 1),['!', '.', '?', ';', '_', ':', ')']) && ! preg_match('/^.*?\-[^\d]*(\d+)[^\d]*\-.*$/',$content) && (substr(trim($this->getNodeContent($elements[ $i + 1 ])), 0,strlen('')) !== '' && ctype_lower((substr(trim(strip_tags($content)),strlen(trim(strip_tags($content))) - 1))))) || ((int) $elements[ $i ][ 'top' ] === (int) $elements[ $i + 1 ][ 'top' ])) || (isset($elements[ $i + 1 ]) && trim(strip_tags($this->getNodeContent($elements[ $i+1])))=='[') ) { //if($parentNumbering){ // dd($parentNumbering,$content); //} preg_match('/^([-+]?\d*\.?\d+)(?:[-+]?\d*\.?\d+)(?:[eE]([-+]?\d+))?/', preg_replace('/[^0-9\.)]/', '', substr(trim(preg_replace('/[^A-Za-z0-9.)]/', '', preg_replace('/\)/', '.', preg_replace("/\{.+/", "", html_entity_decode($content))))), 0, 5)), $childNumbering); if (! $childNumbering) { preg_match('/^([-+]?\d*\.?\d+)(?:[eE]([-+]?\d+))?/', preg_replace('/[^0-9\.)]/', '', substr(trim(preg_replace('/[^A-Za-z0-9.)]/', '', preg_replace('/\)/', '.', preg_replace("/\{.+/", "", html_entity_decode($content))))), 0, 5)), $parentNumbering); } //if($childNumbering && strpos($childNumbering[0],"2.1.5")!==false){ // dd(11,$content,$elements[$i],$i,$i+1); //} $nextElement = $elements[ $i + 1 ]; $nextElementContent = $this->getNodeContent($nextElement); $content .= ' '.$nextElementContent; $current[ 'top' ] = $nextElement[ 'top' ]; $current[ 'height' ] = $nextElement[ 'height' ]; if (count($parentNumbering)) { $current[ 'row_numbering' ] = $parentNumbering[ 0 ]; $content = str_replace($current[ 'row_numbering' ], '', $content); $i++; break; } elseif ($childNumbering) { $current[ 'row_numbering' ] = $childNumbering[ 0 ]; $content = str_replace($current[ 'row_numbering' ], '', $content); if (strlen(trim(strip_tags($content))) && ! in_array(substr(trim(strip_tags($content)), strlen(trim(strip_tags($content))) - 1), ['.', ':', '!', '?','[',',']) && !ctype_lower(substr(trim(strip_tags($content)), strlen(trim(strip_tags($content)))-1)) && (!ctype_lower(substr(trim(strip_tags($this->getNodeContent($elements[$i+1]))), 0, 1)) || !in_array(substr(trim(strip_tags($this->getNodeContent($elements[$i+1]))), 0, 1), ['[', '{']))) { $i++; break; } } if( ! empty($current[ 'row_numbering' ]) && ctype_digit(trim(preg_replace("/[^0-9a-zA-Z]/", "", strip_tags($this->getNodeContent($elements[$i])))))){ $i++; break; } //$current[ 'font' ] = $nextElement[ 'font' ]; $i++; continue; } $data = $this->extractNumbering($content); $content = [ 'type' => (int) $current[ 'font' ] === $this->titleFontThreshold ? 'title' : null, 'content' => $data[ 'content' ], 'numbering' => (! empty($current[ 'row_numbering' ])) ? (int)$current[ 'row_numbering' ] : $data[ 'numbering' ], 'top' => (int) $current[ 'top' ], 'height' => (int) $current[ 'height' ], 'left' => (int) $current[ 'left' ], 'font' => (int) $current[ 'font' ], 'children' => $listContent ]; $rows[] = $content; } } return $rows; } /** * Returns the xml node content * * @param $node * * @return string|string[]|null */ private function getNodeContent($node) { return preg_replace('!\s+!', ' ', preg_match_all("/(.*?)<\/text>/", $node->asXML(), $matches) ? $matches[ 1 ] ? $matches[ 1 ][ 0 ] : '' : ''); } /** * Extract the numbering if exists from the string * * @param $content * * @return array */ private function extractNumbering($content) { $regexOne = '/^(([a-zA-Z0-9]+[.\)])+)([ ]|[a-z]|[A-Z])/'; $regexTwo = '/^(([\d\.]+)\d)/'; if (preg_match($regexOne, $content, $n)) { $numbering = trim(last($n)); } else { if (preg_match($regexTwo, $content, $n)) { $numbering = trim(last($n)); } else { $numbering = ''; } } if (strlen($numbering) > 1) { return [ 'content' => '

'.trim(str_replace($numbering, '', $content)).'

', 'numbering' => $numbering ]; } return [ 'content' => '

'.trim($content).'

', 'numbering' => '' ]; } /** * Build the structure as required by the editor and the gamification module * * @param $elements * * @return array */ private function buildChildStructure($elements) { $alreadyHandledIndexes = []; $build = []; // 0 1 2 3 4 5 6 // 1 1.1 1.1.1 1.2 1.2.1 1.3 1.3.1 2 3 4 4.1 4.2 5 6 for ($i = 0; $i < count($elements) - 1; $i++) { if (! isset($elements[ $i ][ 'type' ])) { if ($elements[ $i ][ 'top' ] < 100) { $elements[ $i ][ 'type' ] = 'header'; } elseif ($elements[ $i ][ 'top' ] > 1150) { $elements[ $i ][ 'type' ] = 'footer'; } } if (in_array($i, $alreadyHandledIndexes)) { continue; } if (isset($elements[ $i ][ 'type' ]) && in_array($elements[ $i ][ 'type' ], ['footer', 'header'])) { continue; } for ($j = $i + 1; $j < count($elements); $j++) { if (! isset($elements[ $j ][ 'type' ])) { if ($elements[ $j ][ 'top' ] < 100) { $elements[ $j ][ 'type' ] = 'header'; } elseif ($elements[ $j ][ 'top' ] > 1150) { $elements[ $j ][ 'type' ] = 'footer'; } } if (in_array($j, $alreadyHandledIndexes)) { continue; } if (isset($elements[ $j ][ 'type' ]) && in_array($elements[ $j ][ 'type' ], ['footer', 'header'])) { continue; } if ($elements[ $j ][ 'type' ] === 'title' && $elements[ $i ][ 'top' ] !== $elements[ $j ][ 'top' ] && ! ctype_digit(trim(preg_replace("/[^0-9a-zA-Z]/", "", strip_tags($elements[ $i ][ 'content' ]))))) { break; } if ($elements[ $i ][ 'left' ] < $elements[ $j ][ 'left' ] || ($elements[ $i ][ 'type' ] == 'title' && is_null($elements[ $j ][ 'type' ]))) { $elements[ $i ] = $this->handlePossibleChild($elements[ $i ], $elements[ $j ]); $alreadyHandledIndexes[] = $j; } else { break; } } if (! in_array($elements[ $i ][ 'type' ], ['header', 'footer'])) { $build[] = $elements[ $i ]; } $alreadyHandledIndexes[] = $i; } return $build; } /** * Handle each node child's * * @param $parent * @param $child * * @return mixed */ protected function handlePossibleChild($parent, $child) { // 1 // 1.1 // 1.1.1 // 2 // Must iterate through parent children if (count($parent[ 'children' ]) === 0) { $parent[ 'children' ][] = $child; return $parent; } $lastParentChild = last($parent[ 'children' ]); // Possible to be either child or grandchild if ($child[ 'left' ] > $lastParentChild[ 'left' ]) { $lastParentChild = $this->handlePossibleChild($lastParentChild, $child); } elseif ($child[ 'left' ] === $parent[ 'left' ] && $parent[ 'type' ] == 'title' && is_null($child[ 'type' ])) { $parent[ 'children' ][] = $child; return $parent; } else { if ($child[ 'left' ] === $lastParentChild[ 'left' ]) { $parent[ 'children' ][] = $child; return $parent; } } $parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild; return $parent; } /** * Set's the title threshold * * @param $elements */ protected function setTitleThreshold($elements) { $nextElement = null; foreach ($elements as $index => $element) { if ($index + 1 < count($elements) && ! isset($this->titleFontThreshold)) { $nextElement = $elements[ $index + 1 ]; if ((isset($current->b) || $index == 0 || (! is_null($nextElement) && (int) $element[ 'font' ] < (int) $nextElement[ 'font' ]))) { $this->titleFontThreshold = (int) $element[ 'font' ]; } } else { continue; } } } /** * Set's the header and footer threshold * * @param $elements */ protected function setHeaderFooterThreshold($elements) { foreach ($elements as $index => $element) { if (isset($elements[ $index + 1 ]) && ! isset($this->headerFontFooterThreshold)) { $nextElement = $elements[ $index + 1 ]; if (! isset($nextElement[ 'type' ]) && $element[ 'top' ] > $nextElement[ 'top' ]) { $this->headerFontFooterThreshold = $nextElement[ 'font' ]; } } else { continue; } } } }