handle(json_decode($fileContent,true)); } else { Log::error('The given file dose not exists!'); } } public function handle($docxAsHtmlArray) { $response=[]; foreach ($docxAsHtmlArray as $i => $array) { $response = array_merge($response, $this->handleTestHtml($array)); } return $this->buildTheStructure($response); } private function buildTheStructure($data) { $response = []; $alreadyHandled = []; $numbers = []; for ($i = 0; $i < count($data); $i++) { if (array_key_exists($i, $alreadyHandled)) { continue; } $parent = $data[ $i ]; //get numbering from first 10 chars of the string preg_match('/^([-+]?\d*\.?\d+)(?:[eE]([-+]?\d+))?/', preg_replace('/[^0-9\.)]/', '', substr(trim(preg_replace('/[^A-Za-z0-9.)]/', '', preg_replace('/\)/', '.', preg_replace("/\{.+/", "", html_entity_decode($data[ $i ][ 'content' ]))))), 0, 5)), $parentNumbering); if ($parentNumbering && count($numbers) == 0 && last($parentNumbering) < 5) { $numbers[] = $parentNumbering[ 0 ]; $data[ $i ][ 'numbering' ] = rtrim($parentNumbering[ 0 ], '.'); } elseif ($parentNumbering && count($numbers) > 0 && $parentNumbering[ 0 ] >= last($numbers)) { $numbers[] = $parentNumbering[ 0 ]; $data[ $i ][ 'numbering' ] = rtrim($parentNumbering[ 0 ], '.'); } //check if string starts with bold //check if number of bolds equals to 1 //check if not empty html and contains words if ((strpos($parent[ 'content' ], "") === 0 || (substr_count($parent[ 'content' ], "") == 1 || $parentNumbering) && strlen(trim(strip_tags($parent[ 'content' ]))) > 0) || (str_word_count(preg_replace('/[A-Za-z]{4,}/', '', strip_tags($data[ $i ][ 'content' ]))) < 2)) { $childNumbers = []; $j = $i + 1; //check if data exists if (isset($data[ $j ]) && strlen($data[ $j ][ 'content' ])) { for ($j; $j < count($data); $j++) { if ($data[ $j ][ 'content' ] == '\u00a0') { $alreadyHandled[] = $j; } if (array_key_exists($j, $alreadyHandled)) { continue; } $child = $data[ $j ]; preg_match('/^([-+]?\d*\.?\d+)(?:[eE]([-+]?\d+))?/', substr(trim(urldecode(str_replace(['', ''], '', strip_tags($data[ $j ][ 'content' ])))), 0, 5), $childNumbering); if ($childNumbering && ! preg_match("/[a-z]/i", rtrim(trim($childNumbering[ 0 ])))) { if ($childNumbering && count($childNumbers) == 0 && trim($childNumbering[ 0 ]) < 5) { $childNumbers[] = trim($childNumbering[ 0 ]); $data[ $j ][ 'numbering' ] = rtrim(trim($childNumbering[ 0 ]), '.'); } elseif ($childNumbering && count($childNumbers) > 0 && trim($childNumbering[ 0 ]) >= last($childNumbers)) { $childNumbers[] = trim($childNumbering[ 0 ]); $data[ $j ][ 'numbering' ] = rtrim(trim($childNumbering[ 0 ]), '.'); } elseif ($childNumbering && trim($childNumbering[ 0 ]) < 100) { $childNumbers[] = trim($childNumbering[ 0 ]); $data[ $j ][ 'numbering' ] = rtrim(trim($childNumbering[ 0 ]), '.'); } } if (empty(trim($data[ $i ][ 'content' ])) && isset($data[ $j ][ 'numbering' ])) { break; } $breakPoints = array_change_key_case([ 'TERMS OF THE {P1_Pros}', 'TERMS AND CONDITIONS', 'BACKGROUND', 'OPERATIVE PROVISIONS', 'Products and/or Services', 'PAYMENT', 'GRANT OF LICENCE', 'TERM OF LICENCE AGREEMENT', 'ROYALTY', 'PAYMENT', 'PERFORMANCE TARGETS', 'STATIONERY', 'QUALITY CONTROL', 'THE DISTRIBUTOR\'S OBLIGATIONS', 'NON SOLICITATION', 'SALE OF BUSINESS', 'TERMINATION OF AGREEMENT', 'CONDITIONS FOLLOWING TERMINATION', 'RESTRAINT', 'TIME OF ESSENCE AND NOTICES', 'INTERPRETATION', 'ARBITRATION', 'DOMICILIUM AND REGISTERED OFFICE', 'USE OF TRADE MARKS, TRADE NAME, GOODWILL AND KNOW-HOW', 'GENERAL', 'DESCRIPTION OF {P2_NAME} INFORMATION', 'PAYMENT OF FEES', 'SUPPLIER\'S STATUS', 'SUPPLIER\’S OBLIGATIONS', 'DEFINITIONS AND INTERPRETATION', 'DEFINITIONS', 'CONFIDENTIALITY', 'TERMINATION', 'RESTRICTIVE COVENANTS AND INTELLECTUAL PROPERTY', 'DETAILS AND IDENTITY OF CONSULTANT', 'ANTI-BRIBERY', 'ASSIGNMENT SCHEDULE', 'SCHEDULE 1', '{P1_NAME}\'S LIABILITY', 'DURATION OF AGREEMENT AND SUPPLY', 'SUPPLY OF HARDWARE', 'SUPPLY OF SOFTWARE AND DOCUMENTATION', 'SUPPLY OF SUPPORT SERVICES', 'INTELLECTUAL PROPERTY RIGHTS', 'THE CONTRACT', '{P1_NAME}\U2019S LIABILITY', 'UPDATES', 'TERMS OF THE {P1_NAME} PRODUCTS.', 'CUSTOMER RESPONSIBILITIES', 'EXHIBIT A', 'EXHIBIT A-1', 'EXHIBIT A-2', 'WARRANTIES', 'EXIT, TERMINATION AND SUSPENSION', 'EXHIBIT B', 'EXHIBIT B-1', 'EXHIBIT B-2', 'COUNTERPARTS', 'LICENSE GRANT', 'INDEMNIFICATION BY CUSTOMER', 'TERMS OF THE {P1_NAME} PRODUCTS', 'TERMS OF CLOUD SERVICE', 'INDEMNIFICATION BY CUSTOMER', 'TERMINATION', 'TERMS OF THE {P1_PROS}', 'SUPPORT', 'SUB CONTRACTING AND THIRD PARTY RECOMMENDATIONS', 'LICENCE AND ACCESS TO SOFTWARE AND HARDWARE', 'DECLARATION OF NON-LIAISON AND ANTI-CORRUPTION COMMITMENT', '{P1_NAME}\'S DUTIES' ], CASE_UPPER); //$breakPoints = []; if ($this->paragraphBrake($data[ $j ], $breakPoints)) { break; } if (substr(trim(str_replace(array_merge([')'], $childNumbering), '', $data[ $j ][ 'content' ])), 0, 3) == '' && str_word_count(strip_tags(str_replace(array_merge([')'], $childNumbering), '', $data[ $j ][ 'content' ]))) == str_word_count($this->getTextBetweenTags(str_replace(array_merge([')',], $childNumbering), '', $data[ $j ][ 'content' ]), 'b')) && (isset($data[ $j + 1 ]) && ((ctype_upper(substr($data[ $j + 1 ][ 'content' ], 0, 1)) || (isset($data[ $i ][ 'numbering' ]) && isset($data[ $j ][ 'numbering' ]) && $data[ $j ][ 'numbering' ] - $data[ $i ][ 'numbering' ] == 1))))) { break; } if (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && ! isset($data[ $i ][ 'numbering' ]) && ctype_upper(str_replace(' ', '', $data[ $j ][ 'content' ])) && str_word_count($data[ $j ][ 'content' ]) >= 1) { break; } if (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && ! isset($data[ $i ][ 'numbering' ]) && ctype_upper(str_replace([ '', '', last($childNumbering), last($childNumbering), ')', '.' ], '', trim(str_replace(' ', '', $data[ $j ][ 'content' ])))) && str_word_count($data[ $j ][ 'content' ]) >= 1) { break; } //if(isset($data[$j]['numbering']) && isset($data[$i]['numbering']) && ) if (isset($data[ $i ][ 'children' ]) && isset($data[ $i ][ 'numbering' ]) && count($data[ $i ][ 'children' ]) && isset($data[ $j ][ 'numbering' ]) && isset(last($data[ $i ][ 'children' ])[ 'numbering' ]) && ($data[ $j ][ 'numbering' ] - last($data[ $i ][ 'children' ])[ 'numbering' ] !== 1 && $data[ $i ][ 'numbering' ] < $data[ $j ][ 'numbering' ]) && ! in_array(substr(strip_tags(last($data[ $i ][ 'children' ])[ 'content' ]), strlen(strip_tags(last($data[ $i ][ 'children' ])[ 'content' ])) - 1), [':', '-']) && ! strpos($data[ $j ][ 'numbering' ], '.')) { break; } if (in_array(strtoupper(trim(str_replace([ '', '', last($parentNumbering), last($parentNumbering), ')', '.' ], '', strip_tags($data[ $i ][ 'content' ])))), $breakPoints)) { if ((! isset($data[ $i ][ 'numbering' ]) && isset($data[ $j ][ 'numbering' ]) && (substr($data[ $i ][ 'content' ], 0, 3) != '') || (str_word_count(strip_tags($data[ $i ][ 'content' ])) != str_word_count($this->getTextBetweenTags($data[ $i ][ 'content' ], 'b'))))) { if (! in_array($data[ $i ][ 'content' ], $breakPoints)) { break; } } } if (in_array(strtoupper(trim(str_replace([ '', '', last($childNumbering), last($childNumbering), ')', '.' ], '', strip_tags($data[ $j ][ 'content' ])))), $breakPoints)) { break; } if (in_array(substr(strip_tags($data[ $j ][ 'content' ]), strlen(strip_tags($data[ $j ][ 'content' ])) - 1), [':', '-'])) { $data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); $alreadyHandled[] = $j; } elseif (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && ctype_lower(substr(last($data[ $i ][ 'children' ])[ 'content' ], strlen(last($data[ $i ][ 'children' ])[ 'content' ]) - 1)) && ctype_lower(substr(trim($data[ $j ][ 'content' ]), 0, 1))) { $data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); $alreadyHandled[] = $j; } elseif (str_word_count(preg_replace('/[A-Za-z]{4,}/', '', strip_tags($data[ $j ][ 'content' ]))) < 3 && strlen(strip_tags($data[ $j ][ 'content' ])) && ! isset($data[ $j ][ 'numbering' ]) && ctype_upper(substr($data[ $j ][ 'content' ], 0, 1)) && str_word_count($data[ $j ][ 'content' ]) < 10) { if (isset($data[ $i ][ 'children' ]) && ! in_array(substr(trim(last($data[ $i ][ 'children' ])[ 'content' ]), strlen(trim(last($data[ $i ][ 'children' ])[ 'content' ])) - 1), ['!', '.', '?', '_', '}'])) { $data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); $alreadyHandled[] = $j; } else { break; } //dd($data[$i]); } elseif (str_word_count(preg_replace('/[A-Za-z]{4,}/', '', strip_tags($data[ $i ][ 'content' ]))) < 2 && strlen(strip_tags($data[ $i ][ 'content' ]))) { if (isset($data[ $i ][ 'numbering' ]) && isset($data[ $j ][ 'numbering' ]) && is_numeric($data[ $j ][ 'numbering' ]) && abs($data[ $j ][ 'numbering' ] - $data[ $i ][ 'numbering' ]) == 1 && str_word_count($data[ $j ] [ 'content' ]) < 6) { break; } if (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && ((str_word_count($data[ $j ] [ 'content' ]) < 6) || (substr_count($data[ $j ][ 'content' ], '') == 1 && substr_count(last($data[ $i ][ 'children' ])[ 'content' ], '') == 0 && ! isset(last($data[ $i ][ 'children' ])[ 'numbering' ]))) && ctype_upper((substr($data[ $j ][ 'content' ], 0, 1)))) { break; } if (isset($data[ $i ][ 'numbering' ]) && isset($data[ $j ][ 'numbering' ]) && $data[ $j ][ 'numbering' ] + 1 == $data[ $i ][ 'numbering' ] && str_word_count($data[ $j ][ 'content' ]) < 6) { break; } if (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && ! isset($data[ $i ][ 'numbering' ]) && ! isset(last($data[ $i ][ 'children' ])[ 'numbering' ]) && isset($data[ $j ][ 'numbering' ])) { break; } $data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); $alreadyHandled[] = $j; } elseif (! in_array(trim(strtolower(strip_tags($data[ $j ][ 'content' ]))), ['definitions']) && ! ctype_space($data[ $j ][ 'content' ]) && strlen(trim(strip_tags($data[ $j ][ 'content' ]))) && ! isset($data[ $i ][ 'numbering' ]) && ! isset($data[ $j ][ 'numbering' ])) { $data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); $alreadyHandled[] = $j; } elseif (isset($data[ $i ][ 'numbering' ]) && isset($data[ $j ][ 'numbering' ])) { if (is_numeric($data[ $j ][ 'numbering' ]) && is_numeric($data[ $i ][ 'numbering' ]) && ((float) $data[ $j ][ 'numbering' ] - (float) $data[ $i ][ 'numbering' ]) == 1 && str_word_count($data[ $j ][ 'content' ]) < str_word_count($data[ $i ][ 'content' ])) { break; } if (is_numeric($data[ $j ][ 'numbering' ]) && abs($data[ $j ][ 'numbering' ] - $data[ $i ][ 'numbering' ]) === 1 && (isset($data[ $i ][ 'children' ]) && (! (isset(last($data[ $i ][ 'children' ])[ 'numbering' ])) || (isset(last($data[ $i ][ 'children' ])[ 'numbering' ]) && abs(last($data[ $i ][ 'children' ])[ 'numbering' ] - $data[ $j ][ 'numbering' ]) !== 1))) && str_word_count($data[ $j ][ 'content' ]) < 8) { break; } if (substr_count($data[ $j ][ 'numbering' ], '.') > substr_count($data[ $i ][ 'numbering' ], '.') && ((float) $data[ $j ][ 'numbering' ] - (float) $data[ $i ][ 'numbering' ]) < 1) { $data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); $alreadyHandled[] = $j; } elseif (((float) $data[ $j ][ 'numbering' ] > (float) $data[ $i ][ 'numbering' ] && substr_count($data[ $j ][ 'content' ], '') == 0 && substr_count($data[ $i ][ 'content' ], '') == 1) || (substr_count($data[ $i ][ 'content' ], "") == 1 && (substr_count($data[ $j ][ 'content' ], '') == 0 || substr_count($data[ $j ][ 'content' ], '')) > 1)) { $data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); $alreadyHandled[] = $j; } elseif (substr_count($data[ $i ][ 'content' ], '') == 1 && str_word_count($data[ $j ][ 'content' ]) > 6 && isset($data[ $j ][ 'numbering' ])) { if (strpos($data[ $j ][ 'content' ], 'Networking infrastructure (hardware, firmware, software an') !== false) { dd('aa'); } if (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ])) { $lastParentChild = last($data[ $i ][ 'children' ]); if (isset($lastParentChild[ 'numbering' ]) && abs($lastParentChild[ 'numbering' ] - $data[ $j ][ 'numbering' ]) === 1 && (substr_count($data[ $j ][ 'content' ], '') == 1)) { break; } } $data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); $alreadyHandled[] = $j; } elseif (isset($data[ $i ][ 'numbering' ]) && abs($data[ $i ][ 'numbering' ] - $data[ $j ][ 'numbering' ]) === 1 && str_word_count($data[ $j ][ 'content' ]) >= 6) { $data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); $alreadyHandled[] = $j; } elseif (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && isset($data[ $j ][ 'numbering' ]) && isset(last($data[ $i ][ 'children' ])[ 'numbering' ]) && abs((float) $data[ $j ][ 'numbering' ] - (float) last($data[ $i ][ 'children' ])[ 'numbering' ]) == (float) 1) { $data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); $alreadyHandled[] = $j; } elseif (isset($data[ $i ][ 'numbering' ]) && abs($data[ $i ][ 'numbering' ] - $data[ $j ][ 'numbering' ]) == 0 && str_word_count($data[ $j ][ 'content' ]) >= 6) { $data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); $alreadyHandled[] = $j; } else { break; } } elseif (isset($data[ $i ][ 'numbering' ]) && ! isset($data[ $j ][ 'numbering' ]) && str_word_count($data[ $j ][ 'content' ]) > 6) { if (substr_count($data[ $j ][ 'content' ], "") == 1 && strpos(strtolower($data[ $i ][ 'content' ]), 'definition') === false) { break; } $data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); $alreadyHandled[] = $j; } elseif (empty($data[ $j ][ 'content' ]) && (isset($data[ $j + 1 ]) && isset($data[ $j - 1 ]) && isset($data[ $i ][ 'children' ]))) { if (isset(last($data[ $i ][ 'children' ])[ 'numbering' ]) && strlen(last($data[ $i ][ 'children' ])[ 'numbering' ]) == strlen(preg_replace('/[^0-9\.)]/', '', substr(trim(preg_replace('/ +/', ' ', preg_replace('/[^A-Za-z0-9 .]/', ' ', urldecode(strip_tags($data[ $j + 1 ][ 'content' ]))))), 0, 5))) && ! empty($data[ $j ][ 'content' ])) { dd('Here', $data[ $i ], $data[ $j ]); $alreadyHandled[] = $j; } else { break; } } elseif (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && isset($data[ $j ][ 'numbering' ])) { $lastParentChild = last($data[ $i ][ 'children' ]); if (isset($lastParentChild[ 'numbering' ]) && isset($child[ 'numbering' ]) && substr_count($lastParentChild[ 'numbering' ], '.') > substr_count($data[ $j ][ 'numbering' ], '.')) { dd('111'); } else { $data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]); $alreadyHandled[] = $j; } } else { break; } //if(strpos($data[$i]['content'],'2. TERMS OF THE {P1_Pros}.')!==false || strpos($data[$j]['content'],'2. TERMS OF THE {P1_Pros}.')!==false){ // dd($data[$i],$data[$j]); //} } } } if (strlen(trim(strip_tags($data[ $i ][ 'content' ])))) { $response[] = $data[ $i ]; //if ($data[ $i ][ 'content' ] == "Duration of Agreement and Supply") { // dd(121,$data[$i],$i); //} //if($i > 73){ // dd($i,$data[$i],$response); //} } $alreadyHandled[] = $i; } return $response; } private function handlePossibleChild($parent, $child) { if (empty($parent[ 'content' ]) && ! empty($child[ 'content' ])) { return $child; } if (empty($child[ 'content' ])) { return $parent; } // Must iterate through parent children if (! isset($parent[ 'children' ]) || (isset($parent[ 'children' ]) && count($parent[ 'children' ]) == 0)) { $parent[ 'children' ] = []; if (str_word_count(strip_tags($child[ 'content' ])) >= 5 && strpos($child[ 'content' ], '') === false) { $parent[ 'children' ][] = $child; } elseif (strpos($parent[ 'content' ], '') !== false && strpos($child[ 'content' ], '') !== false) { $parent[ 'children' ][] = $child; } elseif (isset($child[ 'content' ])) { $parent[ 'children' ][] = $child; } return $parent; } $lastParentChild = last($parent[ 'children' ]); if ($lastParentChild && substr($lastParentChild[ 'content' ], strlen($lastParentChild[ 'content' ]) - 1) === ':' && ((ctype_lower(substr($child[ 'content' ], 0, 1)) || (ctype_digit(substr($child[ 'content' ], 0, 1)) && str_word_count($child[ 'content' ]) > 5)))) { $lastParentChild = $this->handlePossibleChild($lastParentChild, $child); if (isset($lastParentChild[ 'numbering' ]) && isset($child[ 'numbering' ]) && $child[ 'numbering' ] - 1 == $lastParentChild[ 'numbering' ]) { $parent[ 'children' ][] = $child; } else { $parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild; } return $parent; } if (isset($lastParentChild[ 'numbering' ]) && isset($child[ 'numbering' ]) && strlen($child[ 'numbering' ]) > strlen($lastParentChild[ 'numbering' ])) { if (isset($parent[ 'children' ]) && isset(last($parent[ 'children' ])[ 'numbering' ]) && $child[ 'numbering' ]) { if (is_numeric($child[ 'numbering' ]) && abs($child[ 'numbering' ] - $lastParentChild[ 'numbering' ]) === 1) { $parent[ 'children' ][] = $child; return $parent; } } if (isset($child[ 'numbering' ]) && isset($lastParentChild[ 'numbering' ]) && substr_count($lastParentChild[ 'numbering' ], '.') == substr_count($child[ 'numbering' ], '.')) { $parent[ 'children' ][] = $child; return $parent; } $lastParentChild = $this->handlePossibleChild($lastParentChild, $child); $parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild; return $parent; } if (! in_array(substr(trim(str_replace(['and', 'or'], '', $lastParentChild[ 'content' ])), strlen(trim(str_replace(['and', 'or'], '', $lastParentChild[ 'content' ]))) - 1), ['!', '.', '?', ';', '_', ':']) && (ctype_lower(substr(trim($child[ 'content' ]), 0, 1)) || ((ctype_upper(substr(trim($child[ 'content' ]), 0, 1)) && ! isset($child[ 'numbering' ]))))) { //dd($lastParentChild,$child); if (strpos($lastParentChild[ 'content' ], 'e, this Agreement and the {P1_Name} Software Licence Agreement') !== false) { dd('aa', $lastParentChild, $child); } $lastParentChild[ 'content' ] .= ' '.$child[ 'content' ]; $parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild; return $parent; } elseif (! in_array(substr(trim($parent[ 'content' ]), strlen(trim($parent[ 'content' ])) - 1), ['!', '.', '?', ';']) && ctype_lower(substr(trim($lastParentChild[ 'content' ]), strlen(trim($lastParentChild[ 'content' ])) - 1)) && ctype_lower(substr(trim($child[ 'content' ]), 0, 1))) { $parent[ 'children' ][] = $child; } elseif (! in_array(substr(trim(str_replace(['and', 'or'], '', $lastParentChild[ 'content' ])), strlen(trim(str_replace(['and', 'or'], '', $lastParentChild[ 'content' ]))) - 1), [ '!', '.', '?', ';', '_', ':' ]) && isset($lastParentChild[ 'numbering' ]) && isset($child[ 'numbering' ]) && $lastParentChild[ 'numbering' ] > $child[ 'numbering' ]) { $lastParentChild[ 'children' ][] = $child; $parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild; } else { $parent[ 'children' ][] = $child; } return $parent; } public function handleTestHtml($array) { $data = []; foreach ($array as $item) { if (count($item) == 1 && is_array(last($item))) { return $this->handleTestHtml($item); } else { $html = $this->buildParagraphs($item); if (! isset($data[ 'content' ]) && count($html) > 1) { $data = array_merge($data, $html); } elseif ($html) { $data = $html; } } } return $data; } private function buildParagraphs($paragraphs) { $result = []; $alreadyHandled = []; for ($i = 0; $i < count($paragraphs); $i++) { if (array_key_exists($i, $alreadyHandled)) { continue; } $paragraph = $paragraphs[ $i ]; if (is_array($paragraph)) { $result = array_merge($result, $this->buildParagraphs($paragraph)); } elseif (strlen($paragraph) && ! ctype_space($paragraph)) { $cleanHtml = trim(str_replace(' ', '', preg_replace('/<([^>\s]+)[^>]*>(?:\s*(?:
| | | | | | | )\s*)*<\/\1>/', '', preg_replace('/(]*>)|(<\/font>)/', '', preg_replace('/\s+/S', " ", $paragraph))))); if (! empty($cleanHtml)) { $result[] = ['content' => html_entity_decode($cleanHtml, ENT_COMPAT | ENT_HTML401, 'UTF-8')]; } } } return $result; } /* * Get text between html tag */ private function getTextBetweenTags($string, $tagname) { $pattern = "/<$tagname ?.*>(.*)<\/$tagname>/"; preg_match($pattern, str_replace(['', ''], '', $string), $matches); if ($matches) { return last($matches); } return ''; } private function paragraphBrake($paragraph, array $breakPoints) { //$paragraph[ 'content' ] = '2) TERMS OF THE {P1_Pros}. Subject to the terms of the Agreement, {P1_Name} grants Customer and/or its Affiliates a non-exclusive, non-transferable (except to a successor in interest as permitted hereunder) license to use the {P1_Pros} listed on the Order Form during the Term. Customer\’s and/or its Affiliates\’ right to use the {P1_Pros} is limited to the volume and other restrictions contained herein and in the Order Form and the Documentation.'; //$paragraph[ 'numbering' ] = '2'; preg_replace('/(\d+)<\/b>/', $paragraph[ 'content' ], $paragraph[ 'content' ]); preg_replace('/(\d+)\)/', $paragraph[ 'content' ], $paragraph[ 'content' ]); if (isset($paragraph[ 'numbering' ])) { $paragraph[ 'content' ] = str_replace(['.', ')', $paragraph[ 'numbering' ]], '', $paragraph[ 'content' ]); } if (substr_count($paragraph[ 'content' ], '') === 1) { $breakString = explode('
', $paragraph[ 'content' ]); if ($breakString) { $breakString = trim(str_replace('', '', trim($breakString[ 0 ]))); if (in_array($breakString, $breakPoints)) { return true; } } } return false; } }