Repo for the search and displace ingest module that takes odf, docx and pdf and transforms it into .md to be used with search and displace operations
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

773 lines
29 KiB

<?php
namespace App\Parser;
use Illuminate\Support\Facades\Log;
class ParseTextArray
{
/**
* @var array
*/
private $breakPoints = [
'TERMS OF THE {P1_Pros}',
'TERMS AND CONDITIONS',
'BACKGROUND',
'OPERATIVE PROVISIONS',
'Products and/or Services',
'PAYMENT',
'GRANT OF LICENCE',
'TERM OF LICENCE AGREEMENT',
'ROYALTY',
'PAYMENT',
'PERFORMANCE TARGETS',
'STATIONERY',
'QUALITY CONTROL',
'THE DISTRIBUTOR\'S OBLIGATIONS',
'NON SOLICITATION',
'SALE OF BUSINESS',
'TERMINATION OF AGREEMENT',
'CONDITIONS FOLLOWING TERMINATION',
'RESTRAINT',
'TIME OF ESSENCE AND NOTICES',
'INTERPRETATION',
'ARBITRATION',
'DOMICILIUM AND REGISTERED OFFICE',
'USE OF TRADE MARKS, TRADE NAME, GOODWILL AND KNOW-HOW',
'GENERAL',
'DESCRIPTION OF {P2_NAME} INFORMATION',
'PAYMENT OF FEES',
'SUPPLIER\'S STATUS',
'SUPPLIER\’S OBLIGATIONS',
'DEFINITIONS AND INTERPRETATION',
'DEFINITIONS',
'CONFIDENTIALITY',
'TERMINATION',
'RESTRICTIVE COVENANTS AND INTELLECTUAL PROPERTY',
'DETAILS AND IDENTITY OF CONSULTANT',
'ANTI-BRIBERY',
'ASSIGNMENT SCHEDULE',
'SCHEDULE 1',
'{P1_NAME}\'S LIABILITY',
'DURATION OF AGREEMENT AND SUPPLY',
'SUPPLY OF HARDWARE',
'SUPPLY OF SOFTWARE AND DOCUMENTATION',
'SUPPLY OF SUPPORT SERVICES',
'INTELLECTUAL PROPERTY RIGHTS',
'THE CONTRACT',
'{P1_NAME}\U2019S LIABILITY',
'UPDATES',
'TERMS OF THE {P1_NAME} PRODUCTS.',
'CUSTOMER RESPONSIBILITIES',
'EXHIBIT A',
'EXHIBIT A-1',
'EXHIBIT A-2',
'WARRANTIES',
'EXIT, TERMINATION AND SUSPENSION',
'EXHIBIT B',
'EXHIBIT B-1',
'EXHIBIT B-2',
'COUNTERPARTS',
'LICENSE GRANT',
'INDEMNIFICATION BY CUSTOMER',
'TERMS OF THE {P1_NAME} PRODUCTS',
'TERMS OF CLOUD SERVICE',
'INDEMNIFICATION BY CUSTOMER',
'TERMINATION',
'TERMS OF THE {P1_PROS}',
'SUPPORT',
'SUB CONTRACTING AND THIRD PARTY RECOMMENDATIONS',
'LICENCE AND ACCESS TO SOFTWARE AND HARDWARE',
'DECLARATION OF NON-LIAISON AND ANTI-CORRUPTION COMMITMENT',
'{P1_NAME}\'S DUTIES',
'ana are mere',
'definitions',
'fees',
'ENGAGEMENT',
'DUTIES',
'TERMINATION',
'STATEMENTS',
'CONFIDENTIALITY',
'Human rights',
'Labour',
'Environment',
'Anti-corruption',
'Services',
'Scope of the Agreement',
'Staffing Levels for Services','Indemnification'
];
/**
* @var bool
*/
private $pdf;
/**
* ParseTextArray constructor.
*
* @param bool $pdf
*/
public function __construct($pdf = false)
{
$this->breakPoints = $this->nestedUppercase($this->breakPoints);
$this->pdf = $pdf;
}
public function fromFile($filePath)
{
if (file_exists($filePath)) {
$fileContent = file_get_contents($filePath);
return $this->buildTheStructure(array_filter(explode(PHP_EOL, $fileContent)));
}
Log::error('The given file does not exists!');
return '';
}
/**
* Build the child structure and extract relevant data from the text content
*
*
* @param $textAsArray
*
* @return array
*/
private function buildTheStructure($textAsArray)
{
$textAsArray = array_values($textAsArray);
$response = [];
$alreadyHandled = [];
$countData = count($textAsArray);
for ($i = 0; $i < $countData; $i++) {
if (array_key_exists($i, $alreadyHandled)) {
continue;
}
// Extract the content and count the number of the empty spaces from the beginning.
$data[ $i ] = [
'content' => trim($textAsArray[ $i ]),
'spaces' => strlen($textAsArray[ $i ]) - strlen(ltrim($textAsArray[ $i ]))
];
//Remove numbering from the paragraph content
if ($numbering = $this->getNumbering($textAsArray[ $i ])) {
$data[ $i ][ 'numbering' ] = $numbering;
$data[ $i ][ 'content' ] = trim(ltrim(str_replace($numbering, '', $data[ $i ][ 'content' ]), '.'));
}
if (
$this->pdf &&
strpos($textAsArray[ $i ], 'Page') !== false &&
strpos($textAsArray[ $i ], 'of') !== false
) {
$alreadyHandled[] = $i;
break;
}
$j = $i + 1;
if (isset($textAsArray[ $j ])) {
for ($j; $j < $countData; $j++) {
if (array_key_exists($j, $alreadyHandled)) {
continue;
}
if (
$this->pdf &&
isset($textAsArray[ $j ]) &&
strpos($textAsArray[ $j ], 'Page') !== false &&
strpos($textAsArray[ $j ], 'of') !== false
) {
$alreadyHandled[] = $j;
continue;
}
// Extract the content and count the number of the empty spaces from the beginning
$data[ $j ] = [
'content' => trim($textAsArray[ $j ]),
'spaces' => strlen($textAsArray[ $j ]) - strlen(ltrim($textAsArray[ $j ]))
];
// Remove numbering from the paragraph content
if ($numbering = $this->getNumbering($textAsArray[ $j ])) {
$data[ $j ][ 'numbering' ] = $numbering;
$data[ $j ][ 'content' ] = trim(
ltrim(str_replace($numbering, '', $data[ $j ][ 'content' ]), '.')
);
}
// Break if both have numbering and the space is equal
if (
$data[ $j ][ 'spaces' ] == $data[ $i ][ 'spaces' ] &&
$this->hasNumbering($data[ $j ]) &&
$this->hasNumbering($data[ $i ]) &&
substr_count($data[ $i ][ 'numbering' ], '.') == substr_count($data[ $j ][ 'numbering' ], '.') &&
count(array_filter(str_split($data[ $i ][ 'numbering' ]), 'is_numeric')) == count(array_filter(str_split($data[ $j ][ 'numbering' ]), 'is_numeric'))) {
break;
}
if (
$this->hasNumbering($data[ $j ]) &&
! $this->hasNumbering($data[ $i ]) &&
! $data[ $i ][ 'spaces' ] &&
$data[ $j ][ 'spaces' ] > $data[ $i ][ 'spaces' ] &&
! in_array(substr($data[ $i ][ 'content' ], -1), [':'])
) {
break;
}
if (
$this->hasNumbering($data[ $j ]) &&
$this->hasNumbering($data[ $i ]) &&
((float) $data[ $j ][ 'numbering' ] - (float) $data[ $i ][ 'numbering' ]) >= 1
) {
break;
}
if (
$this->hasNumbering($data[ $j ]) &&
$this->hasNumbering($data[ $i ]) &&
((float) $data[ $j ][ 'numbering' ] - (float) $data[ $i ][ 'numbering' ]) >= 1
) {
break;
}
// Hardcoded breakpoints
if (
$this->hasNumbering($data[ $j ]) &&
in_array(strtoupper(str_replace(['.', "\t", ""], '', $data[ $j ][ 'content' ])), $this->breakPoints)
) {
break;
}
// Hardcoded "Schedule break"
if (
! $this->hasNumbering($data[ $j ]) &&
strpos(substr(trim(strtolower(utf8_encode($data[ $j ][ 'content' ]))), 0, 10), 'schedule') !== false
) {
break;
}
if (
! $this->hasNumbering($data[ $j ]) &&
strpos(substr(trim($data[ $j ][ 'content' ]), 0, 15), 'Exhibit') !== false &&
! in_array(substr(trim($data[ $j ][ 'content' ]), -1), ['.'])
) {
break;
}
if (strpos(substr(trim(strtolower($data[ $j ][ 'content' ])), 0, 15), 'attachment') !== false) {
break;
}
if ($this->hasNumbering($data[ $j ]) && $this->hasChild($data[ $i ])) {
if ($this->hasNumbering(last($data[ $i ][ 'children' ])) && (is_numeric(last($data[ $i ][ 'children' ])[ 'numbering' ]) && strpos(last($data[ $i ][ 'children' ])[ 'numbering' ],
".") !== false) && (is_numeric($data[ $j ][ 'numbering' ]) && strpos($data[ $j ][ 'numbering' ],
".") === false)) {
break;
}
}
if ($data[ $j ][ 'spaces' ] > $data[ $i ][ 'spaces' ] && strlen($data[ $i ][ 'content' ]) && strlen($data[ $j ][ 'content' ])) {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} elseif (isset($textAsArray[ $j + 1 ]) && $this->paragraphBetweenClauses($data[ $i ], $data[ $j ],
array_slice($textAsArray, $j + 1))) {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} elseif ($this->hasChild($data[ $i ]) && $this->lastChildIsList($data[ $i ]) && ($data[ $i ][ 'spaces' ] == 0 || $data[ $i ][ 'spaces' ] > $data[ $j ][ 'spaces' ])) {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} elseif ($data[ $j ][ 'spaces' ] == $data[ $i ][ 'spaces' ] && isset($data[ $i ][ 'numbering' ]) && isset($data[ $j ][ 'numbering' ]) && (substr_count($data[ $i ][ 'numbering' ],
'.') < substr_count($data[ $j ][ 'numbering' ],
'.') || count(array_filter(str_split($data[ $i ][ 'numbering' ]),
'is_numeric')) < count(array_filter(str_split($data[ $j ][ 'numbering' ]),
'is_numeric')))) {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} else {
if ($this->paragraphIsList($data[ $i ]) && (ctype_lower(substr($data[ $j ][ 'content' ], 0,
1)) || in_array(substr($data[ $j ][ 'content' ], 0, 1), ['{', '•']))) {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} elseif ($this->hasNumbering($data[ $i ]) && $this->hasNumbering($data[ $j ]) && is_numeric($data[ $j ][ 'numbering' ]) && strpos($data[ $j ][ 'numbering' ],
".") !== false && strpos($data[ $i ][ 'numbering' ],
".") === false && ! is_int($data[ $j ][ 'numbering' ] - $data[ $i ][ 'numbering' ])) {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} elseif ($this->hasChild($data[ $i ]) && ($data[ $j ][ 'spaces' ] == $this->getLastChildForParagraph($data[ $i ])[ 'spaces' ])) {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} elseif (strpos(strtolower($data[ $i ][ 'content' ]),
'definitions and') !== false && in_array(utf8_encode(substr($data[ $j ][ 'content' ], 0,
1)), ['â', '"'])) {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} elseif ($this->hasChild($data[ $i ]) && $this->paragraphIsList($this->getLastChildFromParagraph($data[ $i ]))) {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} elseif (($this->hasChild($data[ $i ]) || $data[ $i ][ 'spaces' ] == $data[ $j ][ 'spaces' ]) && ! $this->hasNumbering($this->getLastChildForParagraph($data[ $i ])) && ! $this->hasNumbering($data[ $j ])) {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} else {
break;
}
}
}
}
if (strlen($data[ $i ][ 'content' ])) {
$response[] = $data[ $i ];
}
$alreadyHandled[] = $i;
}
return $this->recheckClauses($response);
}
/**
* Recheck missed clauses and assign them to a parent if is the case
*
* @param $clauses
*
* @return array
*/
private function recheckClauses($clauses)
{
$checkedClauses = [];
$alreadyManaged = [];
for ($i = 0; $i < count($clauses); $i++) {
if (array_key_exists($i, $alreadyManaged)) {
continue;
}
$data [ $i ] = $clauses[ $i ];
$j = $i + 1;
if (
isset($clauses[ $j ]) &&
$clauses[ $j ][ 'content' ] &&
$this->hasNumbering($data[ $i ]) &&
(
(! $this->hasNumbering($clauses[ $j ])) ||
(
$this->hasNumbering($clauses[ $j ]) &&
is_numeric($clauses[ $j ][ 'numbering' ]) &&
count(array_filter(explode('.', $clauses[ $j ][ 'numbering' ]))) > 1 &&
is_numeric($clauses[ $i ][ 'numbering' ]) &&
count(array_filter(explode('.', $clauses[ $i ][ 'numbering' ]))) <= 1
)
)
) {
for ($j; $j < count($clauses); $j++) {
if (
isset($clauses[ $j ][ 'numbering' ]) &&
is_numeric($clauses[ $j ][ 'numbering' ]) &&
count(array_filter(explode('.', $clauses[ $j ][ 'numbering' ]))) == 1
) {
break;
}
$data[ $i ][ 'children' ][] = $clauses[ $j ];
$alreadyManaged[] = $j;
}
}
$alreadyManaged[] = $i;
if ($data[ $i ][ 'content' ]) {
$checkedClauses[] = $data[ $i ];
}
}
return $checkedClauses;
}
/**
* Build the child structure based on the spaces before the text
*
* @param $parent
* @param $child
*
*
* @return mixed
*/
private function handlePossibleChild($parent, $child)
{
if (empty($child[ 'content' ])) {
return $parent;
}
if ($this->pdf && ! isset($parent[ 'children' ]) && (ctype_lower(substr(trim($child[ 'content' ]), 0,
1)) || in_array(substr(trim($child[ 'content' ]), 0, 1),
['}', ')']) || is_numeric(substr(trim($child[ 'content' ]), 0,
1)) || in_array(substr(trim($child[ 'content' ]), -1),
['.', ',', ':']) || (! in_array(substr(trim($child[ 'content' ]), -1), [
'.',
',',
':'
])) && $child[ 'spaces' ] > $parent[ 'spaces' ]) && ((in_array(substr(trim($parent[ 'content' ]),
-1), ['}', ')', ',', '"']) || ! in_array(substr(trim($parent[ 'content' ]), -1),
['.', ':', '!']) || ctype_lower(substr(trim($parent[ 'content' ]), -1))))) {
//dd($parent,$child);
$parent[ 'content' ] .= ' '.$child[ 'content' ];
return $parent;
} elseif ($this->pdf && isset($parent[ 'children' ]) && (ctype_lower(substr(trim($child[ 'content' ]), 0,
1)) || in_array(substr(trim($child[ 'content' ]), 0, 1),
['}', ')']) || is_numeric(substr(trim($child[ 'content' ]), 0,
1)) || in_array(substr(trim($child[ 'content' ]), -1),
['.', ',', ':'])) && ((in_array(substr(trim($this->getLastChildForParagraph($parent)[ 'content' ]),
-1), [
'}',
')',
',',
'"'
]) || ! in_array(substr(trim($this->getLastChildForParagraph($parent)[ 'content' ]), -1),
['.', ':', '!']) || ctype_lower(substr(trim($this->getLastChildForParagraph($parent)[ 'content' ]),
-1))))) {
if (strpos($child[ 'content' ], 'thirty') !== false && $parent[ 'numbering' ] !== '1.') {
$lastParentChild = last($parent[ 'children' ]);
$lastParentChild[ 'content' ] .= ' '.$child[ 'content' ];
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild;
return $parent;
}
} elseif ($this->pdf && ! isset($parent[ 'children' ]) && $child[ 'spaces' ] >= $parent[ 'spaces' ] && ! $this->hasNumbering($child)) {
if ($this->hasChild($parent)) {
$lastParentChild = $this->getLastChildForParagraph($parent);
$lastParentChild[ 'content' ] .= ' '.$child[ 'content' ];
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild;
} else {
$parent[ 'content' ] .= ' '.$child[ 'content' ];
}
return $parent;
}
if (! isset($parent[ 'children' ])) {
$parent[ 'children' ][] = $child;
return $parent;
}
$lastParentChild = last($parent[ 'children' ]);
if ($this->lastChildIsList($parent) && (ctype_lower(substr(trim($child[ 'content' ]), 0,
1)) || in_array(substr(trim($child[ 'content' ]), -1), [';']) || strpos($child[ 'content' ],
':') !== false || in_array(trim(substr(trim($child[ 'content' ]), 0, 1)),
['{', '('])) && ! $this->hasNumbering($child)) {
if (! isset($lastParentChild[ 'children' ])) {
$lastParentChild[ 'children' ][] = $child;
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild;
return $parent;
}
if (isset($lastParentChild[ 'children' ]) && ! in_array(substr(last($lastParentChild[ 'children' ])[ 'content' ],
-1), ['.', ';', ',']) && ! in_array(substr(trim($child[ 'content' ]), 0, 1),
['(', '{', ':']) && ! $this->hasNumbering($child)) {
$lastParentChild[ 'children' ][ count($lastParentChild[ 'children' ]) - 1 ][ 'content' ] .= ' '.trim($child[ 'content' ]);
} else {
$lastParentChild[ 'children' ][] = $child;
}
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild;
return $parent;
}
if ($this->hasNumbering($lastParentChild) && $this->hasNumbering($child) && substr(trim($lastParentChild[ 'content' ]),
-1) == ':' && count(array_filter(str_split($lastParentChild[ 'numbering' ]),
'is_numeric')) < count(array_filter(str_split($child[ 'numbering' ]), 'is_numeric'))) {
$lastParentChild[ 'children' ][] = $child;
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild;
return $parent;
}
if ($lastParentChild[ 'spaces' ] == $child[ 'spaces' ]) {
if ($this->hasNumbering($lastParentChild) && $this->hasNumbering($child) && (in_array(substr(trim($lastParentChild[ 'content' ]),
-1), ['.', ';']) || $this->hasNumbering($child))) {
if (($this->hasNumbering($lastParentChild) && $this->hasNumbering($child) && ((int) substr($child[ 'numbering' ],
strrpos($child[ 'numbering' ], '.') + 1) - (int) substr($lastParentChild[ 'numbering' ],
strrpos($lastParentChild[ 'numbering' ],
'.') + 1) == 1)) || (in_array(utf8_encode(substr($lastParentChild[ 'content' ], 0,
1)), ['â', '"', '{']) && in_array(utf8_encode(substr($child[ 'content' ], 0, 1)),
['â', '"', '{', '•']))) {
$parent[ 'children' ][] = $child;
} else {
$lastParentChild[ 'children' ][] = $child;
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild;
}
} else {
$lastParentChild[ 'content' ] .= ' '.$child[ 'content' ];
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild;
}
} elseif (! $this->hasNumbering($child) && ! in_array(substr(trim($lastParentChild[ 'content' ]), 0, 1),
['.', ';', '}']) && (ctype_lower(substr(trim($lastParentChild[ 'content' ]),
-1))) || in_array(substr(trim($lastParentChild[ 'content' ]), -1),
[',']) && (ctype_lower(substr(trim($child[ 'content' ]), 0,
1)) || in_array(substr(trim($child[ 'content' ]), 0, 1), ['{', '(', ')']))) {
$lastParentChild[ 'content' ] .= ' '.$child[ 'content' ];
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild;
} else {
if ($this->hasChild($parent) && in_array(substr(trim($this->getLastChildForParagraph($parent)[ 'content' ]),
-1), ['.', ';', '}'])) {
$lastParentChild[ 'children' ][] = $child;
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild;
return $parent;
}
$lastParentChild = $this->handlePossibleChild($lastParentChild, $child);
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild;
}
return $parent;
}
/**
* Check if paragraph is a list
*
* @param $paragraph
*
* @return bool
*/
private function paragraphIsList($paragraph)
{
return substr(trim($paragraph[ 'content' ]), -1) === ':';
}
/**
* Check if last child from the paragraph is a list
*
* @param $paragraph
*
* @return bool
*/
private function lastChildIsList($paragraph)
{
if ($this->hasChild($paragraph)) {
$lastParentChild = last($paragraph[ 'children' ]);
if (substr(trim($lastParentChild[ 'content' ]), -1) == ':') {
return true;
}
}
return false;
}
private function getLastChildForParagraph($paragraph)
{
if ($this->hasChild($paragraph)) {
$lastParentChild = last($paragraph[ 'children' ]);
return $this->getLastChildFromParagraph($lastParentChild);
}
return $paragraph;
}
/**
* Check if a paragraph has any child
*
* @param $paragraph
*
* @return bool
*/
private function hasChild($paragraph)
{
if (isset($paragraph[ 'children' ])) {
return true;
}
return false;
}
/**
* Extract numbering from a given paragraph
*
* return false if has no numbering
*
* @param $paragraph
*
* @return false|mixed
*/
private function getNumbering($paragraph)
{
if (isset($paragraph)) {
$paragraphContent = trim($paragraph);
if (in_array(substr($paragraphContent, 0, 1), ['(', '{'])) {
return false;
}
if ($this->pdf && isset($paragraph) && strpos($paragraphContent,
'Page') !== false && strpos($paragraphContent, 'of') !== false) {
return false;
}
preg_match('/^([-+]?\d*\.?\d+?\d*\.?\d+|\d+(\.?)*)(?:[eE]([-+]?\d+))?/', preg_replace('/[^0-9\.)]/', '',
substr(trim(preg_replace('/[^A-Za-z0-9.)]/', '',
preg_replace('/\)/', '.', preg_replace("/\{.+/", "", trim($paragraphContent))))), 0, 6)),
$paragraphNumbering);
if (count($paragraphNumbering) && (in_array(substr($paragraphContent, strlen($paragraphNumbering[ 0 ]), 1),
[' ', "\t", '.', ')']) || in_array(substr($paragraphNumbering[ 0 ], -1),
[' ', "\t", '.', ')']) || is_numeric($paragraphNumbering[ 0 ]))) {
$locationOfNumbering = strpos($paragraphContent,$paragraphNumbering[0]);
if(substr($paragraphContent,$locationOfNumbering-1,1)=='(' &&substr($paragraphContent,$locationOfNumbering+1,1)==')'){
return false;
}
return str_replace('..', '.', $paragraphNumbering[ 0 ]);
}
return false;
}
return false;
}
/**
* Check if a paragraph is between clauses
*
* @param $first
* @param $paragraph
* @param $list
*
* @return bool
*/
private function paragraphBetweenClauses($first, $paragraph, $list)
{
if ($this->hasNumbering($first) && ! isset($paragraph[ 'numbering' ])) {
$firstNumberingString = $this->getLastChildFromParagraph($first);
if (isset($firstNumberingString[ 'numbering' ])) {
$firstNumbering = last(array_filter(explode('.', $firstNumberingString[ 'numbering' ])));
foreach ($list as $lastParagraph) {
if ($lastParagraphNumberingString = $this->getNumbering($lastParagraph)) {
$lastParagraphNumbering = last(array_filter(explode('.', $lastParagraphNumberingString)));
if ($lastParagraphNumbering - $firstNumbering == 1 && substr_count($firstNumberingString[ 'numbering' ],
'.') == substr_count($lastParagraphNumberingString, '.')) {
return true;
} elseif (substr_count($firstNumberingString[ 'numbering' ],
'.') > substr_count($lastParagraphNumberingString, '.')) {
return true;
}
return false;
}
}
}
return false;
}
return false;
}
private function getLastChildFromParagraph($paragraph)
{
if (isset($paragraph[ 'children' ])) {
return $this->getLastChildFromParagraph(last($paragraph[ 'children' ]));
}
return $paragraph;
}
private function appendToLastChildFromParagraph($paragraph, $append)
{
if (isset($paragraph[ 'children' ])) {
return $this->getLastChildFromParagraph(last($paragraph[ 'children' ]));
}
$paragraph[ 'content' ] .= ' '.$append[ 'content' ];
return $paragraph;
}
/**
* Check if a paragraph has numbering
*
* @param $paragraph
*
* @return bool
*/
private function hasNumbering($paragraph)
{
if (isset($paragraph[ 'numbering' ]) && $paragraph[ 'numbering' ]) {
return true;
}
return false;
}
/**
* Uppercase all values in the array
*
* @param $value
*
* @return array|string
*/
private function nestedUppercase($value)
{
if (is_array($value)) {
return array_map([$this, 'nestedUppercase'], $value);
}
//remove unwanted chars
return strtoupper(str_replace(['.'], '', $value));
}
}