Browse Source

Improve README. Cleanup

dev
Orzu Ionut 2 years ago
parent
commit
f42d01b3a4
  1. 8
      .env.example
  2. 531
      README.md
  3. 9
      app/Console/Commands/DeployWorker.php
  4. 57
      app/Console/Commands/TestMachine.php
  5. 5
      app/Jobs/IngestDocuments.php
  6. 17
      app/Parser/DocxParser/Footer.php
  7. 41
      app/Parser/DocxParser/Footnote.php
  8. 11
      app/Parser/DocxParser/Header.php
  9. 26
      app/Parser/DocxParser/Link.php
  10. 77
      app/Parser/DocxParser/ListItemRun.php
  11. 11
      app/Parser/DocxParser/PageBreak.php
  12. 269
      app/Parser/DocxParser/ParseDocx.php
  13. 32
      app/Parser/DocxParser/PreserveText.php
  14. 41
      app/Parser/DocxParser/Section.php
  15. 35
      app/Parser/DocxParser/Table.php
  16. 41
      app/Parser/DocxParser/Table/Cell.php
  17. 41
      app/Parser/DocxParser/Table/Row.php
  18. 147
      app/Parser/DocxParser/Text.php
  19. 17
      app/Parser/DocxParser/TextBreak.php
  20. 74
      app/Parser/DocxParser/TextRun.php
  21. 72
      app/Parser/DocxParser/Title.php
  22. 117
      app/Parser/DocxParser/Traits/Helper.php
  23. 527
      app/Parser/HtmlParser/ParseHtml.php
  24. 670
      app/Parser/ParseHtmlArray.php
  25. 406
      app/Parser/ParseXml.php
  26. 35
      database/migrations/2019_08_19_000000_create_failed_jobs_table.php
  27. 16
      database/seeds/DatabaseSeeder.php
  28. 21
      package.json
  29. 1
      resources/js/app.js
  30. 28
      resources/js/bootstrap.js
  31. 19
      resources/lang/en/auth.php
  32. 19
      resources/lang/en/pagination.php
  33. 22
      resources/lang/en/passwords.php
  34. 151
      resources/lang/en/validation.php
  35. 1
      resources/sass/app.scss
  36. 5
      resources/views/errors/401.blade.php
  37. 5
      resources/views/errors/403.blade.php
  38. 4
      resources/views/errors/404.blade.php
  39. 4
      resources/views/errors/405.blade.php
  40. 5
      resources/views/errors/419.blade.php
  41. 6
      resources/views/errors/429.blade.php
  42. 5
      resources/views/errors/500.blade.php
  43. 5
      resources/views/errors/503.blade.php
  44. 126
      resources/views/errors/minimal.blade.php
  45. 18
      routes/api.php
  46. 15
      routes/channels.php
  47. 38
      tests/Feature/ProcessDocxDocumentTest.php
  48. 15
      webpack.mix.js
  49. 6173
      yarn.lock

8
.env.example

@ -8,16 +8,16 @@ LOG_CHANNEL=stack
BROADCAST_DRIVER=log
CACHE_DRIVER=file
QUEUE_CONNECTION=sync
SESSION_DRIVER=file
QUEUE_CONNECTION=redis
SESSION_DRIVER=redis
SESSION_LIFETIME=120
REDIS_HOST=127.0.0.1
REDIS_PASSWORD=null
REDIS_PORT=6379
REDIS_QUEUE=
REDIS_QUEUE=sd_ingest
WEBHOOK_CORE_URL=
WEBHOOK_CORE_SECRET=
USER_HOME_PATH=
USER_HOME_PATH=/tmp

531
README.md

@ -1,67 +1,67 @@
## About S&D Ingest
S&D INGEST it's the module that receives row files in different formats and send's them to any module after the file's are being processed.
## Search and Displace Ingest
## :cyclone: Server Requirements:
- php7.4 [https://www.php.net] [LICENSE](https://www.php.net/license/index.php)
- apache [https://httpd.apache.org] [LICENSE](hhttps://www.apache.org/licenses/LICENSE-2.0)
- redis [https://redis.io] [LICENSE](https://redislabs.com/legal/licenses/)
- postgresql-server [https://www.postgresql.org] [LICENSE](https://tldrlegal.com/license/postgresql-license-(postgresql))
- supervisor [http://supervisord.org] [LICENSE](https://github.com/Supervisor/supervisor/blob/master/LICENSES.txt)
- libraoffice [https://www.libreoffice.org] [LICENSE](https://www.libreoffice.org/about-us/licenses)
- python [https://www.python.org/] [LICENSE](https://www.python.org/download/releases/2.7/license/)
- pdftotext [https://github.com/jalan/pdftotext] [LICENSE](https://github.com/jalan/pdftotext/blob/master/LICENSE)
- python 3.8 [https://www.python.org/] [LICENSE](https://docs.python.org/3/license.html)
- composer [https://getcomposer.org/] [LICENSE](https://github.com/composer/composer/blob/main/LICENSE)
## :zap: Build with:
- Laravel Framework ^6.2
## :rocket: Installation
### Ubuntu Packages
```bash
# LibreOffice
apt-get install python-software-properties
apt-add-repository ppa:libreoffice/ppa
apt-get update
apt-get install libreoffice
# Python
apt-get update
apt-get install software-properies-common
add-apt-repository ppa:deadsnakes/ppa
apt-get install supervisor python3.8 python3.8-dev
apt-get install redis-server
supervisorctl restart all
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
python get-pip.py
rm -rf get-pip.py
apt install libpoppler-cpp-dev
pip install --upgrade pip
pip install pdftotext supervisor
systemctl enable supervisor
php artisan queue:deploy-supervisor
systemctl restart supervisor
composer install
npm install
cp .env.example .env
php artisan key:generate
sudo -u postgres psql
postgres=# create database mydb;
postgres=# create user myuser with encrypted password 'mypass';
postgres=# grant all privileges on database mydb to myuser;
#update the .env with the current postgres credentials
sudo mkdir /var/log/amqp
sudo mkdir /var/log/queue
php artisan migrate
php artisan queue:deploy-supervisor
supervisorctl start all
# Redis
apt-get install redis-server
# PDF Convertor
apt-get install libpoppler-cpp-dev
apt-get install poppler-utils
# Tesseract OCR
add-apt-repository ppa:alex-p/tesseract-ocr-devel
apt-get update
apt install tesseract-ocr
apt-get install tesseract-ocr
# Unpaper
apt-get install unpaper
# DOCX to PDF Convertor
apt-get install unoconv
```
### Libraries Packages
```bash
# Pip
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
python get-pip.py
rm -rf get-pip.py
pip install --upgrade pip
# Pdftotext
pip install pdftotext
# Supervisor
pip install supervisor
systemctl enable supervisor
mkdir /var/log/amqp
mkdir /var/log/queue
# Deskew
cd DESKEW_INSTALLATION_DIRECTORY
cd Bin
@ -72,450 +72,41 @@ pip3 install opencv-python
cd DEWARP_INSTALLATION_DIRECTORY
pip3 install -r requirements.txt
```
# MAT2 (Metadata remover) - Not used at the moment
pip3 install mat2
apt-get install gir1.2-poppler-0.18
### Install app
```bash
# Generate environment file
cp .env.example .env
# DOCX to PDF Convertor
apt-get install unoconv
# Install backend packages
composer install
```
# Generate app key
php artisan key:generate
# Change the value for the QUEUE_CONNECTION to redis, if it is not set already
## Local Usage
# Deploy supervisor
php artisan queue:deploy-supervisor
```python
php artisan serve
php artisan queue:work
supervisorctl start all
```
### Search and Displace Core Setup
- Install the `Search and Displace Core` app, found here https://git.law/newroco/searchanddisplace-core
- Get the URL of the `Search and Displace Core` app and add it to the `WEBHOOK_CORE_URL` variable in `.env`
- Add in `.env` the `WEBHOOK_CORE_SECRET` value which needs to be the same value as the `WEBHOOK_CLIENT_SECRET` in
the `Search and Displace Core` app's `.env` file
## PHP Packages
- cebe/markdown [LICENSE](https://github.com/cebe/markdown/blob/master/LICENSE)
- fideloper/proxy [LICENSE](https://github.com/fideloper/TrustedProxy/blob/master/LICENSE.md)
- laravel/framework [LICENSE](https://github.com/laravel/framework/blob/7.x/LICENSE.md)
- laravel/tinker [LICENSE](https://github.com/laravel/tinker/blob/2.x/LICENSE.md)
- league/html-to-markdown [LICENSE](https://github.com/thephpleague/html-to-markdown/blob/master/LICENSE)
- phpoffice/phpword [LICENSE](https://github.com/PHPOffice/PHPWord/blob/0.17.0/LICENSE)
- predis/predis [LICENSE](https://github.com/php-enqueue/amqp-bunny/blob/master/LICENSE)
- spatie/laravel-webhook-server [LICENSE](https://github.com/spatie/laravel-webhook-server/blob/master/LICENSE.md)
## Current running process
- [DOC,DOCX,RTF etc..] are first being converted to docx and then converted to .txt using https://www.libreoffice.org
- [PDF] files are converted to .txt
- The resulting .txt file is processed using our own logic/alogorithm and clause breaking point to an array that looks similar to this:
```json
{
"content": "Definitions and Interpretation",
"spaces": 4,
"numbering": "1.",
"children": [
{
"content": "In this Agreement, the following expressions shall have the meanings set opposite them, unless inconsistent with the context or otherwise specified:",
"spaces": 8,
"numbering": "1.1",
"children": [
{
"content": "“Agreement” this agreement including all schedules, appendices and exhibits attached herein;",
"spaces": 0
},
{
"content": "“Associated Company” any company which is listed in Schedule 2 (as may be amended from time to time in writing) and which is in relation to either party its Parent undertaking or its subsidiary undertaking or a subsidiary undertaking of its Parent undertaking or any other person controlled by it or under the same control (where “control” is to be construed in accordance with section 1124 of the Corporation Tax Act 2010) whether direct or indirect. “Parent undertaking” shall have the meaning attributed thereto in Section 1162 of the Companies Act 2006;",
"spaces": 0
},
{
"content": "“Commencement Date” [TBC] “Confidential Information” collectively and individually, all or any document or information of any nature in any format, including oral, written or electronic form relating to either party or their Associated Companies’ or either of their businesses, including technology, Customers, Customer Information, supplier, employees, finances, data, products, services, trade secrets, processes, designs, drawings, diagrams, plans, specifications, formulae, testing procedures, computer software, reports, investigative studies, manuals, assets, costs, prices, marketing opportunities, proprietary information, Know-how, the terms of this Agreement and any other information or material relating to the information described above which (i) is disclosed by either party (or by any individual or legal entity acting in their name or on their behalf, including employees, consultants, sub-contractors, advisors of any kind and agents) or (ii) which comes to the attention of either party (or any individual or legal entity acting in their name or on their behalf, including employees, consultants, sub-contractors, advisors of any kind and agents) during the course of the carrying out of the rights or obligations under this Agreement;",
"spaces": 0
},
{
"content": "“Customers” customers of {P1_Name} and/or its Associated Companies from time to time who owe a Debt to {P1_Name};",
"spaces": 0
},
{
"content": "“Customer Data” any information given by the Customer directly to the {P2_Name} or its personnel;",
"spaces": 0
},
{
"content": "“Customer Information” any Customers’ personal information supplied to the {P2_Name} by or on behalf of {P1_Name} during the performance of this Agreement, including Personal Data, but excluding Customer Data;",
"spaces": 0
},
{
"content": "“Data Protection Legislation” all applicable legislation concerning the protection of individuals with regard to the processing of Personal Data and the free movement of such data including the Data Protection Act 1998 and any regulations made under such legislation and any relevant codes of practice and guidance notes issued from time to time by the Information Commissioner (or its successor);",
"spaces": 0
},
{
"content": "“Debt” any monies owed by the Customer to {P1_Name} which have remained unpaid by the Customer contrary to the terms and conditions between {P1_Name} and the Customer governing the repayment of sums owed;",
"spaces": 0
},
{
"content": "“Debt Management Plan” a plan outlined by the {P2_Name} and agreed by the Customer which details the amount and frequency of payments to be made to each of the Customer’s creditors;",
"spaces": 0
},
{
"content": "“Disbursement” total amount remitted to {P1_Name} on a monthly basis by the {P2_Name} for application to the Customer’s account in respect of their Offer;",
"spaces": 0
},
{
"content": "“European Economic Area” the European Economic Area comprising of the following countries as at the Commencement Date: Austria; Belgium; Bulgaria; Cyprus; the Czech Republic; Denmark; Estonia; Finland; France; Germany; Greece; Hungary; Ireland; Italy; Latvia; Lithuania; Luxembourg; Malta; the Netherlands; Poland; Portugal; Romania; Slovakia; Slovenia; Spain; Sweden; the United Kingdom; Iceland; Liechtenstein and Norway, as amended from time to time;",
"spaces": 0
},
{
"content": "“EU Model Terms” the set of model contractual clauses which the Information Commissioner has authorised for use by Data Controllers (as such term is defined in the Data Protection Legislation) established in the European Union where there is a transfer of Personal Data to Data Processors (as such term is defined in the Data Protection Legislation) outside of the European Economic Area;",
"spaces": 0
},
{
"content": "“Facility” the {P2_Name} site authorised by {P1_Name} where the processing and/or storage of Personal Data supplied by {P1_Name} pursuant to this Agreement takes place. For the purposes of this Agreement that site shall be located at {P1_Reg} or such other place as may be notified in writing to {P1_Name} from time to time;",
"spaces": 0
},
{
"content": "“Force Majeure” any acts, events, omissions or accidents beyond the reasonable control of either Party, including but not limited to acts of God, extreme adverse weather conditions or natural disaster, war, threat of or preparation for war, armed conflict, imposition of sanctions, embargo, breaking off of diplomatic relations or similar actions, terrorist attack, civil war, civil commotion or riots, nuclear, chemical or biological contamination or sonic boom, compliance with any law, regulation or directive, fire, explosion or accidental damage, failure of plant machinery, machinery, computers or vehicles;",
"spaces": 0
},
{
"content": "“Information Commissioner” the independent authority in the UK (or its successor body) which regulates information rights;",
"spaces": 0
},
{
"content": "“Initial Period” three (3) years from the Commencement Date;",
"spaces": 0
},
{
"content": "“Lending Code” a voluntary code of practice (enforced by the Lending Standards Board) which sets standards for financial institutions and provides consumers with protection and explanation on how such institutions are expected to deal with them day-to-day and in times of financial difficulties;",
"spaces": 0
},
{
"content": "“Notification” written notification from the {P2_Name} to {P1_Name} that it has obtained Permission;",
"spaces": 0
},
{
"content": "“Offer” a statement of proposed amount to be repaid by the Customer to {P1_Name} in respect of the Customer’s Debt including instalment plans;",
"spaces": 0
},
{
"content": "“Payment Break” instance where the Customer fails to make an agreed repayment to the {P2_Name} for payment to their creditors;",
"spaces": 0
},
{
"content": "“Permission” written confirmation (which may be confirmation by email or other electronic means) from the Customer to the {P2_Name} that they are appointing the {P2_Name} to act on the Customer’s behalf in the management of the Customer’s Debt and authorising the {P2_Name} to negotiate payment terms with {P1_Name} in respect of the Customer’s Debt and authorising the {P2_Name} to have access to Customer Information;",
"spaces": 0
},
{
"content": "“Personal Data” personal data as defined in the Data Protection Legislation;",
"spaces": 0
},
{
"content": "“Regulatory Authorities” any body who, from time to time, has competent rule-making, investigatory and/or enforcement powers in relation to the business of {P1_Name} and/or its Associated Companies, including, without limitation, the Financial Conduct Authority, the Consumer Financial Protection Bureau, the Office of Fair Trading, the Information Commissioner’s Office, the Lending Standards Board, UK and US Government departments and organisations, the Office of the Comptroller of Currency, the Federal Reserve and other governmental or non-governmental regulatory authorities in the UK, US or other competent jurisdictions;",
"spaces": 0
},
{
"content": "“Regulatory Requirements”",
"spaces": 0
},
{
"content": "(a) all applicable laws, statutes, regulations, ordinances or subordinate legislation in force from time to time to which this Agreement or a party is subject;",
"spaces": 4
},
{
"content": "(b) the common law as applicable to the parties from time to time;",
"spaces": 4
},
{
"content": "(c) all binding court orders, judgements or decrees;",
"spaces": 4
},
{
"content": "all applicable directives, policies, rules, orders, code of conduct or practice or applicable guidance (including the Lending Code and the Financial Conduct Authority TCF principles that are binding on a party and that are made or given by any government, an agency thereof, any Regulatory Authority or other regulatory authority, including in the case of the {P2_Name}, laws and rules imposed by local regulatory authorities in the country where it is located;",
"spaces": 0
},
{
"content": "“Working Day” any day on which banks in London are open for the transaction of normal business excluding Saturdays, Sundays and bank and public holidays in England and Wales.",
"spaces": 0
}
]
},
{
"content": "In this Agreement:",
"spaces": 8,
"numbering": "1.2",
"children": [
{
"content": "references to Recitals, Clauses and Schedules and their sub-divisions are to the Recitals to, Clauses of and Schedules to this Agreement and their sub-divisions respectively, unless specified otherwise;",
"spaces": 12,
"numbering": "1.2.1"
},
{
"content": "the index and headings are included for convenience only and shall not affect the construction or interpretation of this Agreement;",
"spaces": 12,
"numbering": "1.2.2"
},
{
"content": "words importing gender include the other gender and the singular includes the plural and vice versa;",
"spaces": 12,
"numbering": "1.2.3"
},
{
"content": "references to persons include individuals, bodies corporate, firms, unincorporated associations and governmental, semi-governmental and local authorities or agencies;",
"spaces": 12,
"numbering": "1.2.4"
},
{
"content": "references to the words “include”, “including”, “in particular” or similar words or expressions will be construed without limitation and accordingly will not limit the words preceding or following them;",
"spaces": 12,
"numbering": "1.2.5"
},
{
"content": "where expressions used in this Agreement are not specifically defined and are capable of having a special meaning according to the usage or custom of the card services sector or banking services sector, such expressions are to be interpreted accordingly. Any meaning given in this Agreement to a defined term shall prevail over such other special meaning;",
"spaces": 12,
"numbering": "1.2.6"
},
{
"content": "references to a “party” or “parties” will mean either {P1_Name} and/or the {P2_Name} as the context requires and references to a third party will mean any person other than the parties;",
"spaces": 12,
"numbering": "1.2.7"
},
{
"content": "except where expressly stated otherwise, references to any statute, legislation, code of practice or other regulation will include any sub-ordinate legislation and any equivalent regulation in any relevant jurisdiction, as amended, modified, consolidated, re-enacted and/or replaced and in force from time to time;",
"spaces": 12,
"numbering": "1.2.8"
},
{
"content": "any negative obligation imposed on any party shall be construed as if it were also an obligation not to permit or suffer the act or thing in question and any positive obligation imposed on any party shall be construed as if it were also an obligation to procure that the act or thing in question be done;",
"spaces": 12,
"numbering": "1.2.9"
},
{
"content": "the Schedules and Appendices (if any) form part of this Agreement and shall be construed and have the same full force and effect as if expressly set out in the body of this Agreement. To the extent only of any conflict or inconsistency between the Clauses, Schedules and Appendices (if any), the Clauses will prevail and the order of precedence will be as follows:",
"spaces": 12,
"numbering": "1.2.10"
},
{
"content": "1 the provisions of the Clauses;",
"spaces": 16,
"numbering": "1.2.10"
},
{
"content": "2 the provisions of the Schedules; and",
"spaces": 16,
"numbering": "1.2.10"
},
{
"content": "3 the provisions of the Appendices.",
"spaces": 16,
"numbering": "1.2.10"
}
]
}
]
},
{
"content": "Obligations of the {P2_Name}",
"spaces": 4,
"numbering": "2.",
"children": [
{
"content": "The {P2_Name} shall obtain the Permission from the Customer before proceeding with the Debt Management Plan.",
"spaces": 8,
"numbering": "2.1"
},
{
"content": "Subject at all times to the {P2_Name} being in receipt of the applicable Permission, {P2_Name} shall provide the corresponding Notification to {P1_Name} before or at the time of making the first Offer to {P1_Name}. In the absence of such Permission or Notification {P1_Name} shall not be obliged to provide any Customer Information to the {P2_Name}.",
"spaces": 8,
"numbering": "2.2"
},
{
"content": "{P1_Name} may request, and the {P2_Name} shall provide, any Permission to {P1_Name} within two (2) Working Days of such request by {P1_Name} to enable {P1_Name} to verify the Permissions stated in the Notifications provided that in the event that {P1_Name} requests ten (10) or more Permissions in any 12 hour period then the {P2_Name} shall provide such Permissions as promptly as is reasonably possible.",
"spaces": 8,
"numbering": "2.3"
},
{
"content": "Any delay or failure by the {P2_Name} to comply with Clause 2.3 shall be deemed a material breach of this Agreement and the provisions of clause 9.3 shall apply.",
"spaces": 8,
"numbering": "2.4"
},
{
"content": "Subject to Clause 2.1 and in accordance with the Debt Management Plan, the {P2_Name} shall make an Offer to {P1_Name} for the repayment of the Debt detailing the amount and frequency of proposed payments. Such Offer will be made in accordance with the Lending Code guidelines and based upon the principle of equitable distribution of available income (after priority payments) in line with the amount outstanding to each creditor.",
"spaces": 8,
"numbering": "2.5"
},
{
"content": "Upon receipt of the Offer from the {P2_Name} {P1_Name} may either;",
"spaces": 8,
"numbering": "2.6",
"children": [
{
"content": "accept the Offer; or",
"spaces": 12,
"numbering": "2.6.1"
},
{
"content": "reject the Offer where it considers the offer to be unreasonable by written notice to the {P2_Name}.",
"spaces": 12,
"numbering": "2.6.2"
}
]
},
{
"content": "In the event that {P1_Name} accepts an Offer, then the {P2_Name} shall arrange for the Disbursement to be repaid to {P1_Name} in accordance with the Offer within five (5) Working Days of receipt by the {P2_Name} of cleared funds from the Customer.",
"spaces": 8,
"numbering": "2.7"
},
{
"content": "In the event that {P1_Name} rejects the Offer, then the {P2_Name} shall review the Debt Management Plan and the {P2_Name} may make a new Offer to {P1_Name}.",
"spaces": 8,
"numbering": "2.8"
},
{
"content": "For the avoidance of doubt nothing in this Agreement constitutes an obligation on {P1_Name} to accept any unreasonable Offer made by the {P2_Name}.",
"spaces": 8,
"numbering": "2.9"
},
{
"content": "The {P2_Name} shall notify {P1_Name} in writing as soon as reasonably possible:",
"spaces": 8,
"numbering": "2.10",
"children": [
{
"content": "upon becoming aware of any withdrawal of a Permission or any amendment thereto made by a Customer; and",
"spaces": 12,
"numbering": "2.10.1"
},
{
"content": "of any circumstance or event which is reasonably likely to materially affect the {P2_Name}’s ability to comply with its obligations under this Agreement.",
"spaces": 12,
"numbering": "2.10.2"
}
]
},
{
"content": "Failure by the {P2_Name} to notify {P1_Name} pursuant to Clause 2.10.1 shall be deemed a material breach of this Agreement and the provisions of clause 9.3 shall apply.",
"spaces": 8,
"numbering": "2.11"
},
{
"content": "The {P2_Name} shall;",
"spaces": 8,
"numbering": "2.12",
"children": [
{
"content": "at all times act in accordance with and subject to any limitations set out in (i) the Permission and (ii) the requirements of this Agreement;",
"spaces": 12,
"numbering": "2.12.1"
},
{
"content": "comply with the reporting and review requirements set out in Schedule I.",
"spaces": 12,
"numbering": "2.12.2"
},
{
"content": "be at all times courteous and business like in its contact with the Customers;",
"spaces": 12,
"numbering": "2.12.3"
},
{
"content": "use its reasonable commercial endeavours to comply with any reasonable and lawful directions, orders and instructions which {P1_Name} may from time to time give to it in accordance with or to give effect to the provisions of this Agreement;",
"spaces": 12,
"numbering": "2.12.4"
},
{
"content": "identify, procure and keep in force all permits, certificates, licences, approvals, authorisations and consents which may be necessary in connection with the performance of its obligations under this Agreement;",
"spaces": 12,
"numbering": "2.12.5"
},
{
"content": "in performing its obligations under this Agreement, ensure that it is knowledgeable about and shall continue to be knowledgable about all Regulatory Requirements and that it shall comply with all Regulatory Requirements and (i) maintain evidence of its compliance with Regulatory Requirements, (ii) take all necessary steps required to comply with such Regulatory Requirements promptly upon becoming aware it is not so complying, and (iii) take all necessary steps to remedy any previous breaches of such Regulatory Requirements;",
"spaces": 12,
"numbering": "2.12.6"
},
{
"content": "where permitted to do so, promptly notify {P1_Name} in the event that a regulatory body who regulates {P1_Name} or the {P2_Name} conducts an audit or investigation of the {P2_Name} and disclose to {P1_Name} (subject always to the provisions of confidentiality set out at Clause 7) details of any adverse regulatory findings; and",
"spaces": 12,
"numbering": "2.12.7"
},
{
"content": "co-operate with {P1_Name} and assist them in their dealings with Regulatory Authorities to the extent reasonably required in relation to this Agreement including implementing such measures as are reasonably necessary and appropriate to effect compliance with Regulatory Requirements.",
"spaces": 12,
"numbering": "2.12.8"
}
]
},
{
"content": "{P1_Name} acknowledges and accepts that the {P2_Name} may give advice and assistance and provide services and products beyond the scope of the Debt Management Plan to Customers and that the {P2_Name} will not disclose any Customer Data to {P1_Name} without the Customer’s prior consent (which the {P2_Name} is under no obligation to seek).",
"spaces": 8,
"numbering": "2.13"
},
{
"content": "Any failure or inability of a Customer to agree to or comply with a Debt Management Plan or any other advice or assistance given by the {P2_Name} pursuant to this Agreement shall not cause the {P2_Name} to be in breach of the terms of this Agreement and shall not prevent the {P2_Name} from providing advice for debt negotiations, counselling and management solutions outside of the Services.",
"spaces": 8,
"numbering": "2.14"
},
{
"content": "The Parties acknowledge that the {P2_Name} is not acting as an agent of {P1_Name} and that it is not a debt collection agent of {P1_Name}.",
"spaces": 8,
"numbering": "2.15"
}
]
},
{
"content": "Rights and Obligations of {P1_Name}",
"spaces": 4,
"numbering": "3.",
"children": [
{
"content": "During the term of this Agreement {P1_Name} shall provide such information and assistance as is reasonably required for the {P2_Name} to perform its obligations under this Agreement.",
"spaces": 8,
"numbering": "2.16"
},
{
"content": "For the period of six months from termination or expiry of this Agreement {P1_Name} shall not, without the prior written agreement of the {P2_Name}, employ or engage on any basis or offer such employment or engagement to any of the {P2_Name}’s personnel provided that employment or engagement of any member of the {P2_Name}’s personnel pursuant to a bona fide recruitment campaign shall not be a breach of this clause.",
"spaces": 8,
"numbering": "2.17"
},
{
"content": "{P1_Name} represents and warrants that:",
"spaces": 8,
"numbering": "2.18",
"children": [
{
"content": "it has the requisite power and authority required by any applicable law or otherwise to enter into this Agreement and to carry out the obligations contemplated by the Agreement reliably and professionally and that the execution and performance of this Agreement has been duly authorised by the required corporate action by {P1_Name};",
"spaces": 12,
"numbering": "2.18.1"
},
{
"content": "it has and shall maintain during the continuance of this Agreement all necessary rights, licences and consents necessary to provide the Customer Information to the {P2_Name} and to perform its obligations under this Agreement.",
"spaces": 12,
"numbering": "2.18.2"
}
]
},
{
"content": "If {P1_Name} notifies the {P2_Name} in writing that amendments are required to be made to this Agreement (including any Schedule hereto) to ensure {P1_Name}’s compliance with its obligations to a Regulatory Authority and/or any Regulatory Requirements (including changes required in order to comply with any rules or guidance (including guidance as to interpretation of such rules) issued or published by or on behalf of such Regulatory Authorities or coming into force from time to time), the {P2_Name} shall be obliged to make such amendments as soon as reasonably practicable and in shall use reasonable commercial endeavours to ensure that such changes are made in sufficient time so as to ensure that {P1_Name} is complying with such obligations.",
"spaces": 8,
"numbering": "2.19"
},
{
"content": "In the event the {P2_Name} is unable to comply with any amendments as notified to it by {P1_Name} pursuant to Clause 3.1 or fails to comply within a reasonable time then {P1_Name} may terminate this Agreement immediately.",
"spaces": 8,
"numbering": "2.20"
},
{
"content": "{P1_Name} shall comply with the reporting and review requirements set out in Schedule I.",
"spaces": 8,
"numbering": "2.21"
},
{
"content": "Notwithstanding Clause 2.21 above {P1_Name} shall not make any changes to a Customer’s {P1_Name} account without direct contact with the Customer. For the avoidance of doubt the Permission shall only relate to the provision of information regarding a Customer’s {P1_Name} account.",
"spaces": 8,
"numbering": "2.22"
}
]
},
{
"content": "Conditions",
"spaces": 4,
"numbering": "4.",
"children": [
{
"content": "It is a condition of this Agreement that each party is entitled to enter into this Agreement and to perform its obligations set out herein.",
"spaces": 8,
"numbering": "2.23"
}
]
},
```
- spatie/pdf-to-text [LICENSE](https://github.com/spatie/pdf-to-text/blob/main/LICENSE.md)
- thiagoalessio/tesseract_ocr [LICENSE](https://github.com/thiagoalessio/tesseract-ocr-for-php/blob/main/MIT-LICENSE)

9
app/Console/Commands/DeployWorker.php

@ -44,14 +44,15 @@ class DeployWorker extends Command
{
$workerName = 'queue-worker-'.str_replace(' ', '-', strtolower(env('APP_NAME'))).'-'.str_replace(' ', '-', strtolower(env('APP_ENV')));
$workerFile = $workerName.'.conf';
try {
Storage::disk('supervisor')->put($workerFile, '[program:'.$workerName.']
process_name=%(program_name)s_%(process_num)02d');
Storage::disk('supervisor')->append($workerFile, 'command=php '.base_path().'/artisan queue:work');
Storage::disk('supervisor')->append($workerFile, 'command=php '.base_path().'/artisan queue:listen --queue=sd_ingest,default --tries=2 --timeout=180');
Storage::disk('supervisor')->append($workerFile, 'autostart=true
autorestart=true
user=www-data
numprocs=1
numprocs=3
redirect_stderr=true
stdout_logfile=/var/log/queue/'.$workerName.'.log');
} catch (Exception $e) {
@ -59,7 +60,9 @@ stdout_logfile=/var/log/queue/'.$workerName.'.log');
return;
}
$this->info('supervisor script installed');
try {
exec('sudo supervisorctl reread');
exec('sudo supervisorctl update');
@ -70,7 +73,7 @@ stdout_logfile=/var/log/queue/'.$workerName.'.log');
return;
}
$this->info('queue worker started');
$this->info('queue worker started');
}
}

57
app/Console/Commands/TestMachine.php
File diff suppressed because it is too large
View File

5
app/Jobs/IngestDocuments.php

@ -4,11 +4,6 @@ namespace App\Jobs;
use App\Ingest\Convertor;
use App\Ingest\DataJsonConvertor;
use App\Ingest\DocxReader;
use App\Parser\ParseXml;
use App\Parser\DocxParser\ParseDocx;
use App\Parser\HtmlParser\ParseHtml;
use App\Parser\ParseHtmlArray;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;

17
app/Parser/DocxParser/Footer.php

@ -1,17 +0,0 @@
<?php
namespace App\Parser\DocxParser;
class Footer
{
public function handle($element){
dd('Footer',get_class_methods($element));
//return ['content' => [
// 'content' => '<'.$heading.(($inlineStyle) ? ' style="'.$inlineStyle.'"' : '').'>'.$element->getText().'</'.$heading.'>',
// 'type' => 'title',
//],
// 'type' => 'title',
// 'depth' => (int) $element->getDepth()];
}
}

41
app/Parser/DocxParser/Footnote.php

@ -1,41 +0,0 @@
<?php
namespace App\Parser\DocxParser;
use App\Parser\DocxParser\Traits\Helper;
use Exception;
class Footnote
{
use Helper;
public function handle($section)
{
$result = [];
$sectionElements = $this->getElements($section);
foreach ($sectionElements as $element) {
try {
$handler = $this->getHandler($element);
} catch (Exception $e) {
throw new Exception($e->getMessage());
}
finally {
$data = $handler->handle($element);
if ($data) {
$result[] = $handler->handle($element);
}
}
}
if (count($result) > 0) {
//dd($result);
return $result;
}
return;
}
}

11
app/Parser/DocxParser/Header.php

@ -1,11 +0,0 @@
<?php
namespace App\Parser\DocxParser;
class Header
{
public function handle($element){
//dd('Header',$element);
}
}

26
app/Parser/DocxParser/Link.php

@ -1,26 +0,0 @@
<?php
namespace App\Parser\DocxParser;
class Link
{
public function handle($element)
{
$text = $element->getText();
//if (! is_string($text)) {
// dd($element);
//}
return [
'content' => $this->buildHtmlLink($element, $text),
'type' => 'link'
];
}
private function buildHtmlLink($element, $text)
{
return "<a href='".$element->getLinkSrc()."' target='_blank'>".$text."</a>";
}
}

77
app/Parser/DocxParser/ListItemRun.php

@ -1,77 +0,0 @@
<?php
namespace App\Parser\DocxParser;
use App\Parser\DocxParser\Traits\Helper;
use Exception;
use PhpOffice\PhpWord\Reader\Word2007\Numbering;
use PhpOffice\PhpWord\Style;
class ListItemRun
{
use Helper;
public function handle($list)
{
$result = [];
$listElements = $this->getElements($list);
if (count($listElements)) {
foreach ($listElements as $index => $element) {
//dd($element->getFontStyle());
try {
$handler = $this->getHandler($element);
$data = $handler->handle($element);
if ($data && isset($data[ 'content' ]) && strlen(trim(strip_tags($data[ 'content' ])))) {
$styleName = $list->getParagraphStyle()->getStyleName();
if ($index === 0) {
$result[] = [
'content' => $data,
'type' => 'listItemRun',
'depth' => (int) $list->getDepth(),
'styleDepth' => $this->getStyleListDepth($styleName),
'styleName' => $styleName,
'index' => $list->getElementIndex(),
'children' => []
];
} else {
if (isset($result[ count($result) - 1 ])) {
$result[ count($result) - 1 ][ 'content' ][ 'content' ] .= ' '.$data[ 'content' ];
} else {
$result[] = [
'content' => $data,
'type' => 'listItemRun',
'depth' => (int) $list->getDepth(),
'styleDepth' => $this->getStyleListDepth($styleName),
'styleName' => $styleName,
'index' => $list->getElementIndex(),
'children' => []
];
}
}
}
} catch (Exception $e) {
throw new Exception($e->getMessage());
}
}
if ($result) {
if (count($result) === 1) {
$result = reset($result);
$result[ 'content' ][ 'content' ] = '<p>'.$result[ 'content' ][ 'content' ].'</p>';
}
}
}
return $result;
}
}

11
app/Parser/DocxParser/PageBreak.php

@ -1,11 +0,0 @@
<?php
namespace App\Parser\DocxParser;
class PageBreak
{
public function handle($element)
{
return;
}
}

269
app/Parser/DocxParser/ParseDocx.php

@ -1,269 +0,0 @@
<?php
namespace App\Parser\DocxParser;
use App\Parser\DocxParser\Traits\Helper;
use Illuminate\Support\Facades\Log;
use PhpOffice\PhpWord\IOFactory;
use function GuzzleHttp\Psr7\str;
class ParseDocx
{
use Helper;
protected $currentNumberingIndex = 1;
public function fromUploadedFile($file)
{
try {
$docxFileLoader = IOFactory::load($file);
Log::info('Parse docx');
return $this->parseLoadedDocx($docxFileLoader);
} catch (\Exception $exception) {
dd($exception);
throw new \Exception($exception->getMessage());
}
}
private function parseLoadedDocx($docx)
{
$styles = 0;
foreach ($docx->getSections() as $page) {
$handler = $this->getHandler($page);
$paragraphs = $handler->handle($page);
if ($paragraphs) {
foreach ($paragraphs as $index => $paragraph) {
try {
if ($paragraph && $paragraph[ 'type' ] !== 'textBreak' && (isset($paragraph[ 'content' ][ 'type' ]) && $paragraph[ 'content' ][ 'type' ] !== 'textBreak') || $paragraph[ 'type' ] == 'table') {
$result[] = $paragraph;
if (isset($paragraph[ 'styleName' ])) {
$styles++;
}
}
} catch (\Exception $e) {
dd($e);
}
}
}
}
$depthTypeType = count($result) / 2 <= $styles ? 'styleDepth' : 'depth';
return $this->setTheNumbering($result, null, $depthTypeType);
}
private function setTheNumbering($paragraphs, $parentNumbering = null, $depthType = 'depth')
{
$result = [];
$paragraphs = $this->buildTheChildrens($paragraphs, $depthType);
for ($index = 0; $index < count($paragraphs); $index++) {
$paragraph = $paragraphs[ $index ];
try {
if ($paragraph[ 'type' ] !== 'table' && ($paragraph[ $depthType ] === 0 || $parentNumbering) && strpos($paragraph[ 'styleName' ],
'BodyText') === false) {
$paragraph[ 'content' ][ 'numbering' ] = ($parentNumbering) ? $parentNumbering.((int) $index + 1).'.' : $this->currentNumberingIndex.'.';
$paragraph[ 'content' ][ 'numbering_row' ] = ($parentNumbering) ? ((int) $index + 1) : $this->currentNumberingIndex;
if ($paragraph[ 'children' ] && count($paragraph[ 'children' ])) {
$paragraph[ 'children' ] = $this->setTheNumbering($paragraph[ 'children' ],
$paragraph[ 'content' ][ 'numbering' ], $depthType);
}
if (! $parentNumbering) {
$this->currentNumberingIndex++;
}
} elseif (isset($paragraph[ 'content' ][ 'numbering' ]) && isset($paragraph[ 'children' ]) && count($paragraph[ 'children' ])) {
$paragraphs[ $index ] = $this->setChildrenNumbering($paragraphs[ $index ]);
} elseif (isset($paragraphs[ $index ][ 'content' ][ 'numbering' ]) && isset(last($result)[ 'content' ][ 'numbering' ]) && $paragraphs[ $index ][ 'content' ][ 'numbering' ] == last($result)[ 'content' ][ 'numbering' ]) {
}
} catch (\Exception $e) {
dd($e);
}
$result[] = $paragraphs[ $index ];
}
return $result;
}
/**
* @param $parent
*
* @return mixed
*/
private function setChildrenNumbering($parent)
{
$numbering = 1;
for ($j = 0; $j < count($parent[ 'children' ]); $j++) {
$children = $parent[ 'children' ][ $j ];
if ($children[ 'type' ] == 'listItemRun' || isset($children[ 'content' ][ 'numbering' ])) {
$parentNumber = $parent[ 'content' ][ 'numbering' ];
$parent[ 'children' ][ $j ][ 'content' ][ 'numbering' ] = (substr(trim($parentNumber),
strlen(trim($parentNumber)) - 1) == '.') ? $parentNumber.$numbering : $parentNumber.'.'.$numbering;
if (count($parent[ 'children' ][ $j ][ 'children' ])) {
$parent[ 'children' ][ $j ] = $this->setChildrenNumbering($parent[ 'children' ][ $j ]);
}
$numbering++;
}
}
return $parent;
}
/**
* @param $paragraphs
*
* @return array
*/
private function buildTheChildrens($paragraphs, $depthType)
{
$alreadyHandledIndexes = [];
$result = [];
for ($i = 0; $i < count($paragraphs); $i++) {
if (in_array($i, $alreadyHandledIndexes)) {
continue;
}
$j = $i + 1;
for ($j; $j < count($paragraphs); $j++) {
if (in_array($j, $alreadyHandledIndexes)) {
continue;
}
if (isset($paragraphs[ $j ][ 'content' ][ 'content' ]) && $paragraphs[ $j ][ 'content' ][ 'content' ] === '<p></p>') {
$alreadyHandledIndexes[] = $j;
$j++;
}
if (isset($paragraphs[ $i ][ $depthType ]) && isset($paragraphs[ $j ][ $depthType ]) && $paragraphs[ $i ][ $depthType ] !== null && $paragraphs[ $j ][ $depthType ] !== null && $paragraphs[ $i ][ $depthType ] < $paragraphs[ $j ][ $depthType ]) {
$paragraphs[ $i ] = $this->handlePossibleChild($paragraphs[ $i ], $paragraphs[ $j ], $i,
$depthType);
} elseif (isset($paragraphs[ $j ][ 'styleName' ]) && $paragraphs[ $j ][ 'styleName' ] === 'ListParagraph' && $paragraphs[ $i ][ $depthType ] === null && substr(strip_tags($paragraphs[ $i ][ 'content' ][ 'content' ]),
-1) === ':') {
$paragraphs[ $i ] = $this->handlePossibleChild($paragraphs[ $i ], $paragraphs[ $j ], $i,
$depthType);
} elseif (isset($paragraphs[ $j + 1 ]) && isset($paragraphs[ $j + 1 ][ 'content' ][ 'content' ]) && isset($paragraphs[ $j ]) && isset($paragraphs[ $j ][ 'content' ][ 'content' ]) && substr(strip_tags($paragraphs[ $j ][ 'content' ][ 'content' ]),
-1) === ':' && (isset($paragraphs[ $j + 1 ]) && ctype_lower(substr(trim(strip_tags($paragraphs[ $j + 1 ][ 'content' ][ 'content' ])),
0,
1)) || (isset($paragraphs[ $j + 1 ]) && substr(trim(strip_tags($paragraphs[ $j + 1 ][ 'content' ][ 'content' ])),
strlen(trim(strip_tags($paragraphs[ $j + 1 ][ 'content' ][ 'content' ]))) - 1) == ';'))) {
$k = $j + 1;
$alreadyHandledIndexes[] = $k;
while (isset($paragraphs[ $k ]) && substr(str_replace('and', '',
trim(strip_tags(str_replace('and', '', $paragraphs[ $k ][ 'content' ][ 'content' ])))),
strlen(str_replace('and', '', trim(strip_tags(str_replace('and', '',
$paragraphs[ $k ][ 'content' ][ 'content' ]))))) - 1) == ';') {
$paragraphs[ $j ][ 'children' ][] = $paragraphs[ $k ];
$alreadyHandledIndexes[] = $k++;
}
$paragraphs[ $i ] = $this->handlePossibleChild($paragraphs[ $i ], $paragraphs[ $j ], $i,
$depthType);
} elseif (isset($paragraphs[ $i ][ 'styleName' ]) && $paragraphs[ $i ][ $depthType ] !== $paragraphs[ $j ][ $depthType ] && strpos($paragraphs[ $i ][ 'styleName' ],
'Heading2') !== false && ((isset($paragraphs[ $j ][ 'depth' ]) || ($paragraphs[ $j ][ 'type' ] == 'textRun' && isset($paragraphs[ $j ][ 'content' ][ 'numbering' ])) && is_null($paragraphs[ $j ][ 'styleName' ])))) {
$paragraphs[ $i ] = $this->handlePossibleChild($paragraphs[ $i ], $paragraphs[ $j ], $i,
$depthType);
} else {
break;
}
$alreadyHandledIndexes[] = $j;
}
$result[] = $paragraphs[ $i ];
$alreadyHandledIndexes[] = $i;
}
return $result;
}
/**
* @param $parent
* @param $child
* @param $i
*
* @return mixed
*/
private function handlePossibleChild($parent, $child, $i, $depthType)
{
// Must iterate through parent children
if (isset($parent[ 'children' ]) && count($parent[ 'children' ]) === 0) {
if ($parent[ $depthType ] < $child[ $depthType ] || $parent[ $depthType ] === null) {
$parent[ 'children' ][] = $child;
} elseif (strpos($parent[ 'styleName' ],
'Heading') !== false && isset($child[ 'content' ][ 'numbering' ]) && substr_count($child[ 'content' ][ 'numbering' ],
'.') == 1) {
$parent[ 'children' ][] = $child;
} else {
return $parent;
}
return $parent;
}
$lastParentChild = last($parent[ 'children' ]);
// Possible to be either child or grandchild
if ($lastParentChild[ $depthType ] && $child[ $depthType ] > $lastParentChild[ $depthType ]) {
$lastParentChild = $this->handlePossibleChild($lastParentChild, $child, $i, $depthType);
} else {
if ($child[ $depthType ] === $lastParentChild[ $depthType ]) {
$parent[ 'children' ][] = $child;
return $parent;
}
if (((isset($lastParentChild[ 'styleDepth' ]) && $lastParentChild[ 'styleDepth' ] === $child[ 'depth' ])) && $lastParentChild[ 'index' ] !== $child[ 'index' ]) {
$parent[ 'children' ][] = $child;
return $parent;
}
}
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild;
return $parent;
}
}

32
app/Parser/DocxParser/PreserveText.php

@ -1,32 +0,0 @@
<?php
namespace App\Parser\DocxParser;
use App\Parser\DocxParser\Traits\Helper;
class PreserveText
{
use Helper;
public function handle($element)
{
$text = $element->getText();
if (is_array($text)) {
$text = implode(' ', $text);
}
return [
'content' => [
'content' => preg_replace("/\{[^)]+\}/", '{REF_NUMBER}', $text, 1),
'type' => 'text'
],
'type' => 'preserveText',
'index' => $element->getElementIndex(),
'children' => [],
'styleName' => 'Level2Number',
'styleDepth' => 1,
'depth' => 0
];
}
}

41
app/Parser/DocxParser/Section.php

@ -1,41 +0,0 @@
<?php
namespace App\Parser\DocxParser;
use App\Parser\DocxParser\Traits\Helper;
use Exception;
use PhpOffice\PhpWord\Element\Section as WordSection;
class Section
{
use Helper;
public function handle($section)
{
$result = [];
if ($section instanceof WordSection) {
$sectionElements = $this->getElements($section);
foreach ($sectionElements as $element) {
try {
$handler = $this->getHandler($element);
} catch (Exception $e) {
throw new Exception($e->getMessage());
}
$data = $handler->handle($element);
if($data){
$result[] = $handler->handle($element);
}
}
}
if (count($result) > 0) {
return $result;
}
return;
}
}

35
app/Parser/DocxParser/Table.php

@ -1,35 +0,0 @@
<?php
namespace App\Parser\DocxParser;
use App\Parser\DocxParser\Traits\Helper;
class Table
{
use Helper;
public function handle($table)
{
$result = [];
foreach ($table->getRows() as $row) {
$handlerName = "\App\Parser\DocxParser\\".substr(strrchr(__CLASS__, "\\"),
1).'\\'.$this->getReflectionClass($row);
$handler = new $handlerName;
$data = $handler->handle($row);
if ($data) {
$result [] = $handler->handle($row);
}
}
//dd($table->getNestedLevel(),get_class_methods($table));
//
return [
'content' => '',
'children' => $result,
'styleDepth' => $table->getNestedLevel() + 1,
'depth' => $table->getNestedLevel() + 1,
'type' => 'table',
];
}
}

41
app/Parser/DocxParser/Table/Cell.php

@ -1,41 +0,0 @@
<?php
namespace App\Parser\DocxParser\Table;
use App\Parser\DocxParser\Traits\Helper;
use Exception;
use Illuminate\Support\Arr;
use PhpOffice\PhpWord\Element\TextBreak;
class Cell
{
use Helper;
public function handle($cell)
{
$result = [];
$cells = $this->getElements($cell);
foreach ($this->getElements($cell) as $index => $element) {
if (! $element instanceof TextBreak) {
try {
$handler = $this->getHandler($element);
} catch (Exception $e) {
throw new Exception($e->getMessage());
}
$data = $handler->handle($element);
$data['width']= $cell->getWidth();
$result[] = $data;
}
}
return [
'content' => '',
'children' => $result,
'depth' => null,
'type' => 'cell',
];
}
}

41
app/Parser/DocxParser/Table/Row.php

@ -1,41 +0,0 @@
<?php
namespace App\Parser\DocxParser\Table;
use App\Parser\DocxParser\Traits\Helper;
use Illuminate\Support\Arr;
class Row
{
use Helper;
/**
* @param $row
*
* @return mixed
*/
public function handle($row)
{
$rows = $row->getCells();
$result = [];
foreach ($rows as $index => $cell) {
$handler = new Cell();
$result[] = $handler->handle($cell);
}
return [
'content' => '',
'children' => $result,
'depth' => null,
'height' => $row->getHeight(),
'isTblHeader' => $row->getStyle()->isTblHeader(),
'index' => $row->getElementIndex(),
'type' => 'row',
];
}
}

147
app/Parser/DocxParser/Text.php

@ -1,147 +0,0 @@
<?php
namespace App\Parser\DocxParser;
use App\Parser\DocxParser\Traits\Helper;
class Text
{
use Helper;
public function handle($textElement)
{
$data = $this->getElementData($textElement);
$data[ 'type' ] = 'text';
return $data;
}
/**
* @param $textElement
*
* @return array
*/
private function getElementData($textElement)
{
$text = $textElement->getText();
//if (strpos($text, 'PPOINTMENT AND GRANT OF LICENSE') !== false) {
// dd($textElement->getParent()->getDepth());
//}
$textData = $this->getNumberingFromText($text);
if (strlen($textData[ 'content' ])) {
$textData[ 'content' ] = $this->styleTheText($textData[ 'content' ], $textElement);
}
return $textData;
}
/**
* @param $text
*
* @return array
*/
private function getNumberingFromText($text)
{
$data = [];
preg_match('/^([0-9.])([^(A-Z)(a-z) ]*)/', trim($text), $match);
if ($match && isset($match[ 0 ]) && $match[ 0 ] !== '.') {
$data[ 'content' ] = trim(str_replace($match[ 0 ], '', $text));
$data[ 'numbering' ] = $match[ 0 ];
} else {
$data[ 'content' ] = trim(preg_replace('/\t+/', '', $text));
}
return $data;
}
private function styleTheText($textString, $textObject)
{
$textStyle = [
'font' => $textObject->getFontStyle(),
'paragraph' => $textObject->getParagraphStyle()
];
$fontStyle = $textStyle[ 'font' ]->getStyleValues();
$inlineStyle = $this->getInlineStyles(array_merge($fontStyle[ 'style' ], $fontStyle[ 'basic' ]));
return '<span'.(($inlineStyle) ? ' style="'.$inlineStyle.'"' : '').'>'.$this->getStyledText($textString,
$fontStyle[ 'style' ]).'</span>';
}
/**
* @param $styles
*
* @return string
*/
private function getInlineStyles($styles)
{
$styleString = '';
$acceptedInline = [
"dStrike" => 'text-decoration: line-through;text-decoration-style: double;',
"smallCaps" => 'text-transform: lowercase;',
"allCaps" => 'text-transform: capitalize;',
"fgColor" => 'background-color:'.$styles[ 'fgColor' ].';',
"hidden" => 'display:none;',
"size" => 'font-size:'.$styles[ 'size' ].'pt;',
"color" => 'color:#'.$styles[ 'color' ].';'
];
foreach ($styles as $style => $value) {
if (array_key_exists($style, $acceptedInline) && $value && ! in_array($value, ['none', 'auto'])) {
$styleString .= $acceptedInline[ $style ];
}
}
return $styleString;
}
/**
* @param $text
* @param $styles
*
* @return string
*/
private function getStyledText($text, $styles)
{
$mappedStyle = [
'bold' => 'strong',
'italic' => 'i',
'underline' => 'u',
'strike' => 'strike',
"super" => 'sup',
"sub" => 'sub',
];
foreach ($styles as $style => $active) {
if (array_key_exists($style, $mappedStyle) && $active && $active !== 'none') {
$text = $this->appendHtmlStyle($text, $mappedStyle[ $style ]);
}
}
return $text;
}
/**
* @param $text
* @param $styleType
*
* @return string
*/
private function appendHtmlStyle($text, $styleType)
{
return "<$styleType>$text</$styleType>";
}
}

17
app/Parser/DocxParser/TextBreak.php

@ -1,17 +0,0 @@
<?php
namespace App\Parser\DocxParser;
class TextBreak
{
public function handle($element)
{
return;
return [
'content' => '<br>',
'type' => 'textBreak'
];
}
}

74
app/Parser/DocxParser/TextRun.php

@ -1,74 +0,0 @@
<?php
namespace App\Parser\DocxParser;
use App\Parser\DocxParser\Traits\Helper;
use Exception;
class TextRun
{
use Helper;
public function handle($textRun)
{
$result = [];
$textRunElements = $this->getElements($textRun);
if (count($textRunElements)) {
foreach ($textRunElements as $index => $element) {
try {
$handler = $this->getHandler($element);
$data = $handler->handle($element);
if ($data) {
$styleName = $textRun->getParagraphStyle()->getStyleName();
if ($index === 0) {
$result[] = [
'content' => $handler->handle($element),
'type' => 'textRun',
'depth' => $textRun->getDepth(),
'styleDepth' => $this->getStyleListDepth($styleName),
'styleName' => $styleName,
'index' => $textRun->getElementIndex(),
'children' => []
];
} else {
if (isset($result[ count($result) - 1 ])) {
$result[ count($result) - 1 ][ 'content' ][ 'content' ] .= ' '.$data[ 'content' ];
} else {
$result[] = [
'content' => $data,
'type' => 'textRun',
'depth' => (int) $textRun->getDepth(),
'styleDepth' => $this->getStyleListDepth($styleName),
'styleName' => $styleName,
'index' => $textRun->getElementIndex(),
'children' => []
];
}
}
}
} catch (Exception $e) {
dd($e, 2);
throw new Exception($e->getMessage());
}
}
if ($result) {
if (count($result) === 1) {
$result = reset($result);
$result[ 'content' ][ 'content' ] = '<p>'.$result[ 'content' ][ 'content' ].'</p>';
}
}
}
return $result;
}
}

72
app/Parser/DocxParser/Title.php

@ -1,72 +0,0 @@
<?php
namespace App\Parser\DocxParser;
use App\Parser\DocxParser\Traits\Helper;
use PhpOffice\PhpWord\Style;
use PhpOffice\PhpWord\Element\Title as WordTitle;
class Title
{
use Helper;
public function handle($element)
{
if (! $element instanceof WordTitle) {
return;
}
$title = $element->getText();
if (! is_string($title)) {
$handler = $this->getHandler($title);
return $handler->handle($title);
}
//dd($element->getText(),get_class_methods($element),$element->getDepth());
$style = $this->getTitleStyle($element);
$headings = [
'Title' => 'h1',
'Subtitle' => 'h2',
'Heading1' => 'h1',
'Heading2' => 'h2',
'Heading3' => 'h3',
'Heading4' => 'h4',
'Heading5' => 'h5',
];
$fontStyle = $style[ 'font' ]->getStyleValues();
$inlineStyle = $this->getInlineStyles(array_merge($fontStyle[ 'style' ], $fontStyle[ 'basic' ]));
$heading = array_key_exists($style[ 'heading' ], $headings) ? $headings[ $style[ 'heading' ] ] : 'h5';
return [
'content' => [
'content' => '<'.$heading.(($inlineStyle) ? ' style="'.$inlineStyle.'"' : '').'>'.$element->getText().'</'.$heading.'>',
'type' => 'title',
],
'type' => 'title',
'depth' => null,
'styleDepth' => $this->getStyleListDepth($element->getStyle()),
'styleName' => $element->getStyle(),
'index' => $element->getElementIndex(),
'children' => []
];
}
private function getTitleStyle($element)
{
if (strpos($element->getStyle(), 'Heading') !== false) {
$font = Style::getStyle(str_replace('Heading', 'Heading_', $element->getStyle()));
} else {
$font = Style::getStyle($element->getStyle());
}
return [
'font' => $font,
'heading' => $element->getStyle()
];
}
}

117
app/Parser/DocxParser/Traits/Helper.php

@ -1,117 +0,0 @@
<?php
namespace App\Parser\DocxParser\Traits;
use ReflectionClass;
trait Helper
{
/**
* @param $element
*
* @return string
* @throws \Exception
*/
public function getHandler($element)
{
try {
$reflectClass = $this->getReflectionClass($element);
} catch (\Exception $exception) {
throw new \Exception($exception->getMessage());
}
$handleClass = 'App\Parser\DocxParser\\'.$reflectClass;
if (class_exists($handleClass)) {
return new $handleClass;
} else {
throw new \Exception("Handler class $handleClass dose not exists!");
}
}
/**
* @param $element
*
* @return string
*/
public function getReflectionClass($element)
{
try {
$reflectClass = new ReflectionClass($element);
} catch (\ReflectionException $e) {
throwException($e);
}
return $reflectClass->getShortName();
}
/**
* Get the child elements of an element
*
* @param $element
*
* @return mixed
*/
public function getElements($element)
{
return $element->getElements();
}
/**
* Check if an element has childrens
*
* @param $element
*
* @return bool
*/
public function hasElements($element)
{
return (bool) count($this->getElements($element));
}
/**
* @param $styles
*
* @return string
*/
private function getInlineStyles($styles)
{
$styleString = '';
$acceptedInline = [
"dStrike" => 'text-decoration: line-through;text-decoration-style: double;',
"smallCaps" => 'text-transform: lowercase;',
"allCaps" => 'text-transform: capitalize;',
"fgColor" => 'background-color:'.$styles[ 'fgColor' ].';',
"hidden" => 'display:none;',
"size" => 'font-size:'.$styles[ 'size' ].'pt;',
"color" => 'color:#'.$styles[ 'color' ].';'
];
foreach ($styles as $style => $value) {
if (array_key_exists($style, $acceptedInline) && $value && ! in_array($value, ['none', 'auto'])) {
$styleString .= $acceptedInline[ $style ];
}
}
return $styleString;
}
public function getStyleListDepth($styleName)
{
$getNumberFromStyleName = filter_var($styleName, FILTER_SANITIZE_NUMBER_FLOAT, FILTER_FLAG_ALLOW_FRACTION);
if (is_numeric($getNumberFromStyleName) && strpos(strtolower($styleName), 'definition') === false) {
$depth = (int) $getNumberFromStyleName - 1;
} else {
$depth = null;
}
return $depth;
}
}

527
app/Parser/HtmlParser/ParseHtml.php

@ -1,527 +0,0 @@
<?php
namespace App\Parser\HtmlParser;
use DOMDocument;
use Illuminate\Support\Facades\Log;
class ParseHtml
{
public function fromUploadedFile($file)
{
try {
$htmlDom = new DomDocument();
Log::info('Parse html from file:'.$file);
$htmlString = file_get_contents($file);
libxml_use_internal_errors(true);
$htmlDom->loadHTML($htmlString);
$htmlDom->preserveWhiteSpace = false;
return $this->parseLoadedHtml($htmlDom);
} catch (\Exception $exception) {
dd($exception);
}
}
private function parseLoadedHtml($htmlDom)
{
$response = [];
$page = $htmlDom->getElementsByTagName("body")[ 0 ];
$dataStructuredArray = $this->buildTheParsedResponse($this->domToArray($page));
foreach ($dataStructuredArray as $index => $item) {
if (isset($item[ '_type' ]) && $item[ '_type' ] !== 'table') {
$data = $this->handleChildrens($item);
if (isset($data[ 'content' ])) {
$data[ 'content' ] = $this->closetags($data[ 'content' ]);
$data[ 'clean_content' ] = preg_replace("/(\r\n|\t|\r|\n)+/", " ", strip_tags($data[ 'content' ]));
$response[] = $data;
}
}
}
return $this->fixChildrenStructure($response);
}
private function domToArray($root)
{
$result = [];
//handle classic node
if ($root->nodeType == XML_ELEMENT_NODE) {
$result[ '_type' ] = $root->nodeName;
if ($root->nodeName === 'ol') {
if ($root->hasAttribute('start')) {
$result[ '_startFrom' ] = $root->getAttribute('start');
} else {
$result[ '_startFrom' ] = 1;
}
}
$result[ '_numberOfChildren' ] = $root->childNodes->length;
if ($root->hasChildNodes()) {
$children = $root->childNodes;
for ($i = 0; $i < $children->length; $i++) {
$child = $this->domToArray($children->item($i));
//don't keep textnode with only spaces and newline
if (! empty($child)) {
$result[ '_children' ][] = $child;
}
}
}
//handle text node
} elseif ($root->nodeType == XML_TEXT_NODE || $root->nodeType == XML_CDATA_SECTION_NODE) {
$value = $root->nodeValue;
if (! empty($value)) {
$cleanText = preg_replace("/(\r\n|\t|\r|\n)+/", " ", $value);
if (! empty(str_replace(' ', '', $cleanText))) {
$result[ '_type' ] = '_text';
$result[ '_content' ] = ltrim($cleanText);
}
}
}
//list attributes
if ($root->hasAttributes()) {
foreach ($root->attributes as $attribute) {
$result[ '_attributes' ][ $attribute->name ] = $attribute->value;
}
}
return $result;
}
private function buildTheParsedResponse(array $htmElementsAsArray): array
{
$parsedResponse = [];
foreach ($htmElementsAsArray[ '_children' ] as $index => $elementArray) {
$data = [];
if ($elementArray[ '_type' ] === '_text') {
$data[ '_type' ] = $elementArray[ '_type' ];
$data[ 'content' ] = $this->parseParagraph($elementArray);
} elseif (isset($elementArray[ '_children' ])) {
$parsedResponseData = $this->buildTheParsedResponse($elementArray);
if (! empty($parsedResponseData)) {
$data[ '_type' ] = $elementArray[ '_type' ];
if (in_array($elementArray[ '_type' ], ['ul', 'ol'])) {
if (isset($elementArray[ '_startFrom' ])) {
$data[ 'start' ] = $elementArray[ '_startFrom' ];
}
$data [ 'children' ] = $parsedResponseData;
} else {
$data [ 'content' ] = $parsedResponseData;
}
}
}
if (! empty($data)) {
if (isset($elementArray[ '_attributes' ])) {
$data[ '_attributes' ] = $elementArray[ '_attributes' ];
}
$parsedResponse[] = $data;
}
}
return $parsedResponse;
}
private function remove_empty_tags_recursive($str, $repto = null)
{
//** Return if string not given or empty.
if (! is_string($str) || trim($str) == '') {
return $str;
}
//** Recursive empty HTML tags.
return preg_replace(
//** Pattern written by Junaid Atari.
'/<([^<\/>]*)>([\s]*?|(?R))<\/\1>/imsU',
//** Replace with nothing if string empty.
! is_string($repto) ? '' : $repto,
//** Source string
$str);
}
private function closetags($text)
{
$tagstack = [];
$stacksize = 0;
$tagqueue = '';
$newtext = '';
// Known single-entity/self-closing tags.
$single_tags = [
'area',
'base',
'basefont',
'br',
'col',
'command',
'embed',
'frame',
'hr',
'img',
'input',
'isindex',
'link',
'meta',
'param',
'source'
];
// Tags that can be immediately nested within themselves.
$nestable_tags = ['blockquote', 'div', 'object', 'q', 'span'];
// WP bug fix for comments - in case you REALLY meant to type '< !--'.
$text = str_replace('< !--', '< !--', $text);
// WP bug fix for LOVE <3 (and other situations with '<' before a number).
$text = preg_replace('#<([0-9]{1})#', '&lt;$1', $text);
/**
* Matches supported tags.
*
* To get the pattern as a string without the comments paste into a PHP
* REPL like `php -a`.
*
* @see https://html.spec.whatwg.org/#elements-2
* @see https://w3c.github.io/webcomponents/spec/custom/#valid-custom-element-name
*
* @example
* ~# php -a
* php > $s = [paste copied contents of expression below including parentheses];
* php > echo $s;
*/
$tag_pattern = ('#<'. // Start with an opening bracket.
'(/?)'. // Group 1 - If it's a closing tag it'll have a leading slash.
'('. // Group 2 - Tag name.
// Custom element tags have more lenient rules than HTML tag names.
'(?:[a-z](?:[a-z0-9._]*)-(?:[a-z0-9._-]+)+)'.'|'.// Traditional tag rules approximate HTML tag names.
'(?:[\w:]+)'.')'.'(?:'.// We either immediately close the tag with its '>' and have nothing here.
'\s*'.'(/?)'. // Group 3 - "attributes" for empty tag.
'|'.// Or we must start with space characters to separate the tag name from the attributes (or whitespace).
'(\s+)'. // Group 4 - Pre-attribute whitespace.
'([^>]*)'. // Group 5 - Attributes.
')'.'>#' // End with a closing bracket.
);
while (preg_match($tag_pattern, $text, $regex)) {
$full_match = $regex[ 0 ];
$has_leading_slash = ! empty($regex[ 1 ]);
$tag_name = $regex[ 2 ];
$tag = strtolower($tag_name);
$is_single_tag = in_array($tag, $single_tags, true);
$pre_attribute_ws = isset($regex[ 4 ]) ? $regex[ 4 ] : '';
$attributes = trim(isset($regex[ 5 ]) ? $regex[ 5 ] : $regex[ 3 ]);
$has_self_closer = '/' === substr($attributes, -1);
$newtext .= $tagqueue;
$i = strpos($text, $full_match);
$l = strlen($full_match);
// Clear the shifter.
$tagqueue = '';
if ($has_leading_slash) { // End tag.
// If too many closing tags.
if ($stacksize <= 0) {
$tag = '';
// Or close to be safe $tag = '/' . $tag.
// If stacktop value = tag close value, then pop.
} elseif ($tagstack[ $stacksize - 1 ] === $tag) { // Found closing tag.
$tag = '</'.$tag.'>'; // Close tag.
array_pop($tagstack);
$stacksize--;
} else { // Closing tag not at top, search for it.
for ($j = $stacksize - 1; $j >= 0; $j--) {
if ($tagstack[ $j ] === $tag) {
// Add tag to tagqueue.
for ($k = $stacksize - 1; $k >= $j; $k--) {
$tagqueue .= '</'.array_pop($tagstack).'>';
$stacksize--;
}
break;
}
}
$tag = '';
}
} else { // Begin tag.
if ($has_self_closer) { // If it presents itself as a self-closing tag...
// ...but it isn't a known single-entity self-closing tag, then don't let it be treated as such
// and immediately close it with a closing tag (the tag will encapsulate no text as a result).
if (! $is_single_tag) {
$attributes = trim(substr($attributes, 0, -1))."></$tag";
}
} elseif ($is_single_tag) { // Else if it's a known single-entity tag but it doesn't close itself, do so.
$pre_attribute_ws = ' ';
$attributes .= '/';
} else { // It's not a single-entity tag.
// If the top of the stack is the same as the tag we want to push, close previous tag.
if ($stacksize > 0 && ! in_array($tag, $nestable_tags,
true) && $tagstack[ $stacksize - 1 ] === $tag) {
$tagqueue = '</'.array_pop($tagstack).'>';
$stacksize--;
}
$stacksize = array_push($tagstack, $tag);
}
// Attributes.
if ($has_self_closer && $is_single_tag) {
// We need some space - avoid <br/> and prefer <br />.
$pre_attribute_ws = ' ';
}
$tag = '<'.$tag.$pre_attribute_ws.$attributes.'>';
// If already queuing a close tag, then put this tag on too.
if (! empty($tagqueue)) {
$tagqueue .= $tag;
$tag = '';
}
}
$newtext .= substr($text, 0, $i).$tag;
$text = substr($text, $i + $l);
}
// Clear tag queue.
$newtext .= $tagqueue;
// Add remaining text.
$newtext .= $text;
while ($x = array_pop($tagstack)) {
$newtext .= '</'.$x.'>'; // Add remaining tags to close.
}
// WP fix for the bug with HTML comments.
$newtext = str_replace('< !--', '<!--', $newtext);
$newtext = str_replace('< !--', '< !--', $newtext);
return $this->remove_empty_tags_recursive($newtext);
}
private function parseParagraph($elementArray, $type = null, $number = null)
{
$data = [];
$data[ '_content' ] = ($type) ? $this->closetags(implode('',
$type).$elementArray[ '_content' ]) : $elementArray[ '_content' ];
return $data;
}
private function handleChildrens($data, $parsed = [])
{
if ($data[ '_type' ] !== 'table') {
$parsed[ 'content' ] = '<'.$data[ '_type' ].'>';
if (in_array($data[ '_type' ], ['ol', 'ul'])) {
$parsed[ 'children' ] = [];
if (isset($data[ 'start' ])) {
$startFrom = $data[ 'start' ];
}
foreach ($data[ 'children' ] as $child) {
if (isset($child[ 'start' ])) {
$startFrom = $child[ 'start' ];
}
if (isset($child[ 'content' ])) {
foreach ($child[ 'content' ] as $li) {
$data = $this->handleChildrens($li);
if (isset($data[ 'content' ])) {
$data[ 'clean_content' ] = preg_replace("/(\r\n|\t|\r|\n)+/", " ",
strip_tags($data[ 'content' ]));
if (isset($startFrom) && strlen(trim($data[ 'clean_content' ])) > 0) {
$data[ 'numbering_row' ] = $startFrom;
$startFrom++;
}
$parsed[ 'children' ][] = $data;
}
}
} else {
$data = $this->handleChildrens($child);
$data[ 'clean_content' ] = preg_replace("/(\r\n|\t|\r|\n)+/", " ",
strip_tags($data[ 'content' ]));
$parsed[ 'children' ][] = $data;
}
}
} elseif (isset($data[ '_type' ]) && ($data[ '_type' ] === 'div')) {
foreach ($data[ 'content' ] as $child) {
$data = $this->handleChildrens($child);
if (isset($data[ 'content' ])) {
$data[ 'clean_content' ] = preg_replace("/(\r\n|\t|\r|\n)+/", " ",
strip_tags($data[ 'content' ]));
$data[ 'content' ] = $this->closetags($data[ 'content' ]);
}
$parsed[ 'children' ][] = $data;
}
} else {
$contentChilds = count($data[ 'content' ]);
foreach ($data[ 'content' ] as $index => $child) {
if ($child[ '_type' ] !== '_text') {
if (! isset($parsed[ 'content' ])) {
$parsed[ 'content' ] = '<'.$child[ '_type' ].'>';
} else {
$parsed[ 'content' ] .= '<'.$child[ '_type' ].'>';
}
$childs = $this->handleChildrens($child, $parsed);
if ($childs && isset($child[ 'content' ])) {
$parsed[ 'content' ] .= $childs[ 'content' ];
}
} else {
if (! isset($parsed[ 'content' ])) {
$parsed[ 'content' ] = $child[ 'content' ][ '_content' ];
} else {
$parsed[ 'content' ] .= $child[ 'content' ][ '_content' ];
}
}
if ($contentChilds == $index + 1) {
$parsed[ 'content' ] = $this->closetags($parsed[ 'content' ]);
}
$parsed[ 'children' ] = [];
}
}
return $parsed;
}
}
private function fixChildrenStructure($data)
{
$result = [];
$alreadyHandledIndexes = [];
for ($i = 0; $i < count($data); $i++) {
if (isset($data[ $i ][ 'content' ]) && $data[ $i ][ 'content' ] == '<ol>') {
$alreadyHandledIndexes[] = $i;
continue;
}
if (array_key_exists($i, $alreadyHandledIndexes)) {
continue;
}
if(isset($data[ $i ]['content']) && $data[ $i ]['content']==='' && count($data[ $i ]['children'])==1){
$data[ $i ] = last($data[ $i ]['children']);
}
$j = $i + 1;
for ($j; $j < count($data); $j++) {
if (array_key_exists($i, $alreadyHandledIndexes)) {
continue;
}
if (! isset($data[ $j ][ 'content' ]) || strpos($data[ $j ][ 'content' ], 'h1') !== false) {
break;
}
if(isset($data[$i]['numbering_row'])){
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandledIndexes[] = $j;
}else {
break;
}
}
//if (isset($data[ $i ][ 'content' ]) && empty($data[ $i ][ 'content' ])) {
// $data[ $i ] = last($data[ $i ][ 'children' ]);
//}
if (is_array($data[ $i ]) && count($data[ $i ]) > 1 && ! isset($data[ $i ][ 'content' ])) {
$result = array_merge($result, $data[ $i ]);
} else {
$result[] = $data[ $i ];
}
$alreadyHandledIndexes[] = $i;
}
return $result;
}
private function handlePossibleChild($parent, $child = [])
{
if($child['content']===''){
dd($parent);
}
if (isset($parent[ 'children' ])) {
if (empty($parent[ 'content' ]) && count($parent[ 'children' ]) === 1) {
$parent = $parent[ 'children' ][ 0 ];
} elseif (empty($parent[ 'content' ]) && count($parent[ 'children' ]) > 1) {
$parent = $this->fixChildrenStructure($parent[ 'children' ]);
}
}
if (isset($child[ 'content' ]) && $child[ 'content' ] == '<ol>') {
for ($i = 0; $i < count($child[ 'children' ]); $i++) {
$newChild = $child[ 'children' ][ $i ];
if ($child[ 'children' ][ $i ][ 'content' ] == '<ol>') {
$lastParentChild = last($parent[ 'children' ]);
$newChild = $this->handlePossibleChild($lastParentChild, $child[ 'children' ][ $i ]);
}
$parent[ 'children' ][] = $newChild;
}
//return $parent;
}
if (isset($parent[ 'clean_content' ]) && strlen($parent[ 'clean_content' ]) && strpbrk(substr($parent[ 'clean_content' ],
-1), '.,;\'"0123456789') === false && ctype_lower(substr($parent[ 'clean_content' ],
-1)) && isset($child[ 'clean_content' ]) && strlen($child[ 'clean_content' ])) {
$parent[ 'content' ] .= ' '.$child[ 'content' ];
$parent[ 'children' ] = array_merge($parent[ 'children' ], $child[ 'children' ]);
$parent[ 'clean_content' ] .= ' '.$child[ 'clean_content' ];
}
if (is_array($parent) && count($parent) == 1 && ! isset($parent[ 'content' ])) {
$parent = array_shift($parent);
}
return $parent;
}
}

670
app/Parser/ParseHtmlArray.php

@ -1,670 +0,0 @@
<?php
namespace App\Parser;
use Illuminate\Support\Facades\Log;
class ParseHtmlArray
{
public function fromFile($filePath)
{
if (file_exists($filePath)) {
$fileContent = file_get_contents($filePath);
$fileContent = str_replace('},
]', "}
]", $fileContent);
return $this->handle(json_decode($fileContent,true));
} else {
Log::error('The given file dose not exists!');
}
}
public function handle($docxAsHtmlArray)
{
$response=[];
foreach ($docxAsHtmlArray as $i => $array) {
$response = array_merge($response, $this->handleTestHtml($array));
}
return $this->buildTheStructure($response);
}
private function buildTheStructure($data)
{
$response = [];
$alreadyHandled = [];
$numbers = [];
for ($i = 0; $i < count($data); $i++) {
if (array_key_exists($i, $alreadyHandled)) {
continue;
}
$parent = $data[ $i ];
//get numbering from first 10 chars of the string
preg_match('/^([-+]?\d*\.?\d+)(?:[eE]([-+]?\d+))?/', preg_replace('/[^0-9\.)]/', '',
substr(trim(preg_replace('/[^A-Za-z0-9.)]/', '', preg_replace('/\)/', '.',
preg_replace("/\{.+/", "", html_entity_decode($data[ $i ][ 'content' ]))))), 0, 5)),
$parentNumbering);
if ($parentNumbering && count($numbers) == 0 && last($parentNumbering) < 5) {
$numbers[] = $parentNumbering[ 0 ];
$data[ $i ][ 'numbering' ] = rtrim($parentNumbering[ 0 ], '.');
} elseif ($parentNumbering && count($numbers) > 0 && $parentNumbering[ 0 ] >= last($numbers)) {
$numbers[] = $parentNumbering[ 0 ];
$data[ $i ][ 'numbering' ] = rtrim($parentNumbering[ 0 ], '.');
}
//check if string starts with bold
//check if number of bolds equals to 1
//check if not empty html and contains words
if ((strpos($parent[ 'content' ], "<b>") === 0 || (substr_count($parent[ 'content' ],
"<b>") == 1 || $parentNumbering) && strlen(trim(strip_tags($parent[ 'content' ]))) > 0) || (str_word_count(preg_replace('/[A-Za-z]{4,}/',
'', strip_tags($data[ $i ][ 'content' ]))) < 2)) {
$childNumbers = [];
$j = $i + 1;
//check if data exists
if (isset($data[ $j ]) && strlen($data[ $j ][ 'content' ])) {
for ($j; $j < count($data); $j++) {
if ($data[ $j ][ 'content' ] == '\u00a0') {
$alreadyHandled[] = $j;
}
if (array_key_exists($j, $alreadyHandled)) {
continue;
}
$child = $data[ $j ];
preg_match('/^([-+]?\d*\.?\d+)(?:[eE]([-+]?\d+))?/',
substr(trim(urldecode(str_replace(['<b>', '</b>'], '',
strip_tags($data[ $j ][ 'content' ])))), 0, 5), $childNumbering);
if ($childNumbering && ! preg_match("/[a-z]/i", rtrim(trim($childNumbering[ 0 ])))) {
if ($childNumbering && count($childNumbers) == 0 && trim($childNumbering[ 0 ]) < 5) {
$childNumbers[] = trim($childNumbering[ 0 ]);
$data[ $j ][ 'numbering' ] = rtrim(trim($childNumbering[ 0 ]), '.');
} elseif ($childNumbering && count($childNumbers) > 0 && trim($childNumbering[ 0 ]) >= last($childNumbers)) {
$childNumbers[] = trim($childNumbering[ 0 ]);
$data[ $j ][ 'numbering' ] = rtrim(trim($childNumbering[ 0 ]), '.');
} elseif ($childNumbering && trim($childNumbering[ 0 ]) < 100) {
$childNumbers[] = trim($childNumbering[ 0 ]);
$data[ $j ][ 'numbering' ] = rtrim(trim($childNumbering[ 0 ]), '.');
}
}
if (empty(trim($data[ $i ][ 'content' ])) && isset($data[ $j ][ 'numbering' ])) {
break;
}
$breakPoints = array_change_key_case([
'TERMS OF THE {P1_Pros}',
'TERMS AND CONDITIONS',
'BACKGROUND',
'OPERATIVE PROVISIONS',
'Products and/or Services',
'PAYMENT',
'GRANT OF LICENCE',
'TERM OF LICENCE AGREEMENT',
'ROYALTY',
'PAYMENT',
'PERFORMANCE TARGETS',
'STATIONERY',
'QUALITY CONTROL',
'THE DISTRIBUTOR\'S OBLIGATIONS',
'NON SOLICITATION',
'SALE OF BUSINESS',
'TERMINATION OF AGREEMENT',
'CONDITIONS FOLLOWING TERMINATION',
'RESTRAINT',
'TIME OF ESSENCE AND NOTICES',
'INTERPRETATION',
'ARBITRATION',
'DOMICILIUM AND REGISTERED OFFICE',
'USE OF TRADE MARKS, TRADE NAME, GOODWILL AND KNOW-HOW',
'GENERAL',
'DESCRIPTION OF {P2_NAME} INFORMATION',
'PAYMENT OF FEES',
'SUPPLIER\'S STATUS',
'SUPPLIER\’S OBLIGATIONS',
'DEFINITIONS AND INTERPRETATION',
'DEFINITIONS',
'CONFIDENTIALITY',
'TERMINATION',
'RESTRICTIVE COVENANTS AND INTELLECTUAL PROPERTY',
'DETAILS AND IDENTITY OF CONSULTANT',
'ANTI-BRIBERY',
'ASSIGNMENT SCHEDULE',
'SCHEDULE 1',
'{P1_NAME}\'S LIABILITY',
'DURATION OF AGREEMENT AND SUPPLY',
'SUPPLY OF HARDWARE',
'SUPPLY OF SOFTWARE AND DOCUMENTATION',
'SUPPLY OF SUPPORT SERVICES',
'INTELLECTUAL PROPERTY RIGHTS',
'THE CONTRACT',
'{P1_NAME}\U2019S LIABILITY',
'UPDATES',
'TERMS OF THE {P1_NAME} PRODUCTS.',
'CUSTOMER RESPONSIBILITIES',
'EXHIBIT A',
'EXHIBIT A-1',
'EXHIBIT A-2',
'WARRANTIES',
'EXIT, TERMINATION AND SUSPENSION',
'EXHIBIT B',
'EXHIBIT B-1',
'EXHIBIT B-2',
'COUNTERPARTS',
'LICENSE GRANT',
'INDEMNIFICATION BY CUSTOMER',
'TERMS OF THE {P1_NAME} PRODUCTS',
'TERMS OF CLOUD SERVICE',
'INDEMNIFICATION BY CUSTOMER',
'TERMINATION',
'TERMS OF THE {P1_PROS}',
'SUPPORT',
'SUB CONTRACTING AND THIRD PARTY RECOMMENDATIONS',
'LICENCE AND ACCESS TO SOFTWARE AND HARDWARE',
'DECLARATION OF NON-LIAISON AND ANTI-CORRUPTION COMMITMENT',
'{P1_NAME}\'S DUTIES'
], CASE_UPPER);
//$breakPoints = [];
if ($this->paragraphBrake($data[ $j ], $breakPoints)) {
break;
}
if (substr(trim(str_replace(array_merge([')'], $childNumbering), '', $data[ $j ][ 'content' ])),
0, 3) == '<b>' && str_word_count(strip_tags(str_replace(array_merge([')'],
$childNumbering), '',
$data[ $j ][ 'content' ]))) == str_word_count($this->getTextBetweenTags(str_replace(array_merge([')',],
$childNumbering), '', $data[ $j ][ 'content' ]),
'b')) && (isset($data[ $j + 1 ]) && ((ctype_upper(substr($data[ $j + 1 ][ 'content' ],
0,
1)) || (isset($data[ $i ][ 'numbering' ]) && isset($data[ $j ][ 'numbering' ]) && $data[ $j ][ 'numbering' ] - $data[ $i ][ 'numbering' ] == 1))))) {
break;
}
if (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && ! isset($data[ $i ][ 'numbering' ]) && ctype_upper(str_replace(' ',
'', $data[ $j ][ 'content' ])) && str_word_count($data[ $j ][ 'content' ]) >= 1) {
break;
}
if (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && ! isset($data[ $i ][ 'numbering' ]) && ctype_upper(str_replace([
'<b>',
'</b>',
last($childNumbering),
last($childNumbering),
')',
'.'
], '', trim(str_replace(' ', '',
$data[ $j ][ 'content' ])))) && str_word_count($data[ $j ][ 'content' ]) >= 1) {
break;
}
//if(isset($data[$j]['numbering']) && isset($data[$i]['numbering']) && )
if (isset($data[ $i ][ 'children' ]) && isset($data[ $i ][ 'numbering' ]) && count($data[ $i ][ 'children' ]) && isset($data[ $j ][ 'numbering' ]) && isset(last($data[ $i ][ 'children' ])[ 'numbering' ]) && ($data[ $j ][ 'numbering' ] - last($data[ $i ][ 'children' ])[ 'numbering' ] !== 1 && $data[ $i ][ 'numbering' ] < $data[ $j ][ 'numbering' ]) && ! in_array(substr(strip_tags(last($data[ $i ][ 'children' ])[ 'content' ]),
strlen(strip_tags(last($data[ $i ][ 'children' ])[ 'content' ])) - 1),
[':', '-']) && ! strpos($data[ $j ][ 'numbering' ], '.')) {
break;
}
if (in_array(strtoupper(trim(str_replace([
'<b>',
'</b>',
last($parentNumbering),
last($parentNumbering),
')',
'.'
], '', strip_tags($data[ $i ][ 'content' ])))), $breakPoints)) {
if ((! isset($data[ $i ][ 'numbering' ]) && isset($data[ $j ][ 'numbering' ]) && (substr($data[ $i ][ 'content' ],
0,
3) != '<b>') || (str_word_count(strip_tags($data[ $i ][ 'content' ])) != str_word_count($this->getTextBetweenTags($data[ $i ][ 'content' ],
'b'))))) {
if (! in_array($data[ $i ][ 'content' ], $breakPoints)) {
break;
}
}
}
if (in_array(strtoupper(trim(str_replace([
'<b>',
'</b>',
last($childNumbering),
last($childNumbering),
')',
'.'
], '', strip_tags($data[ $j ][ 'content' ])))), $breakPoints)) {
break;
}
if (in_array(substr(strip_tags($data[ $j ][ 'content' ]),
strlen(strip_tags($data[ $j ][ 'content' ])) - 1), [':', '-'])) {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} elseif (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && ctype_lower(substr(last($data[ $i ][ 'children' ])[ 'content' ],
strlen(last($data[ $i ][ 'children' ])[ 'content' ]) - 1)) && ctype_lower(substr(trim($data[ $j ][ 'content' ]),
0, 1))) {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} elseif (str_word_count(preg_replace('/[A-Za-z]{4,}/', '',
strip_tags($data[ $j ][ 'content' ]))) < 3 && strlen(strip_tags($data[ $j ][ 'content' ])) && ! isset($data[ $j ][ 'numbering' ]) && ctype_upper(substr($data[ $j ][ 'content' ],
0, 1)) && str_word_count($data[ $j ][ 'content' ]) < 10) {
if (isset($data[ $i ][ 'children' ]) && ! in_array(substr(trim(last($data[ $i ][ 'children' ])[ 'content' ]),
strlen(trim(last($data[ $i ][ 'children' ])[ 'content' ])) - 1),
['!', '.', '?', '_', '}'])) {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} else {
break;
}
//dd($data[$i]);
} elseif (str_word_count(preg_replace('/[A-Za-z]{4,}/', '',
strip_tags($data[ $i ][ 'content' ]))) < 2 && strlen(strip_tags($data[ $i ][ 'content' ]))) {
if (isset($data[ $i ][ 'numbering' ]) && isset($data[ $j ][ 'numbering' ]) && is_numeric($data[ $j ][ 'numbering' ]) && abs($data[ $j ][ 'numbering' ] - $data[ $i ][ 'numbering' ]) == 1 && str_word_count($data[ $j ]
[ 'content' ]) < 6) {
break;
}
if (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && ((str_word_count($data[ $j ]
[ 'content' ]) < 6) || (substr_count($data[ $j ][ 'content' ],
'<b>') == 1 && substr_count(last($data[ $i ][ 'children' ])[ 'content' ],
'<b>') == 0 && ! isset(last($data[ $i ][ 'children' ])[ 'numbering' ]))) && ctype_upper((substr($data[ $j ][ 'content' ],
0, 1)))) {
break;
}
if (isset($data[ $i ][ 'numbering' ]) && isset($data[ $j ][ 'numbering' ]) && $data[ $j ][ 'numbering' ] + 1 == $data[ $i ][ 'numbering' ] && str_word_count($data[ $j ][ 'content' ]) < 6) {
break;
}
if (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && ! isset($data[ $i ][ 'numbering' ]) && ! isset(last($data[ $i ][ 'children' ])[ 'numbering' ]) && isset($data[ $j ][ 'numbering' ])) {
break;
}
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} elseif (! in_array(trim(strtolower(strip_tags($data[ $j ][ 'content' ]))),
['definitions']) && ! ctype_space($data[ $j ][ 'content' ]) && strlen(trim(strip_tags($data[ $j ][ 'content' ]))) && ! isset($data[ $i ][ 'numbering' ]) && ! isset($data[ $j ][ 'numbering' ])) {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} elseif (isset($data[ $i ][ 'numbering' ]) && isset($data[ $j ][ 'numbering' ])) {
if (is_numeric($data[ $j ][ 'numbering' ]) && is_numeric($data[ $i ][ 'numbering' ]) && ((float) $data[ $j ][ 'numbering' ] - (float) $data[ $i ][ 'numbering' ]) == 1 && str_word_count($data[ $j ][ 'content' ]) < str_word_count($data[ $i ][ 'content' ])) {
break;
}
if (is_numeric($data[ $j ][ 'numbering' ]) && abs($data[ $j ][ 'numbering' ] - $data[ $i ][ 'numbering' ]) === 1 && (isset($data[ $i ][ 'children' ]) && (! (isset(last($data[ $i ][ 'children' ])[ 'numbering' ])) || (isset(last($data[ $i ][ 'children' ])[ 'numbering' ]) && abs(last($data[ $i ][ 'children' ])[ 'numbering' ] - $data[ $j ][ 'numbering' ]) !== 1))) && str_word_count($data[ $j ][ 'content' ]) < 8) {
break;
}
if (substr_count($data[ $j ][ 'numbering' ], '.') > substr_count($data[ $i ][ 'numbering' ],
'.') && ((float) $data[ $j ][ 'numbering' ] - (float) $data[ $i ][ 'numbering' ]) < 1) {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} elseif (((float) $data[ $j ][ 'numbering' ] > (float) $data[ $i ][ 'numbering' ] && substr_count($data[ $j ][ 'content' ],
'<b>') == 0 && substr_count($data[ $i ][ 'content' ],
'<b>') == 1) || (substr_count($data[ $i ][ 'content' ],
"<b>") == 1 && (substr_count($data[ $j ][ 'content' ],
'<b>') == 0 || substr_count($data[ $j ][ 'content' ], '<b>')) > 1)) {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} elseif (substr_count($data[ $i ][ 'content' ],
'<b>') == 1 && str_word_count($data[ $j ][ 'content' ]) > 6 && isset($data[ $j ][ 'numbering' ])) {
if (strpos($data[ $j ][ 'content' ],
'Networking infrastructure (hardware, firmware, software an') !== false) {
dd('aa');
}
if (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ])) {
$lastParentChild = last($data[ $i ][ 'children' ]);
if (isset($lastParentChild[ 'numbering' ]) && abs($lastParentChild[ 'numbering' ] - $data[ $j ][ 'numbering' ]) === 1 && (substr_count($data[ $j ][ 'content' ],
'<b>') == 1)) {
break;
}
}
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} elseif (isset($data[ $i ][ 'numbering' ]) && abs($data[ $i ][ 'numbering' ] - $data[ $j ][ 'numbering' ]) === 1 && str_word_count($data[ $j ][ 'content' ]) >= 6) {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} elseif (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && isset($data[ $j ][ 'numbering' ]) && isset(last($data[ $i ][ 'children' ])[ 'numbering' ]) && abs((float) $data[ $j ][ 'numbering' ] - (float) last($data[ $i ][ 'children' ])[ 'numbering' ]) == (float) 1) {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} elseif (isset($data[ $i ][ 'numbering' ]) && abs($data[ $i ][ 'numbering' ] - $data[ $j ][ 'numbering' ]) == 0 && str_word_count($data[ $j ][ 'content' ]) >= 6) {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} else {
break;
}
} elseif (isset($data[ $i ][ 'numbering' ]) && ! isset($data[ $j ][ 'numbering' ]) && str_word_count($data[ $j ][ 'content' ]) > 6) {
if (substr_count($data[ $j ][ 'content' ],
"<b>") == 1 && strpos(strtolower($data[ $i ][ 'content' ]),
'definition') === false) {
break;
}
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} elseif (empty($data[ $j ][ 'content' ]) && (isset($data[ $j + 1 ]) && isset($data[ $j - 1 ]) && isset($data[ $i ][ 'children' ]))) {
if (isset(last($data[ $i ][ 'children' ])[ 'numbering' ]) && strlen(last($data[ $i ][ 'children' ])[ 'numbering' ]) == strlen(preg_replace('/[^0-9\.)]/',
'', substr(trim(preg_replace('/ +/', ' ', preg_replace('/[^A-Za-z0-9 .]/', ' ',
urldecode(strip_tags($data[ $j + 1 ][ 'content' ]))))), 0,
5))) && ! empty($data[ $j ][ 'content' ])) {
dd('Here', $data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
} else {
break;
}
} elseif (isset($data[ $i ][ 'children' ]) && count($data[ $i ][ 'children' ]) && isset($data[ $j ][ 'numbering' ])) {
$lastParentChild = last($data[ $i ][ 'children' ]);
if (isset($lastParentChild[ 'numbering' ]) && isset($child[ 'numbering' ]) && substr_count($lastParentChild[ 'numbering' ],
'.') > substr_count($data[ $j ][ 'numbering' ], '.')) {
dd('111');
} else {
$data[ $i ] = $this->handlePossibleChild($data[ $i ], $data[ $j ]);
$alreadyHandled[] = $j;
}
} else {
break;
}
//if(strpos($data[$i]['content'],'<b>2. TERMS OF THE {P1_Pros}.</b>')!==false || strpos($data[$j]['content'],'<b>2. TERMS OF THE {P1_Pros}.</b>')!==false){
// dd($data[$i],$data[$j]);
//}
}
}
}
if (strlen(trim(strip_tags($data[ $i ][ 'content' ])))) {
$response[] = $data[ $i ];
//if ($data[ $i ][ 'content' ] == "Duration of Agreement and Supply") {
// dd(121,$data[$i],$i);
//}
//if($i > 73){
// dd($i,$data[$i],$response);
//}
}
$alreadyHandled[] = $i;
}
return $response;
}
private function handlePossibleChild($parent, $child)
{
if (empty($parent[ 'content' ]) && ! empty($child[ 'content' ])) {
return $child;
}
if (empty($child[ 'content' ])) {
return $parent;
}
// Must iterate through parent children
if (! isset($parent[ 'children' ]) || (isset($parent[ 'children' ]) && count($parent[ 'children' ]) == 0)) {
$parent[ 'children' ] = [];
if (str_word_count(strip_tags($child[ 'content' ])) >= 5 && strpos($child[ 'content' ], '<b>') === false) {
$parent[ 'children' ][] = $child;
} elseif (strpos($parent[ 'content' ], '<b>') !== false && strpos($child[ 'content' ], '<b>') !== false) {
$parent[ 'children' ][] = $child;
} elseif (isset($child[ 'content' ])) {
$parent[ 'children' ][] = $child;
}
return $parent;
}
$lastParentChild = last($parent[ 'children' ]);
if ($lastParentChild && substr($lastParentChild[ 'content' ],
strlen($lastParentChild[ 'content' ]) - 1) === ':' && ((ctype_lower(substr($child[ 'content' ], 0,
1)) || (ctype_digit(substr($child[ 'content' ], 0,
1)) && str_word_count($child[ 'content' ]) > 5)))) {
$lastParentChild = $this->handlePossibleChild($lastParentChild, $child);
if (isset($lastParentChild[ 'numbering' ]) && isset($child[ 'numbering' ]) && $child[ 'numbering' ] - 1 == $lastParentChild[ 'numbering' ]) {
$parent[ 'children' ][] = $child;
} else {
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild;
}
return $parent;
}
if (isset($lastParentChild[ 'numbering' ]) && isset($child[ 'numbering' ]) && strlen($child[ 'numbering' ]) > strlen($lastParentChild[ 'numbering' ])) {
if (isset($parent[ 'children' ]) && isset(last($parent[ 'children' ])[ 'numbering' ]) && $child[ 'numbering' ]) {
if (is_numeric($child[ 'numbering' ]) && abs($child[ 'numbering' ] - $lastParentChild[ 'numbering' ]) === 1) {
$parent[ 'children' ][] = $child;
return $parent;
}
}
if (isset($child[ 'numbering' ]) && isset($lastParentChild[ 'numbering' ]) && substr_count($lastParentChild[ 'numbering' ],
'.') == substr_count($child[ 'numbering' ], '.')) {
$parent[ 'children' ][] = $child;
return $parent;
}
$lastParentChild = $this->handlePossibleChild($lastParentChild, $child);
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild;
return $parent;
}
if (! in_array(substr(trim(str_replace(['and', 'or'], '', $lastParentChild[ 'content' ])),
strlen(trim(str_replace(['and', 'or'], '', $lastParentChild[ 'content' ]))) - 1),
['!', '.', '?', ';', '_', ':']) && (ctype_lower(substr(trim($child[ 'content' ]), 0,
1)) || ((ctype_upper(substr(trim($child[ 'content' ]), 0,
1)) && ! isset($child[ 'numbering' ]))))) {
//dd($lastParentChild,$child);
if (strpos($lastParentChild[ 'content' ],
'e, this Agreement and the {P1_Name} Software Licence Agreement') !== false) {
dd('aa', $lastParentChild, $child);
}
$lastParentChild[ 'content' ] .= ' '.$child[ 'content' ];
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild;
return $parent;
} elseif (! in_array(substr(trim($parent[ 'content' ]), strlen(trim($parent[ 'content' ])) - 1),
['!', '.', '?', ';']) && ctype_lower(substr(trim($lastParentChild[ 'content' ]),
strlen(trim($lastParentChild[ 'content' ])) - 1)) && ctype_lower(substr(trim($child[ 'content' ]), 0,
1))) {
$parent[ 'children' ][] = $child;
} elseif (! in_array(substr(trim(str_replace(['and', 'or'], '', $lastParentChild[ 'content' ])),
strlen(trim(str_replace(['and', 'or'], '', $lastParentChild[ 'content' ]))) - 1), [
'!',
'.',
'?',
';',
'_',
':'
]) && isset($lastParentChild[ 'numbering' ]) && isset($child[ 'numbering' ]) && $lastParentChild[ 'numbering' ] > $child[ 'numbering' ]) {
$lastParentChild[ 'children' ][] = $child;
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild;
} else {
$parent[ 'children' ][] = $child;
}
return $parent;
}
public function handleTestHtml($array)
{
$data = [];
foreach ($array as $item) {
if (count($item) == 1 && is_array(last($item))) {
return $this->handleTestHtml($item);
} else {
$html = $this->buildParagraphs($item);
if (! isset($data[ 'content' ]) && count($html) > 1) {
$data = array_merge($data, $html);
} elseif ($html) {
$data = $html;
}
}
}
return $data;
}
private function buildParagraphs($paragraphs)
{
$result = [];
$alreadyHandled = [];
for ($i = 0; $i < count($paragraphs); $i++) {
if (array_key_exists($i, $alreadyHandled)) {
continue;
}
$paragraph = $paragraphs[ $i ];
if (is_array($paragraph)) {
$result = array_merge($result, $this->buildParagraphs($paragraph));
} elseif (strlen($paragraph) && ! ctype_space($paragraph)) {
$cleanHtml = trim(str_replace('<b> </b>', '',
preg_replace('/<([^>\s]+)[^>]*>(?:\s*(?:<br \/>|&nbsp;|&thinsp;|&ensp;|&emsp;|&#8201;|&#8194;|&#8195;)\s*)*<\/\1>/',
'', preg_replace('/(<font[^>]*>)|(<\/font>)/', '', preg_replace('/\s+/S', " ", $paragraph)))));
if (! empty($cleanHtml)) {
$result[] = ['content' => html_entity_decode($cleanHtml, ENT_COMPAT | ENT_HTML401, 'UTF-8')];
}
}
}
return $result;
}
/*
* Get text between html tag
*/
private function getTextBetweenTags($string, $tagname)
{
$pattern = "/<$tagname ?.*>(.*)<\/$tagname>/";
preg_match($pattern, str_replace(['<u>', '</u>'], '', $string), $matches);
if ($matches) {
return last($matches);
}
return '';
}
private function paragraphBrake($paragraph, array $breakPoints)
{
//$paragraph[ 'content' ] = '2) <b>TERMS OF THE {P1_Pros}.</b> Subject to the terms of the Agreement, {P1_Name} grants Customer and/or its Affiliates a non-exclusive, non-transferable (except to a successor in interest as permitted hereunder) license to use the {P1_Pros} listed on the <u>Order Form</u> during the Term. Customer\’s and/or its Affiliates\’ right to use the {P1_Pros} is limited to the volume and other restrictions contained herein and in the Order Form and the Documentation.';
//$paragraph[ 'numbering' ] = '2';
preg_replace('/<b ?.*>(\d+)<\/b>/', $paragraph[ 'content' ], $paragraph[ 'content' ]);
preg_replace('/(\d+)\)/', $paragraph[ 'content' ], $paragraph[ 'content' ]);
if (isset($paragraph[ 'numbering' ])) {
$paragraph[ 'content' ] = str_replace(['.', ')', $paragraph[ 'numbering' ]], '', $paragraph[ 'content' ]);
}
if (substr_count($paragraph[ 'content' ], '</b>') === 1) {
$breakString = explode('</b>', $paragraph[ 'content' ]);
if ($breakString) {
$breakString = trim(str_replace('<b>', '', trim($breakString[ 0 ])));
if (in_array($breakString, $breakPoints)) {
return true;
}
}
}
return false;
}
}

406
app/Parser/ParseXml.php

@ -1,406 +0,0 @@
<?php
namespace App\Parser;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Storage;
use SimpleXMLElement;
class ParseXml
{
/**
* @var int
*/
private $titleFontThreshold;
/**
* @var int
*/
private $headerFontFooterThreshold;
/**
* ParseXml constructor.
*/
public function __construct()
{
$this->headerFontFooterThreshold = null;
$this->titleFontThreshold = null;
}
/**
* Handle xml files
*
* @param $xmlFile
*
* @return mixed
*/
public function handle($xmlFile)
{
if (is_string($xmlFile)) {
try {
$storageDisk = Storage::disk('contracts');
while (! $storageDisk->exists($xmlFile)) {
//Sleep if file not yet written
sleep(1);
}
$file = $storageDisk->get($xmlFile);
} catch (\Exception $exception) {
Log::error('Failed to load the xml file '.$exception->getMessage());
}
} else {
$file = file_get_contents($xmlFile);
}
//foreach (simplexml_load_string($file) as $key =>$xmlElementPage){
// dd($xmlElementPage);
//}
return $this->buildChildStructure($this->handleElements(simplexml_load_string($file)->xpath('//text')));
}
/**
* @param $element
*
* @return mixed
*/
private function handleElements($element)
{
if (is_array($element)) {
$elements = $element;
} else {
$elements = (array) $element;
}
//dd(!in_array(trim(last(explode(' ', strip_tags('modify or make additions to the {P1_Name} Software, except to the extent permitted by law; or')))),['and','or']),trim(last(explode(' ', strip_tags('modify or make additions to the {P1_Name} Software, except to the extent permitted by law; or')))));
$this->setTitleThreshold($elements);
$numberOfNodes = count($elements);
$rows = [];
for ($i = 0; $i < $numberOfNodes; $i++) {
$current = $elements[ $i ];
$listContent = [];
if ($current instanceof SimpleXMLElement) {
$content = $this->getNodeContent($current);
//if(strpos($content,'Provided that the Customer has continued to pay ')!==false){
// dd(($i + 1 <= $numberOfNodes && isset($elements[ $i + 1 ]) && (((int) $elements[ $i + 1 ][ 'top' ] === (int) $current[ 'top' ]) || (int) $elements[ $i + 1 ][ 'top' ] <= ((int) $current[ 'top' ] + (int) $current[ 'height' ] + 3)) && (int) $current[ 'top' ] <= (int) $elements[ $i + 1 ][ 'top' ])
// || (isset($elements[ $i + 1 ]) && ctype_lower(substr(trim(strip_tags($this->getNodeContent($elements[ $i + 1 ]))),0,1))), substr(trim(strip_tags($this->getNodeContent($elements[ $i + 1 ]))),0,1))));
//}
$parentNumbering = [];
while ($i + 1 <= $numberOfNodes && isset($elements[ $i + 1 ]) &&
(((((((int) $elements[ $i + 1 ][ 'top' ] === (int) $current[ 'top' ]) || (int) $elements[ $i + 1 ][ 'top' ] <= ((int) $current[ 'top' ] + (int) $current[ 'height' ] + 3)) && (int) $current[ 'top' ] <= (int) $elements[ $i + 1 ][ 'top' ])
|| (ctype_lower(substr(trim(strip_tags($this->getNodeContent($elements[ $i + 1 ]))),0,1)))
|| (! in_array(substr(trim(strip_tags($this->getNodeContent($elements[ $i + 1 ]))),0, 1), [',']))
|| (ctype_lower(substr(trim(strip_tags($content)),strlen(trim(strip_tags($content))) - 1))))
&& ! in_array(substr(trim(str_replace(['and','or'], '', $content)), strlen(trim(str_replace(['and', 'or'], '', $content))) - 1),['!', '.', '?', ';', '_', ':', ')'])
&& ! preg_match('/^.*?\-[^\d]*(\d+)[^\d]*\-.*$/',$content)
&& (substr(trim($this->getNodeContent($elements[ $i + 1 ])), 0,strlen('<b>')) !== '<b>'
&& ctype_lower((substr(trim(strip_tags($content)),strlen(trim(strip_tags($content))) - 1)))))
|| ((int) $elements[ $i ][ 'top' ] === (int) $elements[ $i + 1 ][ 'top' ]))
|| (isset($elements[ $i + 1 ]) && trim(strip_tags($this->getNodeContent($elements[ $i+1])))=='[')
) {
//if($parentNumbering){
// dd($parentNumbering,$content);
//}
preg_match('/^([-+]?\d*\.?\d+)(?:[-+]?\d*\.?\d+)(?:[eE]([-+]?\d+))?/',
preg_replace('/[^0-9\.)]/', '', substr(trim(preg_replace('/[^A-Za-z0-9.)]/', '',
preg_replace('/\)/', '.', preg_replace("/\{.+/", "", html_entity_decode($content))))),
0, 5)), $childNumbering);
if (! $childNumbering) {
preg_match('/^([-+]?\d*\.?\d+)(?:[eE]([-+]?\d+))?/', preg_replace('/[^0-9\.)]/', '',
substr(trim(preg_replace('/[^A-Za-z0-9.)]/', '',
preg_replace('/\)/', '.', preg_replace("/\{.+/", "", html_entity_decode($content))))),
0, 5)), $parentNumbering);
}
//if($childNumbering && strpos($childNumbering[0],"2.1.5")!==false){
// dd(11,$content,$elements[$i],$i,$i+1);
//}
$nextElement = $elements[ $i + 1 ];
$nextElementContent = $this->getNodeContent($nextElement);
$content .= ' '.$nextElementContent;
$current[ 'top' ] = $nextElement[ 'top' ];
$current[ 'height' ] = $nextElement[ 'height' ];
if (count($parentNumbering)) {
$current[ 'row_numbering' ] = $parentNumbering[ 0 ];
$content = str_replace($current[ 'row_numbering' ], '', $content);
$i++;
break;
} elseif ($childNumbering) {
$current[ 'row_numbering' ] = $childNumbering[ 0 ];
$content = str_replace($current[ 'row_numbering' ], '', $content);
if (strlen(trim(strip_tags($content))) && ! in_array(substr(trim(strip_tags($content)),
strlen(trim(strip_tags($content))) - 1),
['.', ':', '!', '?','[',',']) && !ctype_lower(substr(trim(strip_tags($content)),
strlen(trim(strip_tags($content)))-1)) && (!ctype_lower(substr(trim(strip_tags($this->getNodeContent($elements[$i+1]))),
0, 1)) || !in_array(substr(trim(strip_tags($this->getNodeContent($elements[$i+1]))), 0, 1),
['[', '{']))) {
$i++;
break;
}
}
if( ! empty($current[ 'row_numbering' ]) && ctype_digit(trim(preg_replace("/[^0-9a-zA-Z]/",
"", strip_tags($this->getNodeContent($elements[$i])))))){
$i++;
break;
}
//$current[ 'font' ] = $nextElement[ 'font' ];
$i++;
continue;
}
$data = $this->extractNumbering($content);
$content = [
'type' => (int) $current[ 'font' ] === $this->titleFontThreshold ? 'title' : null,
'content' => $data[ 'content' ],
'numbering' => (! empty($current[ 'row_numbering' ])) ? (int)$current[ 'row_numbering' ] : $data[ 'numbering' ],
'top' => (int) $current[ 'top' ],
'height' => (int) $current[ 'height' ],
'left' => (int) $current[ 'left' ],
'font' => (int) $current[ 'font' ],
'children' => $listContent
];
$rows[] = $content;
}
}
return $rows;
}
/**
* Returns the xml node content
*
* @param $node
*
* @return string|string[]|null
*/
private function getNodeContent($node)
{
return preg_replace('!\s+!', ' ', preg_match_all("/<text.*?>(.*?)<\/text>/", $node->asXML(),
$matches) ? $matches[ 1 ] ? $matches[ 1 ][ 0 ] : '' : '');
}
/**
* Extract the numbering if exists from the string
*
* @param $content
*
* @return array
*/
private function extractNumbering($content)
{
$regexOne = '/^(([a-zA-Z0-9]+[.\)])+)([ ]|[a-z]|[A-Z])/';
$regexTwo = '/^(([\d\.]+)\d)/';
if (preg_match($regexOne, $content, $n)) {
$numbering = trim(last($n));
} else {
if (preg_match($regexTwo, $content, $n)) {
$numbering = trim(last($n));
} else {
$numbering = '';
}
}
if (strlen($numbering) > 1) {
return [
'content' => '<p>'.trim(str_replace($numbering, '', $content)).'</p>',
'numbering' => $numbering
];
}
return [
'content' => '<p>'.trim($content).'</p>',
'numbering' => ''
];
}
/**
* Build the structure as required by the editor and the gamification module
*
* @param $elements
*
* @return array
*/
private function buildChildStructure($elements)
{
$alreadyHandledIndexes = [];
$build = [];
// 0 1 2 3 4 5 6
// 1 1.1 1.1.1 1.2 1.2.1 1.3 1.3.1 2 3 4 4.1 4.2 5 6
for ($i = 0; $i < count($elements) - 1; $i++) {
if (! isset($elements[ $i ][ 'type' ])) {
if ($elements[ $i ][ 'top' ] < 100) {
$elements[ $i ][ 'type' ] = 'header';
} elseif ($elements[ $i ][ 'top' ] > 1150) {
$elements[ $i ][ 'type' ] = 'footer';
}
}
if (in_array($i, $alreadyHandledIndexes)) {
continue;
}
if (isset($elements[ $i ][ 'type' ]) && in_array($elements[ $i ][ 'type' ], ['footer', 'header'])) {
continue;
}
for ($j = $i + 1; $j < count($elements); $j++) {
if (! isset($elements[ $j ][ 'type' ])) {
if ($elements[ $j ][ 'top' ] < 100) {
$elements[ $j ][ 'type' ] = 'header';
} elseif ($elements[ $j ][ 'top' ] > 1150) {
$elements[ $j ][ 'type' ] = 'footer';
}
}
if (in_array($j, $alreadyHandledIndexes)) {
continue;
}
if (isset($elements[ $j ][ 'type' ]) && in_array($elements[ $j ][ 'type' ], ['footer', 'header'])) {
continue;
}
if ($elements[ $j ][ 'type' ] === 'title' && $elements[ $i ][ 'top' ] !== $elements[ $j ][ 'top' ] && ! ctype_digit(trim(preg_replace("/[^0-9a-zA-Z]/",
"", strip_tags($elements[ $i ][ 'content' ]))))) {
break;
}
if ($elements[ $i ][ 'left' ] < $elements[ $j ][ 'left' ] || ($elements[ $i ][ 'type' ] == 'title' && is_null($elements[ $j ][ 'type' ]))) {
$elements[ $i ] = $this->handlePossibleChild($elements[ $i ], $elements[ $j ]);
$alreadyHandledIndexes[] = $j;
} else {
break;
}
}
if (! in_array($elements[ $i ][ 'type' ], ['header', 'footer'])) {
$build[] = $elements[ $i ];
}
$alreadyHandledIndexes[] = $i;
}
return $build;
}
/**
* Handle each node child's
*
* @param $parent
* @param $child
*
* @return mixed
*/
protected function handlePossibleChild($parent, $child)
{
// 1
// 1.1
// 1.1.1
// 2
// Must iterate through parent children
if (count($parent[ 'children' ]) === 0) {
$parent[ 'children' ][] = $child;
return $parent;
}
$lastParentChild = last($parent[ 'children' ]);
// Possible to be either child or grandchild
if ($child[ 'left' ] > $lastParentChild[ 'left' ]) {
$lastParentChild = $this->handlePossibleChild($lastParentChild, $child);
} elseif ($child[ 'left' ] === $parent[ 'left' ] && $parent[ 'type' ] == 'title' && is_null($child[ 'type' ])) {
$parent[ 'children' ][] = $child;
return $parent;
} else {
if ($child[ 'left' ] === $lastParentChild[ 'left' ]) {
$parent[ 'children' ][] = $child;
return $parent;
}
}
$parent[ 'children' ][ count($parent[ 'children' ]) - 1 ] = $lastParentChild;
return $parent;
}
/**
* Set's the title threshold
*
* @param $elements
*/
protected function setTitleThreshold($elements)
{
$nextElement = null;
foreach ($elements as $index => $element) {
if ($index + 1 < count($elements) && ! isset($this->titleFontThreshold)) {
$nextElement = $elements[ $index + 1 ];
if ((isset($current->b) || $index == 0 || (! is_null($nextElement) && (int) $element[ 'font' ] < (int) $nextElement[ 'font' ]))) {
$this->titleFontThreshold = (int) $element[ 'font' ];
}
} else {
continue;
}
}
}
/**
* Set's the header and footer threshold
*
* @param $elements
*/
protected function setHeaderFooterThreshold($elements)
{
foreach ($elements as $index => $element) {
if (isset($elements[ $index + 1 ]) && ! isset($this->headerFontFooterThreshold)) {
$nextElement = $elements[ $index + 1 ];
if (! isset($nextElement[ 'type' ]) && $element[ 'top' ] > $nextElement[ 'top' ]) {
$this->headerFontFooterThreshold = $nextElement[ 'font' ];
}
} else {
continue;
}
}
}
}

35
database/migrations/2019_08_19_000000_create_failed_jobs_table.php

@ -1,35 +0,0 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
class CreateFailedJobsTable extends Migration
{
/**
* Run the migrations.
*
* @return void
*/
public function up()
{
Schema::create('failed_jobs', function (Blueprint $table) {
$table->bigIncrements('id');
$table->text('connection');
$table->text('queue');
$table->longText('payload');
$table->longText('exception');
$table->timestamp('failed_at')->useCurrent();
});
}
/**
* Reverse the migrations.
*
* @return void
*/
public function down()
{
Schema::dropIfExists('failed_jobs');
}
}

16
database/seeds/DatabaseSeeder.php

@ -1,16 +0,0 @@
<?php
use Illuminate\Database\Seeder;
class DatabaseSeeder extends Seeder
{
/**
* Seed the application's database.
*
* @return void
*/
public function run()
{
// $this->call(UsersTableSeeder::class);
}
}

21
package.json

@ -1,21 +0,0 @@
{
"private": true,
"scripts": {
"dev": "npm run development",
"development": "cross-env NODE_ENV=development node_modules/webpack/bin/webpack.js --progress --hide-modules --config=node_modules/laravel-mix/setup/webpack.config.js",
"watch": "npm run development -- --watch",
"watch-poll": "npm run watch -- --watch-poll",
"hot": "cross-env NODE_ENV=development node_modules/webpack-dev-server/bin/webpack-dev-server.js --inline --hot --config=node_modules/laravel-mix/setup/webpack.config.js",
"prod": "npm run production",
"production": "cross-env NODE_ENV=production node_modules/webpack/bin/webpack.js --no-progress --hide-modules --config=node_modules/laravel-mix/setup/webpack.config.js"
},
"devDependencies": {
"axios": "^0.19",
"cross-env": "^5.1",
"laravel-mix": "^5.0.1",
"lodash": "^4.17.13",
"resolve-url-loader": "^2.3.1",
"sass": "^1.15.2",
"sass-loader": "^8.0.0"
}
}

1
resources/js/app.js

@ -1 +0,0 @@
require('./bootstrap');

28
resources/js/bootstrap.js

@ -1,28 +0,0 @@
window._ = require('lodash');
/**
* We'll load the axios HTTP library which allows us to easily issue requests
* to our Laravel back-end. This library automatically handles sending the
* CSRF token as a header based on the value of the "XSRF" token cookie.
*/
window.axios = require('axios');
window.axios.defaults.headers.common['X-Requested-With'] = 'XMLHttpRequest';
/**
* Echo exposes an expressive API for subscribing to channels and listening
* for events that are broadcast by Laravel. Echo and event broadcasting
* allows your team to easily build robust real-time web applications.
*/
// import Echo from 'laravel-echo';
// window.Pusher = require('pusher-js');
// window.Echo = new Echo({
// broadcaster: 'pusher',
// key: process.env.MIX_PUSHER_APP_KEY,
// cluster: process.env.MIX_PUSHER_APP_CLUSTER,
// forceTLS: true
// });

19
resources/lang/en/auth.php

@ -1,19 +0,0 @@
<?php
return [
/*
|--------------------------------------------------------------------------
| Authentication Language Lines
|--------------------------------------------------------------------------
|
| The following language lines are used during authentication for various
| messages that we need to display to the user. You are free to modify
| these language lines according to your application's requirements.
|
*/
'failed' => 'These credentials do not match our records.',
'throttle' => 'Too many login attempts. Please try again in :seconds seconds.',
];

19
resources/lang/en/pagination.php

@ -1,19 +0,0 @@
<?php
return [
/*
|--------------------------------------------------------------------------
| Pagination Language Lines
|--------------------------------------------------------------------------
|
| The following language lines are used by the paginator library to build
| the simple pagination links. You are free to change them to anything
| you want to customize your views to better match your application.
|
*/
'previous' => '&laquo; Previous',
'next' => 'Next &raquo;',
];

22
resources/lang/en/passwords.php

@ -1,22 +0,0 @@
<?php
return [
/*
|--------------------------------------------------------------------------
| Password Reset Language Lines
|--------------------------------------------------------------------------
|
| The following language lines are the default lines which match reasons
| that are given by the password broker for a password update attempt
| has failed, such as for an invalid token or invalid new password.
|
*/
'reset' => 'Your password has been reset!',
'sent' => 'We have e-mailed your password reset link!',
'throttled' => 'Please wait before retrying.',
'token' => 'This password reset token is invalid.',
'user' => "We can't find a user with that e-mail address.",
];

151
resources/lang/en/validation.php

@ -1,151 +0,0 @@
<?php
return [
/*
|--------------------------------------------------------------------------
| Validation Language Lines
|--------------------------------------------------------------------------
|
| The following language lines contain the default error messages used by
| the validator class. Some of these rules have multiple versions such
| as the size rules. Feel free to tweak each of these messages here.
|
*/
'accepted' => 'The :attribute must be accepted.',
'active_url' => 'The :attribute is not a valid URL.',
'after' => 'The :attribute must be a date after :date.',
'after_or_equal' => 'The :attribute must be a date after or equal to :date.',
'alpha' => 'The :attribute may only contain letters.',
'alpha_dash' => 'The :attribute may only contain letters, numbers, dashes and underscores.',
'alpha_num' => 'The :attribute may only contain letters and numbers.',
'array' => 'The :attribute must be an array.',
'before' => 'The :attribute must be a date before :date.',
'before_or_equal' => 'The :attribute must be a date before or equal to :date.',
'between' => [
'numeric' => 'The :attribute must be between :min and :max.',
'file' => 'The :attribute must be between :min and :max kilobytes.',
'string' => 'The :attribute must be between :min and :max characters.',
'array' => 'The :attribute must have between :min and :max items.',
],
'boolean' => 'The :attribute field must be true or false.',
'confirmed' => 'The :attribute confirmation does not match.',
'date' => 'The :attribute is not a valid date.',
'date_equals' => 'The :attribute must be a date equal to :date.',
'date_format' => 'The :attribute does not match the format :format.',
'different' => 'The :attribute and :other must be different.',
'digits' => 'The :attribute must be :digits digits.',
'digits_between' => 'The :attribute must be between :min and :max digits.',
'dimensions' => 'The :attribute has invalid image dimensions.',
'distinct' => 'The :attribute field has a duplicate value.',
'email' => 'The :attribute must be a valid email address.',
'ends_with' => 'The :attribute must end with one of the following: :values.',
'exists' => 'The selected :attribute is invalid.',
'file' => 'The :attribute must be a file.',
'filled' => 'The :attribute field must have a value.',
'gt' => [
'numeric' => 'The :attribute must be greater than :value.',
'file' => 'The :attribute must be greater than :value kilobytes.',
'string' => 'The :attribute must be greater than :value characters.',
'array' => 'The :attribute must have more than :value items.',
],
'gte' => [
'numeric' => 'The :attribute must be greater than or equal :value.',
'file' => 'The :attribute must be greater than or equal :value kilobytes.',
'string' => 'The :attribute must be greater than or equal :value characters.',
'array' => 'The :attribute must have :value items or more.',
],
'image' => 'The :attribute must be an image.',
'in' => 'The selected :attribute is invalid.',
'in_array' => 'The :attribute field does not exist in :other.',
'integer' => 'The :attribute must be an integer.',
'ip' => 'The :attribute must be a valid IP address.',
'ipv4' => 'The :attribute must be a valid IPv4 address.',
'ipv6' => 'The :attribute must be a valid IPv6 address.',
'json' => 'The :attribute must be a valid JSON string.',
'lt' => [
'numeric' => 'The :attribute must be less than :value.',
'file' => 'The :attribute must be less than :value kilobytes.',
'string' => 'The :attribute must be less than :value characters.',
'array' => 'The :attribute must have less than :value items.',
],
'lte' => [
'numeric' => 'The :attribute must be less than or equal :value.',
'file' => 'The :attribute must be less than or equal :value kilobytes.',
'string' => 'The :attribute must be less than or equal :value characters.',
'array' => 'The :attribute must not have more than :value items.',
],
'max' => [
'numeric' => 'The :attribute may not be greater than :max.',
'file' => 'The :attribute may not be greater than :max kilobytes.',
'string' => 'The :attribute may not be greater than :max characters.',
'array' => 'The :attribute may not have more than :max items.',
],
'mimes' => 'The :attribute must be a file of type: :values.',
'mimetypes' => 'The :attribute must be a file of type: :values.',
'min' => [
'numeric' => 'The :attribute must be at least :min.',
'file' => 'The :attribute must be at least :min kilobytes.',
'string' => 'The :attribute must be at least :min characters.',
'array' => 'The :attribute must have at least :min items.',
],
'not_in' => 'The selected :attribute is invalid.',
'not_regex' => 'The :attribute format is invalid.',
'numeric' => 'The :attribute must be a number.',
'password' => 'The password is incorrect.',
'present' => 'The :attribute field must be present.',
'regex' => 'The :attribute format is invalid.',
'required' => 'The :attribute field is required.',
'required_if' => 'The :attribute field is required when :other is :value.',
'required_unless' => 'The :attribute field is required unless :other is in :values.',
'required_with' => 'The :attribute field is required when :values is present.',
'required_with_all' => 'The :attribute field is required when :values are present.',
'required_without' => 'The :attribute field is required when :values is not present.',
'required_without_all' => 'The :attribute field is required when none of :values are present.',
'same' => 'The :attribute and :other must match.',
'size' => [
'numeric' => 'The :attribute must be :size.',
'file' => 'The :attribute must be :size kilobytes.',
'string' => 'The :attribute must be :size characters.',
'array' => 'The :attribute must contain :size items.',
],
'starts_with' => 'The :attribute must start with one of the following: :values.',
'string' => 'The :attribute must be a string.',
'timezone' => 'The :attribute must be a valid zone.',
'unique' => 'The :attribute has already been taken.',
'uploaded' => 'The :attribute failed to upload.',
'url' => 'The :attribute format is invalid.',
'uuid' => 'The :attribute must be a valid UUID.',
/*
|--------------------------------------------------------------------------
| Custom Validation Language Lines
|--------------------------------------------------------------------------
|
| Here you may specify custom validation messages for attributes using the
| convention "attribute.rule" to name the lines. This makes it quick to
| specify a specific custom language line for a given attribute rule.
|
*/
'custom' => [
'attribute-name' => [
'rule-name' => 'custom-message',
],
],
/*
|--------------------------------------------------------------------------
| Custom Validation Attributes
|--------------------------------------------------------------------------
|
| The following language lines are used to swap our attribute placeholder
| with something more reader friendly such as "E-Mail Address" instead
| of "email". This simply helps us make our message more expressive.
|
*/
'attributes' => [],
];

1
resources/sass/app.scss

@ -1 +0,0 @@
//

5
resources/views/errors/401.blade.php

@ -1,5 +0,0 @@
@extends(' errors.minimal')
@section('title', __('Unauthorized'))
@section('code', '401')
@section('message', __('Unauthorized'))

5
resources/views/errors/403.blade.php

@ -1,5 +0,0 @@
@extends(' errors.minimal')
@section('title', __('Forbidden'))
@section('code', '403')
@section('message', __($exception->getMessage() ?: 'Forbidden'))

4
resources/views/errors/404.blade.php

@ -1,4 +0,0 @@
@extends('errors.minimal')
@section('title', __('Not Found'))
@section('code', '404')
@section('message', __('The page you are looking for might have been removed had its name changed or is temporarily unavailable.'))

4
resources/views/errors/405.blade.php

@ -1,4 +0,0 @@
@extends('errors.minimal')
@section('title', __('405 Error'))
@section('code', '405')
@section('message', __('The page you are looking for might have been removed had its name changed or is temporarily unavailable.'))

5
resources/views/errors/419.blade.php

@ -1,5 +0,0 @@
@extends(' errors.minimal')
@section('title', __('Page Expired'))
@section('code', '419')
@section('message', __('Page Expired'))

6
resources/views/errors/429.blade.php

@ -1,6 +0,0 @@
@extends(' errors.minimal')
@section('title', __('Too Many Requests'))
@section('code', '429')
@section('message', __('Too Many Requests'))

5
resources/views/errors/500.blade.php

@ -1,5 +0,0 @@
@extends(' errors.minimal')
@section('title', __('Server Error'))
@section('code', '500')
@section('message', __('Server Error'))

5
resources/views/errors/503.blade.php

@ -1,5 +0,0 @@
@extends(' errors.minimal')
@section('title', __('Service Unavailable'))
@section('code', '503')
@section('message', __($exception->getMessage() ?: 'Service Unavailable'))

126
resources/views/errors/minimal.blade.php

@ -1,126 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>@yield('title')</title>
<!-- Fonts -->
<link rel="dns-prefetch" href="//fonts.gstatic.com">
<link href="https://fonts.googleapis.com/css?family=Josefin+Sans:400,700" rel="stylesheet">
<style>
* {
-webkit-box-sizing: border-box;
box-sizing: border-box;
}
body {
padding: 0;
margin: 0;
}
#container {
position: relative;
height: 100vh;
background-color: #f3f3f3;
}
#container .container {
position: absolute;
left: 50%;
top: 50%;
-webkit-transform: translate(-50%, -50%);
-ms-transform: translate(-50%, -50%);
transform: translate(-50%, -50%);
}
.container {
max-width: 460px;
width: 100%;
text-align: center;
line-height: 1.4;
}
.container .code {
height: 158px;
line-height: 153px;
}
.container .code h1 {
font-family: 'Josefin Sans', sans-serif;
color: #222;
font-size: 220px;
letter-spacing: 10px;
margin: 0;
font-weight: 700;
text-shadow: 2px 2px 0 #c9c9c9, -2px -2px 0 #c9c9c9;
}
.container .code h1 > span {
text-shadow: 2px 2px 0 #198fd7, -2px -2px 0 #198fd7, 0 0 8px #198fe7;
}
.container p {
font-family: 'Josefin Sans', sans-serif;
color: #484848;
padding-top: 10px;
font-size: 16px;
font-weight: 400;
margin-top: 0;
margin-bottom: 15px;
}
.container a {
font-family: 'Josefin Sans', sans-serif;
font-size: 14px;
text-decoration: none;
text-transform: uppercase;
background: transparent;
color: #484848;
border: 2px solid #484848;
display: inline-block;
padding: 10px 25px;
font-weight: 700;
-webkit-transition: 0.2s all;
transition: 0.2s all;
}
.container a:hover {
color: #198fd7;
border-color: #198fe7;
}
@media only screen and (max-width: 480px) {
.container .code {
height: 122px;
line-height: 122px;
}
.container .code h1 {
font-size: 122px;
}
}
</style>
</head>
<body>
<?php
$codeAsArray = str_split(app()->view->getSections()[ 'code' ]);
$codeAsArray[ 1 ] = '<span>'.$codeAsArray[ 1 ].'</span>';
$styledCode = implode('', $codeAsArray);
?>
<div id="container">
<div class="container">
<div class="code">
<h1>{!! $styledCode !!}</h1>
</div>
<p>@yield('message')</p>
<a href="https://contrai.io">home page</a>
</div>
</div>
</body>
</html>

18
routes/api.php

@ -1,19 +1 @@
<?php
use Illuminate\Http\Request;
/*
|--------------------------------------------------------------------------
| API Routes
|--------------------------------------------------------------------------
|
| Here is where you can register API routes for your application. These
| routes are loaded by the RouteServiceProvider within a group which
| is assigned the "api" middleware group. Enjoy building your API!
|
*/
Route::middleware('auth:api')->get('/user', function (Request $request) {
return $request->user();
});

15
routes/channels.php

@ -1,16 +1 @@
<?php
/*
|--------------------------------------------------------------------------
| Broadcast Channels
|--------------------------------------------------------------------------
|
| Here you may register all of the event broadcasting channels that your
| application supports. The given channel authorization callbacks are
| used to check if an authenticated user can listen to the channel.
|
*/
Broadcast::channel('App.User.{id}', function ($user, $id) {
return (int) $user->id === (int) $id;
});

38
tests/Feature/ProcessDocxDocumentTest.php

@ -1,38 +0,0 @@
<?php
namespace Tests\Feature;
use App\Ingest\DocxReader;
use App\Ingest\DocxWriter;
use App\Jobs\RecreateDocument;
use Illuminate\Support\Facades\Storage;
use Tests\TestCase;
class ProcessDocxDocumentTest extends TestCase
{
/** @test */
public function it_reads_docx_documents_content()
{
$storage = Storage::disk('local');
// $reader = new DocxReader($storage, 'contracts/x.docx');
// $reader = new DocxReader($storage, 'contracts/y.docx');
// $reader = new DocxReader($storage, 'contracts/z.docx');
$reader = new DocxReader($storage, 'contracts/with-bookmarks.docx');
$result = $reader->execute();
$writer = new DocxWriter($storage, 'contracts/test-write.docx');
$writer->execute($result);
}
/** @test */
public function it_recreates_original_document_from_json()
{
// $data = Storage::disk('local')->get('contracts/x.json');
$data = Storage::disk('local')->get('contracts/a.json');
$data = json_decode($data, true);
$recreateDocument = new RecreateDocument('test123', $data);
$recreateDocument->handle();
}
}

15
webpack.mix.js

@ -1,15 +0,0 @@
const mix = require('laravel-mix');
/*
|--------------------------------------------------------------------------
| Mix Asset Management
|--------------------------------------------------------------------------
|
| Mix provides a clean, fluent API for defining some Webpack build steps
| for your Laravel application. By default, we are compiling the Sass
| file for the application as well as bundling up all the JS files.
|
*/
mix.js('resources/js/app.js', 'public/js')
.sass('resources/sass/app.scss', 'public/css');

6173
yarn.lock
File diff suppressed because it is too large
View File

Loading…
Cancel
Save