Repo for the search and displace ingest module that takes odf, docx and pdf and transforms it into .md to be used with search and displace operations
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

117 lines
2.4 KiB

  1. <?php
  2. namespace App\Ingest;
  3. use Symfony\Component\Process\Exception\ProcessFailedException;
  4. use Symfony\Component\Process\Process;
  5. class DocxConvertor extends AbstractConvertor
  6. {
  7. /**
  8. *
  9. * @throws \Exception
  10. */
  11. public function execute()
  12. {
  13. $this->convertToPdfWithLibreOffice();
  14. if ( ! $this->storage->exists($this->path)) {
  15. throw new \Exception('Failed to convert to PDF: ' . $this->path);
  16. }
  17. $convertor = new PDFConvertor($this->storage, $this->path);
  18. $convertor->execute();
  19. }
  20. protected function convertToPDF()
  21. {
  22. (new Process(['export HOME=' . env('USER_HOME_PATH')]))->run();
  23. $process = new Process([
  24. 'unoconv',
  25. '-f',
  26. 'pdf',
  27. // '-c=socket,host=localhost,port=' . (2000 + rand(2, 7)) . ';urp;StarOffice.ComponentContext',
  28. $this->storage->path($this->path),
  29. ]);
  30. $process->setTimeout(10);
  31. $process->run();
  32. if (!$process->isSuccessful()) {
  33. throw new ProcessFailedException($process);
  34. }
  35. $this->deleteOriginalDocument();
  36. }
  37. /**
  38. *
  39. * @throws \Exception
  40. */
  41. public function convertToPdfWithLibreOffice()
  42. {
  43. $this->convertToFormat('pdf');
  44. }
  45. /**
  46. *
  47. * @throws \Exception
  48. */
  49. public function convertToODT()
  50. {
  51. $this->convertToFormat('odt');
  52. }
  53. /**
  54. *
  55. * @throws \Exception
  56. */
  57. public function convertToRTF()
  58. {
  59. $this->convertToFormat('rtf');
  60. }
  61. /**
  62. *
  63. * @throws \Exception
  64. */
  65. public function convertToDOC()
  66. {
  67. $this->convertToFormat('doc');
  68. }
  69. /**
  70. *
  71. * @throws \Exception
  72. */
  73. public function convertToTXT()
  74. {
  75. $this->convertToFormat('txt');
  76. }
  77. /**
  78. *
  79. * @throws \Exception
  80. */
  81. protected function convertToFormat($format)
  82. {
  83. $office = new Office();
  84. $success = $office->run(
  85. $format,
  86. $this->storage->path($this->path),
  87. $this->storage->path($this->directoryPath)
  88. );
  89. if (! $success) {
  90. throw new \Exception('Failed when converting from DOCX to ' . strtoupper($format) . ' for file: ' . $this->path);
  91. }
  92. $this->deleteOriginalDocument();
  93. $this->path = "$this->directoryPath/document.$format";
  94. }
  95. }