Repo for the search and displace ingest module that takes odf, docx and pdf and transforms it into .md to be used with search and displace operations
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

113 lines
2.3 KiB

  1. <?php
  2. namespace App\Ingest;
  3. use Symfony\Component\Process\Exception\ProcessFailedException;
  4. use Symfony\Component\Process\Process;
  5. class DocxAndOdtConvertor extends AbstractConvertor
  6. {
  7. /**
  8. *
  9. * @throws \Exception
  10. */
  11. public function execute()
  12. {
  13. $this->convertToHTMLWithLibreOffice();
  14. if ( ! $this->storage->exists($this->path)) {
  15. throw new \Exception('Failed to convert to HTML: ' . $this->path);
  16. }
  17. }
  18. protected function convertToPDF()
  19. {
  20. (new Process(['export HOME=' . env('USER_HOME_PATH')]))->run();
  21. $process = new Process([
  22. 'unoconv',
  23. '-f',
  24. 'pdf',
  25. // '-c=socket,host=localhost,port=' . (2000 + rand(2, 7)) . ';urp;StarOffice.ComponentContext',
  26. $this->storage->path($this->path),
  27. ]);
  28. $process->setTimeout(10);
  29. $process->run();
  30. if (!$process->isSuccessful()) {
  31. throw new ProcessFailedException($process);
  32. }
  33. $this->deleteOriginalDocument();
  34. }
  35. /**
  36. *
  37. * @throws \Exception
  38. */
  39. public function convertToHTMLWithLibreOffice()
  40. {
  41. $this->convertToFormat('html');
  42. }
  43. /**
  44. *
  45. * @throws \Exception
  46. */
  47. public function convertToODT()
  48. {
  49. $this->convertToFormat('odt');
  50. }
  51. /**
  52. *
  53. * @throws \Exception
  54. */
  55. public function convertToRTF()
  56. {
  57. $this->convertToFormat('rtf');
  58. }
  59. /**
  60. *
  61. * @throws \Exception
  62. */
  63. public function convertToDOC()
  64. {
  65. $this->convertToFormat('doc');
  66. }
  67. /**
  68. *
  69. * @throws \Exception
  70. */
  71. public function convertToTXT()
  72. {
  73. $this->convertToFormat('txt');
  74. }
  75. /**
  76. *
  77. * @throws \Exception
  78. */
  79. protected function convertToFormat($format)
  80. {
  81. $office = new Office();
  82. $success = $office->run(
  83. $format,
  84. $this->storage->path($this->path),
  85. $this->storage->path($this->directoryPath)
  86. );
  87. if (! $success) {
  88. throw new \Exception('Failed when converting from ' . $this->type . ' to ' . strtoupper($format) . ' for file: ' . $this->path);
  89. }
  90. $this->deleteOriginalDocument();
  91. $this->path = "$this->directoryPath/document.$format";
  92. }
  93. }