You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
31 lines
774 B
31 lines
774 B
import pdftotext,json,sys, getopt
|
|
|
|
def main(argv):
|
|
inputfile = ''
|
|
outputfile = ''
|
|
try:
|
|
opts, args = getopt.getopt(argv,"hi:o:",["ifile=","ofile="])
|
|
except getopt.GetoptError:
|
|
print('parse-pdf.py -i <inputfile> -o <outputfile>')
|
|
sys.exit(2)
|
|
for opt, arg in opts:
|
|
if opt == '-h':
|
|
print('parse-pdf.py -i <inputfile> -o <outputfile>')
|
|
sys.exit()
|
|
elif opt in ("-i", "--ifile"):
|
|
inputfile = arg
|
|
elif opt in ("-o", "--ofile"):
|
|
outputfile = arg
|
|
|
|
# Load your PDF
|
|
with open(inputfile, "rb") as file:
|
|
pdf = pdftotext.PDF(file)
|
|
outFile = open(outputfile,'w',encoding='UTF-8')
|
|
outFile.write("\n\n".join(pdf))
|
|
outFile.close()
|
|
|
|
if __name__ == "__main__":
|
|
main(sys.argv[1:])
|
|
|
|
|
|
|