You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

23 lines
484 B
Python

# import libraries
from PIL import Image
import pytesseract
import time
i = 1
while True:
try:
5 years ago
img = Image.open("bounding_box/input%i.jpg"%i)
print(img)
pdf = pytesseract.image_to_pdf_or_hocr(img, lang="eng", extension='pdf')
time.sleep(1)
file = open(("ocred/page%i.pdf"%i), "w+b")
file.write(bytearray(pdf))
file.close()
i+=1
print(i)
except:
print("All pages must be ready!")
break