You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
23 lines
478 B
Python
23 lines
478 B
Python
5 years ago
|
# import libraries
|
||
|
from PIL import Image
|
||
|
import pytesseract
|
||
|
import time
|
||
|
|
||
|
i = 1
|
||
|
|
||
|
while True:
|
||
|
try:
|
||
|
img = Image.open("cropped/page%i.jpg"%i)
|
||
|
print(img)
|
||
|
pdf = pytesseract.image_to_pdf_or_hocr(img, lang="eng", extension='pdf')
|
||
|
time.sleep(1)
|
||
|
file = open(("ocred/page%i.pdf"%i), "w+b")
|
||
|
file.write(bytearray(pdf))
|
||
|
file.close()
|
||
|
i+=1
|
||
|
print(i)
|
||
|
|
||
|
except:
|
||
|
print("All pages must be ready!")
|
||
|
break
|