master
Pedro Sá Couto 5 years ago
parent f66a18bfb9
commit 27c24672c9

BIN
.DS_Store vendored

Binary file not shown.

@ -33,7 +33,7 @@ def pdftopil():
return pil_images
def save_images(pil_images):
d = 1
d = 0
for image in pil_images:
image.save(("split/input%d"%d) + ".jpg")
d += 1

@ -14,7 +14,7 @@ while True:
print("cropping even")
# left, up, right, bottom
border = (0, 0, 68, 0)
border = (0, 0, 65, 0)
finalpage = ImageOps.crop(page, border)
finalpage.save('cropped/page%i.jpg'%i)
@ -23,7 +23,7 @@ while True:
print("cropping odd")
# left, up, right, bottom
border = (68, 0, 0, 0)
border = (65, 0, 0, 0)
finalpage = ImageOps.crop(page, border)
finalpage.save('cropped/page%i.jpg'%i)

@ -0,0 +1,5 @@
cd split
pwd
rm page0.jpg
rm -ltr | tail -1
rm .DS_Store

BIN
scans/.DS_Store vendored

Binary file not shown.

@ -7,7 +7,7 @@ i = 1
while True:
try:
img = Image.open("cropped/page%i.jpg"%i)
img = Image.open("bounding_box/input%i.jpg"%i)
print(img)
pdf = pytesseract.image_to_pdf_or_hocr(img, lang="eng", extension='pdf')
time.sleep(1)

@ -1,12 +1,13 @@
mkdir split
mkdir rotated
mkdir ocred
mkdir bounding_box
mkdir cropped
./merge_scans.sh
mkdir split
python3 burstpdf.py
mkdir rotated
python3 rotation.py
mkdir bounding_box
python3 bounding_box.py
python3 mirror_crop.py
mkdir cropped
# python3 mirror_crop.py
mkdir ocred
python3 tesseract_ocr.py
./remove.sh
./merge_files.sh

Loading…
Cancel
Save