diff --git a/.DS_Store b/.DS_Store index d760ca6..a1888eb 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/burstpdf.py b/burstpdf.py index b7a22a9..32e0e9c 100755 --- a/burstpdf.py +++ b/burstpdf.py @@ -33,7 +33,7 @@ def pdftopil(): return pil_images def save_images(pil_images): - d = 1 + d = 0 for image in pil_images: image.save(("split/input%d"%d) + ".jpg") d += 1 diff --git a/mirror_crop.py b/mirror_crop.py index 6400a77..db56cc1 100644 --- a/mirror_crop.py +++ b/mirror_crop.py @@ -14,7 +14,7 @@ while True: print("cropping even") # left, up, right, bottom - border = (0, 0, 68, 0) + border = (0, 0, 65, 0) finalpage = ImageOps.crop(page, border) finalpage.save('cropped/page%i.jpg'%i) @@ -23,7 +23,7 @@ while True: print("cropping odd") # left, up, right, bottom - border = (68, 0, 0, 0) + border = (65, 0, 0, 0) finalpage = ImageOps.crop(page, border) finalpage.save('cropped/page%i.jpg'%i) diff --git a/remove.sh b/remove.sh new file mode 100755 index 0000000..1fb9b97 --- /dev/null +++ b/remove.sh @@ -0,0 +1,5 @@ +cd split +pwd +rm page0.jpg +rm -ltr | tail -1 +rm .DS_Store diff --git a/scans/.DS_Store b/scans/.DS_Store index 3ffcfeb..5008ddf 100644 Binary files a/scans/.DS_Store and b/scans/.DS_Store differ diff --git a/tesseract_ocr.py b/tesseract_ocr.py index 2e91780..1e3f641 100755 --- a/tesseract_ocr.py +++ b/tesseract_ocr.py @@ -7,7 +7,7 @@ i = 1 while True: try: - img = Image.open("cropped/page%i.jpg"%i) + img = Image.open("bounding_box/input%i.jpg"%i) print(img) pdf = pytesseract.image_to_pdf_or_hocr(img, lang="eng", extension='pdf') time.sleep(1) diff --git a/workshop_stream.sh b/workshop_stream.sh index 557800c..caa6ccc 100755 --- a/workshop_stream.sh +++ b/workshop_stream.sh @@ -1,12 +1,13 @@ -mkdir split -mkdir rotated -mkdir ocred -mkdir bounding_box -mkdir cropped ./merge_scans.sh +mkdir split python3 burstpdf.py +mkdir rotated python3 rotation.py +mkdir bounding_box python3 bounding_box.py -python3 mirror_crop.py +mkdir cropped +# python3 mirror_crop.py +mkdir ocred python3 tesseract_ocr.py +./remove.sh ./merge_files.sh