From 5588614b446266196aef6d31500a594bb4720f60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20S=C3=A1=20Couto?= Date: Thu, 6 Feb 2020 17:05:05 +0100 Subject: [PATCH] working --- .DS_Store | Bin 10244 -> 10244 bytes bounding_box.py | 2 +- burstpdf.py | 43 --------------------------------------- change_res.sh | 7 +++++++ delete_and_start_over.sh | 6 ++++++ merge_scans.sh | 7 ------- mirror_crop.py | 0 readme.md | 18 ++++++++++------ remove.sh | 3 +-- rename_scans.sh | 7 +++++++ rotation.py | 0 scans/.DS_Store | Bin 6148 -> 0 bytes workshop_stream.sh | 8 ++++---- 13 files changed, 38 insertions(+), 63 deletions(-) mode change 100644 => 100755 bounding_box.py delete mode 100755 burstpdf.py create mode 100755 change_res.sh create mode 100755 delete_and_start_over.sh delete mode 100755 merge_scans.sh mode change 100644 => 100755 mirror_crop.py mode change 100644 => 100755 readme.md mode change 100644 => 100755 remove.sh create mode 100755 rename_scans.sh mode change 100644 => 100755 rotation.py delete mode 100644 scans/.DS_Store mode change 100644 => 100755 workshop_stream.sh diff --git a/.DS_Store b/.DS_Store index a1888eb8b48f447874e158c419e57ec4664061aa..b9d3dc62cc5d3a71bc82d7ead8963d92826be1c5 100644 GIT binary patch delta 946 zcmZn(XbG6$gHU^hRjG6Mqx&tw4s2?fub{N$vZ{3M`&00RT_Y9Oug9}Ivjs3M-E z^5O!hqEnNLL>sgutE+FDKyaaBWG&xG8{s_L5Bx>=K@ z#N=6L!%Zp+F3QWv&r1giFiz$XaMb5zNM^`jNMy)kNM}f8h-WAQlEn;qK%9Z)Np#;dzXD9>86=5}L%j9{2I-C0hSs5p9 z6t<9O1u9MkTbzPwCDVDJl`|#_h{#R;BmAG6AFL)1XhrVi8A8g^-auJ9q_9Gk&l47( z{933AMNgivy(c@+jC`;&K~6`rkJ%PzB68@XsirU*Is(mN1x8CUID(6F^HW@)$%q4@VzQ-#tvX0W0Wdl< l8A`B(KRi4(7fbwQ-OR4=i)FIE$QqQ89g4A`NFX*C7yxnj0384T delta 1166 zcmeHF&ubGw6n;~iHZe_ilg-Z-Z6Fi^4YEOs^+&Kxq!L6DOera9>uxfMOE){q?xrldy%o_tSyvU)62;GG=-9ArWYq3B?i_QCbG)1LaDw3V`T1aIjTdC8uwFJuRV${5 zN{pmh#cMog-X@#r0@1Pt-cayn-okgRoKk0yXJDY!nYf0Ft4Fn^n$DWMM};oyWdmdF zdf8BlnNrEbR(>E~xzQD~a3!sk2ovZCW^_v-nz@V(lf@OuFeI&vleL?wBx5zBC5dH` zN-M9h9Xoq<`Et5cP_%O7;&=8e>^LIx(O$+wq?Z8Ld9x1 zEW#f_d4jyP@?UcIJE6;@1pi-_;8pygX#Z9>6+%>H_FT(ZRk4^ diff --git a/bounding_box.py b/bounding_box.py old mode 100644 new mode 100755 index 0f4c481..ff55ba7 --- a/bounding_box.py +++ b/bounding_box.py @@ -29,6 +29,6 @@ while True: d+=1 except: - logging.exception("message") + # logging.exception("message") print("All pages must be ready!") break diff --git a/burstpdf.py b/burstpdf.py deleted file mode 100755 index 32e0e9c..0000000 --- a/burstpdf.py +++ /dev/null @@ -1,43 +0,0 @@ -#Based in the code in https://iq.opengenus.org/pdf_to_image_in_python/ - -import pdf2image -from PIL import Image -import time - -#DECLARE CONSTANTS -PDF_PATH = ("scans/out.pdf") -DPI = 72 -FIRST_PAGE = None -LAST_PAGE = None -FORMAT = 'jpg' -THREAD_COUNT = 1 -USERPWD = None -USE_CROPBOX = False -STRICT = False - -def pdftopil(): - #This method reads a pdf and converts it into a sequence of images - #PDF_PATH sets the path to the PDF file - #dpi parameter assists in adjusting the resolution of the image - #first_page parameter allows you to set a first page to be processed by pdftoppm - #last_page parameter allows you to set a last page to be processed by pdftoppm - #fmt parameter allows to set the format of pdftoppm conversion (PpmImageFile, TIFF) - #thread_count parameter allows you to set how many thread will be used for conversion. - #userpw parameter allows you to set a password to unlock the converted PDF - #use_cropbox parameter allows you to use the crop box instead of the media box when converting - #strict parameter allows you to catch pdftoppm syntax error with a custom type PDFSyntaxError - - start_time = time.time() - pil_images = pdf2image.convert_from_path(PDF_PATH, dpi=DPI, first_page=FIRST_PAGE, last_page=LAST_PAGE, fmt=FORMAT, thread_count=THREAD_COUNT, userpw=USERPWD, use_cropbox=USE_CROPBOX, strict=STRICT) - print ("Time taken : " + str(time.time() - start_time)) - return pil_images - -def save_images(pil_images): - d = 0 - for image in pil_images: - image.save(("split/input%d"%d) + ".jpg") - d += 1 - -if __name__ == "__main__": - pil_images = pdftopil() - save_images(pil_images) diff --git a/change_res.sh b/change_res.sh new file mode 100755 index 0000000..0b1dd0e --- /dev/null +++ b/change_res.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +i=0 +for img in `ls scans/*.jpg`; do + convert $img -density 72 split/input$i.jpg + i=$((i+1)); +done diff --git a/delete_and_start_over.sh b/delete_and_start_over.sh new file mode 100755 index 0000000..6eda002 --- /dev/null +++ b/delete_and_start_over.sh @@ -0,0 +1,6 @@ +rm -R scans split rotated bounding_box ocred +mkdir -p scans +mv out.pdf $(date +%F-%H:%M).pdf && touch out.pdf +sleep 2 +cp *.pdf ~/Desktop +rm *.pdf diff --git a/merge_scans.sh b/merge_scans.sh deleted file mode 100755 index 21211f9..0000000 --- a/merge_scans.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -#line 3 means here -# cd "$(dirname "$0")" - -cd scans -pwd -convert *.jpg out.pdf diff --git a/mirror_crop.py b/mirror_crop.py old mode 100644 new mode 100755 diff --git a/readme.md b/readme.md old mode 100644 new mode 100755 index a417292..ea9add1 --- a/readme.md +++ b/readme.md @@ -55,7 +55,7 @@ sudo pip3 install pdf2image Pillow opencv-python pytesseract

Make all the files executable.

```bash -sudo chmod 777 merge_scans.sh workshop_stream.sh marge_files.sh +sudo chmod 777 merge_scans.sh workshop_stream.sh rename_scans.sh change_res.sh delete_and_start_over.sh ```

In case you want to skip any of the scripts just comment out in the shell code, workshop_stream.sh.

@@ -116,15 +116,15 @@ mkdir bounding_box mkdir cropped ``` ###Merge the files in the directory scans -

All the scans will be appended to one pdf called out.pdf

+

All the scans will be renamed

```bash -./merge_scans.sh +./rename_scans.sh ``` ###Burst the pdf in scans -

Burst this pdf, renaming all the files so they can be iterated later.

+

Change resolution of the scans so that it is lighter to process

```bash -python3 burstpdf.py +./change_res.sh ``` ###Rotate the pdfs @@ -140,7 +140,7 @@ python3 bounding_box.py ``` ###Crop the mirror -

The pages are now cropped, but the mirror is still visible in the middle. I commented it out because if the cameras are positioned correctly there is no need for this step.

+

The pages are now cropped, but the mirror may still be visible in the edge. This happens if the cameras are not adjusted properly. I commented it out because if the cameras are positioned correctly there is no need for this step.

```bash python3 mirror_crop.py ``` @@ -156,6 +156,12 @@ python3 tesseract_ocr.py ```bash ./merge_files.sh ``` + +##START OVER +

Just run delete_and_start_over.sh and start over

+```bash +./delete_and_start_over.sh +```

## License The package is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT). diff --git a/remove.sh b/remove.sh old mode 100644 new mode 100755 index 8c11f7b..7363bcb --- a/remove.sh +++ b/remove.sh @@ -1,5 +1,4 @@ cd split pwd -rm page0.jpg +rm input0.jpg rm `ls *.jpg | tail -n 1` -rm .DS_Store diff --git a/rename_scans.sh b/rename_scans.sh new file mode 100755 index 0000000..d509cc3 --- /dev/null +++ b/rename_scans.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +i=0 +for img in `ls scans/*.jpg`; do + mv $img scans/input$i.jpg + i=$((i+1)); +done diff --git a/rotation.py b/rotation.py old mode 100644 new mode 100755 diff --git a/scans/.DS_Store b/scans/.DS_Store deleted file mode 100644 index 5008ddfcf53c02e82d7eee2e57c38e5672ef89f6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0