Still need to fix the mirror margins
parent
b078f1c75b
commit
ae1a91eef7
@ -1,67 +0,0 @@
|
||||
import cv2
|
||||
import time
|
||||
import logging
|
||||
|
||||
d = 1
|
||||
|
||||
while True:
|
||||
try:
|
||||
threshold = 25
|
||||
time.sleep(1)
|
||||
|
||||
input = ('input%d.jpg'%d)
|
||||
page = ('page%d.jpg'%d)
|
||||
|
||||
print("Value of d is:",d,"\n","Page name:",input)
|
||||
img = cv2.imread(input, 0) # load grayscale version
|
||||
|
||||
# the indeces where the useful region starts and ends
|
||||
hStrart = 0
|
||||
hEnd = img.shape[0]
|
||||
vStart = 0
|
||||
vEnd = img.shape[1]
|
||||
|
||||
# get row and column maxes for each row and column
|
||||
hMax = img.max(1)
|
||||
vMax = img.max(0)
|
||||
|
||||
hDone_flag = False
|
||||
vDone_flag = False
|
||||
|
||||
# go through the list of max and begin where the pixel value is greater
|
||||
# than the threshold
|
||||
for i in range(hMax.size):
|
||||
if not hDone_flag:
|
||||
if hMax[i] > threshold:
|
||||
hStart = i
|
||||
hDone_flag = True
|
||||
|
||||
if hDone_flag:
|
||||
if hMax[i] < threshold:
|
||||
hEnd = i
|
||||
break
|
||||
|
||||
for i in range(vMax.size):
|
||||
if not vDone_flag:
|
||||
if vMax[i] > threshold:
|
||||
vStart = i
|
||||
vDone_flag = True
|
||||
|
||||
if vDone_flag:
|
||||
if vMax[i] < threshold:
|
||||
vEnd = i
|
||||
break
|
||||
|
||||
# load the color image and choose only the useful area from it
|
||||
img2 = (cv2.imread(input))[hStart:hEnd, vStart:vEnd,:]
|
||||
|
||||
# write the cropped image
|
||||
cv2.imwrite(page, img2)
|
||||
|
||||
d+=1
|
||||
print("Value of d is:", d)
|
||||
|
||||
except:
|
||||
logging.exception("message")
|
||||
print("All pages must be ready!")
|
||||
break
|
@ -0,0 +1,34 @@
|
||||
import cv2
|
||||
import logging
|
||||
|
||||
d = 1
|
||||
|
||||
while True:
|
||||
try:
|
||||
output = ('cropped/page%d.jpg'%d)
|
||||
|
||||
# Load image, convert to grayscale, and find edges
|
||||
image = cv2.imread('rotated/input%d.jpg'%d)
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU + cv2.THRESH_BINARY)[1]
|
||||
|
||||
# Find contour and sort by contour area
|
||||
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
|
||||
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
|
||||
|
||||
# Find bounding box and extract ROI
|
||||
for c in cnts:
|
||||
x,y,w,h = cv2.boundingRect(c)
|
||||
ROI = image[y:y+h, x:x+w]
|
||||
break
|
||||
|
||||
cv2.imwrite(output,ROI)
|
||||
cv2.waitKey()
|
||||
|
||||
d+=1
|
||||
|
||||
except:
|
||||
logging.exception("message")
|
||||
print("All pages must be ready!")
|
||||
break
|
@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
#line 3 means here
|
||||
# cd "$(dirname "$0")"
|
||||
|
||||
cd scans
|
||||
pwd
|
||||
convert *.jpg out.pdf
|
@ -1,8 +1,10 @@
|
||||
mkdir split
|
||||
mkdir rotated
|
||||
mkdir ocred
|
||||
mkdir cropped
|
||||
./merge_scans.sh
|
||||
python3 burstpdf.py
|
||||
python3 rotation.py
|
||||
python3 crop.py
|
||||
python3 mask_crop.py
|
||||
python3 tesseract_ocr.py
|
||||
./merge_files.sh
|
||||
|
Loading…
Reference in New Issue