commit 56d96f35a735d61e88838c58bae7889c12249902 Author: Pedro Sá Couto Date: Fri Jun 19 22:48:01 2020 +0200 working on local machine diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..6142d3d Binary files /dev/null and b/.DS_Store differ diff --git a/jstor.sh b/jstor.sh new file mode 100755 index 0000000..6ab792b --- /dev/null +++ b/jstor.sh @@ -0,0 +1,23 @@ +for i in {1..5} +do + cd /Users/PSC/Desktop/JSTOR/jstorpaper/ + for name in *; do mv "$name" "${name// /_}"; done + cd `ls -td -- /Users/PSC/Desktop/JSTOR/jstorpaper/* | head -n 1` + var2=`ls -td -- /Users/PSC/Desktop/JSTOR/jstorpaper/* | head -n 1` + for name in *; do mv "$name" "${name// /_}"; done + cp $var2/*.pdf /Users/PSC/Desktop/JSTOR/overlay + cd /Users/PSC/Desktop/JSTOR/overlay + mv /Users/PSC/Desktop/JSTOR/overlay/*.pdf target.pdf + mkdir -p split + python3 burstpdf.py + python3 overlaylogo_cover.py + python3 overlaylogo_page.py + rm target.pdf + convert "split/*.{png,jpeg,pdf}" -quality 100 name.pdf + var1=$var2/*.pdf + mv name.pdf $var1 + rm -r split + mv $var2 /Users/PSC/Desktop/JSTOR/ready + cd /Users/PSC/Desktop/JSTOR/ready + ocrmypdf `ls -td -- /Users/PSC/Desktop/JSTOR/ready/*/*.pdf | head -n 1` `ls -td -- /Users/PSC/Desktop/JSTOR/ready/*/*.pdf | head -n 1` +done diff --git a/jstorpaper/.DS_Store b/jstorpaper/.DS_Store new file mode 100644 index 0000000..f0c4c1a Binary files /dev/null and b/jstorpaper/.DS_Store differ diff --git a/overlay/.DS_Store b/overlay/.DS_Store new file mode 100644 index 0000000..75424c5 Binary files /dev/null and b/overlay/.DS_Store differ diff --git a/overlay/burstpdf.py b/overlay/burstpdf.py new file mode 100644 index 0000000..bab1715 --- /dev/null +++ b/overlay/burstpdf.py @@ -0,0 +1,43 @@ +#Based in the code in https://iq.opengenus.org/pdf_to_image_in_python/ + +import pdf2image +from PIL import Image +import time + +#DECLARE CONSTANTS +PDF_PATH = "target.pdf" +DPI = 200 +FIRST_PAGE = None +LAST_PAGE = None +FORMAT = 'png' +THREAD_COUNT = 1 +USERPWD = None +USE_CROPBOX = False +STRICT = False + +def pdftopil(): + #This method reads a pdf and converts it into a sequence of images + #PDF_PATH sets the path to the PDF file + #dpi parameter assists in adjusting the resolution of the image + #first_page parameter allows you to set a first page to be processed by pdftoppm + #last_page parameter allows you to set a last page to be processed by pdftoppm + #fmt parameter allows to set the format of pdftoppm conversion (PpmImageFile, TIFF) + #thread_count parameter allows you to set how many thread will be used for conversion. + #userpw parameter allows you to set a password to unlock the converted PDF + #use_cropbox parameter allows you to use the crop box instead of the media box when converting + #strict parameter allows you to catch pdftoppm syntax error with a custom type PDFSyntaxError + + start_time = time.time() + pil_images = pdf2image.convert_from_path(PDF_PATH, dpi=DPI, first_page=FIRST_PAGE, last_page=LAST_PAGE, fmt=FORMAT, thread_count=THREAD_COUNT, userpw=USERPWD, use_cropbox=USE_CROPBOX, strict=STRICT) + print ("Time taken : " + str(time.time() - start_time)) + return pil_images + +def save_images(pil_images): + d = 1 + for image in pil_images: + image.save(("split/page%d"%d) + ".png") + d += 1 + +if __name__ == "__main__": + pil_images = pdftopil() + save_images(pil_images) diff --git a/overlay/cover.png b/overlay/cover.png new file mode 100644 index 0000000..46faa1a Binary files /dev/null and b/overlay/cover.png differ diff --git a/overlay/overlaylogo_cover.py b/overlay/overlaylogo_cover.py new file mode 100644 index 0000000..90da298 --- /dev/null +++ b/overlay/overlaylogo_cover.py @@ -0,0 +1,18 @@ +from PIL import Image + +background = Image.open("split/page1.png") + +#rescaling the logo +basewidth = (background.size[0]) +baseheight = (background.size[1]) +finalcover = Image.open("cover.png") +wpercent = (basewidth/float(finalcover.size[0])) +hsize = int((float(finalcover.size[1])*float(wpercent))) +finalcover = finalcover.resize((basewidth,hsize), Image.ANTIALIAS) +finalcover.save("cover_rescale.png") + +foreground = Image.open("cover_rescale.png") +foregroundheight = (foreground.size[1]) + +background.paste(foreground, (0, (baseheight-foregroundheight)), foreground.convert('RGBA')) +background.save("split/page1.png") diff --git a/overlay/overlaylogo_page.py b/overlay/overlaylogo_page.py new file mode 100644 index 0000000..c6c6412 --- /dev/null +++ b/overlay/overlaylogo_page.py @@ -0,0 +1,30 @@ +from PIL import Image + +base = Image.open("split/page2.png") + +#rescaling the logo +basewidth = (base.size[0]) +baseheight = (base.size[1]) +finalpage = Image.open("pages.png") +wpercent = (basewidth/float(finalpage.size[0])) +hsize = int((float(finalpage.size[1])*float(wpercent))) +finalpage = finalpage.resize((basewidth,hsize), Image.ANTIALIAS) +finalpage.save("page_rescale.png") + +foreground = Image.open("page_rescale.png") +foregroundheight = (foreground.size[1]) + +i = 2 + +while True: + try: + background = Image.open("split/page%i.png"%i) + + background.paste(foreground, (0, (baseheight-foregroundheight)), foreground.convert('RGBA')) + background.save("split/page%i.png"%i) + + i+=1 + + except: + print("DID MY JOB!") + break diff --git a/overlay/pages.png b/overlay/pages.png new file mode 100644 index 0000000..0ef8e7e Binary files /dev/null and b/overlay/pages.png differ diff --git a/ready/.DS_Store b/ready/.DS_Store new file mode 100644 index 0000000..68909fa Binary files /dev/null and b/ready/.DS_Store differ