working on local machine
commit
56d96f35a7
@ -0,0 +1,23 @@
|
|||||||
|
for i in {1..5}
|
||||||
|
do
|
||||||
|
cd /Users/PSC/Desktop/JSTOR/jstorpaper/
|
||||||
|
for name in *; do mv "$name" "${name// /_}"; done
|
||||||
|
cd `ls -td -- /Users/PSC/Desktop/JSTOR/jstorpaper/* | head -n 1`
|
||||||
|
var2=`ls -td -- /Users/PSC/Desktop/JSTOR/jstorpaper/* | head -n 1`
|
||||||
|
for name in *; do mv "$name" "${name// /_}"; done
|
||||||
|
cp $var2/*.pdf /Users/PSC/Desktop/JSTOR/overlay
|
||||||
|
cd /Users/PSC/Desktop/JSTOR/overlay
|
||||||
|
mv /Users/PSC/Desktop/JSTOR/overlay/*.pdf target.pdf
|
||||||
|
mkdir -p split
|
||||||
|
python3 burstpdf.py
|
||||||
|
python3 overlaylogo_cover.py
|
||||||
|
python3 overlaylogo_page.py
|
||||||
|
rm target.pdf
|
||||||
|
convert "split/*.{png,jpeg,pdf}" -quality 100 name.pdf
|
||||||
|
var1=$var2/*.pdf
|
||||||
|
mv name.pdf $var1
|
||||||
|
rm -r split
|
||||||
|
mv $var2 /Users/PSC/Desktop/JSTOR/ready
|
||||||
|
cd /Users/PSC/Desktop/JSTOR/ready
|
||||||
|
ocrmypdf `ls -td -- /Users/PSC/Desktop/JSTOR/ready/*/*.pdf | head -n 1` `ls -td -- /Users/PSC/Desktop/JSTOR/ready/*/*.pdf | head -n 1`
|
||||||
|
done
|
Binary file not shown.
Binary file not shown.
@ -0,0 +1,43 @@
|
|||||||
|
#Based in the code in https://iq.opengenus.org/pdf_to_image_in_python/
|
||||||
|
|
||||||
|
import pdf2image
|
||||||
|
from PIL import Image
|
||||||
|
import time
|
||||||
|
|
||||||
|
#DECLARE CONSTANTS
|
||||||
|
PDF_PATH = "target.pdf"
|
||||||
|
DPI = 200
|
||||||
|
FIRST_PAGE = None
|
||||||
|
LAST_PAGE = None
|
||||||
|
FORMAT = 'png'
|
||||||
|
THREAD_COUNT = 1
|
||||||
|
USERPWD = None
|
||||||
|
USE_CROPBOX = False
|
||||||
|
STRICT = False
|
||||||
|
|
||||||
|
def pdftopil():
|
||||||
|
#This method reads a pdf and converts it into a sequence of images
|
||||||
|
#PDF_PATH sets the path to the PDF file
|
||||||
|
#dpi parameter assists in adjusting the resolution of the image
|
||||||
|
#first_page parameter allows you to set a first page to be processed by pdftoppm
|
||||||
|
#last_page parameter allows you to set a last page to be processed by pdftoppm
|
||||||
|
#fmt parameter allows to set the format of pdftoppm conversion (PpmImageFile, TIFF)
|
||||||
|
#thread_count parameter allows you to set how many thread will be used for conversion.
|
||||||
|
#userpw parameter allows you to set a password to unlock the converted PDF
|
||||||
|
#use_cropbox parameter allows you to use the crop box instead of the media box when converting
|
||||||
|
#strict parameter allows you to catch pdftoppm syntax error with a custom type PDFSyntaxError
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
pil_images = pdf2image.convert_from_path(PDF_PATH, dpi=DPI, first_page=FIRST_PAGE, last_page=LAST_PAGE, fmt=FORMAT, thread_count=THREAD_COUNT, userpw=USERPWD, use_cropbox=USE_CROPBOX, strict=STRICT)
|
||||||
|
print ("Time taken : " + str(time.time() - start_time))
|
||||||
|
return pil_images
|
||||||
|
|
||||||
|
def save_images(pil_images):
|
||||||
|
d = 1
|
||||||
|
for image in pil_images:
|
||||||
|
image.save(("split/page%d"%d) + ".png")
|
||||||
|
d += 1
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
pil_images = pdftopil()
|
||||||
|
save_images(pil_images)
|
Binary file not shown.
After Width: | Height: | Size: 97 KiB |
@ -0,0 +1,18 @@
|
|||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
background = Image.open("split/page1.png")
|
||||||
|
|
||||||
|
#rescaling the logo
|
||||||
|
basewidth = (background.size[0])
|
||||||
|
baseheight = (background.size[1])
|
||||||
|
finalcover = Image.open("cover.png")
|
||||||
|
wpercent = (basewidth/float(finalcover.size[0]))
|
||||||
|
hsize = int((float(finalcover.size[1])*float(wpercent)))
|
||||||
|
finalcover = finalcover.resize((basewidth,hsize), Image.ANTIALIAS)
|
||||||
|
finalcover.save("cover_rescale.png")
|
||||||
|
|
||||||
|
foreground = Image.open("cover_rescale.png")
|
||||||
|
foregroundheight = (foreground.size[1])
|
||||||
|
|
||||||
|
background.paste(foreground, (0, (baseheight-foregroundheight)), foreground.convert('RGBA'))
|
||||||
|
background.save("split/page1.png")
|
@ -0,0 +1,30 @@
|
|||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
base = Image.open("split/page2.png")
|
||||||
|
|
||||||
|
#rescaling the logo
|
||||||
|
basewidth = (base.size[0])
|
||||||
|
baseheight = (base.size[1])
|
||||||
|
finalpage = Image.open("pages.png")
|
||||||
|
wpercent = (basewidth/float(finalpage.size[0]))
|
||||||
|
hsize = int((float(finalpage.size[1])*float(wpercent)))
|
||||||
|
finalpage = finalpage.resize((basewidth,hsize), Image.ANTIALIAS)
|
||||||
|
finalpage.save("page_rescale.png")
|
||||||
|
|
||||||
|
foreground = Image.open("page_rescale.png")
|
||||||
|
foregroundheight = (foreground.size[1])
|
||||||
|
|
||||||
|
i = 2
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
background = Image.open("split/page%i.png"%i)
|
||||||
|
|
||||||
|
background.paste(foreground, (0, (baseheight-foregroundheight)), foreground.convert('RGBA'))
|
||||||
|
background.save("split/page%i.png"%i)
|
||||||
|
|
||||||
|
i+=1
|
||||||
|
|
||||||
|
except:
|
||||||
|
print("DID MY JOB!")
|
||||||
|
break
|
Binary file not shown.
After Width: | Height: | Size: 72 KiB |
Binary file not shown.
Loading…
Reference in New Issue