working on local machine
commit
56d96f35a7
@ -0,0 +1,23 @@
|
||||
for i in {1..5}
|
||||
do
|
||||
cd /Users/PSC/Desktop/JSTOR/jstorpaper/
|
||||
for name in *; do mv "$name" "${name// /_}"; done
|
||||
cd `ls -td -- /Users/PSC/Desktop/JSTOR/jstorpaper/* | head -n 1`
|
||||
var2=`ls -td -- /Users/PSC/Desktop/JSTOR/jstorpaper/* | head -n 1`
|
||||
for name in *; do mv "$name" "${name// /_}"; done
|
||||
cp $var2/*.pdf /Users/PSC/Desktop/JSTOR/overlay
|
||||
cd /Users/PSC/Desktop/JSTOR/overlay
|
||||
mv /Users/PSC/Desktop/JSTOR/overlay/*.pdf target.pdf
|
||||
mkdir -p split
|
||||
python3 burstpdf.py
|
||||
python3 overlaylogo_cover.py
|
||||
python3 overlaylogo_page.py
|
||||
rm target.pdf
|
||||
convert "split/*.{png,jpeg,pdf}" -quality 100 name.pdf
|
||||
var1=$var2/*.pdf
|
||||
mv name.pdf $var1
|
||||
rm -r split
|
||||
mv $var2 /Users/PSC/Desktop/JSTOR/ready
|
||||
cd /Users/PSC/Desktop/JSTOR/ready
|
||||
ocrmypdf `ls -td -- /Users/PSC/Desktop/JSTOR/ready/*/*.pdf | head -n 1` `ls -td -- /Users/PSC/Desktop/JSTOR/ready/*/*.pdf | head -n 1`
|
||||
done
|
Binary file not shown.
Binary file not shown.
@ -0,0 +1,43 @@
|
||||
#Based in the code in https://iq.opengenus.org/pdf_to_image_in_python/
|
||||
|
||||
import pdf2image
|
||||
from PIL import Image
|
||||
import time
|
||||
|
||||
#DECLARE CONSTANTS
|
||||
PDF_PATH = "target.pdf"
|
||||
DPI = 200
|
||||
FIRST_PAGE = None
|
||||
LAST_PAGE = None
|
||||
FORMAT = 'png'
|
||||
THREAD_COUNT = 1
|
||||
USERPWD = None
|
||||
USE_CROPBOX = False
|
||||
STRICT = False
|
||||
|
||||
def pdftopil():
|
||||
#This method reads a pdf and converts it into a sequence of images
|
||||
#PDF_PATH sets the path to the PDF file
|
||||
#dpi parameter assists in adjusting the resolution of the image
|
||||
#first_page parameter allows you to set a first page to be processed by pdftoppm
|
||||
#last_page parameter allows you to set a last page to be processed by pdftoppm
|
||||
#fmt parameter allows to set the format of pdftoppm conversion (PpmImageFile, TIFF)
|
||||
#thread_count parameter allows you to set how many thread will be used for conversion.
|
||||
#userpw parameter allows you to set a password to unlock the converted PDF
|
||||
#use_cropbox parameter allows you to use the crop box instead of the media box when converting
|
||||
#strict parameter allows you to catch pdftoppm syntax error with a custom type PDFSyntaxError
|
||||
|
||||
start_time = time.time()
|
||||
pil_images = pdf2image.convert_from_path(PDF_PATH, dpi=DPI, first_page=FIRST_PAGE, last_page=LAST_PAGE, fmt=FORMAT, thread_count=THREAD_COUNT, userpw=USERPWD, use_cropbox=USE_CROPBOX, strict=STRICT)
|
||||
print ("Time taken : " + str(time.time() - start_time))
|
||||
return pil_images
|
||||
|
||||
def save_images(pil_images):
|
||||
d = 1
|
||||
for image in pil_images:
|
||||
image.save(("split/page%d"%d) + ".png")
|
||||
d += 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
pil_images = pdftopil()
|
||||
save_images(pil_images)
|
Binary file not shown.
After Width: | Height: | Size: 97 KiB |
@ -0,0 +1,18 @@
|
||||
from PIL import Image
|
||||
|
||||
background = Image.open("split/page1.png")
|
||||
|
||||
#rescaling the logo
|
||||
basewidth = (background.size[0])
|
||||
baseheight = (background.size[1])
|
||||
finalcover = Image.open("cover.png")
|
||||
wpercent = (basewidth/float(finalcover.size[0]))
|
||||
hsize = int((float(finalcover.size[1])*float(wpercent)))
|
||||
finalcover = finalcover.resize((basewidth,hsize), Image.ANTIALIAS)
|
||||
finalcover.save("cover_rescale.png")
|
||||
|
||||
foreground = Image.open("cover_rescale.png")
|
||||
foregroundheight = (foreground.size[1])
|
||||
|
||||
background.paste(foreground, (0, (baseheight-foregroundheight)), foreground.convert('RGBA'))
|
||||
background.save("split/page1.png")
|
@ -0,0 +1,30 @@
|
||||
from PIL import Image
|
||||
|
||||
base = Image.open("split/page2.png")
|
||||
|
||||
#rescaling the logo
|
||||
basewidth = (base.size[0])
|
||||
baseheight = (base.size[1])
|
||||
finalpage = Image.open("pages.png")
|
||||
wpercent = (basewidth/float(finalpage.size[0]))
|
||||
hsize = int((float(finalpage.size[1])*float(wpercent)))
|
||||
finalpage = finalpage.resize((basewidth,hsize), Image.ANTIALIAS)
|
||||
finalpage.save("page_rescale.png")
|
||||
|
||||
foreground = Image.open("page_rescale.png")
|
||||
foregroundheight = (foreground.size[1])
|
||||
|
||||
i = 2
|
||||
|
||||
while True:
|
||||
try:
|
||||
background = Image.open("split/page%i.png"%i)
|
||||
|
||||
background.paste(foreground, (0, (baseheight-foregroundheight)), foreground.convert('RGBA'))
|
||||
background.save("split/page%i.png"%i)
|
||||
|
||||
i+=1
|
||||
|
||||
except:
|
||||
print("DID MY JOB!")
|
||||
break
|
Binary file not shown.
After Width: | Height: | Size: 72 KiB |
Binary file not shown.
Loading…
Reference in New Issue