#Based in the code in https://iq.opengenus.org/pdf_to_image_in_python/ import pdf2image from PIL import Image import time #DECLARE CONSTANTS PDF_PATH = "target.pdf" DPI = 200 FIRST_PAGE = None LAST_PAGE = None FORMAT = 'png' THREAD_COUNT = 1 USERPWD = None USE_CROPBOX = False STRICT = False def pdftopil(): #This method reads a pdf and converts it into a sequence of images #PDF_PATH sets the path to the PDF file #dpi parameter assists in adjusting the resolution of the image #first_page parameter allows you to set a first page to be processed by pdftoppm #last_page parameter allows you to set a last page to be processed by pdftoppm #fmt parameter allows to set the format of pdftoppm conversion (PpmImageFile, TIFF) #thread_count parameter allows you to set how many thread will be used for conversion. #userpw parameter allows you to set a password to unlock the converted PDF #use_cropbox parameter allows you to use the crop box instead of the media box when converting #strict parameter allows you to catch pdftoppm syntax error with a custom type PDFSyntaxError start_time = time.time() pil_images = pdf2image.convert_from_path(PDF_PATH, dpi=DPI, first_page=FIRST_PAGE, last_page=LAST_PAGE, fmt=FORMAT, thread_count=THREAD_COUNT, userpw=USERPWD, use_cropbox=USE_CROPBOX, strict=STRICT) print ("Time taken : " + str(time.time() - start_time)) return pil_images def save_images(pil_images): d = 1 for image in pil_images: image.save(("split/page%d"%d) + ".png") d += 1 if __name__ == "__main__": pil_images = pdftopil() save_images(pil_images)