import PyPDF2 from wand.image import Image from wand.color import Color import io import os def pdf_page_to_png(src_pdf, pagenum = 0, resolution = 72,): """ Returns specified PDF page as wand.image.Image png. :param PyPDF2.PdfFileReader src_pdf: PDF from which to take pages. :param int pagenum: Page number to take. :param int resolution: Resolution for resulting png in DPI. """ dst_pdf = PyPDF2.PdfFileWriter() dst_pdf.addPage(src_pdf.getPage(pagenum)) pdf_bytes = io.BytesIO() dst_pdf.write(pdf_bytes) pdf_bytes.seek(0) img = Image(file = pdf_bytes, resolution = resolution) img.background_color = Color('white') img.alpha_channel = 'remove' img.convert("jpeg") return img def get_cover(file_path, filename): # Main # ==== print(file_path) src_filename = file_path try: src_pdf = PyPDF2.PdfFileReader(open(src_filename, "rb")) except: print("couln't open PDF") return None; if src_pdf.isEncrypted: try: src_pdf.decrypt('') except: print("couln't decrypt") return None; # What follows is a lookup table of page numbers within sample_log.pdf and the corresponding filenames. pages = [{"pagenum": 0, "filename": filename}] # Convert each page to a png image. for page in pages: big_filename = "app/cover/"+page["filename"] + "_cover.jpeg" #small_filename = "app/cover/"+page["filename"] + "cover_small" + ".jpeg" img = pdf_page_to_png(src_pdf, pagenum = page["pagenum"], resolution = 130) #img.save(filename = big_filename) # Ensmallen img.transform("", "250") img.save(filename = big_filename) return page["filename"] + "_cover.jpeg" #--- #epub #https://ebooks.stackexchange.com/questions/6517/command-line-extraction-of-metadata-title-author-from-epub-file #https://hackage.haskell.org/package/epub-tools #http://stackoverflow.com/questions/9751475/extract-cover-image-from-chm-and-epub-files