Add PyPDF2 to vendor

9 years ago · 829e926770
parent 2b2d485730
commit 829e926770
9 changed files with 5956 additions and 0 deletions
--- a/vendor/PyPDF2/init.py
+++ b/vendor/PyPDF2/init.py
@ -0,0 +1,5 @@
+from .pdf import PdfFileReader, PdfFileWriter
+from .merger import PdfFileMerger
+from .pagerange import PageRange, parse_filename_page_ranges
+from ._version import __version__
+__all__ = ["pdf", "PdfFileMerger"]
--- a/vendor/PyPDF2/_version.py
+++ b/vendor/PyPDF2/_version.py
@ -0,0 +1 @@
+__version__ = '1.26.0'
--- a/vendor/PyPDF2/filters.py
+++ b/vendor/PyPDF2/filters.py
@ -0,0 +1,362 @@
+# vim: sw=4:expandtab:foldmethod=marker
+#
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+"""
+Implementation of stream filters for PDF.
+"""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+from .utils import PdfReadError, ord_, chr_
+from sys import version_info
+if version_info < ( 3, 0 ):
+    from cStringIO import StringIO
+else:
+    from io import StringIO
+    import struct
+
+try:
+    import zlib
+
+    def decompress(data):
+        return zlib.decompress(data)
+
+    def compress(data):
+        return zlib.compress(data)
+
+except ImportError:
+    # Unable to import zlib.  Attempt to use the System.IO.Compression
+    # library from the .NET framework. (IronPython only)
+    import System
+    from System import IO, Collections, Array
+
+    def _string_to_bytearr(buf):
+        retval = Array.CreateInstance(System.Byte, len(buf))
+        for i in range(len(buf)):
+            retval[i] = ord(buf[i])
+        return retval
+
+    def _bytearr_to_string(bytes):
+        retval = ""
+        for i in range(bytes.Length):
+            retval += chr(bytes[i])
+        return retval
+
+    def _read_bytes(stream):
+        ms = IO.MemoryStream()
+        buf = Array.CreateInstance(System.Byte, 2048)
+        while True:
+            bytes = stream.Read(buf, 0, buf.Length)
+            if bytes == 0:
+                break
+            else:
+                ms.Write(buf, 0, bytes)
+        retval = ms.ToArray()
+        ms.Close()
+        return retval
+
+    def decompress(data):
+        bytes = _string_to_bytearr(data)
+        ms = IO.MemoryStream()
+        ms.Write(bytes, 0, bytes.Length)
+        ms.Position = 0  # fseek 0
+        gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
+        bytes = _read_bytes(gz)
+        retval = _bytearr_to_string(bytes)
+        gz.Close()
+        return retval
+
+    def compress(data):
+        bytes = _string_to_bytearr(data)
+        ms = IO.MemoryStream()
+        gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True)
+        gz.Write(bytes, 0, bytes.Length)
+        gz.Close()
+        ms.Position = 0 # fseek 0
+        bytes = ms.ToArray()
+        retval = _bytearr_to_string(bytes)
+        ms.Close()
+        return retval
+
+
+class FlateDecode(object):
+    def decode(data, decodeParms):
+        data = decompress(data)
+        predictor = 1
+        if decodeParms:
+            try:
+                predictor = decodeParms.get("/Predictor", 1)
+            except AttributeError:
+                pass    # usually an array with a null object was read
+
+        # predictor 1 == no predictor
+        if predictor != 1:
+            columns = decodeParms["/Columns"]
+            # PNG prediction:
+            if predictor >= 10 and predictor <= 15:
+                output = StringIO()
+                # PNG prediction can vary from row to row
+                rowlength = columns + 1
+                assert len(data) % rowlength == 0
+                prev_rowdata = (0,) * rowlength
+                for row in range(len(data) // rowlength):
+                    rowdata = [ord_(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
+                    filterByte = rowdata[0]
+                    if filterByte == 0:
+                        pass
+                    elif filterByte == 1:
+                        for i in range(2, rowlength):
+                            rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
+                    elif filterByte == 2:
+                        for i in range(1, rowlength):
+                            rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
+                    else:
+                        # unsupported PNG filter
+                        raise PdfReadError("Unsupported PNG filter %r" % filterByte)
+                    prev_rowdata = rowdata
+                    output.write(''.join([chr(x) for x in rowdata[1:]]))
+                data = output.getvalue()
+            else:
+                # unsupported predictor
+                raise PdfReadError("Unsupported flatedecode predictor %r" % predictor)
+        return data
+    decode = staticmethod(decode)
+
+    def encode(data):
+        return compress(data)
+    encode = staticmethod(encode)
+
+
+class ASCIIHexDecode(object):
+    def decode(data, decodeParms=None):
+        retval = ""
+        char = ""
+        x = 0
+        while True:
+            c = data[x]
+            if c == ">":
+                break
+            elif c.isspace():
+                x += 1
+                continue
+            char += c
+            if len(char) == 2:
+                retval += chr(int(char, base=16))
+                char = ""
+            x += 1
+        assert char == ""
+        return retval
+    decode = staticmethod(decode)
+
+
+class LZWDecode(object):
+    """Taken from:
+    http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm
+    """
+    class decoder(object):
+        def __init__(self, data):
+            self.STOP=257
+            self.CLEARDICT=256
+            self.data=data
+            self.bytepos=0
+            self.bitpos=0
+            self.dict=[""]*4096
+            for i in range(256):
+                self.dict[i]=chr(i)
+            self.resetDict()
+
+        def resetDict(self):
+            self.dictlen=258
+            self.bitspercode=9
+
+        def nextCode(self):
+            fillbits=self.bitspercode
+            value=0
+            while fillbits>0 :
+                if self.bytepos >= len(self.data):
+                    return -1
+                nextbits=ord(self.data[self.bytepos])
+                bitsfromhere=8-self.bitpos
+                if bitsfromhere>fillbits:
+                    bitsfromhere=fillbits
+                value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) &
+                           (0xff >> (8-bitsfromhere))) <<
+                          (fillbits-bitsfromhere))
+                fillbits -= bitsfromhere
+                self.bitpos += bitsfromhere
+                if self.bitpos >=8:
+                    self.bitpos=0
+                    self.bytepos = self.bytepos+1
+            return value
+
+        def decode(self):
+            """ algorithm derived from:
+            http://www.rasip.fer.hr/research/compress/algorithms/fund/lz/lzw.html
+            and the PDFReference
+            """
+            cW = self.CLEARDICT;
+            baos=""
+            while True:
+                pW = cW;
+                cW = self.nextCode();
+                if cW == -1:
+                    raise PdfReadError("Missed the stop code in LZWDecode!")
+                if cW == self.STOP:
+                    break;
+                elif cW == self.CLEARDICT:
+                    self.resetDict();
+                elif pW == self.CLEARDICT:
+                    baos+=self.dict[cW]
+                else:
+                    if cW < self.dictlen:
+                        baos += self.dict[cW]
+                        p=self.dict[pW]+self.dict[cW][0]
+                        self.dict[self.dictlen]=p
+                        self.dictlen+=1
+                    else:
+                        p=self.dict[pW]+self.dict[pW][0]
+                        baos+=p
+                        self.dict[self.dictlen] = p;
+                        self.dictlen+=1
+                    if (self.dictlen >= (1 << self.bitspercode) - 1 and
+                        self.bitspercode < 12):
+                        self.bitspercode+=1
+            return baos
+
+    @staticmethod
+    def decode(data,decodeParams=None):
+        return LZWDecode.decoder(data).decode()
+
+
+class ASCII85Decode(object):
+    def decode(data, decodeParms=None):
+        if version_info < ( 3, 0 ):
+            retval = ""
+            group = []
+            x = 0
+            hitEod = False
+            # remove all whitespace from data
+            data = [y for y in data if not (y in ' \n\r\t')]
+            while not hitEod:
+                c = data[x]
+                if len(retval) == 0 and c == "<" and data[x+1] == "~":
+                    x += 2
+                    continue
+                #elif c.isspace():
+                #    x += 1
+                #    continue
+                elif c == 'z':
+                    assert len(group) == 0
+                    retval += '\x00\x00\x00\x00'
+                    x += 1
+                    continue
+                elif c == "~" and data[x+1] == ">":
+                    if len(group) != 0:
+                        # cannot have a final group of just 1 char
+                        assert len(group) > 1
+                        cnt = len(group) - 1
+                        group += [ 85, 85, 85 ]
+                        hitEod = cnt
+                    else:
+                        break
+                else:
+                    c = ord(c) - 33
+                    assert c >= 0 and c < 85
+                    group += [ c ]
+                if len(group) >= 5:
+                    b = group[0] * (85**4) + \
+                        group[1] * (85**3) + \
+                        group[2] * (85**2) + \
+                        group[3] * 85 + \
+                        group[4]
+                    assert b < (2**32 - 1)
+                    c4 = chr((b >> 0) % 256)
+                    c3 = chr((b >> 8) % 256)
+                    c2 = chr((b >> 16) % 256)
+                    c1 = chr(b >> 24)
+                    retval += (c1 + c2 + c3 + c4)
+                    if hitEod:
+                        retval = retval[:-4+hitEod]
+                    group = []
+                x += 1
+            return retval
+        else:
+            if isinstance(data, str):
+                data = data.encode('ascii')
+            n = b = 0
+            out = bytearray()
+            for c in data:
+                if ord('!') <= c and c <= ord('u'):
+                    n += 1
+                    b = b*85+(c-33)
+                    if n == 5:
+                        out += struct.pack(b'>L',b)
+                        n = b = 0
+                elif c == ord('z'):
+                    assert n == 0
+                    out += b'\0\0\0\0'
+                elif c == ord('~'):
+                    if n:
+                        for _ in range(5-n):
+                            b = b*85+84
+                        out += struct.pack(b'>L',b)[:n-1]
+                    break
+            return bytes(out)
+    decode = staticmethod(decode)
+
+
+def decodeStreamData(stream):
+    from .generic import NameObject
+    filters = stream.get("/Filter", ())
+    if len(filters) and not isinstance(filters[0], NameObject):
+        # we have a single filter instance
+        filters = (filters,)
+    data = stream._data
+    # If there is not data to decode we should not try to decode the data.
+    if data:
+        for filterType in filters:
+            if filterType == "/FlateDecode" or filterType == "/Fl":
+                data = FlateDecode.decode(data, stream.get("/DecodeParms"))
+            elif filterType == "/ASCIIHexDecode" or filterType == "/AHx":
+                data = ASCIIHexDecode.decode(data)
+            elif filterType == "/LZWDecode" or filterType == "/LZW":
+                data = LZWDecode.decode(data, stream.get("/DecodeParms"))
+            elif filterType == "/ASCII85Decode" or filterType == "/A85":
+                data = ASCII85Decode.decode(data)
+            elif filterType == "/Crypt":
+                decodeParams = stream.get("/DecodeParams", {})
+                if "/Name" not in decodeParams and "/Type" not in decodeParams:
+                    pass
+                else:
+                    raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
+            else:
+                # unsupported filter
+                raise NotImplementedError("unsupported filter %s" % filterType)
+    return data
--- a/vendor/PyPDF2/generic.py
+++ b/vendor/PyPDF2/generic.py
--- a/vendor/PyPDF2/merger.py
+++ b/vendor/PyPDF2/merger.py
@ -0,0 +1,553 @@
+# vim: sw=4:expandtab:foldmethod=marker
+#
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from .generic import *
+from .utils import isString, str_
+from .pdf import PdfFileReader, PdfFileWriter
+from .pagerange import PageRange
+from sys import version_info
+if version_info < ( 3, 0 ):
+    from cStringIO import StringIO
+    StreamIO = StringIO
+else:
+    from io import BytesIO
+    from io import FileIO as file
+    StreamIO = BytesIO
+
+
+class _MergedPage(object):
+    """
+    _MergedPage is used internally by PdfFileMerger to collect necessary
+    information on each page that is being merged.
+    """
+    def __init__(self, pagedata, src, id):
+        self.src = src
+        self.pagedata = pagedata
+        self.out_pagedata = None
+        self.id = id
+
+
+class PdfFileMerger(object):
+    """
+    Initializes a PdfFileMerger object. PdfFileMerger merges multiple PDFs
+    into a single PDF. It can concatenate, slice, insert, or any combination
+    of the above.
+
+    See the functions :meth:`merge()<merge>` (or :meth:`append()<append>`)
+    and :meth:`write()<write>` for usage information.
+
+    :param bool strict: Determines whether user should be warned of all
+            problems and also causes some correctable problems to be fatal.
+            Defaults to ``True``.
+    """
+
+    def __init__(self, strict=True):
+        self.inputs = []
+        self.pages = []
+        self.output = PdfFileWriter()
+        self.bookmarks = []
+        self.named_dests = []
+        self.id_count = 0
+        self.strict = strict
+
+    def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
+        """
+        Merges the pages from the given file into the output file at the
+        specified page number.
+
+        :param int position: The *page number* to insert this file. File will
+            be inserted after the given number.
+
+        :param fileobj: A File Object or an object that supports the standard read
+            and seek methods similar to a File Object. Could also be a
+            string representing a path to a PDF file.
+
+        :param str bookmark: Optionally, you may specify a bookmark to be applied at
+            the beginning of the included file by supplying the text of the bookmark.
+
+        :param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
+            to merge only the specified range of pages from the source
+            document into the output document.
+
+        :param bool import_bookmarks: You may prevent the source document's bookmarks
+            from being imported by specifying this as ``False``.
+        """
+
+        # This parameter is passed to self.inputs.append and means
+        # that the stream used was created in this method.
+        my_file = False
+
+        # If the fileobj parameter is a string, assume it is a path
+        # and create a file object at that location. If it is a file,
+        # copy the file's contents into a BytesIO (or StreamIO) stream object; if
+        # it is a PdfFileReader, copy that reader's stream into a
+        # BytesIO (or StreamIO) stream.
+        # If fileobj is none of the above types, it is not modified
+        decryption_key = None
+        if isString(fileobj):
+            fileobj = file(fileobj, 'rb')
+            my_file = True
+        elif isinstance(fileobj, file):
+            fileobj.seek(0)
+            filecontent = fileobj.read()
+            fileobj = StreamIO(filecontent)
+            my_file = True
+        elif isinstance(fileobj, PdfFileReader):
+            orig_tell = fileobj.stream.tell()
+            fileobj.stream.seek(0)
+            filecontent = StreamIO(fileobj.stream.read())
+            fileobj.stream.seek(orig_tell) # reset the stream to its original location
+            fileobj = filecontent
+            if hasattr(fileobj, '_decryption_key'):
+                decryption_key = fileobj._decryption_key
+            my_file = True
+
+        # Create a new PdfFileReader instance using the stream
+        # (either file or BytesIO or StringIO) created above
+        pdfr = PdfFileReader(fileobj, strict=self.strict)
+        if decryption_key is not None:
+            pdfr._decryption_key = decryption_key
+
+        # Find the range of pages to merge.
+        if pages == None:
+            pages = (0, pdfr.getNumPages())
+        elif isinstance(pages, PageRange):
+            pages = pages.indices(pdfr.getNumPages())
+        elif not isinstance(pages, tuple):
+            raise TypeError('"pages" must be a tuple of (start, stop[, step])')
+
+        srcpages = []
+        if bookmark:
+            bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
+
+        outline = []
+        if import_bookmarks:
+            outline = pdfr.getOutlines()
+            outline = self._trim_outline(pdfr, outline, pages)
+
+        if bookmark:
+            self.bookmarks += [bookmark, outline]
+        else:
+            self.bookmarks += outline
+
+        dests = pdfr.namedDestinations
+        dests = self._trim_dests(pdfr, dests, pages)
+        self.named_dests += dests
+
+        # Gather all the pages that are going to be merged
+        for i in range(*pages):
+            pg = pdfr.getPage(i)
+
+            id = self.id_count
+            self.id_count += 1
+
+            mp = _MergedPage(pg, pdfr, id)
+
+            srcpages.append(mp)
+
+        self._associate_dests_to_pages(srcpages)
+        self._associate_bookmarks_to_pages(srcpages)
+
+        # Slice to insert the pages at the specified position
+        self.pages[position:position] = srcpages
+
+        # Keep track of our input files so we can close them later
+        self.inputs.append((fileobj, pdfr, my_file))
+
+    def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True):
+        """
+        Identical to the :meth:`merge()<merge>` method, but assumes you want to concatenate
+        all pages onto the end of the file instead of specifying a position.
+
+        :param fileobj: A File Object or an object that supports the standard read
+            and seek methods similar to a File Object. Could also be a
+            string representing a path to a PDF file.
+
+        :param str bookmark: Optionally, you may specify a bookmark to be applied at
+            the beginning of the included file by supplying the text of the bookmark.
+
+        :param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
+            to merge only the specified range of pages from the source
+            document into the output document.
+
+        :param bool import_bookmarks: You may prevent the source document's bookmarks
+            from being imported by specifying this as ``False``.
+        """
+
+        self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks)
+
+    def write(self, fileobj):
+        """
+        Writes all data that has been merged to the given output file.
+
+        :param fileobj: Output file. Can be a filename or any kind of
+            file-like object.
+        """
+        my_file = False
+        if isString(fileobj):
+            fileobj = file(fileobj, 'wb')
+            my_file = True
+
+        # Add pages to the PdfFileWriter
+        # The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13
+        for page in self.pages:
+            self.output.addPage(page.pagedata)
+            page.out_pagedata = self.output.getReference(self.output._pages.getObject()["/Kids"][-1].getObject())
+            #idnum = self.output._objects.index(self.output._pages.getObject()["/Kids"][-1].getObject()) + 1
+            #page.out_pagedata = IndirectObject(idnum, 0, self.output)
+
+        # Once all pages are added, create bookmarks to point at those pages
+        self._write_dests()
+        self._write_bookmarks()
+
+        # Write the output to the file
+        self.output.write(fileobj)
+
+        if my_file:
+            fileobj.close()
+
+    def close(self):
+        """
+        Shuts all file descriptors (input and output) and clears all memory
+        usage.
+        """
+        self.pages = []
+        for fo, pdfr, mine in self.inputs:
+            if mine:
+                fo.close()
+
+        self.inputs = []
+        self.output = None
+
+    def addMetadata(self, infos):
+        """
+        Add custom metadata to the output.
+
+        :param dict infos: a Python dictionary where each key is a field
+            and each value is your new metadata.
+            Example: ``{u'/Title': u'My title'}``
+        """
+        self.output.addMetadata(infos)
+
+    def setPageLayout(self, layout):
+        """
+        Set the page layout
+
+        :param str layout: The page layout to be used
+
+        Valid layouts are:
+             /NoLayout        Layout explicitly not specified
+             /SinglePage      Show one page at a time
+             /OneColumn       Show one column at a time
+             /TwoColumnLeft   Show pages in two columns, odd-numbered pages on the left
+             /TwoColumnRight  Show pages in two columns, odd-numbered pages on the right
+             /TwoPageLeft     Show two pages at a time, odd-numbered pages on the left
+             /TwoPageRight    Show two pages at a time, odd-numbered pages on the right
+        """
+        self.output.setPageLayout(layout)
+
+    def setPageMode(self, mode):
+        """
+        Set the page mode.
+
+        :param str mode: The page mode to use.
+
+        Valid modes are:
+            /UseNone         Do not show outlines or thumbnails panels
+            /UseOutlines     Show outlines (aka bookmarks) panel
+            /UseThumbs       Show page thumbnails panel
+            /FullScreen      Fullscreen view
+            /UseOC           Show Optional Content Group (OCG) panel
+            /UseAttachments  Show attachments panel
+        """
+        self.output.setPageMode(mode)
+
+    def _trim_dests(self, pdf, dests, pages):
+        """
+        Removes any named destinations that are not a part of the specified
+        page set.
+        """
+        new_dests = []
+        prev_header_added = True
+        for k, o in list(dests.items()):
+            for j in range(*pages):
+                if pdf.getPage(j).getObject() == o['/Page'].getObject():
+                    o[NameObject('/Page')] = o['/Page'].getObject()
+                    assert str_(k) == str_(o['/Title'])
+                    new_dests.append(o)
+                    break
+        return new_dests
+
+    def _trim_outline(self, pdf, outline, pages):
+        """
+        Removes any outline/bookmark entries that are not a part of the
+        specified page set.
+        """
+        new_outline = []
+        prev_header_added = True
+        for i, o in enumerate(outline):
+            if isinstance(o, list):
+                sub = self._trim_outline(pdf, o, pages)
+                if sub:
+                    if not prev_header_added:
+                        new_outline.append(outline[i-1])
+                    new_outline.append(sub)
+            else:
+                prev_header_added = False
+                for j in range(*pages):
+                    if pdf.getPage(j).getObject() == o['/Page'].getObject():
+                        o[NameObject('/Page')] = o['/Page'].getObject()
+                        new_outline.append(o)
+                        prev_header_added = True
+                        break
+        return new_outline
+
+    def _write_dests(self):
+        dests = self.named_dests
+
+        for v in dests:
+            pageno = None
+            pdf = None
+            if '/Page' in v:
+                for i, p in enumerate(self.pages):
+                    if p.id == v['/Page']:
+                        v[NameObject('/Page')] = p.out_pagedata
+                        pageno = i
+                        pdf = p.src
+                        break
+            if pageno != None:
+                self.output.addNamedDestinationObject(v)
+
+    def _write_bookmarks(self, bookmarks=None, parent=None):
+
+        if bookmarks == None:
+            bookmarks = self.bookmarks
+
+        last_added = None
+        for b in bookmarks:
+            if isinstance(b, list):
+                self._write_bookmarks(b, last_added)
+                continue
+
+            pageno = None
+            pdf = None
+            if '/Page' in b:
+                for i, p in enumerate(self.pages):
+                    if p.id == b['/Page']:
+                        #b[NameObject('/Page')] = p.out_pagedata
+                        args = [NumberObject(p.id), NameObject(b['/Type'])]
+                        #nothing more to add
+                        #if b['/Type'] == '/Fit' or b['/Type'] == '/FitB'
+                        if b['/Type'] == '/FitH' or b['/Type'] == '/FitBH':
+                            if '/Top' in b and not isinstance(b['/Top'], NullObject):
+                                args.append(FloatObject(b['/Top']))
+                            else:
+                                args.append(FloatObject(0))
+                            del b['/Top']
+                        elif b['/Type'] == '/FitV' or b['/Type'] == '/FitBV':
+                            if '/Left' in b and not isinstance(b['/Left'], NullObject):
+                                args.append(FloatObject(b['/Left']))
+                            else:
+                                args.append(FloatObject(0))
+                            del b['/Left']
+                        elif b['/Type'] == '/XYZ':
+                            if '/Left' in b and not isinstance(b['/Left'], NullObject):
+                                args.append(FloatObject(b['/Left']))
+                            else:
+                                args.append(FloatObject(0))
+                            if '/Top' in b and not isinstance(b['/Top'], NullObject):
+                                args.append(FloatObject(b['/Top']))
+                            else:
+                                args.append(FloatObject(0))
+                            if '/Zoom' in b and not isinstance(b['/Zoom'], NullObject):
+                                args.append(FloatObject(b['/Zoom']))
+                            else:
+                                args.append(FloatObject(0))
+                            del b['/Top'], b['/Zoom'], b['/Left']
+                        elif b['/Type'] == '/FitR':
+                            if '/Left' in b and not isinstance(b['/Left'], NullObject):
+                                args.append(FloatObject(b['/Left']))
+                            else:
+                                args.append(FloatObject(0))
+                            if '/Bottom' in b and not isinstance(b['/Bottom'], NullObject):
+                                args.append(FloatObject(b['/Bottom']))
+                            else:
+                                args.append(FloatObject(0))
+                            if '/Right' in b and not isinstance(b['/Right'], NullObject):
+                                args.append(FloatObject(b['/Right']))
+                            else:
+                                args.append(FloatObject(0))
+                            if '/Top' in b and not isinstance(b['/Top'], NullObject):
+                                args.append(FloatObject(b['/Top']))
+                            else:
+                                args.append(FloatObject(0))
+                            del b['/Left'], b['/Right'], b['/Bottom'], b['/Top']
+
+                        b[NameObject('/A')] = DictionaryObject({NameObject('/S'): NameObject('/GoTo'), NameObject('/D'): ArrayObject(args)})
+
+                        pageno = i
+                        pdf = p.src
+                        break
+            if pageno != None:
+                del b['/Page'], b['/Type']
+                last_added = self.output.addBookmarkDict(b, parent)
+
+    def _associate_dests_to_pages(self, pages):
+        for nd in self.named_dests:
+            pageno = None
+            np = nd['/Page']
+
+            if isinstance(np, NumberObject):
+                continue
+
+            for p in pages:
+                if np.getObject() == p.pagedata.getObject():
+                    pageno = p.id
+
+            if pageno != None:
+                nd[NameObject('/Page')] = NumberObject(pageno)
+            else:
+                raise ValueError("Unresolved named destination '%s'" % (nd['/Title'],))
+
+    def _associate_bookmarks_to_pages(self, pages, bookmarks=None):
+        if bookmarks == None:
+            bookmarks = self.bookmarks
+
+        for b in bookmarks:
+            if isinstance(b, list):
+                self._associate_bookmarks_to_pages(pages, b)
+                continue
+
+            pageno = None
+            bp = b['/Page']
+
+            if isinstance(bp, NumberObject):
+                continue
+
+            for p in pages:
+                if bp.getObject() == p.pagedata.getObject():
+                    pageno = p.id
+
+            if pageno != None:
+                b[NameObject('/Page')] = NumberObject(pageno)
+            else:
+                raise ValueError("Unresolved bookmark '%s'" % (b['/Title'],))
+
+    def findBookmark(self, bookmark, root=None):
+        if root == None:
+            root = self.bookmarks
+
+        for i, b in enumerate(root):
+            if isinstance(b, list):
+                res = self.findBookmark(bookmark, b)
+                if res:
+                    return [i] + res
+            elif b == bookmark or b['/Title'] == bookmark:
+                return [i]
+
+        return None
+
+    def addBookmark(self, title, pagenum, parent=None):
+        """
+        Add a bookmark to this PDF file.
+
+        :param str title: Title to use for this bookmark.
+        :param int pagenum: Page number this bookmark will point to.
+        :param parent: A reference to a parent bookmark to create nested
+            bookmarks.
+        """
+        if parent == None:
+            iloc = [len(self.bookmarks)-1]
+        elif isinstance(parent, list):
+            iloc = parent
+        else:
+            iloc = self.findBookmark(parent)
+
+        dest = Bookmark(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
+
+        if parent == None:
+            self.bookmarks.append(dest)
+        else:
+            bmparent = self.bookmarks
+            for i in iloc[:-1]:
+                bmparent = bmparent[i]
+            npos = iloc[-1]+1
+            if npos < len(bmparent) and isinstance(bmparent[npos], list):
+                bmparent[npos].append(dest)
+            else:
+                bmparent.insert(npos, [dest])
+        return dest
+
+    def addNamedDestination(self, title, pagenum):
+        """
+        Add a destination to the output.
+
+        :param str title: Title to use
+        :param int pagenum: Page number this destination points at.
+        """
+
+        dest = Destination(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
+        self.named_dests.append(dest)
+
+
+class OutlinesObject(list):
+    def __init__(self, pdf, tree, parent=None):
+        list.__init__(self)
+        self.tree = tree
+        self.pdf = pdf
+        self.parent = parent
+
+    def remove(self, index):
+        obj = self[index]
+        del self[index]
+        self.tree.removeChild(obj)
+
+    def add(self, title, pagenum):
+        pageRef = self.pdf.getObject(self.pdf._pages)['/Kids'][pagenum]
+        action = DictionaryObject()
+        action.update({
+            NameObject('/D') : ArrayObject([pageRef, NameObject('/FitH'), NumberObject(826)]),
+            NameObject('/S') : NameObject('/GoTo')
+        })
+        actionRef = self.pdf._addObject(action)
+        bookmark = TreeObject()
+
+        bookmark.update({
+            NameObject('/A'): actionRef,
+            NameObject('/Title'): createStringObject(title),
+        })
+
+        self.pdf._addObject(bookmark)
+
+        self.tree.addChild(bookmark)
+
+    def removeAll(self):
+        for child in [x for x in self.tree.children()]:
+            self.tree.removeChild(child)
+            self.pop()
--- a/vendor/PyPDF2/pagerange.py
+++ b/vendor/PyPDF2/pagerange.py
@ -0,0 +1,152 @@
+#!/usr/bin/env python
+"""
+Representation and utils for ranges of PDF file pages.
+
+Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
+All rights reserved. This software is available under a BSD license;
+see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE
+"""
+
+import re
+from .utils import isString
+
+_INT_RE = r"(0|-?[1-9]\d*)"  # A decimal int, don't allow "-0".
+PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)
+# groups:         12     34     5 6     7 8
+
+
+class ParseError(Exception):
+    pass
+
+
+PAGE_RANGE_HELP = """Remember, page indices start with zero.
+        Page range expression examples:
+            :     all pages.                   -1    last page.
+            22    just the 23rd page.          :-1   all but the last page.
+            0:3   the first three pages.       -2    second-to-last page.
+            :3    the first three pages.       -2:   last two pages.
+            5:    from the sixth page onward.  -3:-1 third & second to last.
+        The third, "stride" or "step" number is also recognized.
+            ::2       0 2 4 ... to the end.    3:0:-1    3 2 1 but not 0.
+            1:10:2    1 3 5 7 9                2::-1     2 1 0.
+            ::-1      all pages in reverse order.
+"""
+
+
+class PageRange(object):
+    """
+    A slice-like representation of a range of page indices,
+        i.e. page numbers, only starting at zero.
+    The syntax is like what you would put between brackets [ ].
+    The slice is one of the few Python types that can't be subclassed,
+    but this class converts to and from slices, and allows similar use.
+      o  PageRange(str) parses a string representing a page range.
+      o  PageRange(slice) directly "imports" a slice.
+      o  to_slice() gives the equivalent slice.
+      o  str() and repr() allow printing.
+      o  indices(n) is like slice.indices(n).
+    """
+
+    def __init__(self, arg):
+        """
+        Initialize with either a slice -- giving the equivalent page range,
+        or a PageRange object -- making a copy,
+        or a string like
+            "int", "[int]:[int]" or "[int]:[int]:[int]",
+            where the brackets indicate optional ints.
+        {page_range_help}
+        Note the difference between this notation and arguments to slice():
+            slice(3) means the first three pages;
+            PageRange("3") means the range of only the fourth page.
+            However PageRange(slice(3)) means the first three pages.
+        """
+        if isinstance(arg, slice):
+            self._slice = arg
+            return
+
+        if isinstance(arg, PageRange):
+            self._slice = arg.to_slice()
+            return
+
+        m = isString(arg) and re.match(PAGE_RANGE_RE, arg)
+        if not m:
+            raise ParseError(arg)
+        elif m.group(2):
+            # Special case: just an int means a range of one page.
+            start = int(m.group(2))
+            stop = start + 1 if start != -1 else None
+            self._slice = slice(start, stop)
+        else:
+            self._slice = slice(*[int(g) if g else None
+                                  for g in m.group(4, 6, 8)])
+
+    # Just formatting this when there is __doc__ for __init__
+    if __init__.__doc__:
+        __init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP)
+
+    @staticmethod
+    def valid(input):
+        """ True if input is a valid initializer for a PageRange. """
+        return isinstance(input, slice)  or \
+               isinstance(input, PageRange) or \
+               (isString(input)
+                and bool(re.match(PAGE_RANGE_RE, input)))
+
+    def to_slice(self):
+        """ Return the slice equivalent of this page range. """
+        return self._slice
+
+    def __str__(self):
+        """ A string like "1:2:3". """
+        s = self._slice
+        if s.step == None:
+            if s.start != None  and  s.stop == s.start + 1:
+                return str(s.start)
+
+            indices = s.start, s.stop
+        else:
+            indices = s.start, s.stop, s.step
+        return ':'.join("" if i == None else str(i) for i in indices)
+
+    def __repr__(self):
+        """ A string like "PageRange('1:2:3')". """
+        return "PageRange(" + repr(str(self)) + ")"
+
+    def indices(self, n):
+        """
+        n is the length of the list of pages to choose from.
+        Returns arguments for range().  See help(slice.indices).
+        """
+        return self._slice.indices(n)
+
+
+PAGE_RANGE_ALL = PageRange(":")  # The range of all pages.
+
+
+def parse_filename_page_ranges(args):
+    """
+    Given a list of filenames and page ranges, return a list of
+    (filename, page_range) pairs.
+    First arg must be a filename; other ags are filenames, page-range
+    expressions, slice objects, or PageRange objects.
+    A filename not followed by a page range indicates all pages of the file.
+    """
+    pairs = []
+    pdf_filename = None
+    did_page_range = False
+    for arg in args + [None]:
+        if PageRange.valid(arg):
+            if not pdf_filename:
+                raise ValueError("The first argument must be a filename, " \
+                                 "not a page range.")
+
+            pairs.append( (pdf_filename, PageRange(arg)) )
+            did_page_range = True
+        else:
+            # New filename or end of list--do all of the previous file?
+            if pdf_filename and not did_page_range:
+                pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
+
+            pdf_filename = arg
+            did_page_range = False
+    return pairs
--- a/vendor/PyPDF2/pdf.py
+++ b/vendor/PyPDF2/pdf.py
--- a/vendor/PyPDF2/utils.py
+++ b/vendor/PyPDF2/utils.py
@ -0,0 +1,295 @@
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Utility functions for PDF library.
+"""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+
+import sys
+
+try:
+    import __builtin__ as builtins
+except ImportError:  # Py3
+    import builtins
+
+
+xrange_fn = getattr(builtins, "xrange", range)
+_basestring = getattr(builtins, "basestring", str)
+
+bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
+string_type = getattr(builtins, "unicode", str)
+int_types = (int, long) if sys.version_info[0] < 3 else (int,)
+
+
+# Make basic type tests more consistent
+def isString(s):
+    """Test if arg is a string. Compatible with Python 2 and 3."""
+    return isinstance(s, _basestring)
+
+
+def isInt(n):
+    """Test if arg is an int. Compatible with Python 2 and 3."""
+    return isinstance(n, int_types)
+
+
+def isBytes(b):
+    """Test if arg is a bytes instance. Compatible with Python 2 and 3."""
+    return isinstance(b, bytes_type)
+
+
+#custom implementation of warnings.formatwarning
+def formatWarning(message, category, filename, lineno, line=None):
+    file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name
+    return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)
+
+
+def readUntilWhitespace(stream, maxchars=None):
+    """
+    Reads non-whitespace characters and returns them.
+    Stops upon encountering whitespace or when maxchars is reached.
+    """
+    txt = b_("")
+    while True:
+        tok = stream.read(1)
+        if tok.isspace() or not tok:
+            break
+        txt += tok
+        if len(txt) == maxchars:
+            break
+    return txt
+
+
+def readNonWhitespace(stream):
+    """
+    Finds and reads the next non-whitespace character (ignores whitespace).
+    """
+    tok = WHITESPACES[0]
+    while tok in WHITESPACES:
+        tok = stream.read(1)
+    return tok
+
+
+def skipOverWhitespace(stream):
+    """
+    Similar to readNonWhitespace, but returns a Boolean if more than
+    one whitespace character was read.
+    """
+    tok = WHITESPACES[0]
+    cnt = 0;
+    while tok in WHITESPACES:
+        tok = stream.read(1)
+        cnt+=1
+    return (cnt > 1)
+
+
+def skipOverComment(stream):
+    tok = stream.read(1)
+    stream.seek(-1, 1)
+    if tok == b_('%'):
+        while tok not in (b_('\n'), b_('\r')):
+            tok = stream.read(1)
+
+
+def readUntilRegex(stream, regex, ignore_eof=False):
+    """
+    Reads until the regular expression pattern matched (ignore the match)
+    Raise PdfStreamError on premature end-of-file.
+    :param bool ignore_eof: If true, ignore end-of-line and return immediately
+    """
+    name = b_('')
+    while True:
+        tok = stream.read(16)
+        if not tok:
+            # stream has truncated prematurely
+            if ignore_eof == True:
+                return name
+            else:
+                raise PdfStreamError("Stream has ended unexpectedly")
+        m = regex.search(tok)
+        if m is not None:
+            name += tok[:m.start()]
+            stream.seek(m.start()-len(tok), 1)
+            break
+        name += tok
+    return name
+
+
+class ConvertFunctionsToVirtualList(object):
+    def __init__(self, lengthFunction, getFunction):
+        self.lengthFunction = lengthFunction
+        self.getFunction = getFunction
+
+    def __len__(self):
+        return self.lengthFunction()
+
+    def __getitem__(self, index):
+        if isinstance(index, slice):
+            indices = xrange_fn(*index.indices(len(self)))
+            cls = type(self)
+            return cls(indices.__len__, lambda idx: self[indices[idx]])
+        if not isInt(index):
+            raise TypeError("sequence indices must be integers")
+        len_self = len(self)
+        if index < 0:
+            # support negative indexes
+            index = len_self + index
+        if index < 0 or index >= len_self:
+            raise IndexError("sequence index out of range")
+        return self.getFunction(index)
+
+
+def RC4_encrypt(key, plaintext):
+    S = [i for i in range(256)]
+    j = 0
+    for i in range(256):
+        j = (j + S[i] + ord_(key[i % len(key)])) % 256
+        S[i], S[j] = S[j], S[i]
+    i, j = 0, 0
+    retval = b_("")
+    for x in range(len(plaintext)):
+        i = (i + 1) % 256
+        j = (j + S[i]) % 256
+        S[i], S[j] = S[j], S[i]
+        t = S[(S[i] + S[j]) % 256]
+        retval += b_(chr(ord_(plaintext[x]) ^ t))
+    return retval
+
+
+def matrixMultiply(a, b):
+    return [[sum([float(i)*float(j)
+                  for i, j in zip(row, col)]
+                ) for col in zip(*b)]
+            for row in a]
+
+
+def markLocation(stream):
+    """Creates text file showing current location in context."""
+    # Mainly for debugging
+    RADIUS = 5000
+    stream.seek(-RADIUS, 1)
+    outputDoc = open('PyPDF2_pdfLocation.txt', 'w')
+    outputDoc.write(stream.read(RADIUS))
+    outputDoc.write('HERE')
+    outputDoc.write(stream.read(RADIUS))
+    outputDoc.close()
+    stream.seek(-RADIUS, 1)
+
+
+class PyPdfError(Exception):
+    pass
+
+
+class PdfReadError(PyPdfError):
+    pass
+
+
+class PageSizeNotDefinedError(PyPdfError):
+    pass
+
+
+class PdfReadWarning(UserWarning):
+    pass
+
+
+class PdfStreamError(PdfReadError):
+    pass
+
+
+if sys.version_info[0] < 3:
+    def b_(s):
+        return s
+else:
+    B_CACHE = {}
+
+    def b_(s):
+        bc = B_CACHE
+        if s in bc:
+            return bc[s]
+        if type(s) == bytes:
+            return s
+        else:
+            r = s.encode('latin-1')
+            if len(s) < 2:
+                bc[s] = r
+            return r
+
+
+def u_(s):
+    if sys.version_info[0] < 3:
+        return unicode(s, 'unicode_escape')
+    else:
+        return s
+
+
+def str_(b):
+    if sys.version_info[0] < 3:
+        return b
+    else:
+        if type(b) == bytes:
+            return b.decode('latin-1')
+        else:
+            return b
+
+
+def ord_(b):
+    if sys.version_info[0] < 3 or type(b) == str:
+        return ord(b)
+    else:
+        return b
+
+
+def chr_(c):
+    if sys.version_info[0] < 3:
+        return c
+    else:
+        return chr(c)
+
+
+def barray(b):
+    if sys.version_info[0] < 3:
+        return b
+    else:
+        return bytearray(b)
+
+
+def hexencode(b):
+    if sys.version_info[0] < 3:
+        return b.encode('hex')
+    else:
+        import codecs
+        coder = codecs.getencoder('hex_codec')
+        return coder(b)[0]
+
+
+def hexStr(num):
+    return hex(num).replace('L', '')
+
+
+WHITESPACES = [b_(x) for x in [' ', '\n', '\r', '\t', '\x00']]
--- a/vendor/PyPDF2/xmp.py
+++ b/vendor/PyPDF2/xmp.py
@ -0,0 +1,358 @@
+import re
+import datetime
+import decimal
+from .generic import PdfObject
+from xml.dom import getDOMImplementation
+from xml.dom.minidom import parseString
+from .utils import u_
+
+RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
+XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/"
+PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/"
+XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/"
+
+# What is the PDFX namespace, you might ask?  I might ask that too.  It's
+# a completely undocumented namespace used to place "custom metadata"
+# properties, which are arbitrary metadata properties with no semantic or
+# documented meaning.  Elements in the namespace are key/value-style storage,
+# where the element name is the key and the content is the value.  The keys
+# are transformed into valid XML identifiers by substituting an invalid
+# identifier character with \u2182 followed by the unicode hex ID of the
+# original character.  A key like "my car" is therefore "my\u21820020car".
+#
+# \u2182, in case you're wondering, is the unicode character
+# \u{ROMAN NUMERAL TEN THOUSAND}, a straightforward and obvious choice for
+# escaping characters.
+#
+# Intentional users of the pdfx namespace should be shot on sight.  A
+# custom data schema and sensical XML elements could be used instead, as is
+# suggested by Adobe's own documentation on XMP (under "Extensibility of
+# Schemas").
+#
+# Information presented here on the /pdfx/ schema is a result of limited
+# reverse engineering, and does not constitute a full specification.
+PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/"
+
+iso8601 = re.compile("""
+        (?P<year>[0-9]{4})
+        (-
+            (?P<month>[0-9]{2})
+            (-
+                (?P<day>[0-9]+)
+                (T
+                    (?P<hour>[0-9]{2}):
+                    (?P<minute>[0-9]{2})
+                    (:(?P<second>[0-9]{2}(.[0-9]+)?))?
+                    (?P<tzd>Z|[-+][0-9]{2}:[0-9]{2})
+                )?
+            )?
+        )?
+        """, re.VERBOSE)
+
+
+class XmpInformation(PdfObject):
+    """
+    An object that represents Adobe XMP metadata.
+    Usually accessed by :meth:`getXmpMetadata()<PyPDF2.PdfFileReader.getXmpMetadata>`
+    """
+
+    def __init__(self, stream):
+        self.stream = stream
+        docRoot = parseString(self.stream.getData())
+        self.rdfRoot = docRoot.getElementsByTagNameNS(RDF_NAMESPACE, "RDF")[0]
+        self.cache = {}
+
+    def writeToStream(self, stream, encryption_key):
+        self.stream.writeToStream(stream, encryption_key)
+
+    def getElement(self, aboutUri, namespace, name):
+        for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
+            if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
+                attr = desc.getAttributeNodeNS(namespace, name)
+                if attr != None:
+                    yield attr
+                for element in desc.getElementsByTagNameNS(namespace, name):
+                    yield element
+
+    def getNodesInNamespace(self, aboutUri, namespace):
+        for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
+            if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
+                for i in range(desc.attributes.length):
+                    attr = desc.attributes.item(i)
+                    if attr.namespaceURI == namespace:
+                        yield attr
+                for child in desc.childNodes:
+                    if child.namespaceURI == namespace:
+                        yield child
+
+    def _getText(self, element):
+        text = ""
+        for child in element.childNodes:
+            if child.nodeType == child.TEXT_NODE:
+                text += child.data
+        return text
+
+    def _converter_string(value):
+        return value
+
+    def _converter_date(value):
+        m = iso8601.match(value)
+        year = int(m.group("year"))
+        month = int(m.group("month") or "1")
+        day = int(m.group("day") or "1")
+        hour = int(m.group("hour") or "0")
+        minute = int(m.group("minute") or "0")
+        second = decimal.Decimal(m.group("second") or "0")
+        seconds = second.to_integral(decimal.ROUND_FLOOR)
+        milliseconds = (second - seconds) * 1000000
+        tzd = m.group("tzd") or "Z"
+        dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds)
+        if tzd != "Z":
+            tzd_hours, tzd_minutes = [int(x) for x in tzd.split(":")]
+            tzd_hours *= -1
+            if tzd_hours < 0:
+                tzd_minutes *= -1
+            dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes)
+        return dt
+    _test_converter_date = staticmethod(_converter_date)
+
+    def _getter_bag(namespace, name, converter):
+        def get(self):
+            cached = self.cache.get(namespace, {}).get(name)
+            if cached:
+                return cached
+            retval = []
+            for element in self.getElement("", namespace, name):
+                bags = element.getElementsByTagNameNS(RDF_NAMESPACE, "Bag")
+                if len(bags):
+                    for bag in bags:
+                        for item in bag.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
+                            value = self._getText(item)
+                            value = converter(value)
+                            retval.append(value)
+            ns_cache = self.cache.setdefault(namespace, {})
+            ns_cache[name] = retval
+            return retval
+        return get
+
+    def _getter_seq(namespace, name, converter):
+        def get(self):
+            cached = self.cache.get(namespace, {}).get(name)
+            if cached:
+                return cached
+            retval = []
+            for element in self.getElement("", namespace, name):
+                seqs = element.getElementsByTagNameNS(RDF_NAMESPACE, "Seq")
+                if len(seqs):
+                    for seq in seqs:
+                        for item in seq.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
+                            value = self._getText(item)
+                            value = converter(value)
+                            retval.append(value)
+                else:
+                    value = converter(self._getText(element))
+                    retval.append(value)
+            ns_cache = self.cache.setdefault(namespace, {})
+            ns_cache[name] = retval
+            return retval
+        return get
+
+    def _getter_langalt(namespace, name, converter):
+        def get(self):
+            cached = self.cache.get(namespace, {}).get(name)
+            if cached:
+                return cached
+            retval = {}
+            for element in self.getElement("", namespace, name):
+                alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt")
+                if len(alts):
+                    for alt in alts:
+                        for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
+                            value = self._getText(item)
+                            value = converter(value)
+                            retval[item.getAttribute("xml:lang")] = value
+                else:
+                    retval["x-default"] = converter(self._getText(element))
+            ns_cache = self.cache.setdefault(namespace, {})
+            ns_cache[name] = retval
+            return retval
+        return get
+
+    def _getter_single(namespace, name, converter):
+        def get(self):
+            cached = self.cache.get(namespace, {}).get(name)
+            if cached:
+                return cached
+            value = None
+            for element in self.getElement("", namespace, name):
+                if element.nodeType == element.ATTRIBUTE_NODE:
+                    value = element.nodeValue
+                else:
+                    value = self._getText(element)
+                break
+            if value != None:
+                value = converter(value)
+            ns_cache = self.cache.setdefault(namespace, {})
+            ns_cache[name] = value
+            return value
+        return get
+
+    dc_contributor = property(_getter_bag(DC_NAMESPACE, "contributor", _converter_string))
+    """
+    Contributors to the resource (other than the authors). An unsorted
+    array of names.
+    """
+
+    dc_coverage = property(_getter_single(DC_NAMESPACE, "coverage", _converter_string))
+    """
+    Text describing the extent or scope of the resource.
+    """
+
+    dc_creator = property(_getter_seq(DC_NAMESPACE, "creator", _converter_string))
+    """
+    A sorted array of names of the authors of the resource, listed in order
+    of precedence.
+    """
+
+    dc_date = property(_getter_seq(DC_NAMESPACE, "date", _converter_date))
+    """
+    A sorted array of dates (datetime.datetime instances) of signifigance to
+    the resource.  The dates and times are in UTC.
+    """
+
+    dc_description = property(_getter_langalt(DC_NAMESPACE, "description", _converter_string))
+    """
+    A language-keyed dictionary of textual descriptions of the content of the
+    resource.
+    """
+
+    dc_format = property(_getter_single(DC_NAMESPACE, "format", _converter_string))
+    """
+    The mime-type of the resource.
+    """
+
+    dc_identifier = property(_getter_single(DC_NAMESPACE, "identifier", _converter_string))
+    """
+    Unique identifier of the resource.
+    """
+
+    dc_language = property(_getter_bag(DC_NAMESPACE, "language", _converter_string))
+    """
+    An unordered array specifying the languages used in the resource.
+    """
+
+    dc_publisher = property(_getter_bag(DC_NAMESPACE, "publisher", _converter_string))
+    """
+    An unordered array of publisher names.
+    """
+
+    dc_relation = property(_getter_bag(DC_NAMESPACE, "relation", _converter_string))
+    """
+    An unordered array of text descriptions of relationships to other
+    documents.
+    """
+
+    dc_rights = property(_getter_langalt(DC_NAMESPACE, "rights", _converter_string))
+    """
+    A language-keyed dictionary of textual descriptions of the rights the
+    user has to this resource.
+    """
+
+    dc_source = property(_getter_single(DC_NAMESPACE, "source", _converter_string))
+    """
+    Unique identifier of the work from which this resource was derived.
+    """
+
+    dc_subject = property(_getter_bag(DC_NAMESPACE, "subject", _converter_string))
+    """
+    An unordered array of descriptive phrases or keywrods that specify the
+    topic of the content of the resource.
+    """
+
+    dc_title = property(_getter_langalt(DC_NAMESPACE, "title", _converter_string))
+    """
+    A language-keyed dictionary of the title of the resource.
+    """
+
+    dc_type = property(_getter_bag(DC_NAMESPACE, "type", _converter_string))
+    """
+    An unordered array of textual descriptions of the document type.
+    """
+
+    pdf_keywords = property(_getter_single(PDF_NAMESPACE, "Keywords", _converter_string))
+    """
+    An unformatted text string representing document keywords.
+    """
+
+    pdf_pdfversion = property(_getter_single(PDF_NAMESPACE, "PDFVersion", _converter_string))
+    """
+    The PDF file version, for example 1.0, 1.3.
+    """
+
+    pdf_producer = property(_getter_single(PDF_NAMESPACE, "Producer", _converter_string))
+    """
+    The name of the tool that created the PDF document.
+    """
+
+    xmp_createDate = property(_getter_single(XMP_NAMESPACE, "CreateDate", _converter_date))
+    """
+    The date and time the resource was originally created.  The date and
+    time are returned as a UTC datetime.datetime object.
+    """
+
+    xmp_modifyDate = property(_getter_single(XMP_NAMESPACE, "ModifyDate", _converter_date))
+    """
+    The date and time the resource was last modified.  The date and time
+    are returned as a UTC datetime.datetime object.
+    """
+
+    xmp_metadataDate = property(_getter_single(XMP_NAMESPACE, "MetadataDate", _converter_date))
+    """
+    The date and time that any metadata for this resource was last
+    changed.  The date and time are returned as a UTC datetime.datetime
+    object.
+    """
+
+    xmp_creatorTool = property(_getter_single(XMP_NAMESPACE, "CreatorTool", _converter_string))
+    """
+    The name of the first known tool used to create the resource.
+    """
+
+    xmpmm_documentId = property(_getter_single(XMPMM_NAMESPACE, "DocumentID", _converter_string))
+    """
+    The common identifier for all versions and renditions of this resource.
+    """
+
+    xmpmm_instanceId = property(_getter_single(XMPMM_NAMESPACE, "InstanceID", _converter_string))
+    """
+    An identifier for a specific incarnation of a document, updated each
+    time a file is saved.
+    """
+
+    def custom_properties(self):
+        if not hasattr(self, "_custom_properties"):
+            self._custom_properties = {}
+            for node in self.getNodesInNamespace("", PDFX_NAMESPACE):
+                key = node.localName
+                while True:
+                    # see documentation about PDFX_NAMESPACE earlier in file
+                    idx = key.find(u_("\u2182"))
+                    if idx == -1:
+                        break
+                    key = key[:idx] + chr(int(key[idx+1:idx+5], base=16)) + key[idx+5:]
+                if node.nodeType == node.ATTRIBUTE_NODE:
+                    value = node.nodeValue
+                else:
+                    value = self._getText(node)
+                self._custom_properties[key] = value
+        return self._custom_properties
+
+    custom_properties = property(custom_properties)
+    """
+    Retrieves custom metadata properties defined in the undocumented pdfx
+    metadata schema.
+
+    :return: a dictionary of key/value items for custom metadata properties.
+    :rtype: dict
+    """