parallel-library/vendor/PyPDF2/utils.py

# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

"""
Utility functions for PDF library.
"""
__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"


import sys

try:
    import __builtin__ as builtins
except ImportError:  # Py3
    import builtins


xrange_fn = getattr(builtins, "xrange", range)
_basestring = getattr(builtins, "basestring", str)

bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
string_type = getattr(builtins, "unicode", str)
int_types = (int, long) if sys.version_info[0] < 3 else (int,)


# Make basic type tests more consistent
def isString(s):
    """Test if arg is a string. Compatible with Python 2 and 3."""
    return isinstance(s, _basestring)


def isInt(n):
    """Test if arg is an int. Compatible with Python 2 and 3."""
    return isinstance(n, int_types)


def isBytes(b):
    """Test if arg is a bytes instance. Compatible with Python 2 and 3."""
    return isinstance(b, bytes_type)


#custom implementation of warnings.formatwarning
def formatWarning(message, category, filename, lineno, line=None):
    file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name
    return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)


def readUntilWhitespace(stream, maxchars=None):
    """
    Reads non-whitespace characters and returns them.
    Stops upon encountering whitespace or when maxchars is reached.
    """
    txt = b_("")
    while True:
        tok = stream.read(1)
        if tok.isspace() or not tok:
            break
        txt += tok
        if len(txt) == maxchars:
            break
    return txt


def readNonWhitespace(stream):
    """
    Finds and reads the next non-whitespace character (ignores whitespace).
    """
    tok = WHITESPACES[0]
    while tok in WHITESPACES:
        tok = stream.read(1)
    return tok


def skipOverWhitespace(stream):
    """
    Similar to readNonWhitespace, but returns a Boolean if more than
    one whitespace character was read.
    """
    tok = WHITESPACES[0]
    cnt = 0;
    while tok in WHITESPACES:
        tok = stream.read(1)
        cnt+=1
    return (cnt > 1)


def skipOverComment(stream):
    tok = stream.read(1)
    stream.seek(-1, 1)
    if tok == b_('%'):
        while tok not in (b_('\n'), b_('\r')):
            tok = stream.read(1)


def readUntilRegex(stream, regex, ignore_eof=False):
    """
    Reads until the regular expression pattern matched (ignore the match)
    Raise PdfStreamError on premature end-of-file.
    :param bool ignore_eof: If true, ignore end-of-line and return immediately
    """
    name = b_('')
    while True:
        tok = stream.read(16)
        if not tok:
            # stream has truncated prematurely
            if ignore_eof == True:
                return name
            else:
                raise PdfStreamError("Stream has ended unexpectedly")
        m = regex.search(tok)
        if m is not None:
            name += tok[:m.start()]
            stream.seek(m.start()-len(tok), 1)
            break
        name += tok
    return name


class ConvertFunctionsToVirtualList(object):
    def __init__(self, lengthFunction, getFunction):
        self.lengthFunction = lengthFunction
        self.getFunction = getFunction

    def __len__(self):
        return self.lengthFunction()

    def __getitem__(self, index):
        if isinstance(index, slice):
            indices = xrange_fn(*index.indices(len(self)))
            cls = type(self)
            return cls(indices.__len__, lambda idx: self[indices[idx]])
        if not isInt(index):
            raise TypeError("sequence indices must be integers")
        len_self = len(self)
        if index < 0:
            # support negative indexes
            index = len_self + index
        if index < 0 or index >= len_self:
            raise IndexError("sequence index out of range")
        return self.getFunction(index)


def RC4_encrypt(key, plaintext):
    S = [i for i in range(256)]
    j = 0
    for i in range(256):
        j = (j + S[i] + ord_(key[i % len(key)])) % 256
        S[i], S[j] = S[j], S[i]
    i, j = 0, 0
    retval = b_("")
    for x in range(len(plaintext)):
        i = (i + 1) % 256
        j = (j + S[i]) % 256
        S[i], S[j] = S[j], S[i]
        t = S[(S[i] + S[j]) % 256]
        retval += b_(chr(ord_(plaintext[x]) ^ t))
    return retval


def matrixMultiply(a, b):
    return [[sum([float(i)*float(j)
                  for i, j in zip(row, col)]
                ) for col in zip(*b)]
            for row in a]


def markLocation(stream):
    """Creates text file showing current location in context."""
    # Mainly for debugging
    RADIUS = 5000
    stream.seek(-RADIUS, 1)
    outputDoc = open('PyPDF2_pdfLocation.txt', 'w')
    outputDoc.write(stream.read(RADIUS))
    outputDoc.write('HERE')
    outputDoc.write(stream.read(RADIUS))
    outputDoc.close()
    stream.seek(-RADIUS, 1)


class PyPdfError(Exception):
    pass


class PdfReadError(PyPdfError):
    pass


class PageSizeNotDefinedError(PyPdfError):
    pass


class PdfReadWarning(UserWarning):
    pass


class PdfStreamError(PdfReadError):
    pass


if sys.version_info[0] < 3:
    def b_(s):
        return s
else:
    B_CACHE = {}

    def b_(s):
        bc = B_CACHE
        if s in bc:
            return bc[s]
        if type(s) == bytes:
            return s
        else:
            r = s.encode('latin-1')
            if len(s) < 2:
                bc[s] = r
            return r


def u_(s):
    if sys.version_info[0] < 3:
        return unicode(s, 'unicode_escape')
    else:
        return s


def str_(b):
    if sys.version_info[0] < 3:
        return b
    else:
        if type(b) == bytes:
            return b.decode('latin-1')
        else:
            return b


def ord_(b):
    if sys.version_info[0] < 3 or type(b) == str:
        return ord(b)
    else:
        return b


def chr_(c):
    if sys.version_info[0] < 3:
        return c
    else:
        return chr(c)


def barray(b):
    if sys.version_info[0] < 3:
        return b
    else:
        return bytearray(b)


def hexencode(b):
    if sys.version_info[0] < 3:
        return b.encode('hex')
    else:
        import codecs
        coder = codecs.getencoder('hex_codec')
        return coder(b)[0]


def hexStr(num):
    return hex(num).replace('L', '')


WHITESPACES = [b_(x) for x in [' ', '\n', '\r', '\t', '\x00']]
Add PyPDF2 to vendor 9 years ago			`# Copyright (c) 2006, Mathieu Fenniak`
			`# All rights reserved.`
			`#`
			`# Redistribution and use in source and binary forms, with or without`
			`# modification, are permitted provided that the following conditions are`
			`# met:`
			`#`
			`# * Redistributions of source code must retain the above copyright notice,`
			`# this list of conditions and the following disclaimer.`
			`# * Redistributions in binary form must reproduce the above copyright notice,`
			`# this list of conditions and the following disclaimer in the documentation`
			`# and/or other materials provided with the distribution.`
			`# * The name of the author may not be used to endorse or promote products`
			`# derived from this software without specific prior written permission.`
			`#`
			`# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"`
			`# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE`
			`# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE`
			`# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE`
			`# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR`
			`# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF`
			`# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS`
			`# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN`
			`# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)`
			`# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE`
			`# POSSIBILITY OF SUCH DAMAGE.`

			`"""`
			`Utility functions for PDF library.`
			`"""`
			`__author__ = "Mathieu Fenniak"`
			`__author_email__ = "biziqe@mathieu.fenniak.net"`


			`import sys`

			`try:`
			`import __builtin__ as builtins`
			`except ImportError: # Py3`
			`import builtins`


			`xrange_fn = getattr(builtins, "xrange", range)`
			`_basestring = getattr(builtins, "basestring", str)`

			`bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X`
			`string_type = getattr(builtins, "unicode", str)`
			`int_types = (int, long) if sys.version_info[0] < 3 else (int,)`


			`# Make basic type tests more consistent`
			`def isString(s):`
			`"""Test if arg is a string. Compatible with Python 2 and 3."""`
			`return isinstance(s, _basestring)`


			`def isInt(n):`
			`"""Test if arg is an int. Compatible with Python 2 and 3."""`
			`return isinstance(n, int_types)`


			`def isBytes(b):`
			`"""Test if arg is a bytes instance. Compatible with Python 2 and 3."""`
			`return isinstance(b, bytes_type)`


			`#custom implementation of warnings.formatwarning`
			`def formatWarning(message, category, filename, lineno, line=None):`
			`file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name`
			`return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)`


			`def readUntilWhitespace(stream, maxchars=None):`
			`"""`
			`Reads non-whitespace characters and returns them.`
			`Stops upon encountering whitespace or when maxchars is reached.`
			`"""`
			`txt = b_("")`
			`while True:`
			`tok = stream.read(1)`
			`if tok.isspace() or not tok:`
			`break`
			`txt += tok`
			`if len(txt) == maxchars:`
			`break`
			`return txt`


			`def readNonWhitespace(stream):`
			`"""`
			`Finds and reads the next non-whitespace character (ignores whitespace).`
			`"""`
			`tok = WHITESPACES[0]`
			`while tok in WHITESPACES:`
			`tok = stream.read(1)`
			`return tok`


			`def skipOverWhitespace(stream):`
			`"""`
			`Similar to readNonWhitespace, but returns a Boolean if more than`
			`one whitespace character was read.`
			`"""`
			`tok = WHITESPACES[0]`
			`cnt = 0;`
			`while tok in WHITESPACES:`
			`tok = stream.read(1)`
			`cnt+=1`
			`return (cnt > 1)`


			`def skipOverComment(stream):`
			`tok = stream.read(1)`
			`stream.seek(-1, 1)`
			`if tok == b_('%'):`
			`while tok not in (b_('\n'), b_('\r')):`
			`tok = stream.read(1)`


			`def readUntilRegex(stream, regex, ignore_eof=False):`
			`"""`
			`Reads until the regular expression pattern matched (ignore the match)`
			`Raise PdfStreamError on premature end-of-file.`
			`:param bool ignore_eof: If true, ignore end-of-line and return immediately`
			`"""`
			`name = b_('')`
			`while True:`
			`tok = stream.read(16)`
			`if not tok:`
			`# stream has truncated prematurely`
			`if ignore_eof == True:`
			`return name`
			`else:`
			`raise PdfStreamError("Stream has ended unexpectedly")`
			`m = regex.search(tok)`
			`if m is not None:`
			`name += tok[:m.start()]`
			`stream.seek(m.start()-len(tok), 1)`
			`break`
			`name += tok`
			`return name`


			`class ConvertFunctionsToVirtualList(object):`
			`def __init__(self, lengthFunction, getFunction):`
			`self.lengthFunction = lengthFunction`
			`self.getFunction = getFunction`

			`def __len__(self):`
			`return self.lengthFunction()`

			`def __getitem__(self, index):`
			`if isinstance(index, slice):`
			`indices = xrange_fn(*index.indices(len(self)))`
			`cls = type(self)`
			`return cls(indices.__len__, lambda idx: self[indices[idx]])`
			`if not isInt(index):`
			`raise TypeError("sequence indices must be integers")`
			`len_self = len(self)`
			`if index < 0:`
			`# support negative indexes`
			`index = len_self + index`
			`if index < 0 or index >= len_self:`
			`raise IndexError("sequence index out of range")`
			`return self.getFunction(index)`


			`def RC4_encrypt(key, plaintext):`
			`S = [i for i in range(256)]`
			`j = 0`
			`for i in range(256):`
			`j = (j + S[i] + ord_(key[i % len(key)])) % 256`
			`S[i], S[j] = S[j], S[i]`
			`i, j = 0, 0`
			`retval = b_("")`
			`for x in range(len(plaintext)):`
			`i = (i + 1) % 256`
			`j = (j + S[i]) % 256`
			`S[i], S[j] = S[j], S[i]`
			`t = S[(S[i] + S[j]) % 256]`
			`retval += b_(chr(ord_(plaintext[x]) ^ t))`
			`return retval`


			`def matrixMultiply(a, b):`
			`return [[sum([float(i)*float(j)`
			`for i, j in zip(row, col)]`
			`) for col in zip(*b)]`
			`for row in a]`


			`def markLocation(stream):`
			`"""Creates text file showing current location in context."""`
			`# Mainly for debugging`
			`RADIUS = 5000`
			`stream.seek(-RADIUS, 1)`
			`outputDoc = open('PyPDF2_pdfLocation.txt', 'w')`
			`outputDoc.write(stream.read(RADIUS))`
			`outputDoc.write('HERE')`
			`outputDoc.write(stream.read(RADIUS))`
			`outputDoc.close()`
			`stream.seek(-RADIUS, 1)`


			`class PyPdfError(Exception):`
			`pass`


			`class PdfReadError(PyPdfError):`
			`pass`


			`class PageSizeNotDefinedError(PyPdfError):`
			`pass`


			`class PdfReadWarning(UserWarning):`
			`pass`


			`class PdfStreamError(PdfReadError):`
			`pass`


			`if sys.version_info[0] < 3:`
			`def b_(s):`
			`return s`
			`else:`
			`B_CACHE = {}`

			`def b_(s):`
			`bc = B_CACHE`
			`if s in bc:`
			`return bc[s]`
			`if type(s) == bytes:`
			`return s`
			`else:`
			`r = s.encode('latin-1')`
			`if len(s) < 2:`
			`bc[s] = r`
			`return r`


			`def u_(s):`
			`if sys.version_info[0] < 3:`
			`return unicode(s, 'unicode_escape')`
			`else:`
			`return s`


			`def str_(b):`
			`if sys.version_info[0] < 3:`
			`return b`
			`else:`
			`if type(b) == bytes:`
			`return b.decode('latin-1')`
			`else:`
			`return b`


			`def ord_(b):`
			`if sys.version_info[0] < 3 or type(b) == str:`
			`return ord(b)`
			`else:`
			`return b`


			`def chr_(c):`
			`if sys.version_info[0] < 3:`
			`return c`
			`else:`
			`return chr(c)`


			`def barray(b):`
			`if sys.version_info[0] < 3:`
			`return b`
			`else:`
			`return bytearray(b)`


			`def hexencode(b):`
			`if sys.version_info[0] < 3:`
			`return b.encode('hex')`
			`else:`
			`import codecs`
			`coder = codecs.getencoder('hex_codec')`
			`return coder(b)[0]`


			`def hexStr(num):`
			`return hex(num).replace('L', '')`


			`WHITESPACES = [b_(x) for x in [' ', '\n', '\r', '\t', '\x00']]`