You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
296 lines
7.6 KiB
Python
296 lines
7.6 KiB
Python
9 years ago
|
# Copyright (c) 2006, Mathieu Fenniak
|
||
|
# All rights reserved.
|
||
|
#
|
||
|
# Redistribution and use in source and binary forms, with or without
|
||
|
# modification, are permitted provided that the following conditions are
|
||
|
# met:
|
||
|
#
|
||
|
# * Redistributions of source code must retain the above copyright notice,
|
||
|
# this list of conditions and the following disclaimer.
|
||
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
||
|
# this list of conditions and the following disclaimer in the documentation
|
||
|
# and/or other materials provided with the distribution.
|
||
|
# * The name of the author may not be used to endorse or promote products
|
||
|
# derived from this software without specific prior written permission.
|
||
|
#
|
||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||
|
# POSSIBILITY OF SUCH DAMAGE.
|
||
|
|
||
|
"""
|
||
|
Utility functions for PDF library.
|
||
|
"""
|
||
|
__author__ = "Mathieu Fenniak"
|
||
|
__author_email__ = "biziqe@mathieu.fenniak.net"
|
||
|
|
||
|
|
||
|
import sys
|
||
|
|
||
|
try:
|
||
|
import __builtin__ as builtins
|
||
|
except ImportError: # Py3
|
||
|
import builtins
|
||
|
|
||
|
|
||
|
xrange_fn = getattr(builtins, "xrange", range)
|
||
|
_basestring = getattr(builtins, "basestring", str)
|
||
|
|
||
|
bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
|
||
|
string_type = getattr(builtins, "unicode", str)
|
||
|
int_types = (int, long) if sys.version_info[0] < 3 else (int,)
|
||
|
|
||
|
|
||
|
# Make basic type tests more consistent
|
||
|
def isString(s):
|
||
|
"""Test if arg is a string. Compatible with Python 2 and 3."""
|
||
|
return isinstance(s, _basestring)
|
||
|
|
||
|
|
||
|
def isInt(n):
|
||
|
"""Test if arg is an int. Compatible with Python 2 and 3."""
|
||
|
return isinstance(n, int_types)
|
||
|
|
||
|
|
||
|
def isBytes(b):
|
||
|
"""Test if arg is a bytes instance. Compatible with Python 2 and 3."""
|
||
|
return isinstance(b, bytes_type)
|
||
|
|
||
|
|
||
|
#custom implementation of warnings.formatwarning
|
||
|
def formatWarning(message, category, filename, lineno, line=None):
|
||
|
file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name
|
||
|
return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)
|
||
|
|
||
|
|
||
|
def readUntilWhitespace(stream, maxchars=None):
|
||
|
"""
|
||
|
Reads non-whitespace characters and returns them.
|
||
|
Stops upon encountering whitespace or when maxchars is reached.
|
||
|
"""
|
||
|
txt = b_("")
|
||
|
while True:
|
||
|
tok = stream.read(1)
|
||
|
if tok.isspace() or not tok:
|
||
|
break
|
||
|
txt += tok
|
||
|
if len(txt) == maxchars:
|
||
|
break
|
||
|
return txt
|
||
|
|
||
|
|
||
|
def readNonWhitespace(stream):
|
||
|
"""
|
||
|
Finds and reads the next non-whitespace character (ignores whitespace).
|
||
|
"""
|
||
|
tok = WHITESPACES[0]
|
||
|
while tok in WHITESPACES:
|
||
|
tok = stream.read(1)
|
||
|
return tok
|
||
|
|
||
|
|
||
|
def skipOverWhitespace(stream):
|
||
|
"""
|
||
|
Similar to readNonWhitespace, but returns a Boolean if more than
|
||
|
one whitespace character was read.
|
||
|
"""
|
||
|
tok = WHITESPACES[0]
|
||
|
cnt = 0;
|
||
|
while tok in WHITESPACES:
|
||
|
tok = stream.read(1)
|
||
|
cnt+=1
|
||
|
return (cnt > 1)
|
||
|
|
||
|
|
||
|
def skipOverComment(stream):
|
||
|
tok = stream.read(1)
|
||
|
stream.seek(-1, 1)
|
||
|
if tok == b_('%'):
|
||
|
while tok not in (b_('\n'), b_('\r')):
|
||
|
tok = stream.read(1)
|
||
|
|
||
|
|
||
|
def readUntilRegex(stream, regex, ignore_eof=False):
|
||
|
"""
|
||
|
Reads until the regular expression pattern matched (ignore the match)
|
||
|
Raise PdfStreamError on premature end-of-file.
|
||
|
:param bool ignore_eof: If true, ignore end-of-line and return immediately
|
||
|
"""
|
||
|
name = b_('')
|
||
|
while True:
|
||
|
tok = stream.read(16)
|
||
|
if not tok:
|
||
|
# stream has truncated prematurely
|
||
|
if ignore_eof == True:
|
||
|
return name
|
||
|
else:
|
||
|
raise PdfStreamError("Stream has ended unexpectedly")
|
||
|
m = regex.search(tok)
|
||
|
if m is not None:
|
||
|
name += tok[:m.start()]
|
||
|
stream.seek(m.start()-len(tok), 1)
|
||
|
break
|
||
|
name += tok
|
||
|
return name
|
||
|
|
||
|
|
||
|
class ConvertFunctionsToVirtualList(object):
|
||
|
def __init__(self, lengthFunction, getFunction):
|
||
|
self.lengthFunction = lengthFunction
|
||
|
self.getFunction = getFunction
|
||
|
|
||
|
def __len__(self):
|
||
|
return self.lengthFunction()
|
||
|
|
||
|
def __getitem__(self, index):
|
||
|
if isinstance(index, slice):
|
||
|
indices = xrange_fn(*index.indices(len(self)))
|
||
|
cls = type(self)
|
||
|
return cls(indices.__len__, lambda idx: self[indices[idx]])
|
||
|
if not isInt(index):
|
||
|
raise TypeError("sequence indices must be integers")
|
||
|
len_self = len(self)
|
||
|
if index < 0:
|
||
|
# support negative indexes
|
||
|
index = len_self + index
|
||
|
if index < 0 or index >= len_self:
|
||
|
raise IndexError("sequence index out of range")
|
||
|
return self.getFunction(index)
|
||
|
|
||
|
|
||
|
def RC4_encrypt(key, plaintext):
|
||
|
S = [i for i in range(256)]
|
||
|
j = 0
|
||
|
for i in range(256):
|
||
|
j = (j + S[i] + ord_(key[i % len(key)])) % 256
|
||
|
S[i], S[j] = S[j], S[i]
|
||
|
i, j = 0, 0
|
||
|
retval = b_("")
|
||
|
for x in range(len(plaintext)):
|
||
|
i = (i + 1) % 256
|
||
|
j = (j + S[i]) % 256
|
||
|
S[i], S[j] = S[j], S[i]
|
||
|
t = S[(S[i] + S[j]) % 256]
|
||
|
retval += b_(chr(ord_(plaintext[x]) ^ t))
|
||
|
return retval
|
||
|
|
||
|
|
||
|
def matrixMultiply(a, b):
|
||
|
return [[sum([float(i)*float(j)
|
||
|
for i, j in zip(row, col)]
|
||
|
) for col in zip(*b)]
|
||
|
for row in a]
|
||
|
|
||
|
|
||
|
def markLocation(stream):
|
||
|
"""Creates text file showing current location in context."""
|
||
|
# Mainly for debugging
|
||
|
RADIUS = 5000
|
||
|
stream.seek(-RADIUS, 1)
|
||
|
outputDoc = open('PyPDF2_pdfLocation.txt', 'w')
|
||
|
outputDoc.write(stream.read(RADIUS))
|
||
|
outputDoc.write('HERE')
|
||
|
outputDoc.write(stream.read(RADIUS))
|
||
|
outputDoc.close()
|
||
|
stream.seek(-RADIUS, 1)
|
||
|
|
||
|
|
||
|
class PyPdfError(Exception):
|
||
|
pass
|
||
|
|
||
|
|
||
|
class PdfReadError(PyPdfError):
|
||
|
pass
|
||
|
|
||
|
|
||
|
class PageSizeNotDefinedError(PyPdfError):
|
||
|
pass
|
||
|
|
||
|
|
||
|
class PdfReadWarning(UserWarning):
|
||
|
pass
|
||
|
|
||
|
|
||
|
class PdfStreamError(PdfReadError):
|
||
|
pass
|
||
|
|
||
|
|
||
|
if sys.version_info[0] < 3:
|
||
|
def b_(s):
|
||
|
return s
|
||
|
else:
|
||
|
B_CACHE = {}
|
||
|
|
||
|
def b_(s):
|
||
|
bc = B_CACHE
|
||
|
if s in bc:
|
||
|
return bc[s]
|
||
|
if type(s) == bytes:
|
||
|
return s
|
||
|
else:
|
||
|
r = s.encode('latin-1')
|
||
|
if len(s) < 2:
|
||
|
bc[s] = r
|
||
|
return r
|
||
|
|
||
|
|
||
|
def u_(s):
|
||
|
if sys.version_info[0] < 3:
|
||
|
return unicode(s, 'unicode_escape')
|
||
|
else:
|
||
|
return s
|
||
|
|
||
|
|
||
|
def str_(b):
|
||
|
if sys.version_info[0] < 3:
|
||
|
return b
|
||
|
else:
|
||
|
if type(b) == bytes:
|
||
|
return b.decode('latin-1')
|
||
|
else:
|
||
|
return b
|
||
|
|
||
|
|
||
|
def ord_(b):
|
||
|
if sys.version_info[0] < 3 or type(b) == str:
|
||
|
return ord(b)
|
||
|
else:
|
||
|
return b
|
||
|
|
||
|
|
||
|
def chr_(c):
|
||
|
if sys.version_info[0] < 3:
|
||
|
return c
|
||
|
else:
|
||
|
return chr(c)
|
||
|
|
||
|
|
||
|
def barray(b):
|
||
|
if sys.version_info[0] < 3:
|
||
|
return b
|
||
|
else:
|
||
|
return bytearray(b)
|
||
|
|
||
|
|
||
|
def hexencode(b):
|
||
|
if sys.version_info[0] < 3:
|
||
|
return b.encode('hex')
|
||
|
else:
|
||
|
import codecs
|
||
|
coder = codecs.getencoder('hex_codec')
|
||
|
return coder(b)[0]
|
||
|
|
||
|
|
||
|
def hexStr(num):
|
||
|
return hex(num).replace('L', '')
|
||
|
|
||
|
|
||
|
WHITESPACES = [b_(x) for x in [' ', '\n', '\r', '\t', '\x00']]
|