Merge remote-tracking branch 'use_pip/use-pip'
# Conflicts: # cps.py # readme.md # vendor/wand/api.py # vendor/wand/drawing.py # vendor/wand/exceptions.py # vendor/wand/image.py # vendor/wand/version.pypull/142/merge
commit
acac0570e5
@ -0,0 +1,12 @@
|
||||
Babel>=1.3
|
||||
Flask>=0.11
|
||||
Flask-Babel==0.11.1
|
||||
Flask-Login>=0.3.2
|
||||
Flask-Principal>=0.3.2
|
||||
iso-639>=0.4.5
|
||||
PyPDF2==1.26.0
|
||||
pytz>=2016.10
|
||||
requests>=2.11.1
|
||||
SQLAlchemy>=0.8.4
|
||||
tornado>=4.4.2
|
||||
Wand>=0.4.4
|
@ -1,22 +0,0 @@
|
||||
Copyright (c) 2011 Matthew Frazier
|
||||
|
||||
Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of this software and associated documentation
|
||||
files (the "Software"), to deal in the Software without
|
||||
restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
@ -1,22 +0,0 @@
|
||||
Copyright (c) 2012 Ali Afshar
|
||||
|
||||
Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of this software and associated documentation
|
||||
files (the "Software"), to deal in the Software without
|
||||
restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
@ -1,31 +0,0 @@
|
||||
Copyright (c) 2011 by Armin Ronacher and the Django Software Foundation.
|
||||
|
||||
Some rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials provided
|
||||
with the distribution.
|
||||
|
||||
* The names of the contributors may not be used to endorse or
|
||||
promote products derived from this software without specific
|
||||
prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -1,5 +0,0 @@
|
||||
from .pdf import PdfFileReader, PdfFileWriter
|
||||
from .merger import PdfFileMerger
|
||||
from .pagerange import PageRange, parse_filename_page_ranges
|
||||
from ._version import __version__
|
||||
__all__ = ["pdf", "PdfFileMerger"]
|
@ -1 +0,0 @@
|
||||
__version__ = '1.26.0'
|
@ -1,362 +0,0 @@
|
||||
# vim: sw=4:expandtab:foldmethod=marker
|
||||
#
|
||||
# Copyright (c) 2006, Mathieu Fenniak
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
# * The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
"""
|
||||
Implementation of stream filters for PDF.
|
||||
"""
|
||||
__author__ = "Mathieu Fenniak"
|
||||
__author_email__ = "biziqe@mathieu.fenniak.net"
|
||||
|
||||
from .utils import PdfReadError, ord_, chr_
|
||||
from sys import version_info
|
||||
if version_info < ( 3, 0 ):
|
||||
from cStringIO import StringIO
|
||||
else:
|
||||
from io import StringIO
|
||||
import struct
|
||||
|
||||
try:
|
||||
import zlib
|
||||
|
||||
def decompress(data):
|
||||
return zlib.decompress(data)
|
||||
|
||||
def compress(data):
|
||||
return zlib.compress(data)
|
||||
|
||||
except ImportError:
|
||||
# Unable to import zlib. Attempt to use the System.IO.Compression
|
||||
# library from the .NET framework. (IronPython only)
|
||||
import System
|
||||
from System import IO, Collections, Array
|
||||
|
||||
def _string_to_bytearr(buf):
|
||||
retval = Array.CreateInstance(System.Byte, len(buf))
|
||||
for i in range(len(buf)):
|
||||
retval[i] = ord(buf[i])
|
||||
return retval
|
||||
|
||||
def _bytearr_to_string(bytes):
|
||||
retval = ""
|
||||
for i in range(bytes.Length):
|
||||
retval += chr(bytes[i])
|
||||
return retval
|
||||
|
||||
def _read_bytes(stream):
|
||||
ms = IO.MemoryStream()
|
||||
buf = Array.CreateInstance(System.Byte, 2048)
|
||||
while True:
|
||||
bytes = stream.Read(buf, 0, buf.Length)
|
||||
if bytes == 0:
|
||||
break
|
||||
else:
|
||||
ms.Write(buf, 0, bytes)
|
||||
retval = ms.ToArray()
|
||||
ms.Close()
|
||||
return retval
|
||||
|
||||
def decompress(data):
|
||||
bytes = _string_to_bytearr(data)
|
||||
ms = IO.MemoryStream()
|
||||
ms.Write(bytes, 0, bytes.Length)
|
||||
ms.Position = 0 # fseek 0
|
||||
gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
|
||||
bytes = _read_bytes(gz)
|
||||
retval = _bytearr_to_string(bytes)
|
||||
gz.Close()
|
||||
return retval
|
||||
|
||||
def compress(data):
|
||||
bytes = _string_to_bytearr(data)
|
||||
ms = IO.MemoryStream()
|
||||
gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True)
|
||||
gz.Write(bytes, 0, bytes.Length)
|
||||
gz.Close()
|
||||
ms.Position = 0 # fseek 0
|
||||
bytes = ms.ToArray()
|
||||
retval = _bytearr_to_string(bytes)
|
||||
ms.Close()
|
||||
return retval
|
||||
|
||||
|
||||
class FlateDecode(object):
|
||||
def decode(data, decodeParms):
|
||||
data = decompress(data)
|
||||
predictor = 1
|
||||
if decodeParms:
|
||||
try:
|
||||
predictor = decodeParms.get("/Predictor", 1)
|
||||
except AttributeError:
|
||||
pass # usually an array with a null object was read
|
||||
|
||||
# predictor 1 == no predictor
|
||||
if predictor != 1:
|
||||
columns = decodeParms["/Columns"]
|
||||
# PNG prediction:
|
||||
if predictor >= 10 and predictor <= 15:
|
||||
output = StringIO()
|
||||
# PNG prediction can vary from row to row
|
||||
rowlength = columns + 1
|
||||
assert len(data) % rowlength == 0
|
||||
prev_rowdata = (0,) * rowlength
|
||||
for row in range(len(data) // rowlength):
|
||||
rowdata = [ord_(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
|
||||
filterByte = rowdata[0]
|
||||
if filterByte == 0:
|
||||
pass
|
||||
elif filterByte == 1:
|
||||
for i in range(2, rowlength):
|
||||
rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
|
||||
elif filterByte == 2:
|
||||
for i in range(1, rowlength):
|
||||
rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
|
||||
else:
|
||||
# unsupported PNG filter
|
||||
raise PdfReadError("Unsupported PNG filter %r" % filterByte)
|
||||
prev_rowdata = rowdata
|
||||
output.write(''.join([chr(x) for x in rowdata[1:]]))
|
||||
data = output.getvalue()
|
||||
else:
|
||||
# unsupported predictor
|
||||
raise PdfReadError("Unsupported flatedecode predictor %r" % predictor)
|
||||
return data
|
||||
decode = staticmethod(decode)
|
||||
|
||||
def encode(data):
|
||||
return compress(data)
|
||||
encode = staticmethod(encode)
|
||||
|
||||
|
||||
class ASCIIHexDecode(object):
|
||||
def decode(data, decodeParms=None):
|
||||
retval = ""
|
||||
char = ""
|
||||
x = 0
|
||||
while True:
|
||||
c = data[x]
|
||||
if c == ">":
|
||||
break
|
||||
elif c.isspace():
|
||||
x += 1
|
||||
continue
|
||||
char += c
|
||||
if len(char) == 2:
|
||||
retval += chr(int(char, base=16))
|
||||
char = ""
|
||||
x += 1
|
||||
assert char == ""
|
||||
return retval
|
||||
decode = staticmethod(decode)
|
||||
|
||||
|
||||
class LZWDecode(object):
|
||||
"""Taken from:
|
||||
http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm
|
||||
"""
|
||||
class decoder(object):
|
||||
def __init__(self, data):
|
||||
self.STOP=257
|
||||
self.CLEARDICT=256
|
||||
self.data=data
|
||||
self.bytepos=0
|
||||
self.bitpos=0
|
||||
self.dict=[""]*4096
|
||||
for i in range(256):
|
||||
self.dict[i]=chr(i)
|
||||
self.resetDict()
|
||||
|
||||
def resetDict(self):
|
||||
self.dictlen=258
|
||||
self.bitspercode=9
|
||||
|
||||
def nextCode(self):
|
||||
fillbits=self.bitspercode
|
||||
value=0
|
||||
while fillbits>0 :
|
||||
if self.bytepos >= len(self.data):
|
||||
return -1
|
||||
nextbits=ord(self.data[self.bytepos])
|
||||
bitsfromhere=8-self.bitpos
|
||||
if bitsfromhere>fillbits:
|
||||
bitsfromhere=fillbits
|
||||
value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) &
|
||||
(0xff >> (8-bitsfromhere))) <<
|
||||
(fillbits-bitsfromhere))
|
||||
fillbits -= bitsfromhere
|
||||
self.bitpos += bitsfromhere
|
||||
if self.bitpos >=8:
|
||||
self.bitpos=0
|
||||
self.bytepos = self.bytepos+1
|
||||
return value
|
||||
|
||||
def decode(self):
|
||||
""" algorithm derived from:
|
||||
http://www.rasip.fer.hr/research/compress/algorithms/fund/lz/lzw.html
|
||||
and the PDFReference
|
||||
"""
|
||||
cW = self.CLEARDICT;
|
||||
baos=""
|
||||
while True:
|
||||
pW = cW;
|
||||
cW = self.nextCode();
|
||||
if cW == -1:
|
||||
raise PdfReadError("Missed the stop code in LZWDecode!")
|
||||
if cW == self.STOP:
|
||||
break;
|
||||
elif cW == self.CLEARDICT:
|
||||
self.resetDict();
|
||||
elif pW == self.CLEARDICT:
|
||||
baos+=self.dict[cW]
|
||||
else:
|
||||
if cW < self.dictlen:
|
||||
baos += self.dict[cW]
|
||||
p=self.dict[pW]+self.dict[cW][0]
|
||||
self.dict[self.dictlen]=p
|
||||
self.dictlen+=1
|
||||
else:
|
||||
p=self.dict[pW]+self.dict[pW][0]
|
||||
baos+=p
|
||||
self.dict[self.dictlen] = p;
|
||||
self.dictlen+=1
|
||||
if (self.dictlen >= (1 << self.bitspercode) - 1 and
|
||||
self.bitspercode < 12):
|
||||
self.bitspercode+=1
|
||||
return baos
|
||||
|
||||
@staticmethod
|
||||
def decode(data,decodeParams=None):
|
||||
return LZWDecode.decoder(data).decode()
|
||||
|
||||
|
||||
class ASCII85Decode(object):
|
||||
def decode(data, decodeParms=None):
|
||||
if version_info < ( 3, 0 ):
|
||||
retval = ""
|
||||
group = []
|
||||
x = 0
|
||||
hitEod = False
|
||||
# remove all whitespace from data
|
||||
data = [y for y in data if not (y in ' \n\r\t')]
|
||||
while not hitEod:
|
||||
c = data[x]
|
||||
if len(retval) == 0 and c == "<" and data[x+1] == "~":
|
||||
x += 2
|
||||
continue
|
||||
#elif c.isspace():
|
||||
# x += 1
|
||||
# continue
|
||||
elif c == 'z':
|
||||
assert len(group) == 0
|
||||
retval += '\x00\x00\x00\x00'
|
||||
x += 1
|
||||
continue
|
||||
elif c == "~" and data[x+1] == ">":
|
||||
if len(group) != 0:
|
||||
# cannot have a final group of just 1 char
|
||||
assert len(group) > 1
|
||||
cnt = len(group) - 1
|
||||
group += [ 85, 85, 85 ]
|
||||
hitEod = cnt
|
||||
else:
|
||||
break
|
||||
else:
|
||||
c = ord(c) - 33
|
||||
assert c >= 0 and c < 85
|
||||
group += [ c ]
|
||||
if len(group) >= 5:
|
||||
b = group[0] * (85**4) + \
|
||||
group[1] * (85**3) + \
|
||||
group[2] * (85**2) + \
|
||||
group[3] * 85 + \
|
||||
group[4]
|
||||
assert b < (2**32 - 1)
|
||||
c4 = chr((b >> 0) % 256)
|
||||
c3 = chr((b >> 8) % 256)
|
||||
c2 = chr((b >> 16) % 256)
|
||||
c1 = chr(b >> 24)
|
||||
retval += (c1 + c2 + c3 + c4)
|
||||
if hitEod:
|
||||
retval = retval[:-4+hitEod]
|
||||
group = []
|
||||
x += 1
|
||||
return retval
|
||||
else:
|
||||
if isinstance(data, str):
|
||||
data = data.encode('ascii')
|
||||
n = b = 0
|
||||
out = bytearray()
|
||||
for c in data:
|
||||
if ord('!') <= c and c <= ord('u'):
|
||||
n += 1
|
||||
b = b*85+(c-33)
|
||||
if n == 5:
|
||||
out += struct.pack(b'>L',b)
|
||||
n = b = 0
|
||||
elif c == ord('z'):
|
||||
assert n == 0
|
||||
out += b'\0\0\0\0'
|
||||
elif c == ord('~'):
|
||||
if n:
|
||||
for _ in range(5-n):
|
||||
b = b*85+84
|
||||
out += struct.pack(b'>L',b)[:n-1]
|
||||
break
|
||||
return bytes(out)
|
||||
decode = staticmethod(decode)
|
||||
|
||||
|
||||
def decodeStreamData(stream):
|
||||
from .generic import NameObject
|
||||
filters = stream.get("/Filter", ())
|
||||
if len(filters) and not isinstance(filters[0], NameObject):
|
||||
# we have a single filter instance
|
||||
filters = (filters,)
|
||||
data = stream._data
|
||||
# If there is not data to decode we should not try to decode the data.
|
||||
if data:
|
||||
for filterType in filters:
|
||||
if filterType == "/FlateDecode" or filterType == "/Fl":
|
||||
data = FlateDecode.decode(data, stream.get("/DecodeParms"))
|
||||
elif filterType == "/ASCIIHexDecode" or filterType == "/AHx":
|
||||
data = ASCIIHexDecode.decode(data)
|
||||
elif filterType == "/LZWDecode" or filterType == "/LZW":
|
||||
data = LZWDecode.decode(data, stream.get("/DecodeParms"))
|
||||
elif filterType == "/ASCII85Decode" or filterType == "/A85":
|
||||
data = ASCII85Decode.decode(data)
|
||||
elif filterType == "/Crypt":
|
||||
decodeParams = stream.get("/DecodeParams", {})
|
||||
if "/Name" not in decodeParams and "/Type" not in decodeParams:
|
||||
pass
|
||||
else:
|
||||
raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
|
||||
else:
|
||||
# unsupported filter
|
||||
raise NotImplementedError("unsupported filter %s" % filterType)
|
||||
return data
|
File diff suppressed because it is too large
Load Diff
@ -1,553 +0,0 @@
|
||||
# vim: sw=4:expandtab:foldmethod=marker
|
||||
#
|
||||
# Copyright (c) 2006, Mathieu Fenniak
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
# * The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from .generic import *
|
||||
from .utils import isString, str_
|
||||
from .pdf import PdfFileReader, PdfFileWriter
|
||||
from .pagerange import PageRange
|
||||
from sys import version_info
|
||||
if version_info < ( 3, 0 ):
|
||||
from cStringIO import StringIO
|
||||
StreamIO = StringIO
|
||||
else:
|
||||
from io import BytesIO
|
||||
from io import FileIO as file
|
||||
StreamIO = BytesIO
|
||||
|
||||
|
||||
class _MergedPage(object):
|
||||
"""
|
||||
_MergedPage is used internally by PdfFileMerger to collect necessary
|
||||
information on each page that is being merged.
|
||||
"""
|
||||
def __init__(self, pagedata, src, id):
|
||||
self.src = src
|
||||
self.pagedata = pagedata
|
||||
self.out_pagedata = None
|
||||
self.id = id
|
||||
|
||||
|
||||
class PdfFileMerger(object):
|
||||
"""
|
||||
Initializes a PdfFileMerger object. PdfFileMerger merges multiple PDFs
|
||||
into a single PDF. It can concatenate, slice, insert, or any combination
|
||||
of the above.
|
||||
|
||||
See the functions :meth:`merge()<merge>` (or :meth:`append()<append>`)
|
||||
and :meth:`write()<write>` for usage information.
|
||||
|
||||
:param bool strict: Determines whether user should be warned of all
|
||||
problems and also causes some correctable problems to be fatal.
|
||||
Defaults to ``True``.
|
||||
"""
|
||||
|
||||
def __init__(self, strict=True):
|
||||
self.inputs = []
|
||||
self.pages = []
|
||||
self.output = PdfFileWriter()
|
||||
self.bookmarks = []
|
||||
self.named_dests = []
|
||||
self.id_count = 0
|
||||
self.strict = strict
|
||||
|
||||
def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
|
||||
"""
|
||||
Merges the pages from the given file into the output file at the
|
||||
specified page number.
|
||||
|
||||
:param int position: The *page number* to insert this file. File will
|
||||
be inserted after the given number.
|
||||
|
||||
:param fileobj: A File Object or an object that supports the standard read
|
||||
and seek methods similar to a File Object. Could also be a
|
||||
string representing a path to a PDF file.
|
||||
|
||||
:param str bookmark: Optionally, you may specify a bookmark to be applied at
|
||||
the beginning of the included file by supplying the text of the bookmark.
|
||||
|
||||
:param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
|
||||
to merge only the specified range of pages from the source
|
||||
document into the output document.
|
||||
|
||||
:param bool import_bookmarks: You may prevent the source document's bookmarks
|
||||
from being imported by specifying this as ``False``.
|
||||
"""
|
||||
|
||||
# This parameter is passed to self.inputs.append and means
|
||||
# that the stream used was created in this method.
|
||||
my_file = False
|
||||
|
||||
# If the fileobj parameter is a string, assume it is a path
|
||||
# and create a file object at that location. If it is a file,
|
||||
# copy the file's contents into a BytesIO (or StreamIO) stream object; if
|
||||
# it is a PdfFileReader, copy that reader's stream into a
|
||||
# BytesIO (or StreamIO) stream.
|
||||
# If fileobj is none of the above types, it is not modified
|
||||
decryption_key = None
|
||||
if isString(fileobj):
|
||||
fileobj = file(fileobj, 'rb')
|
||||
my_file = True
|
||||
elif isinstance(fileobj, file):
|
||||
fileobj.seek(0)
|
||||
filecontent = fileobj.read()
|
||||
fileobj = StreamIO(filecontent)
|
||||
my_file = True
|
||||
elif isinstance(fileobj, PdfFileReader):
|
||||
orig_tell = fileobj.stream.tell()
|
||||
fileobj.stream.seek(0)
|
||||
filecontent = StreamIO(fileobj.stream.read())
|
||||
fileobj.stream.seek(orig_tell) # reset the stream to its original location
|
||||
fileobj = filecontent
|
||||
if hasattr(fileobj, '_decryption_key'):
|
||||
decryption_key = fileobj._decryption_key
|
||||
my_file = True
|
||||
|
||||
# Create a new PdfFileReader instance using the stream
|
||||
# (either file or BytesIO or StringIO) created above
|
||||
pdfr = PdfFileReader(fileobj, strict=self.strict)
|
||||
if decryption_key is not None:
|
||||
pdfr._decryption_key = decryption_key
|
||||
|
||||
# Find the range of pages to merge.
|
||||
if pages == None:
|
||||
pages = (0, pdfr.getNumPages())
|
||||
elif isinstance(pages, PageRange):
|
||||
pages = pages.indices(pdfr.getNumPages())
|
||||
elif not isinstance(pages, tuple):
|
||||
raise TypeError('"pages" must be a tuple of (start, stop[, step])')
|
||||
|
||||
srcpages = []
|
||||
if bookmark:
|
||||
bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
|
||||
|
||||
outline = []
|
||||
if import_bookmarks:
|
||||
outline = pdfr.getOutlines()
|
||||
outline = self._trim_outline(pdfr, outline, pages)
|
||||
|
||||
if bookmark:
|
||||
self.bookmarks += [bookmark, outline]
|
||||
else:
|
||||
self.bookmarks += outline
|
||||
|
||||
dests = pdfr.namedDestinations
|
||||
dests = self._trim_dests(pdfr, dests, pages)
|
||||
self.named_dests += dests
|
||||
|
||||
# Gather all the pages that are going to be merged
|
||||
for i in range(*pages):
|
||||
pg = pdfr.getPage(i)
|
||||
|
||||
id = self.id_count
|
||||
self.id_count += 1
|
||||
|
||||
mp = _MergedPage(pg, pdfr, id)
|
||||
|
||||
srcpages.append(mp)
|
||||
|
||||
self._associate_dests_to_pages(srcpages)
|
||||
self._associate_bookmarks_to_pages(srcpages)
|
||||
|
||||
# Slice to insert the pages at the specified position
|
||||
self.pages[position:position] = srcpages
|
||||
|
||||
# Keep track of our input files so we can close them later
|
||||
self.inputs.append((fileobj, pdfr, my_file))
|
||||
|
||||
def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True):
|
||||
"""
|
||||
Identical to the :meth:`merge()<merge>` method, but assumes you want to concatenate
|
||||
all pages onto the end of the file instead of specifying a position.
|
||||
|
||||
:param fileobj: A File Object or an object that supports the standard read
|
||||
and seek methods similar to a File Object. Could also be a
|
||||
string representing a path to a PDF file.
|
||||
|
||||
:param str bookmark: Optionally, you may specify a bookmark to be applied at
|
||||
the beginning of the included file by supplying the text of the bookmark.
|
||||
|
||||
:param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
|
||||
to merge only the specified range of pages from the source
|
||||
document into the output document.
|
||||
|
||||
:param bool import_bookmarks: You may prevent the source document's bookmarks
|
||||
from being imported by specifying this as ``False``.
|
||||
"""
|
||||
|
||||
self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks)
|
||||
|
||||
def write(self, fileobj):
|
||||
"""
|
||||
Writes all data that has been merged to the given output file.
|
||||
|
||||
:param fileobj: Output file. Can be a filename or any kind of
|
||||
file-like object.
|
||||
"""
|
||||
my_file = False
|
||||
if isString(fileobj):
|
||||
fileobj = file(fileobj, 'wb')
|
||||
my_file = True
|
||||
|
||||
# Add pages to the PdfFileWriter
|
||||
# The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13
|
||||
for page in self.pages:
|
||||
self.output.addPage(page.pagedata)
|
||||
page.out_pagedata = self.output.getReference(self.output._pages.getObject()["/Kids"][-1].getObject())
|
||||
#idnum = self.output._objects.index(self.output._pages.getObject()["/Kids"][-1].getObject()) + 1
|
||||
#page.out_pagedata = IndirectObject(idnum, 0, self.output)
|
||||
|
||||
# Once all pages are added, create bookmarks to point at those pages
|
||||
self._write_dests()
|
||||
self._write_bookmarks()
|
||||
|
||||
# Write the output to the file
|
||||
self.output.write(fileobj)
|
||||
|
||||
if my_file:
|
||||
fileobj.close()
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Shuts all file descriptors (input and output) and clears all memory
|
||||
usage.
|
||||
"""
|
||||
self.pages = []
|
||||
for fo, pdfr, mine in self.inputs:
|
||||
if mine:
|
||||
fo.close()
|
||||
|
||||
self.inputs = []
|
||||
self.output = None
|
||||
|
||||
def addMetadata(self, infos):
|
||||
"""
|
||||
Add custom metadata to the output.
|
||||
|
||||
:param dict infos: a Python dictionary where each key is a field
|
||||
and each value is your new metadata.
|
||||
Example: ``{u'/Title': u'My title'}``
|
||||
"""
|
||||
self.output.addMetadata(infos)
|
||||
|
||||
def setPageLayout(self, layout):
|
||||
"""
|
||||
Set the page layout
|
||||
|
||||
:param str layout: The page layout to be used
|
||||
|
||||
Valid layouts are:
|
||||
/NoLayout Layout explicitly not specified
|
||||
/SinglePage Show one page at a time
|
||||
/OneColumn Show one column at a time
|
||||
/TwoColumnLeft Show pages in two columns, odd-numbered pages on the left
|
||||
/TwoColumnRight Show pages in two columns, odd-numbered pages on the right
|
||||
/TwoPageLeft Show two pages at a time, odd-numbered pages on the left
|
||||
/TwoPageRight Show two pages at a time, odd-numbered pages on the right
|
||||
"""
|
||||
self.output.setPageLayout(layout)
|
||||
|
||||
def setPageMode(self, mode):
|
||||
"""
|
||||
Set the page mode.
|
||||
|
||||
:param str mode: The page mode to use.
|
||||
|
||||
Valid modes are:
|
||||
/UseNone Do not show outlines or thumbnails panels
|
||||
/UseOutlines Show outlines (aka bookmarks) panel
|
||||
/UseThumbs Show page thumbnails panel
|
||||
/FullScreen Fullscreen view
|
||||
/UseOC Show Optional Content Group (OCG) panel
|
||||
/UseAttachments Show attachments panel
|
||||
"""
|
||||
self.output.setPageMode(mode)
|
||||
|
||||
def _trim_dests(self, pdf, dests, pages):
|
||||
"""
|
||||
Removes any named destinations that are not a part of the specified
|
||||
page set.
|
||||
"""
|
||||
new_dests = []
|
||||
prev_header_added = True
|
||||
for k, o in list(dests.items()):
|
||||
for j in range(*pages):
|
||||
if pdf.getPage(j).getObject() == o['/Page'].getObject():
|
||||
o[NameObject('/Page')] = o['/Page'].getObject()
|
||||
assert str_(k) == str_(o['/Title'])
|
||||
new_dests.append(o)
|
||||
break
|
||||
return new_dests
|
||||
|
||||
def _trim_outline(self, pdf, outline, pages):
|
||||
"""
|
||||
Removes any outline/bookmark entries that are not a part of the
|
||||
specified page set.
|
||||
"""
|
||||
new_outline = []
|
||||
prev_header_added = True
|
||||
for i, o in enumerate(outline):
|
||||
if isinstance(o, list):
|
||||
sub = self._trim_outline(pdf, o, pages)
|
||||
if sub:
|
||||
if not prev_header_added:
|
||||
new_outline.append(outline[i-1])
|
||||
new_outline.append(sub)
|
||||
else:
|
||||
prev_header_added = False
|
||||
for j in range(*pages):
|
||||
if pdf.getPage(j).getObject() == o['/Page'].getObject():
|
||||
o[NameObject('/Page')] = o['/Page'].getObject()
|
||||
new_outline.append(o)
|
||||
prev_header_added = True
|
||||
break
|
||||
return new_outline
|
||||
|
||||
def _write_dests(self):
|
||||
dests = self.named_dests
|
||||
|
||||
for v in dests:
|
||||
pageno = None
|
||||
pdf = None
|
||||
if '/Page' in v:
|
||||
for i, p in enumerate(self.pages):
|
||||
if p.id == v['/Page']:
|
||||
v[NameObject('/Page')] = p.out_pagedata
|
||||
pageno = i
|
||||
pdf = p.src
|
||||
break
|
||||
if pageno != None:
|
||||
self.output.addNamedDestinationObject(v)
|
||||
|
||||
def _write_bookmarks(self, bookmarks=None, parent=None):
|
||||
|
||||
if bookmarks == None:
|
||||
bookmarks = self.bookmarks
|
||||
|
||||
last_added = None
|
||||
for b in bookmarks:
|
||||
if isinstance(b, list):
|
||||
self._write_bookmarks(b, last_added)
|
||||
continue
|
||||
|
||||
pageno = None
|
||||
pdf = None
|
||||
if '/Page' in b:
|
||||
for i, p in enumerate(self.pages):
|
||||
if p.id == b['/Page']:
|
||||
#b[NameObject('/Page')] = p.out_pagedata
|
||||
args = [NumberObject(p.id), NameObject(b['/Type'])]
|
||||
#nothing more to add
|
||||
#if b['/Type'] == '/Fit' or b['/Type'] == '/FitB'
|
||||
if b['/Type'] == '/FitH' or b['/Type'] == '/FitBH':
|
||||
if '/Top' in b and not isinstance(b['/Top'], NullObject):
|
||||
args.append(FloatObject(b['/Top']))
|
||||
else:
|
||||
args.append(FloatObject(0))
|
||||
del b['/Top']
|
||||
elif b['/Type'] == '/FitV' or b['/Type'] == '/FitBV':
|
||||
if '/Left' in b and not isinstance(b['/Left'], NullObject):
|
||||
args.append(FloatObject(b['/Left']))
|
||||
else:
|
||||
args.append(FloatObject(0))
|
||||
del b['/Left']
|
||||
elif b['/Type'] == '/XYZ':
|
||||
if '/Left' in b and not isinstance(b['/Left'], NullObject):
|
||||
args.append(FloatObject(b['/Left']))
|
||||
else:
|
||||
args.append(FloatObject(0))
|
||||
if '/Top' in b and not isinstance(b['/Top'], NullObject):
|
||||
args.append(FloatObject(b['/Top']))
|
||||
else:
|
||||
args.append(FloatObject(0))
|
||||
if '/Zoom' in b and not isinstance(b['/Zoom'], NullObject):
|
||||
args.append(FloatObject(b['/Zoom']))
|
||||
else:
|
||||
args.append(FloatObject(0))
|
||||
del b['/Top'], b['/Zoom'], b['/Left']
|
||||
elif b['/Type'] == '/FitR':
|
||||
if '/Left' in b and not isinstance(b['/Left'], NullObject):
|
||||
args.append(FloatObject(b['/Left']))
|
||||
else:
|
||||
args.append(FloatObject(0))
|
||||
if '/Bottom' in b and not isinstance(b['/Bottom'], NullObject):
|
||||
args.append(FloatObject(b['/Bottom']))
|
||||
else:
|
||||
args.append(FloatObject(0))
|
||||
if '/Right' in b and not isinstance(b['/Right'], NullObject):
|
||||
args.append(FloatObject(b['/Right']))
|
||||
else:
|
||||
args.append(FloatObject(0))
|
||||
if '/Top' in b and not isinstance(b['/Top'], NullObject):
|
||||
args.append(FloatObject(b['/Top']))
|
||||
else:
|
||||
args.append(FloatObject(0))
|
||||
del b['/Left'], b['/Right'], b['/Bottom'], b['/Top']
|
||||
|
||||
b[NameObject('/A')] = DictionaryObject({NameObject('/S'): NameObject('/GoTo'), NameObject('/D'): ArrayObject(args)})
|
||||
|
||||
pageno = i
|
||||
pdf = p.src
|
||||
break
|
||||
if pageno != None:
|
||||
del b['/Page'], b['/Type']
|
||||
last_added = self.output.addBookmarkDict(b, parent)
|
||||
|
||||
def _associate_dests_to_pages(self, pages):
|
||||
for nd in self.named_dests:
|
||||
pageno = None
|
||||
np = nd['/Page']
|
||||
|
||||
if isinstance(np, NumberObject):
|
||||
continue
|
||||
|
||||
for p in pages:
|
||||
if np.getObject() == p.pagedata.getObject():
|
||||
pageno = p.id
|
||||
|
||||
if pageno != None:
|
||||
nd[NameObject('/Page')] = NumberObject(pageno)
|
||||
else:
|
||||
raise ValueError("Unresolved named destination '%s'" % (nd['/Title'],))
|
||||
|
||||
def _associate_bookmarks_to_pages(self, pages, bookmarks=None):
|
||||
if bookmarks == None:
|
||||
bookmarks = self.bookmarks
|
||||
|
||||
for b in bookmarks:
|
||||
if isinstance(b, list):
|
||||
self._associate_bookmarks_to_pages(pages, b)
|
||||
continue
|
||||
|
||||
pageno = None
|
||||
bp = b['/Page']
|
||||
|
||||
if isinstance(bp, NumberObject):
|
||||
continue
|
||||
|
||||
for p in pages:
|
||||
if bp.getObject() == p.pagedata.getObject():
|
||||
pageno = p.id
|
||||
|
||||
if pageno != None:
|
||||
b[NameObject('/Page')] = NumberObject(pageno)
|
||||
else:
|
||||
raise ValueError("Unresolved bookmark '%s'" % (b['/Title'],))
|
||||
|
||||
def findBookmark(self, bookmark, root=None):
|
||||
if root == None:
|
||||
root = self.bookmarks
|
||||
|
||||
for i, b in enumerate(root):
|
||||
if isinstance(b, list):
|
||||
res = self.findBookmark(bookmark, b)
|
||||
if res:
|
||||
return [i] + res
|
||||
elif b == bookmark or b['/Title'] == bookmark:
|
||||
return [i]
|
||||
|
||||
return None
|
||||
|
||||
def addBookmark(self, title, pagenum, parent=None):
|
||||
"""
|
||||
Add a bookmark to this PDF file.
|
||||
|
||||
:param str title: Title to use for this bookmark.
|
||||
:param int pagenum: Page number this bookmark will point to.
|
||||
:param parent: A reference to a parent bookmark to create nested
|
||||
bookmarks.
|
||||
"""
|
||||
if parent == None:
|
||||
iloc = [len(self.bookmarks)-1]
|
||||
elif isinstance(parent, list):
|
||||
iloc = parent
|
||||
else:
|
||||
iloc = self.findBookmark(parent)
|
||||
|
||||
dest = Bookmark(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
|
||||
|
||||
if parent == None:
|
||||
self.bookmarks.append(dest)
|
||||
else:
|
||||
bmparent = self.bookmarks
|
||||
for i in iloc[:-1]:
|
||||
bmparent = bmparent[i]
|
||||
npos = iloc[-1]+1
|
||||
if npos < len(bmparent) and isinstance(bmparent[npos], list):
|
||||
bmparent[npos].append(dest)
|
||||
else:
|
||||
bmparent.insert(npos, [dest])
|
||||
return dest
|
||||
|
||||
def addNamedDestination(self, title, pagenum):
|
||||
"""
|
||||
Add a destination to the output.
|
||||
|
||||
:param str title: Title to use
|
||||
:param int pagenum: Page number this destination points at.
|
||||
"""
|
||||
|
||||
dest = Destination(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
|
||||
self.named_dests.append(dest)
|
||||
|
||||
|
||||
class OutlinesObject(list):
|
||||
def __init__(self, pdf, tree, parent=None):
|
||||
list.__init__(self)
|
||||
self.tree = tree
|
||||
self.pdf = pdf
|
||||
self.parent = parent
|
||||
|
||||
def remove(self, index):
|
||||
obj = self[index]
|
||||
del self[index]
|
||||
self.tree.removeChild(obj)
|
||||
|
||||
def add(self, title, pagenum):
|
||||
pageRef = self.pdf.getObject(self.pdf._pages)['/Kids'][pagenum]
|
||||
action = DictionaryObject()
|
||||
action.update({
|
||||
NameObject('/D') : ArrayObject([pageRef, NameObject('/FitH'), NumberObject(826)]),
|
||||
NameObject('/S') : NameObject('/GoTo')
|
||||
})
|
||||
actionRef = self.pdf._addObject(action)
|
||||
bookmark = TreeObject()
|
||||
|
||||
bookmark.update({
|
||||
NameObject('/A'): actionRef,
|
||||
NameObject('/Title'): createStringObject(title),
|
||||
})
|
||||
|
||||
self.pdf._addObject(bookmark)
|
||||
|
||||
self.tree.addChild(bookmark)
|
||||
|
||||
def removeAll(self):
|
||||
for child in [x for x in self.tree.children()]:
|
||||
self.tree.removeChild(child)
|
||||
self.pop()
|
@ -1,152 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Representation and utils for ranges of PDF file pages.
|
||||
|
||||
Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
|
||||
All rights reserved. This software is available under a BSD license;
|
||||
see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE
|
||||
"""
|
||||
|
||||
import re
|
||||
from .utils import isString
|
||||
|
||||
_INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0".
|
||||
PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)
|
||||
# groups: 12 34 5 6 7 8
|
||||
|
||||
|
||||
class ParseError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
PAGE_RANGE_HELP = """Remember, page indices start with zero.
|
||||
Page range expression examples:
|
||||
: all pages. -1 last page.
|
||||
22 just the 23rd page. :-1 all but the last page.
|
||||
0:3 the first three pages. -2 second-to-last page.
|
||||
:3 the first three pages. -2: last two pages.
|
||||
5: from the sixth page onward. -3:-1 third & second to last.
|
||||
The third, "stride" or "step" number is also recognized.
|
||||
::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0.
|
||||
1:10:2 1 3 5 7 9 2::-1 2 1 0.
|
||||
::-1 all pages in reverse order.
|
||||
"""
|
||||
|
||||
|
||||
class PageRange(object):
|
||||
"""
|
||||
A slice-like representation of a range of page indices,
|
||||
i.e. page numbers, only starting at zero.
|
||||
The syntax is like what you would put between brackets [ ].
|
||||
The slice is one of the few Python types that can't be subclassed,
|
||||
but this class converts to and from slices, and allows similar use.
|
||||
o PageRange(str) parses a string representing a page range.
|
||||
o PageRange(slice) directly "imports" a slice.
|
||||
o to_slice() gives the equivalent slice.
|
||||
o str() and repr() allow printing.
|
||||
o indices(n) is like slice.indices(n).
|
||||
"""
|
||||
|
||||
def __init__(self, arg):
|
||||
"""
|
||||
Initialize with either a slice -- giving the equivalent page range,
|
||||
or a PageRange object -- making a copy,
|
||||
or a string like
|
||||
"int", "[int]:[int]" or "[int]:[int]:[int]",
|
||||
where the brackets indicate optional ints.
|
||||
{page_range_help}
|
||||
Note the difference between this notation and arguments to slice():
|
||||
slice(3) means the first three pages;
|
||||
PageRange("3") means the range of only the fourth page.
|
||||
However PageRange(slice(3)) means the first three pages.
|
||||
"""
|
||||
if isinstance(arg, slice):
|
||||
self._slice = arg
|
||||
return
|
||||
|
||||
if isinstance(arg, PageRange):
|
||||
self._slice = arg.to_slice()
|
||||
return
|
||||
|
||||
m = isString(arg) and re.match(PAGE_RANGE_RE, arg)
|
||||
if not m:
|
||||
raise ParseError(arg)
|
||||
elif m.group(2):
|
||||
# Special case: just an int means a range of one page.
|
||||
start = int(m.group(2))
|
||||
stop = start + 1 if start != -1 else None
|
||||
self._slice = slice(start, stop)
|
||||
else:
|
||||
self._slice = slice(*[int(g) if g else None
|
||||
for g in m.group(4, 6, 8)])
|
||||
|
||||
# Just formatting this when there is __doc__ for __init__
|
||||
if __init__.__doc__:
|
||||
__init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP)
|
||||
|
||||
@staticmethod
|
||||
def valid(input):
|
||||
""" True if input is a valid initializer for a PageRange. """
|
||||
return isinstance(input, slice) or \
|
||||
isinstance(input, PageRange) or \
|
||||
(isString(input)
|
||||
and bool(re.match(PAGE_RANGE_RE, input)))
|
||||
|
||||
def to_slice(self):
|
||||
""" Return the slice equivalent of this page range. """
|
||||
return self._slice
|
||||
|
||||
def __str__(self):
|
||||
""" A string like "1:2:3". """
|
||||
s = self._slice
|
||||
if s.step == None:
|
||||
if s.start != None and s.stop == s.start + 1:
|
||||
return str(s.start)
|
||||
|
||||
indices = s.start, s.stop
|
||||
else:
|
||||
indices = s.start, s.stop, s.step
|
||||
return ':'.join("" if i == None else str(i) for i in indices)
|
||||
|
||||
def __repr__(self):
|
||||
""" A string like "PageRange('1:2:3')". """
|
||||
return "PageRange(" + repr(str(self)) + ")"
|
||||
|
||||
def indices(self, n):
|
||||
"""
|
||||
n is the length of the list of pages to choose from.
|
||||
Returns arguments for range(). See help(slice.indices).
|
||||
"""
|
||||
return self._slice.indices(n)
|
||||
|
||||
|
||||
PAGE_RANGE_ALL = PageRange(":") # The range of all pages.
|
||||
|
||||
|
||||
def parse_filename_page_ranges(args):
|
||||
"""
|
||||
Given a list of filenames and page ranges, return a list of
|
||||
(filename, page_range) pairs.
|
||||
First arg must be a filename; other ags are filenames, page-range
|
||||
expressions, slice objects, or PageRange objects.
|
||||
A filename not followed by a page range indicates all pages of the file.
|
||||
"""
|
||||
pairs = []
|
||||
pdf_filename = None
|
||||
did_page_range = False
|
||||
for arg in args + [None]:
|
||||
if PageRange.valid(arg):
|
||||
if not pdf_filename:
|
||||
raise ValueError("The first argument must be a filename, " \
|
||||
"not a page range.")
|
||||
|
||||
pairs.append( (pdf_filename, PageRange(arg)) )
|
||||
did_page_range = True
|
||||
else:
|
||||
# New filename or end of list--do all of the previous file?
|
||||
if pdf_filename and not did_page_range:
|
||||
pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
|
||||
|
||||
pdf_filename = arg
|
||||
did_page_range = False
|
||||
return pairs
|
File diff suppressed because it is too large
Load Diff
@ -1,295 +0,0 @@
|
||||
# Copyright (c) 2006, Mathieu Fenniak
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
# * The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
"""
|
||||
Utility functions for PDF library.
|
||||
"""
|
||||
__author__ = "Mathieu Fenniak"
|
||||
__author_email__ = "biziqe@mathieu.fenniak.net"
|
||||
|
||||
|
||||
import sys
|
||||
|
||||
try:
|
||||
import __builtin__ as builtins
|
||||
except ImportError: # Py3
|
||||
import builtins
|
||||
|
||||
|
||||
xrange_fn = getattr(builtins, "xrange", range)
|
||||
_basestring = getattr(builtins, "basestring", str)
|
||||
|
||||
bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
|
||||
string_type = getattr(builtins, "unicode", str)
|
||||
int_types = (int, long) if sys.version_info[0] < 3 else (int,)
|
||||
|
||||
|
||||
# Make basic type tests more consistent
|
||||
def isString(s):
|
||||
"""Test if arg is a string. Compatible with Python 2 and 3."""
|
||||
return isinstance(s, _basestring)
|
||||
|
||||
|
||||
def isInt(n):
|
||||
"""Test if arg is an int. Compatible with Python 2 and 3."""
|
||||
return isinstance(n, int_types)
|
||||
|
||||
|
||||
def isBytes(b):
|
||||
"""Test if arg is a bytes instance. Compatible with Python 2 and 3."""
|
||||
return isinstance(b, bytes_type)
|
||||
|
||||
|
||||
#custom implementation of warnings.formatwarning
|
||||
def formatWarning(message, category, filename, lineno, line=None):
|
||||
file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name
|
||||
return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)
|
||||
|
||||
|
||||
def readUntilWhitespace(stream, maxchars=None):
|
||||
"""
|
||||
Reads non-whitespace characters and returns them.
|
||||
Stops upon encountering whitespace or when maxchars is reached.
|
||||
"""
|
||||
txt = b_("")
|
||||
while True:
|
||||
tok = stream.read(1)
|
||||
if tok.isspace() or not tok:
|
||||
break
|
||||
txt += tok
|
||||
if len(txt) == maxchars:
|
||||
break
|
||||
return txt
|
||||
|
||||
|
||||
def readNonWhitespace(stream):
|
||||
"""
|
||||
Finds and reads the next non-whitespace character (ignores whitespace).
|
||||
"""
|
||||
tok = WHITESPACES[0]
|
||||
while tok in WHITESPACES:
|
||||
tok = stream.read(1)
|
||||
return tok
|
||||
|
||||
|
||||
def skipOverWhitespace(stream):
|
||||
"""
|
||||
Similar to readNonWhitespace, but returns a Boolean if more than
|
||||
one whitespace character was read.
|
||||
"""
|
||||
tok = WHITESPACES[0]
|
||||
cnt = 0;
|
||||
while tok in WHITESPACES:
|
||||
tok = stream.read(1)
|
||||
cnt+=1
|
||||
return (cnt > 1)
|
||||
|
||||
|
||||
def skipOverComment(stream):
|
||||
tok = stream.read(1)
|
||||
stream.seek(-1, 1)
|
||||
if tok == b_('%'):
|
||||
while tok not in (b_('\n'), b_('\r')):
|
||||
tok = stream.read(1)
|
||||
|
||||
|
||||
def readUntilRegex(stream, regex, ignore_eof=False):
|
||||
"""
|
||||
Reads until the regular expression pattern matched (ignore the match)
|
||||
Raise PdfStreamError on premature end-of-file.
|
||||
:param bool ignore_eof: If true, ignore end-of-line and return immediately
|
||||
"""
|
||||
name = b_('')
|
||||
while True:
|
||||
tok = stream.read(16)
|
||||
if not tok:
|
||||
# stream has truncated prematurely
|
||||
if ignore_eof == True:
|
||||
return name
|
||||
else:
|
||||
raise PdfStreamError("Stream has ended unexpectedly")
|
||||
m = regex.search(tok)
|
||||
if m is not None:
|
||||
name += tok[:m.start()]
|
||||
stream.seek(m.start()-len(tok), 1)
|
||||
break
|
||||
name += tok
|
||||
return name
|
||||
|
||||
|
||||
class ConvertFunctionsToVirtualList(object):
|
||||
def __init__(self, lengthFunction, getFunction):
|
||||
self.lengthFunction = lengthFunction
|
||||
self.getFunction = getFunction
|
||||
|
||||
def __len__(self):
|
||||
return self.lengthFunction()
|
||||
|
||||
def __getitem__(self, index):
|
||||
if isinstance(index, slice):
|
||||
indices = xrange_fn(*index.indices(len(self)))
|
||||
cls = type(self)
|
||||
return cls(indices.__len__, lambda idx: self[indices[idx]])
|
||||
if not isInt(index):
|
||||
raise TypeError("sequence indices must be integers")
|
||||
len_self = len(self)
|
||||
if index < 0:
|
||||
# support negative indexes
|
||||
index = len_self + index
|
||||
if index < 0 or index >= len_self:
|
||||
raise IndexError("sequence index out of range")
|
||||
return self.getFunction(index)
|
||||
|
||||
|
||||
def RC4_encrypt(key, plaintext):
|
||||
S = [i for i in range(256)]
|
||||
j = 0
|
||||
for i in range(256):
|
||||
j = (j + S[i] + ord_(key[i % len(key)])) % 256
|
||||
S[i], S[j] = S[j], S[i]
|
||||
i, j = 0, 0
|
||||
retval = b_("")
|
||||
for x in range(len(plaintext)):
|
||||
i = (i + 1) % 256
|
||||
j = (j + S[i]) % 256
|
||||
S[i], S[j] = S[j], S[i]
|
||||
t = S[(S[i] + S[j]) % 256]
|
||||
retval += b_(chr(ord_(plaintext[x]) ^ t))
|
||||
return retval
|
||||
|
||||
|
||||
def matrixMultiply(a, b):
|
||||
return [[sum([float(i)*float(j)
|
||||
for i, j in zip(row, col)]
|
||||
) for col in zip(*b)]
|
||||
for row in a]
|
||||
|
||||
|
||||
def markLocation(stream):
|
||||
"""Creates text file showing current location in context."""
|
||||
# Mainly for debugging
|
||||
RADIUS = 5000
|
||||
stream.seek(-RADIUS, 1)
|
||||
outputDoc = open('PyPDF2_pdfLocation.txt', 'w')
|
||||
outputDoc.write(stream.read(RADIUS))
|
||||
outputDoc.write('HERE')
|
||||
outputDoc.write(stream.read(RADIUS))
|
||||
outputDoc.close()
|
||||
stream.seek(-RADIUS, 1)
|
||||
|
||||
|
||||
class PyPdfError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class PdfReadError(PyPdfError):
|
||||
pass
|
||||
|
||||
|
||||
class PageSizeNotDefinedError(PyPdfError):
|
||||
pass
|
||||
|
||||
|
||||
class PdfReadWarning(UserWarning):
|
||||
pass
|
||||
|
||||
|
||||
class PdfStreamError(PdfReadError):
|
||||
pass
|
||||
|
||||
|
||||
if sys.version_info[0] < 3:
|
||||
def b_(s):
|
||||
return s
|
||||
else:
|
||||
B_CACHE = {}
|
||||
|
||||
def b_(s):
|
||||
bc = B_CACHE
|
||||
if s in bc:
|
||||
return bc[s]
|
||||
if type(s) == bytes:
|
||||
return s
|
||||
else:
|
||||
r = s.encode('latin-1')
|
||||
if len(s) < 2:
|
||||
bc[s] = r
|
||||
return r
|
||||
|
||||
|
||||
def u_(s):
|
||||
if sys.version_info[0] < 3:
|
||||
return unicode(s, 'unicode_escape')
|
||||
else:
|
||||
return s
|
||||
|
||||
|
||||
def str_(b):
|
||||
if sys.version_info[0] < 3:
|
||||
return b
|
||||
else:
|
||||
if type(b) == bytes:
|
||||
return b.decode('latin-1')
|
||||
else:
|
||||
return b
|
||||
|
||||
|
||||
def ord_(b):
|
||||
if sys.version_info[0] < 3 or type(b) == str:
|
||||
return ord(b)
|
||||
else:
|
||||
return b
|
||||
|
||||
|
||||
def chr_(c):
|
||||
if sys.version_info[0] < 3:
|
||||
return c
|
||||
else:
|
||||
return chr(c)
|
||||
|
||||
|
||||
def barray(b):
|
||||
if sys.version_info[0] < 3:
|
||||
return b
|
||||
else:
|
||||
return bytearray(b)
|
||||
|
||||
|
||||
def hexencode(b):
|
||||
if sys.version_info[0] < 3:
|
||||
return b.encode('hex')
|
||||
else:
|
||||
import codecs
|
||||
coder = codecs.getencoder('hex_codec')
|
||||
return coder(b)[0]
|
||||
|
||||
|
||||
def hexStr(num):
|
||||
return hex(num).replace('L', '')
|
||||
|
||||
|
||||
WHITESPACES = [b_(x) for x in [' ', '\n', '\r', '\t', '\x00']]
|
@ -1,358 +0,0 @@
|
||||
import re
|
||||
import datetime
|
||||
import decimal
|
||||
from .generic import PdfObject
|
||||
from xml.dom import getDOMImplementation
|
||||
from xml.dom.minidom import parseString
|
||||
from .utils import u_
|
||||
|
||||
RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
|
||||
XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/"
|
||||
PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/"
|
||||
XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/"
|
||||
|
||||
# What is the PDFX namespace, you might ask? I might ask that too. It's
|
||||
# a completely undocumented namespace used to place "custom metadata"
|
||||
# properties, which are arbitrary metadata properties with no semantic or
|
||||
# documented meaning. Elements in the namespace are key/value-style storage,
|
||||
# where the element name is the key and the content is the value. The keys
|
||||
# are transformed into valid XML identifiers by substituting an invalid
|
||||
# identifier character with \u2182 followed by the unicode hex ID of the
|
||||
# original character. A key like "my car" is therefore "my\u21820020car".
|
||||
#
|
||||
# \u2182, in case you're wondering, is the unicode character
|
||||
# \u{ROMAN NUMERAL TEN THOUSAND}, a straightforward and obvious choice for
|
||||
# escaping characters.
|
||||
#
|
||||
# Intentional users of the pdfx namespace should be shot on sight. A
|
||||
# custom data schema and sensical XML elements could be used instead, as is
|
||||
# suggested by Adobe's own documentation on XMP (under "Extensibility of
|
||||
# Schemas").
|
||||
#
|
||||
# Information presented here on the /pdfx/ schema is a result of limited
|
||||
# reverse engineering, and does not constitute a full specification.
|
||||
PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/"
|
||||
|
||||
iso8601 = re.compile("""
|
||||
(?P<year>[0-9]{4})
|
||||
(-
|
||||
(?P<month>[0-9]{2})
|
||||
(-
|
||||
(?P<day>[0-9]+)
|
||||
(T
|
||||
(?P<hour>[0-9]{2}):
|
||||
(?P<minute>[0-9]{2})
|
||||
(:(?P<second>[0-9]{2}(.[0-9]+)?))?
|
||||
(?P<tzd>Z|[-+][0-9]{2}:[0-9]{2})
|
||||
)?
|
||||
)?
|
||||
)?
|
||||
""", re.VERBOSE)
|
||||
|
||||
|
||||
class XmpInformation(PdfObject):
|
||||
"""
|
||||
An object that represents Adobe XMP metadata.
|
||||
Usually accessed by :meth:`getXmpMetadata()<PyPDF2.PdfFileReader.getXmpMetadata>`
|
||||
"""
|
||||
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
docRoot = parseString(self.stream.getData())
|
||||
self.rdfRoot = docRoot.getElementsByTagNameNS(RDF_NAMESPACE, "RDF")[0]
|
||||
self.cache = {}
|
||||
|
||||
def writeToStream(self, stream, encryption_key):
|
||||
self.stream.writeToStream(stream, encryption_key)
|
||||
|
||||
def getElement(self, aboutUri, namespace, name):
|
||||
for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
|
||||
if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
|
||||
attr = desc.getAttributeNodeNS(namespace, name)
|
||||
if attr != None:
|
||||
yield attr
|
||||
for element in desc.getElementsByTagNameNS(namespace, name):
|
||||
yield element
|
||||
|
||||
def getNodesInNamespace(self, aboutUri, namespace):
|
||||
for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
|
||||
if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
|
||||
for i in range(desc.attributes.length):
|
||||
attr = desc.attributes.item(i)
|
||||
if attr.namespaceURI == namespace:
|
||||
yield attr
|
||||
for child in desc.childNodes:
|
||||
if child.namespaceURI == namespace:
|
||||
yield child
|
||||
|
||||
def _getText(self, element):
|
||||
text = ""
|
||||
for child in element.childNodes:
|
||||
if child.nodeType == child.TEXT_NODE:
|
||||
text += child.data
|
||||
return text
|
||||
|
||||
def _converter_string(value):
|
||||
return value
|
||||
|
||||
def _converter_date(value):
|
||||
m = iso8601.match(value)
|
||||
year = int(m.group("year"))
|
||||
month = int(m.group("month") or "1")
|
||||
day = int(m.group("day") or "1")
|
||||
hour = int(m.group("hour") or "0")
|
||||
minute = int(m.group("minute") or "0")
|
||||
second = decimal.Decimal(m.group("second") or "0")
|
||||
seconds = second.to_integral(decimal.ROUND_FLOOR)
|
||||
milliseconds = (second - seconds) * 1000000
|
||||
tzd = m.group("tzd") or "Z"
|
||||
dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds)
|
||||
if tzd != "Z":
|
||||
tzd_hours, tzd_minutes = [int(x) for x in tzd.split(":")]
|
||||
tzd_hours *= -1
|
||||
if tzd_hours < 0:
|
||||
tzd_minutes *= -1
|
||||
dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes)
|
||||
return dt
|
||||
_test_converter_date = staticmethod(_converter_date)
|
||||
|
||||
def _getter_bag(namespace, name, converter):
|
||||
def get(self):
|
||||
cached = self.cache.get(namespace, {}).get(name)
|
||||
if cached:
|
||||
return cached
|
||||
retval = []
|
||||
for element in self.getElement("", namespace, name):
|
||||
bags = element.getElementsByTagNameNS(RDF_NAMESPACE, "Bag")
|
||||
if len(bags):
|
||||
for bag in bags:
|
||||
for item in bag.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
|
||||
value = self._getText(item)
|
||||
value = converter(value)
|
||||
retval.append(value)
|
||||
ns_cache = self.cache.setdefault(namespace, {})
|
||||
ns_cache[name] = retval
|
||||
return retval
|
||||
return get
|
||||
|
||||
def _getter_seq(namespace, name, converter):
|
||||
def get(self):
|
||||
cached = self.cache.get(namespace, {}).get(name)
|
||||
if cached:
|
||||
return cached
|
||||
retval = []
|
||||
for element in self.getElement("", namespace, name):
|
||||
seqs = element.getElementsByTagNameNS(RDF_NAMESPACE, "Seq")
|
||||
if len(seqs):
|
||||
for seq in seqs:
|
||||
for item in seq.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
|
||||
value = self._getText(item)
|
||||
value = converter(value)
|
||||
retval.append(value)
|
||||
else:
|
||||
value = converter(self._getText(element))
|
||||
retval.append(value)
|
||||
ns_cache = self.cache.setdefault(namespace, {})
|
||||
ns_cache[name] = retval
|
||||
return retval
|
||||
return get
|
||||
|
||||
def _getter_langalt(namespace, name, converter):
|
||||
def get(self):
|
||||
cached = self.cache.get(namespace, {}).get(name)
|
||||
if cached:
|
||||
return cached
|
||||
retval = {}
|
||||
for element in self.getElement("", namespace, name):
|
||||
alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt")
|
||||
if len(alts):
|
||||
for alt in alts:
|
||||
for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
|
||||
value = self._getText(item)
|
||||
value = converter(value)
|
||||
retval[item.getAttribute("xml:lang")] = value
|
||||
else:
|
||||
retval["x-default"] = converter(self._getText(element))
|
||||
ns_cache = self.cache.setdefault(namespace, {})
|
||||
ns_cache[name] = retval
|
||||
return retval
|
||||
return get
|
||||
|
||||
def _getter_single(namespace, name, converter):
|
||||
def get(self):
|
||||
cached = self.cache.get(namespace, {}).get(name)
|
||||
if cached:
|
||||
return cached
|
||||
value = None
|
||||
for element in self.getElement("", namespace, name):
|
||||
if element.nodeType == element.ATTRIBUTE_NODE:
|
||||
value = element.nodeValue
|
||||
else:
|
||||
value = self._getText(element)
|
||||
break
|
||||
if value != None:
|
||||
value = converter(value)
|
||||
ns_cache = self.cache.setdefault(namespace, {})
|
||||
ns_cache[name] = value
|
||||
return value
|
||||
return get
|
||||
|
||||
dc_contributor = property(_getter_bag(DC_NAMESPACE, "contributor", _converter_string))
|
||||
"""
|
||||
Contributors to the resource (other than the authors). An unsorted
|
||||
array of names.
|
||||
"""
|
||||
|
||||
dc_coverage = property(_getter_single(DC_NAMESPACE, "coverage", _converter_string))
|
||||
"""
|
||||
Text describing the extent or scope of the resource.
|
||||
"""
|
||||
|
||||
dc_creator = property(_getter_seq(DC_NAMESPACE, "creator", _converter_string))
|
||||
"""
|
||||
A sorted array of names of the authors of the resource, listed in order
|
||||
of precedence.
|
||||
"""
|
||||
|
||||
dc_date = property(_getter_seq(DC_NAMESPACE, "date", _converter_date))
|
||||
"""
|
||||
A sorted array of dates (datetime.datetime instances) of signifigance to
|
||||
the resource. The dates and times are in UTC.
|
||||
"""
|
||||
|
||||
dc_description = property(_getter_langalt(DC_NAMESPACE, "description", _converter_string))
|
||||
"""
|
||||
A language-keyed dictionary of textual descriptions of the content of the
|
||||
resource.
|
||||
"""
|
||||
|
||||
dc_format = property(_getter_single(DC_NAMESPACE, "format", _converter_string))
|
||||
"""
|
||||
The mime-type of the resource.
|
||||
"""
|
||||
|
||||
dc_identifier = property(_getter_single(DC_NAMESPACE, "identifier", _converter_string))
|
||||
"""
|
||||
Unique identifier of the resource.
|
||||
"""
|
||||
|
||||
dc_language = property(_getter_bag(DC_NAMESPACE, "language", _converter_string))
|
||||
"""
|
||||
An unordered array specifying the languages used in the resource.
|
||||
"""
|
||||
|
||||
dc_publisher = property(_getter_bag(DC_NAMESPACE, "publisher", _converter_string))
|
||||
"""
|
||||
An unordered array of publisher names.
|
||||
"""
|
||||
|
||||
dc_relation = property(_getter_bag(DC_NAMESPACE, "relation", _converter_string))
|
||||
"""
|
||||
An unordered array of text descriptions of relationships to other
|
||||
documents.
|
||||
"""
|
||||
|
||||
dc_rights = property(_getter_langalt(DC_NAMESPACE, "rights", _converter_string))
|
||||
"""
|
||||
A language-keyed dictionary of textual descriptions of the rights the
|
||||
user has to this resource.
|
||||
"""
|
||||
|
||||
dc_source = property(_getter_single(DC_NAMESPACE, "source", _converter_string))
|
||||
"""
|
||||
Unique identifier of the work from which this resource was derived.
|
||||
"""
|
||||
|
||||
dc_subject = property(_getter_bag(DC_NAMESPACE, "subject", _converter_string))
|
||||
"""
|
||||
An unordered array of descriptive phrases or keywrods that specify the
|
||||
topic of the content of the resource.
|
||||
"""
|
||||
|
||||
dc_title = property(_getter_langalt(DC_NAMESPACE, "title", _converter_string))
|
||||
"""
|
||||
A language-keyed dictionary of the title of the resource.
|
||||
"""
|
||||
|
||||
dc_type = property(_getter_bag(DC_NAMESPACE, "type", _converter_string))
|
||||
"""
|
||||
An unordered array of textual descriptions of the document type.
|
||||
"""
|
||||
|
||||
pdf_keywords = property(_getter_single(PDF_NAMESPACE, "Keywords", _converter_string))
|
||||
"""
|
||||
An unformatted text string representing document keywords.
|
||||
"""
|
||||
|
||||
pdf_pdfversion = property(_getter_single(PDF_NAMESPACE, "PDFVersion", _converter_string))
|
||||
"""
|
||||
The PDF file version, for example 1.0, 1.3.
|
||||
"""
|
||||
|
||||
pdf_producer = property(_getter_single(PDF_NAMESPACE, "Producer", _converter_string))
|
||||
"""
|
||||
The name of the tool that created the PDF document.
|
||||
"""
|
||||
|
||||
xmp_createDate = property(_getter_single(XMP_NAMESPACE, "CreateDate", _converter_date))
|
||||
"""
|
||||
The date and time the resource was originally created. The date and
|
||||
time are returned as a UTC datetime.datetime object.
|
||||
"""
|
||||
|
||||
xmp_modifyDate = property(_getter_single(XMP_NAMESPACE, "ModifyDate", _converter_date))
|
||||
"""
|
||||
The date and time the resource was last modified. The date and time
|
||||
are returned as a UTC datetime.datetime object.
|
||||
"""
|
||||
|
||||
xmp_metadataDate = property(_getter_single(XMP_NAMESPACE, "MetadataDate", _converter_date))
|
||||
"""
|
||||
The date and time that any metadata for this resource was last
|
||||
changed. The date and time are returned as a UTC datetime.datetime
|
||||
object.
|
||||
"""
|
||||
|
||||
xmp_creatorTool = property(_getter_single(XMP_NAMESPACE, "CreatorTool", _converter_string))
|
||||
"""
|
||||
The name of the first known tool used to create the resource.
|
||||
"""
|
||||
|
||||
xmpmm_documentId = property(_getter_single(XMPMM_NAMESPACE, "DocumentID", _converter_string))
|
||||
"""
|
||||
The common identifier for all versions and renditions of this resource.
|
||||
"""
|
||||
|
||||
xmpmm_instanceId = property(_getter_single(XMPMM_NAMESPACE, "InstanceID", _converter_string))
|
||||
"""
|
||||
An identifier for a specific incarnation of a document, updated each
|
||||
time a file is saved.
|
||||
"""
|
||||
|
||||
def custom_properties(self):
|
||||
if not hasattr(self, "_custom_properties"):
|
||||
self._custom_properties = {}
|
||||
for node in self.getNodesInNamespace("", PDFX_NAMESPACE):
|
||||
key = node.localName
|
||||
while True:
|
||||
# see documentation about PDFX_NAMESPACE earlier in file
|
||||
idx = key.find(u_("\u2182"))
|
||||
if idx == -1:
|
||||
break
|
||||
key = key[:idx] + chr(int(key[idx+1:idx+5], base=16)) + key[idx+5:]
|
||||
if node.nodeType == node.ATTRIBUTE_NODE:
|
||||
value = node.nodeValue
|
||||
else:
|
||||
value = self._getText(node)
|
||||
self._custom_properties[key] = value
|
||||
return self._custom_properties
|
||||
|
||||
custom_properties = property(custom_properties)
|
||||
"""
|
||||
Retrieves custom metadata properties defined in the undocumented pdfx
|
||||
metadata schema.
|
||||
|
||||
:return: a dictionary of key/value items for custom metadata properties.
|
||||
:rtype: dict
|
||||
"""
|
@ -1 +0,0 @@
|
||||
__version__ = '5.0.6'
|
@ -1,28 +0,0 @@
|
||||
Babel is written and maintained by the Babel team and various contributors:
|
||||
|
||||
Maintainer and Current Project Lead:
|
||||
|
||||
- Armin Ronacher <armin.ronacher@active-4.com>
|
||||
|
||||
Contributors:
|
||||
|
||||
- Christopher Lenz <cmlenz@gmail.com>
|
||||
- Alex Morega <alex@grep.ro>
|
||||
- Felix Schwarz <felix.schwarz@oss.schwarz.eu>
|
||||
- Pedro Algarvio <pedro@algarvio.me>
|
||||
- Jeroen Ruigrok van der Werven <asmodai@in-nomine.org>
|
||||
- Philip Jenvey <pjenvey@underboss.org>
|
||||
- Tobias Bieniek <Tobias.Bieniek@gmx.de>
|
||||
- Jonas Borgström <jonas@edgewall.org>
|
||||
- Daniel Neuhäuser <dasdasich@gmail.com>
|
||||
- Nick Retallack <nick@bitcasa.com>
|
||||
- Thomas Waldmann <tw@waldmann-edv.de>
|
||||
- Lennart Regebro <regebro@gmail.com>
|
||||
|
||||
Babel was previously developed under the Copyright of Edgewall Software. The
|
||||
following copyright notice holds true for releases before 2013: "Copyright (c)
|
||||
2007 - 2011 by Edgewall Software"
|
||||
|
||||
In addition to the regular contributions Babel includes a fork of Lennart
|
||||
Regebro's tzlocal that originally was licensed under the CC0 license. The
|
||||
original copyright of that project is "Copyright 2013 by Lennart Regebro".
|
@ -1,29 +0,0 @@
|
||||
Copyright (C) 2013 by the Babel Team, see AUTHORS for more information.
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. The name of the author may not be used to endorse or promote
|
||||
products derived from this software without specific prior
|
||||
written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS
|
||||
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
||||
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
|
||||
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -1,24 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
babel
|
||||
~~~~~
|
||||
|
||||
Integrated collection of utilities that assist in internationalizing and
|
||||
localizing applications.
|
||||
|
||||
This package is basically composed of two major parts:
|
||||
|
||||
* tools to build and work with ``gettext`` message catalogs
|
||||
* a Python interface to the CLDR (Common Locale Data Repository), providing
|
||||
access to various locale display names, localized number and date
|
||||
formatting, etc.
|
||||
|
||||
:copyright: (c) 2013 by the Babel Team.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
|
||||
from babel.core import UnknownLocaleError, Locale, default_locale, \
|
||||
negotiate_locale, parse_locale, get_locale_identifier
|
||||
|
||||
|
||||
__version__ = '1.3'
|
@ -1,51 +0,0 @@
|
||||
import sys
|
||||
|
||||
PY2 = sys.version_info[0] == 2
|
||||
|
||||
_identity = lambda x: x
|
||||
|
||||
|
||||
if not PY2:
|
||||
text_type = str
|
||||
string_types = (str,)
|
||||
integer_types = (int, )
|
||||
unichr = chr
|
||||
|
||||
text_to_native = lambda s, enc: s
|
||||
|
||||
iterkeys = lambda d: iter(d.keys())
|
||||
itervalues = lambda d: iter(d.values())
|
||||
iteritems = lambda d: iter(d.items())
|
||||
|
||||
from io import StringIO, BytesIO
|
||||
import pickle
|
||||
|
||||
izip = zip
|
||||
imap = map
|
||||
range_type = range
|
||||
|
||||
cmp = lambda a, b: (a > b) - (a < b)
|
||||
|
||||
else:
|
||||
text_type = unicode
|
||||
string_types = (str, unicode)
|
||||
integer_types = (int, long)
|
||||
|
||||
text_to_native = lambda s, enc: s.encode(enc)
|
||||
unichr = unichr
|
||||
|
||||
iterkeys = lambda d: d.iterkeys()
|
||||
itervalues = lambda d: d.itervalues()
|
||||
iteritems = lambda d: d.iteritems()
|
||||
|
||||
from cStringIO import StringIO as BytesIO
|
||||
from StringIO import StringIO
|
||||
import cPickle as pickle
|
||||
|
||||
from itertools import izip, imap
|
||||
range_type = xrange
|
||||
|
||||
cmp = cmp
|
||||
|
||||
|
||||
number_types = integer_types + (float,)
|
@ -1,941 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
babel.core
|
||||
~~~~~~~~~~
|
||||
|
||||
Core locale representation and locale data access.
|
||||
|
||||
:copyright: (c) 2013 by the Babel Team.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from babel import localedata
|
||||
from babel._compat import pickle, string_types
|
||||
|
||||
__all__ = ['UnknownLocaleError', 'Locale', 'default_locale', 'negotiate_locale',
|
||||
'parse_locale']
|
||||
|
||||
|
||||
_global_data = None
|
||||
|
||||
|
||||
def _raise_no_data_error():
|
||||
raise RuntimeError('The babel data files are not available. '
|
||||
'This usually happens because you are using '
|
||||
'a source checkout from Babel and you did '
|
||||
'not build the data files. Just make sure '
|
||||
'to run "python setup.py import_cldr" before '
|
||||
'installing the library.')
|
||||
|
||||
|
||||
def get_global(key):
|
||||
"""Return the dictionary for the given key in the global data.
|
||||
|
||||
The global data is stored in the ``babel/global.dat`` file and contains
|
||||
information independent of individual locales.
|
||||
|
||||
>>> get_global('zone_aliases')['UTC']
|
||||
u'Etc/GMT'
|
||||
>>> get_global('zone_territories')['Europe/Berlin']
|
||||
u'DE'
|
||||
|
||||
.. versionadded:: 0.9
|
||||
|
||||
:param key: the data key
|
||||
"""
|
||||
global _global_data
|
||||
if _global_data is None:
|
||||
dirname = os.path.join(os.path.dirname(__file__))
|
||||
filename = os.path.join(dirname, 'global.dat')
|
||||
if not os.path.isfile(filename):
|
||||
_raise_no_data_error()
|
||||
fileobj = open(filename, 'rb')
|
||||
try:
|
||||
_global_data = pickle.load(fileobj)
|
||||
finally:
|
||||
fileobj.close()
|
||||
return _global_data.get(key, {})
|
||||
|
||||
|
||||
LOCALE_ALIASES = {
|
||||
'ar': 'ar_SY', 'bg': 'bg_BG', 'bs': 'bs_BA', 'ca': 'ca_ES', 'cs': 'cs_CZ',
|
||||
'da': 'da_DK', 'de': 'de_DE', 'el': 'el_GR', 'en': 'en_US', 'es': 'es_ES',
|
||||
'et': 'et_EE', 'fa': 'fa_IR', 'fi': 'fi_FI', 'fr': 'fr_FR', 'gl': 'gl_ES',
|
||||
'he': 'he_IL', 'hu': 'hu_HU', 'id': 'id_ID', 'is': 'is_IS', 'it': 'it_IT',
|
||||
'ja': 'ja_JP', 'km': 'km_KH', 'ko': 'ko_KR', 'lt': 'lt_LT', 'lv': 'lv_LV',
|
||||
'mk': 'mk_MK', 'nl': 'nl_NL', 'nn': 'nn_NO', 'no': 'nb_NO', 'pl': 'pl_PL',
|
||||
'pt': 'pt_PT', 'ro': 'ro_RO', 'ru': 'ru_RU', 'sk': 'sk_SK', 'sl': 'sl_SI',
|
||||
'sv': 'sv_SE', 'th': 'th_TH', 'tr': 'tr_TR', 'uk': 'uk_UA'
|
||||
}
|
||||
|
||||
|
||||
class UnknownLocaleError(Exception):
|
||||
"""Exception thrown when a locale is requested for which no locale data
|
||||
is available.
|
||||
"""
|
||||
|
||||
def __init__(self, identifier):
|
||||
"""Create the exception.
|
||||
|
||||
:param identifier: the identifier string of the unsupported locale
|
||||
"""
|
||||
Exception.__init__(self, 'unknown locale %r' % identifier)
|
||||
|
||||
#: The identifier of the locale that could not be found.
|
||||
self.identifier = identifier
|
||||
|
||||
|
||||
class Locale(object):
|
||||
"""Representation of a specific locale.
|
||||
|
||||
>>> locale = Locale('en', 'US')
|
||||
>>> repr(locale)
|
||||
"Locale('en', territory='US')"
|
||||
>>> locale.display_name
|
||||
u'English (United States)'
|
||||
|
||||
A `Locale` object can also be instantiated from a raw locale string:
|
||||
|
||||
>>> locale = Locale.parse('en-US', sep='-')
|
||||
>>> repr(locale)
|
||||
"Locale('en', territory='US')"
|
||||
|
||||
`Locale` objects provide access to a collection of locale data, such as
|
||||
territory and language names, number and date format patterns, and more:
|
||||
|
||||
>>> locale.number_symbols['decimal']
|
||||
u'.'
|
||||
|
||||
If a locale is requested for which no locale data is available, an
|
||||
`UnknownLocaleError` is raised:
|
||||
|
||||
>>> Locale.parse('en_DE')
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
UnknownLocaleError: unknown locale 'en_DE'
|
||||
|
||||
For more information see :rfc:`3066`.
|
||||
"""
|
||||
|
||||
def __init__(self, language, territory=None, script=None, variant=None):
|
||||
"""Initialize the locale object from the given identifier components.
|
||||
|
||||
>>> locale = Locale('en', 'US')
|
||||
>>> locale.language
|
||||
'en'
|
||||
>>> locale.territory
|
||||
'US'
|
||||
|
||||
:param language: the language code
|
||||
:param territory: the territory (country or region) code
|
||||
:param script: the script code
|
||||
:param variant: the variant code
|
||||
:raise `UnknownLocaleError`: if no locale data is available for the
|
||||
requested locale
|
||||
"""
|
||||
#: the language code
|
||||
self.language = language
|
||||
#: the territory (country or region) code
|
||||
self.territory = territory
|
||||
#: the script code
|
||||
self.script = script
|
||||
#: the variant code
|
||||
self.variant = variant
|
||||
self.__data = None
|
||||
|
||||
identifier = str(self)
|
||||
if not localedata.exists(identifier):
|
||||
raise UnknownLocaleError(identifier)
|
||||
|
||||
@classmethod
|
||||
def default(cls, category=None, aliases=LOCALE_ALIASES):
|
||||
"""Return the system default locale for the specified category.
|
||||
|
||||
>>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LC_MESSAGES']:
|
||||
... os.environ[name] = ''
|
||||
>>> os.environ['LANG'] = 'fr_FR.UTF-8'
|
||||
>>> Locale.default('LC_MESSAGES')
|
||||
Locale('fr', territory='FR')
|
||||
|
||||
The following fallbacks to the variable are always considered:
|
||||
|
||||
- ``LANGUAGE``
|
||||
- ``LC_ALL``
|
||||
- ``LC_CTYPE``
|
||||
- ``LANG``
|
||||
|
||||
:param category: one of the ``LC_XXX`` environment variable names
|
||||
:param aliases: a dictionary of aliases for locale identifiers
|
||||
"""
|
||||
# XXX: use likely subtag expansion here instead of the
|
||||
# aliases dictionary.
|
||||
locale_string = default_locale(category, aliases=aliases)
|
||||
return cls.parse(locale_string)
|
||||
|
||||
@classmethod
|
||||
def negotiate(cls, preferred, available, sep='_', aliases=LOCALE_ALIASES):
|
||||
"""Find the best match between available and requested locale strings.
|
||||
|
||||
>>> Locale.negotiate(['de_DE', 'en_US'], ['de_DE', 'de_AT'])
|
||||
Locale('de', territory='DE')
|
||||
>>> Locale.negotiate(['de_DE', 'en_US'], ['en', 'de'])
|
||||
Locale('de')
|
||||
>>> Locale.negotiate(['de_DE', 'de'], ['en_US'])
|
||||
|
||||
You can specify the character used in the locale identifiers to separate
|
||||
the differnet components. This separator is applied to both lists. Also,
|
||||
case is ignored in the comparison:
|
||||
|
||||
>>> Locale.negotiate(['de-DE', 'de'], ['en-us', 'de-de'], sep='-')
|
||||
Locale('de', territory='DE')
|
||||
|
||||
:param preferred: the list of locale identifers preferred by the user
|
||||
:param available: the list of locale identifiers available
|
||||
:param aliases: a dictionary of aliases for locale identifiers
|
||||
"""
|
||||
identifier = negotiate_locale(preferred, available, sep=sep,
|
||||
aliases=aliases)
|
||||
if identifier:
|
||||
return Locale.parse(identifier, sep=sep)
|
||||
|
||||
@classmethod
|
||||
def parse(cls, identifier, sep='_', resolve_likely_subtags=True):
|
||||
"""Create a `Locale` instance for the given locale identifier.
|
||||
|
||||
>>> l = Locale.parse('de-DE', sep='-')
|
||||
>>> l.display_name
|
||||
u'Deutsch (Deutschland)'
|
||||
|
||||
If the `identifier` parameter is not a string, but actually a `Locale`
|
||||
object, that object is returned:
|
||||
|
||||
>>> Locale.parse(l)
|
||||
Locale('de', territory='DE')
|
||||
|
||||
This also can perform resolving of likely subtags which it does
|
||||
by default. This is for instance useful to figure out the most
|
||||
likely locale for a territory you can use ``'und'`` as the
|
||||
language tag:
|
||||
|
||||
>>> Locale.parse('und_AT')
|
||||
Locale('de', territory='AT')
|
||||
|
||||
:param identifier: the locale identifier string
|
||||
:param sep: optional component separator
|
||||
:param resolve_likely_subtags: if this is specified then a locale will
|
||||
have its likely subtag resolved if the
|
||||
locale otherwise does not exist. For
|
||||
instance ``zh_TW`` by itself is not a
|
||||
locale that exists but Babel can
|
||||
automatically expand it to the full
|
||||
form of ``zh_hant_TW``. Note that this
|
||||
expansion is only taking place if no
|
||||
locale exists otherwise. For instance
|
||||
there is a locale ``en`` that can exist
|
||||
by itself.
|
||||
:raise `ValueError`: if the string does not appear to be a valid locale
|
||||
identifier
|
||||
:raise `UnknownLocaleError`: if no locale data is available for the
|
||||
requested locale
|
||||
"""
|
||||
if identifier is None:
|
||||
return None
|
||||
elif isinstance(identifier, Locale):
|
||||
return identifier
|
||||
elif not isinstance(identifier, string_types):
|
||||
raise TypeError('Unxpected value for identifier: %r' % (identifier,))
|
||||
|
||||
parts = parse_locale(identifier, sep=sep)
|
||||
input_id = get_locale_identifier(parts)
|
||||
|
||||
def _try_load(parts):
|
||||
try:
|
||||
return cls(*parts)
|
||||
except UnknownLocaleError:
|
||||
return None
|
||||
|
||||
def _try_load_reducing(parts):
|
||||
# Success on first hit, return it.
|
||||
locale = _try_load(parts)
|
||||
if locale is not None:
|
||||
return locale
|
||||
|
||||
# Now try without script and variant
|
||||
locale = _try_load(parts[:2])
|
||||
if locale is not None:
|
||||
return locale
|
||||
|
||||
locale = _try_load(parts)
|
||||
if locale is not None:
|
||||
return locale
|
||||
if not resolve_likely_subtags:
|
||||
raise UnknownLocaleError(input_id)
|
||||
|
||||
# From here onwards is some very bad likely subtag resolving. This
|
||||
# whole logic is not entirely correct but good enough (tm) for the
|
||||
# time being. This has been added so that zh_TW does not cause
|
||||
# errors for people when they upgrade. Later we should properly
|
||||
# implement ICU like fuzzy locale objects and provide a way to
|
||||
# maximize and minimize locale tags.
|
||||
|
||||
language, territory, script, variant = parts
|
||||
language = get_global('language_aliases').get(language, language)
|
||||
territory = get_global('territory_aliases').get(territory, territory)
|
||||
script = get_global('script_aliases').get(script, script)
|
||||
variant = get_global('variant_aliases').get(variant, variant)
|
||||
|
||||
if territory == 'ZZ':
|
||||
territory = None
|
||||
if script == 'Zzzz':
|
||||
script = None
|
||||
|
||||
parts = language, territory, script, variant
|
||||
|
||||
# First match: try the whole identifier
|
||||
new_id = get_locale_identifier(parts)
|
||||
likely_subtag = get_global('likely_subtags').get(new_id)
|
||||
if likely_subtag is not None:
|
||||
locale = _try_load_reducing(parse_locale(likely_subtag))
|
||||
if locale is not None:
|
||||
return locale
|
||||
|
||||
# If we did not find anything so far, try again with a
|
||||
# simplified identifier that is just the language
|
||||
likely_subtag = get_global('likely_subtags').get(language)
|
||||
if likely_subtag is not None:
|
||||
language2, _, script2, variant2 = parse_locale(likely_subtag)
|
||||
locale = _try_load_reducing((language2, territory, script2, variant2))
|
||||
if locale is not None:
|
||||
return locale
|
||||
|
||||
raise UnknownLocaleError(input_id)
|
||||
|
||||
def __eq__(self, other):
|
||||
for key in ('language', 'territory', 'script', 'variant'):
|
||||
if not hasattr(other, key):
|
||||
return False
|
||||
return (self.language == other.language) and \
|
||||
(self.territory == other.territory) and \
|
||||
(self.script == other.script) and \
|
||||
(self.variant == other.variant)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def __repr__(self):
|
||||
parameters = ['']
|
||||
for key in ('territory', 'script', 'variant'):
|
||||
value = getattr(self, key)
|
||||
if value is not None:
|
||||
parameters.append('%s=%r' % (key, value))
|
||||
parameter_string = '%r' % self.language + ', '.join(parameters)
|
||||
return 'Locale(%s)' % parameter_string
|
||||
|
||||
def __str__(self):
|
||||
return get_locale_identifier((self.language, self.territory,
|
||||
self.script, self.variant))
|
||||
|
||||
@property
|
||||
def _data(self):
|
||||
if self.__data is None:
|
||||
self.__data = localedata.LocaleDataDict(localedata.load(str(self)))
|
||||
return self.__data
|
||||
|
||||
def get_display_name(self, locale=None):
|
||||
"""Return the display name of the locale using the given locale.
|
||||
|
||||
The display name will include the language, territory, script, and
|
||||
variant, if those are specified.
|
||||
|
||||
>>> Locale('zh', 'CN', script='Hans').get_display_name('en')
|
||||
u'Chinese (Simplified, China)'
|
||||
|
||||
:param locale: the locale to use
|
||||
"""
|
||||
if locale is None:
|
||||
locale = self
|
||||
locale = Locale.parse(locale)
|
||||
retval = locale.languages.get(self.language)
|
||||
if self.territory or self.script or self.variant:
|
||||
details = []
|
||||
if self.script:
|
||||
details.append(locale.scripts.get(self.script))
|
||||
if self.territory:
|
||||
details.append(locale.territories.get(self.territory))
|
||||
if self.variant:
|
||||
details.append(locale.variants.get(self.variant))
|
||||
details = filter(None, details)
|
||||
if details:
|
||||
retval += ' (%s)' % u', '.join(details)
|
||||
return retval
|
||||
|
||||
display_name = property(get_display_name, doc="""\
|
||||
The localized display name of the locale.
|
||||
|
||||
>>> Locale('en').display_name
|
||||
u'English'
|
||||
>>> Locale('en', 'US').display_name
|
||||
u'English (United States)'
|
||||
>>> Locale('sv').display_name
|
||||
u'svenska'
|
||||
|
||||
:type: `unicode`
|
||||
""")
|
||||
|
||||
def get_language_name(self, locale=None):
|
||||
"""Return the language of this locale in the given locale.
|
||||
|
||||
>>> Locale('zh', 'CN', script='Hans').get_language_name('de')
|
||||
u'Chinesisch'
|
||||
|
||||
.. versionadded:: 1.0
|
||||
|
||||
:param locale: the locale to use
|
||||
"""
|
||||
if locale is None:
|
||||
locale = self
|
||||
locale = Locale.parse(locale)
|
||||
return locale.languages.get(self.language)
|
||||
|
||||
language_name = property(get_language_name, doc="""\
|
||||
The localized language name of the locale.
|
||||
|
||||
>>> Locale('en', 'US').language_name
|
||||
u'English'
|
||||
""")
|
||||
|
||||
def get_territory_name(self, locale=None):
|
||||
"""Return the territory name in the given locale."""
|
||||
if locale is None:
|
||||
locale = self
|
||||
locale = Locale.parse(locale)
|
||||
return locale.territories.get(self.territory)
|
||||
|
||||
territory_name = property(get_territory_name, doc="""\
|
||||
The localized territory name of the locale if available.
|
||||
|
||||
>>> Locale('de', 'DE').territory_name
|
||||
u'Deutschland'
|
||||
""")
|
||||
|
||||
def get_script_name(self, locale=None):
|
||||
"""Return the script name in the given locale."""
|
||||
if locale is None:
|
||||
locale = self
|
||||
locale = Locale.parse(locale)
|
||||
return locale.scripts.get(self.script)
|
||||
|
||||
script_name = property(get_script_name, doc="""\
|
||||
The localized script name of the locale if available.
|
||||
|
||||
>>> Locale('ms', 'SG', script='Latn').script_name
|
||||
u'Latin'
|
||||
""")
|
||||
|
||||
@property
|
||||
def english_name(self):
|
||||
"""The english display name of the locale.
|
||||
|
||||
>>> Locale('de').english_name
|
||||
u'German'
|
||||
>>> Locale('de', 'DE').english_name
|
||||
u'German (Germany)'
|
||||
|
||||
:type: `unicode`"""
|
||||
return self.get_display_name(Locale('en'))
|
||||
|
||||
#{ General Locale Display Names
|
||||
|
||||
@property
|
||||
def languages(self):
|
||||
"""Mapping of language codes to translated language names.
|
||||
|
||||
>>> Locale('de', 'DE').languages['ja']
|
||||
u'Japanisch'
|
||||
|
||||
See `ISO 639 <http://www.loc.gov/standards/iso639-2/>`_ for
|
||||
more information.
|
||||
"""
|
||||
return self._data['languages']
|
||||
|
||||
@property
|
||||
def scripts(self):
|
||||
"""Mapping of script codes to translated script names.
|
||||
|
||||
>>> Locale('en', 'US').scripts['Hira']
|
||||
u'Hiragana'
|
||||
|
||||
See `ISO 15924 <http://www.evertype.com/standards/iso15924/>`_
|
||||
for more information.
|
||||
"""
|
||||
return self._data['scripts']
|
||||
|
||||
@property
|
||||
def territories(self):
|
||||
"""Mapping of script codes to translated script names.
|
||||
|
||||
>>> Locale('es', 'CO').territories['DE']
|
||||
u'Alemania'
|
||||
|
||||
See `ISO 3166 <http://www.iso.org/iso/en/prods-services/iso3166ma/>`_
|
||||
for more information.
|
||||
"""
|
||||
return self._data['territories']
|
||||
|
||||
@property
|
||||
def variants(self):
|
||||
"""Mapping of script codes to translated script names.
|
||||
|
||||
>>> Locale('de', 'DE').variants['1901']
|
||||
u'Alte deutsche Rechtschreibung'
|
||||
"""
|
||||
return self._data['variants']
|
||||
|
||||
#{ Number Formatting
|
||||
|
||||
@property
|
||||
def currencies(self):
|
||||
"""Mapping of currency codes to translated currency names. This
|
||||
only returns the generic form of the currency name, not the count
|
||||
specific one. If an actual number is requested use the
|
||||
:func:`babel.numbers.get_currency_name` function.
|
||||
|
||||
>>> Locale('en').currencies['COP']
|
||||
u'Colombian Peso'
|
||||
>>> Locale('de', 'DE').currencies['COP']
|
||||
u'Kolumbianischer Peso'
|
||||
"""
|
||||
return self._data['currency_names']
|
||||
|
||||
@property
|
||||
def currency_symbols(self):
|
||||
"""Mapping of currency codes to symbols.
|
||||
|
||||
>>> Locale('en', 'US').currency_symbols['USD']
|
||||
u'$'
|
||||
>>> Locale('es', 'CO').currency_symbols['USD']
|
||||
u'US$'
|
||||
"""
|
||||
return self._data['currency_symbols']
|
||||
|
||||
@property
|
||||
def number_symbols(self):
|
||||
"""Symbols used in number formatting.
|
||||
|
||||
>>> Locale('fr', 'FR').number_symbols['decimal']
|
||||
u','
|
||||
"""
|
||||
return self._data['number_symbols']
|
||||
|
||||
@property
|
||||
def decimal_formats(self):
|
||||
"""Locale patterns for decimal number formatting.
|
||||
|
||||
>>> Locale('en', 'US').decimal_formats[None]
|
||||
<NumberPattern u'#,##0.###'>
|
||||
"""
|
||||
return self._data['decimal_formats']
|
||||
|
||||
@property
|
||||
def currency_formats(self):
|
||||
"""Locale patterns for currency number formatting.
|
||||
|
||||
>>> print Locale('en', 'US').currency_formats[None]
|
||||
<NumberPattern u'\\xa4#,##0.00'>
|
||||
"""
|
||||
return self._data['currency_formats']
|
||||
|
||||
@property
|
||||
def percent_formats(self):
|
||||
"""Locale patterns for percent number formatting.
|
||||
|
||||
>>> Locale('en', 'US').percent_formats[None]
|
||||
<NumberPattern u'#,##0%'>
|
||||
"""
|
||||
return self._data['percent_formats']
|
||||
|
||||
@property
|
||||
def scientific_formats(self):
|
||||
"""Locale patterns for scientific number formatting.
|
||||
|
||||
>>> Locale('en', 'US').scientific_formats[None]
|
||||
<NumberPattern u'#E0'>
|
||||
"""
|
||||
return self._data['scientific_formats']
|
||||
|
||||
#{ Calendar Information and Date Formatting
|
||||
|
||||
@property
|
||||
def periods(self):
|
||||
"""Locale display names for day periods (AM/PM).
|
||||
|
||||
>>> Locale('en', 'US').periods['am']
|
||||
u'AM'
|
||||
"""
|
||||
return self._data['periods']
|
||||
|
||||
@property
|
||||
def days(self):
|
||||
"""Locale display names for weekdays.
|
||||
|
||||
>>> Locale('de', 'DE').days['format']['wide'][3]
|
||||
u'Donnerstag'
|
||||
"""
|
||||
return self._data['days']
|
||||
|
||||
@property
|
||||
def months(self):
|
||||
"""Locale display names for months.
|
||||
|
||||
>>> Locale('de', 'DE').months['format']['wide'][10]
|
||||
u'Oktober'
|
||||
"""
|
||||
return self._data['months']
|
||||
|
||||
@property
|
||||
def quarters(self):
|
||||
"""Locale display names for quarters.
|
||||
|
||||
>>> Locale('de', 'DE').quarters['format']['wide'][1]
|
||||
u'1. Quartal'
|
||||
"""
|
||||
return self._data['quarters']
|
||||
|
||||
@property
|
||||
def eras(self):
|
||||
"""Locale display names for eras.
|
||||
|
||||
>>> Locale('en', 'US').eras['wide'][1]
|
||||
u'Anno Domini'
|
||||
>>> Locale('en', 'US').eras['abbreviated'][0]
|
||||
u'BC'
|
||||
"""
|
||||
return self._data['eras']
|
||||
|
||||
@property
|
||||
def time_zones(self):
|
||||
"""Locale display names for time zones.
|
||||
|
||||
>>> Locale('en', 'US').time_zones['Europe/London']['long']['daylight']
|
||||
u'British Summer Time'
|
||||
>>> Locale('en', 'US').time_zones['America/St_Johns']['city']
|
||||
u'St. John\u2019s'
|
||||
"""
|
||||
return self._data['time_zones']
|
||||
|
||||
@property
|
||||
def meta_zones(self):
|
||||
"""Locale display names for meta time zones.
|
||||
|
||||
Meta time zones are basically groups of different Olson time zones that
|
||||
have the same GMT offset and daylight savings time.
|
||||
|
||||
>>> Locale('en', 'US').meta_zones['Europe_Central']['long']['daylight']
|
||||
u'Central European Summer Time'
|
||||
|
||||
.. versionadded:: 0.9
|
||||
"""
|
||||
return self._data['meta_zones']
|
||||
|
||||
@property
|
||||
def zone_formats(self):
|
||||
"""Patterns related to the formatting of time zones.
|
||||
|
||||
>>> Locale('en', 'US').zone_formats['fallback']
|
||||
u'%(1)s (%(0)s)'
|
||||
>>> Locale('pt', 'BR').zone_formats['region']
|
||||
u'Hor\\xe1rio %s'
|
||||
|
||||
.. versionadded:: 0.9
|
||||
"""
|
||||
return self._data['zone_formats']
|
||||
|
||||
@property
|
||||
def first_week_day(self):
|
||||
"""The first day of a week, with 0 being Monday.
|
||||
|
||||
>>> Locale('de', 'DE').first_week_day
|
||||
0
|
||||
>>> Locale('en', 'US').first_week_day
|
||||
6
|
||||
"""
|
||||
return self._data['week_data']['first_day']
|
||||
|
||||
@property
|
||||
def weekend_start(self):
|
||||
"""The day the weekend starts, with 0 being Monday.
|
||||
|
||||
>>> Locale('de', 'DE').weekend_start
|
||||
5
|
||||
"""
|
||||
return self._data['week_data']['weekend_start']
|
||||
|
||||
@property
|
||||
def weekend_end(self):
|
||||
"""The day the weekend ends, with 0 being Monday.
|
||||
|
||||
>>> Locale('de', 'DE').weekend_end
|
||||
6
|
||||
"""
|
||||
return self._data['week_data']['weekend_end']
|
||||
|
||||
@property
|
||||
def min_week_days(self):
|
||||
"""The minimum number of days in a week so that the week is counted as
|
||||
the first week of a year or month.
|
||||
|
||||
>>> Locale('de', 'DE').min_week_days
|
||||
4
|
||||
"""
|
||||
return self._data['week_data']['min_days']
|
||||
|
||||
@property
|
||||
def date_formats(self):
|
||||
"""Locale patterns for date formatting.
|
||||
|
||||
>>> Locale('en', 'US').date_formats['short']
|
||||
<DateTimePattern u'M/d/yy'>
|
||||
>>> Locale('fr', 'FR').date_formats['long']
|
||||
<DateTimePattern u'd MMMM y'>
|
||||
"""
|
||||
return self._data['date_formats']
|
||||
|
||||
@property
|
||||
def time_formats(self):
|
||||
"""Locale patterns for time formatting.
|
||||
|
||||
>>> Locale('en', 'US').time_formats['short']
|
||||
<DateTimePattern u'h:mm a'>
|
||||
>>> Locale('fr', 'FR').time_formats['long']
|
||||
<DateTimePattern u'HH:mm:ss z'>
|
||||
"""
|
||||
return self._data['time_formats']
|
||||
|
||||
@property
|
||||
def datetime_formats(self):
|
||||
"""Locale patterns for datetime formatting.
|
||||
|
||||
>>> Locale('en').datetime_formats['full']
|
||||
u"{1} 'at' {0}"
|
||||
>>> Locale('th').datetime_formats['medium']
|
||||
u'{1}, {0}'
|
||||
"""
|
||||
return self._data['datetime_formats']
|
||||
|
||||
@property
|
||||
def plural_form(self):
|
||||
"""Plural rules for the locale.
|
||||
|
||||
>>> Locale('en').plural_form(1)
|
||||
'one'
|
||||
>>> Locale('en').plural_form(0)
|
||||
'other'
|
||||
>>> Locale('fr').plural_form(0)
|
||||
'one'
|
||||
>>> Locale('ru').plural_form(100)
|
||||
'many'
|
||||
"""
|
||||
return self._data['plural_form']
|
||||
|
||||
|
||||
def default_locale(category=None, aliases=LOCALE_ALIASES):
|
||||
"""Returns the system default locale for a given category, based on
|
||||
environment variables.
|
||||
|
||||
>>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE']:
|
||||
... os.environ[name] = ''
|
||||
>>> os.environ['LANG'] = 'fr_FR.UTF-8'
|
||||
>>> default_locale('LC_MESSAGES')
|
||||
'fr_FR'
|
||||
|
||||
The "C" or "POSIX" pseudo-locales are treated as aliases for the
|
||||
"en_US_POSIX" locale:
|
||||
|
||||
>>> os.environ['LC_MESSAGES'] = 'POSIX'
|
||||
>>> default_locale('LC_MESSAGES')
|
||||
'en_US_POSIX'
|
||||
|
||||
The following fallbacks to the variable are always considered:
|
||||
|
||||
- ``LANGUAGE``
|
||||
- ``LC_ALL``
|
||||
- ``LC_CTYPE``
|
||||
- ``LANG``
|
||||
|
||||
:param category: one of the ``LC_XXX`` environment variable names
|
||||
:param aliases: a dictionary of aliases for locale identifiers
|
||||
"""
|
||||
varnames = (category, 'LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')
|
||||
for name in filter(None, varnames):
|
||||
locale = os.getenv(name)
|
||||
if locale:
|
||||
if name == 'LANGUAGE' and ':' in locale:
|
||||
# the LANGUAGE variable may contain a colon-separated list of
|
||||
# language codes; we just pick the language on the list
|
||||
locale = locale.split(':')[0]
|
||||
if locale in ('C', 'POSIX'):
|
||||
locale = 'en_US_POSIX'
|
||||
elif aliases and locale in aliases:
|
||||
locale = aliases[locale]
|
||||
try:
|
||||
return get_locale_identifier(parse_locale(locale))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
|
||||
def negotiate_locale(preferred, available, sep='_', aliases=LOCALE_ALIASES):
|
||||
"""Find the best match between available and requested locale strings.
|
||||
|
||||
>>> negotiate_locale(['de_DE', 'en_US'], ['de_DE', 'de_AT'])
|
||||
'de_DE'
|
||||
>>> negotiate_locale(['de_DE', 'en_US'], ['en', 'de'])
|
||||
'de'
|
||||
|
||||
Case is ignored by the algorithm, the result uses the case of the preferred
|
||||
locale identifier:
|
||||
|
||||
>>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at'])
|
||||
'de_DE'
|
||||
|
||||
>>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at'])
|
||||
'de_DE'
|
||||
|
||||
By default, some web browsers unfortunately do not include the territory
|
||||
in the locale identifier for many locales, and some don't even allow the
|
||||
user to easily add the territory. So while you may prefer using qualified
|
||||
locale identifiers in your web-application, they would not normally match
|
||||
the language-only locale sent by such browsers. To workaround that, this
|
||||
function uses a default mapping of commonly used langauge-only locale
|
||||
identifiers to identifiers including the territory:
|
||||
|
||||
>>> negotiate_locale(['ja', 'en_US'], ['ja_JP', 'en_US'])
|
||||
'ja_JP'
|
||||
|
||||
Some browsers even use an incorrect or outdated language code, such as "no"
|
||||
for Norwegian, where the correct locale identifier would actually be "nb_NO"
|
||||
(Bokmål) or "nn_NO" (Nynorsk). The aliases are intended to take care of
|
||||
such cases, too:
|
||||
|
||||
>>> negotiate_locale(['no', 'sv'], ['nb_NO', 'sv_SE'])
|
||||
'nb_NO'
|
||||
|
||||
You can override this default mapping by passing a different `aliases`
|
||||
dictionary to this function, or you can bypass the behavior althogher by
|
||||
setting the `aliases` parameter to `None`.
|
||||
|
||||
:param preferred: the list of locale strings preferred by the user
|
||||
:param available: the list of locale strings available
|
||||
:param sep: character that separates the different parts of the locale
|
||||
strings
|
||||
:param aliases: a dictionary of aliases for locale identifiers
|
||||
"""
|
||||
available = [a.lower() for a in available if a]
|
||||
for locale in preferred:
|
||||
ll = locale.lower()
|
||||
if ll in available:
|
||||
return locale
|
||||
if aliases:
|
||||
alias = aliases.get(ll)
|
||||
if alias:
|
||||
alias = alias.replace('_', sep)
|
||||
if alias.lower() in available:
|
||||
return alias
|
||||
parts = locale.split(sep)
|
||||
if len(parts) > 1 and parts[0].lower() in available:
|
||||
return parts[0]
|
||||
return None
|
||||
|
||||
|
||||
def parse_locale(identifier, sep='_'):
|
||||
"""Parse a locale identifier into a tuple of the form ``(language,
|
||||
territory, script, variant)``.
|
||||
|
||||
>>> parse_locale('zh_CN')
|
||||
('zh', 'CN', None, None)
|
||||
>>> parse_locale('zh_Hans_CN')
|
||||
('zh', 'CN', 'Hans', None)
|
||||
|
||||
The default component separator is "_", but a different separator can be
|
||||
specified using the `sep` parameter:
|
||||
|
||||
>>> parse_locale('zh-CN', sep='-')
|
||||
('zh', 'CN', None, None)
|
||||
|
||||
If the identifier cannot be parsed into a locale, a `ValueError` exception
|
||||
is raised:
|
||||
|
||||
>>> parse_locale('not_a_LOCALE_String')
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: 'not_a_LOCALE_String' is not a valid locale identifier
|
||||
|
||||
Encoding information and locale modifiers are removed from the identifier:
|
||||
|
||||
>>> parse_locale('it_IT@euro')
|
||||
('it', 'IT', None, None)
|
||||
>>> parse_locale('en_US.UTF-8')
|
||||
('en', 'US', None, None)
|
||||
>>> parse_locale('de_DE.iso885915@euro')
|
||||
('de', 'DE', None, None)
|
||||
|
||||
See :rfc:`4646` for more information.
|
||||
|
||||
:param identifier: the locale identifier string
|
||||
:param sep: character that separates the different components of the locale
|
||||
identifier
|
||||
:raise `ValueError`: if the string does not appear to be a valid locale
|
||||
identifier
|
||||
"""
|
||||
if '.' in identifier:
|
||||
# this is probably the charset/encoding, which we don't care about
|
||||
identifier = identifier.split('.', 1)[0]
|
||||
if '@' in identifier:
|
||||
# this is a locale modifier such as @euro, which we don't care about
|
||||
# either
|
||||
identifier = identifier.split('@', 1)[0]
|
||||
|
||||
parts = identifier.split(sep)
|
||||
lang = parts.pop(0).lower()
|
||||
if not lang.isalpha():
|
||||
raise ValueError('expected only letters, got %r' % lang)
|
||||
|
||||
script = territory = variant = None
|
||||
if parts:
|
||||
if len(parts[0]) == 4 and parts[0].isalpha():
|
||||
script = parts.pop(0).title()
|
||||
|
||||
if parts:
|
||||
if len(parts[0]) == 2 and parts[0].isalpha():
|
||||
territory = parts.pop(0).upper()
|
||||
elif len(parts[0]) == 3 and parts[0].isdigit():
|
||||
territory = parts.pop(0)
|
||||
|
||||
if parts:
|
||||
if len(parts[0]) == 4 and parts[0][0].isdigit() or \
|
||||
len(parts[0]) >= 5 and parts[0][0].isalpha():
|
||||
variant = parts.pop()
|
||||
|
||||
if parts:
|
||||
raise ValueError('%r is not a valid locale identifier' % identifier)
|
||||
|
||||
return lang, territory, script, variant
|
||||
|
||||
|
||||
def get_locale_identifier(tup, sep='_'):
|
||||
"""The reverse of :func:`parse_locale`. It creates a locale identifier out
|
||||
of a ``(language, territory, script, variant)`` tuple. Items can be set to
|
||||
``None`` and trailing ``None``\s can also be left out of the tuple.
|
||||
|
||||
>>> get_locale_identifier(('de', 'DE', None, '1999'))
|
||||
'de_DE_1999'
|
||||
|
||||
.. versionadded:: 1.0
|
||||
|
||||
:param tup: the tuple as returned by :func:`parse_locale`.
|
||||
:param sep: the separator for the identifier.
|
||||
"""
|
||||
tup = tuple(tup[:4])
|
||||
lang, territory, script, variant = tup + (None,) * (4 - len(tup))
|
||||
return sep.join(filter(None, (lang, script, territory, variant)))
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@ -1,209 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
babel.localedata
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
Low-level locale data access.
|
||||
|
||||
:note: The `Locale` class, which uses this module under the hood, provides a
|
||||
more convenient interface for accessing the locale data.
|
||||
|
||||
:copyright: (c) 2013 by the Babel Team.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
|
||||
import os
|
||||
import threading
|
||||
from collections import MutableMapping
|
||||
|
||||
from babel._compat import pickle
|
||||
|
||||
|
||||
_cache = {}
|
||||
_cache_lock = threading.RLock()
|
||||
_dirname = os.path.join(os.path.dirname(__file__), 'localedata')
|
||||
|
||||
|
||||
def exists(name):
|
||||
"""Check whether locale data is available for the given locale. Ther
|
||||
return value is `True` if it exists, `False` otherwise.
|
||||
|
||||
:param name: the locale identifier string
|
||||
"""
|
||||
if name in _cache:
|
||||
return True
|
||||
return os.path.exists(os.path.join(_dirname, '%s.dat' % name))
|
||||
|
||||
|
||||
def locale_identifiers():
|
||||
"""Return a list of all locale identifiers for which locale data is
|
||||
available.
|
||||
|
||||
.. versionadded:: 0.8.1
|
||||
|
||||
:return: a list of locale identifiers (strings)
|
||||
"""
|
||||
return [stem for stem, extension in [
|
||||
os.path.splitext(filename) for filename in os.listdir(_dirname)
|
||||
] if extension == '.dat' and stem != 'root']
|
||||
|
||||
|
||||
def load(name, merge_inherited=True):
|
||||
"""Load the locale data for the given locale.
|
||||
|
||||
The locale data is a dictionary that contains much of the data defined by
|
||||
the Common Locale Data Repository (CLDR). This data is stored as a
|
||||
collection of pickle files inside the ``babel`` package.
|
||||
|
||||
>>> d = load('en_US')
|
||||
>>> d['languages']['sv']
|
||||
u'Swedish'
|
||||
|
||||
Note that the results are cached, and subsequent requests for the same
|
||||
locale return the same dictionary:
|
||||
|
||||
>>> d1 = load('en_US')
|
||||
>>> d2 = load('en_US')
|
||||
>>> d1 is d2
|
||||
True
|
||||
|
||||
:param name: the locale identifier string (or "root")
|
||||
:param merge_inherited: whether the inherited data should be merged into
|
||||
the data of the requested locale
|
||||
:raise `IOError`: if no locale data file is found for the given locale
|
||||
identifer, or one of the locales it inherits from
|
||||
"""
|
||||
_cache_lock.acquire()
|
||||
try:
|
||||
data = _cache.get(name)
|
||||
if not data:
|
||||
# Load inherited data
|
||||
if name == 'root' or not merge_inherited:
|
||||
data = {}
|
||||
else:
|
||||
parts = name.split('_')
|
||||
if len(parts) == 1:
|
||||
parent = 'root'
|
||||
else:
|
||||
parent = '_'.join(parts[:-1])
|
||||
data = load(parent).copy()
|
||||
filename = os.path.join(_dirname, '%s.dat' % name)
|
||||
fileobj = open(filename, 'rb')
|
||||
try:
|
||||
if name != 'root' and merge_inherited:
|
||||
merge(data, pickle.load(fileobj))
|
||||
else:
|
||||
data = pickle.load(fileobj)
|
||||
_cache[name] = data
|
||||
finally:
|
||||
fileobj.close()
|
||||
return data
|
||||
finally:
|
||||
_cache_lock.release()
|
||||
|
||||
|
||||
def merge(dict1, dict2):
|
||||
"""Merge the data from `dict2` into the `dict1` dictionary, making copies
|
||||
of nested dictionaries.
|
||||
|
||||
>>> d = {1: 'foo', 3: 'baz'}
|
||||
>>> merge(d, {1: 'Foo', 2: 'Bar'})
|
||||
>>> items = d.items(); items.sort(); items
|
||||
[(1, 'Foo'), (2, 'Bar'), (3, 'baz')]
|
||||
|
||||
:param dict1: the dictionary to merge into
|
||||
:param dict2: the dictionary containing the data that should be merged
|
||||
"""
|
||||
for key, val2 in dict2.items():
|
||||
if val2 is not None:
|
||||
val1 = dict1.get(key)
|
||||
if isinstance(val2, dict):
|
||||
if val1 is None:
|
||||
val1 = {}
|
||||
if isinstance(val1, Alias):
|
||||
val1 = (val1, val2)
|
||||
elif isinstance(val1, tuple):
|
||||
alias, others = val1
|
||||
others = others.copy()
|
||||
merge(others, val2)
|
||||
val1 = (alias, others)
|
||||
else:
|
||||
val1 = val1.copy()
|
||||
merge(val1, val2)
|
||||
else:
|
||||
val1 = val2
|
||||
dict1[key] = val1
|
||||
|
||||
|
||||
class Alias(object):
|
||||
"""Representation of an alias in the locale data.
|
||||
|
||||
An alias is a value that refers to some other part of the locale data,
|
||||
as specified by the `keys`.
|
||||
"""
|
||||
|
||||
def __init__(self, keys):
|
||||
self.keys = tuple(keys)
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s %r>' % (type(self).__name__, self.keys)
|
||||
|
||||
def resolve(self, data):
|
||||
"""Resolve the alias based on the given data.
|
||||
|
||||
This is done recursively, so if one alias resolves to a second alias,
|
||||
that second alias will also be resolved.
|
||||
|
||||
:param data: the locale data
|
||||
:type data: `dict`
|
||||
"""
|
||||
base = data
|
||||
for key in self.keys:
|
||||
data = data[key]
|
||||
if isinstance(data, Alias):
|
||||
data = data.resolve(base)
|
||||
elif isinstance(data, tuple):
|
||||
alias, others = data
|
||||
data = alias.resolve(base)
|
||||
return data
|
||||
|
||||
|
||||
class LocaleDataDict(MutableMapping):
|
||||
"""Dictionary wrapper that automatically resolves aliases to the actual
|
||||
values.
|
||||
"""
|
||||
|
||||
def __init__(self, data, base=None):
|
||||
self._data = data
|
||||
if base is None:
|
||||
base = data
|
||||
self.base = base
|
||||
|
||||
def __len__(self):
|
||||
return len(self._data)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._data)
|
||||
|
||||
def __getitem__(self, key):
|
||||
orig = val = self._data[key]
|
||||
if isinstance(val, Alias): # resolve an alias
|
||||
val = val.resolve(self.base)
|
||||
if isinstance(val, tuple): # Merge a partial dict with an alias
|
||||
alias, others = val
|
||||
val = alias.resolve(self.base).copy()
|
||||
merge(val, others)
|
||||
if type(val) is dict: # Return a nested alias-resolving dict
|
||||
val = LocaleDataDict(val, base=self.base)
|
||||
if val is not orig:
|
||||
self._data[key] = val
|
||||
return val
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self._data[key] = value
|
||||
|
||||
def __delitem__(self, key):
|
||||
del self._data[key]
|
||||
|
||||
def copy(self):
|
||||
return LocaleDataDict(self._data.copy(), base=self.base)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue