Merge branch 'fix/syntax-python3-20170305' into travis

# Conflicts:
#	cps/epub.py
#	cps/web.py
pull/152/head^2^2
林檎 8 years ago
commit 3369972073

@ -299,7 +299,7 @@ def setup_db():
try:
conn = engine.connect()
except:
except Exception as e:
content = ub.session.query(ub.Settings).first()
content.config_calibre_dir = None
content.db_configured = False

@ -41,6 +41,11 @@ def get_epub_info(tmp_file_path, original_file_name, original_file_extension):
p = tree.xpath('/pkg:package/pkg:metadata', namespaces=ns)[0]
epub_metadata = {}
try:#maybe description isn't present
comments = tree.xpath("//*[local-name() = 'description']/text()")[0]
epub_metadata['comments'] = comments
except IndexError as e:
epub_metadata['comments'] = ""
for s in ['title', 'description', 'creator', 'language']:
tmp = p.xpath('dc:%s/text()' % s, namespaces=ns)
@ -66,6 +71,8 @@ def get_epub_info(tmp_file_path, original_file_name, original_file_extension):
epub_metadata['language'] = isoLanguages.get(part3=lang).name
else:
epub_metadata['language'] = ""
except IndexError as e:
epub_metadata['language'] = ""
coversection = tree.xpath("/pkg:package/pkg:manifest/pkg:item[@id='cover-image']/@href", namespaces=ns)
coverfile = None

@ -4,9 +4,11 @@
from lxml import etree
import os
import uploader
try:
from io import StringIO
except ImportError as e:
import StringIO
def get_fb2_info(tmp_file_path, original_file_extension):
ns = {

@ -14,15 +14,15 @@ import traceback
import re
import unicodedata
try:
from StringIO import StringIO
from email.MIMEBase import MIMEBase
from email.MIMEMultipart import MIMEMultipart
from email.MIMEText import MIMEText
except ImportError:
from io import StringIO
from email.mime.base import MIMEBase
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
except ImportError as e:
from StringIO import StringIO
from email.MIMEBase import MIMEBase
from email.MIMEMultipart import MIMEMultipart
from email.MIMEText import MIMEText
from email import encoders
from email.generator import Generator
from email.utils import formatdate
@ -38,7 +38,7 @@ from tornado.ioloop import IOLoop
try:
import unidecode
use_unidecode=True
except:
except Exception as e:
use_unidecode=False
# Global variables
@ -245,7 +245,10 @@ def get_valid_filename(value, replace_whitespace=True):
value=value.replace(u'ß',u'ss')
value = unicodedata.normalize('NFKD', value)
re_slugify = re.compile('[\W\s-]', re.UNICODE)
value = str(re_slugify.sub('', value).strip())
if type(value) is str: #Python3 str, Python2 unicode
value = re_slugify.sub('', value).strip()
else:
value = unicode(re_slugify.sub('', value).strip())
if replace_whitespace:
#*+:\"/<>? werden durch _ ersetzt
value = re.sub('[\*\+:\\\"/<>\?]+', u'_', value, flags=re.U)
@ -385,7 +388,7 @@ class Updater(threading.Thread):
try:
os.chown(dst_file, permission.st_uid, permission.st_uid)
# print('Permissions: User '+str(new_permissions.st_uid)+' Group '+str(new_permissions.st_uid))
except:
except Exception as e:
e = sys.exc_info()
logging.getLogger('cps.web').debug('Fail '+str(dst_file)+' error: '+str(e))
return
@ -427,7 +430,7 @@ class Updater(threading.Thread):
logging.getLogger('cps.web').debug("Delete file " + item_path)
log_from_thread("Delete file " + item_path)
os.remove(item_path)
except:
except Exception as e:
logging.getLogger('cps.web').debug("Could not remove:" + item_path)
shutil.rmtree(source, ignore_errors=True)

@ -10,7 +10,7 @@ import os
import logging
from werkzeug.security import generate_password_hash
from flask_babel import gettext as _
from builtins import str
#from builtins import str
dbpath = os.path.join(os.path.normpath(os.path.dirname(os.path.realpath(__file__)) + os.sep + ".." + os.sep), "app.db")
engine = create_engine('sqlite:///{0}'.format(dbpath), echo=False)
@ -439,7 +439,7 @@ def create_anonymous_user():
session.add(user)
try:
session.commit()
except:
except Exception as e:
session.rollback()
pass
@ -457,7 +457,7 @@ def create_admin_user():
session.add(user)
try:
session.commit()
except:
except Exception as e:
session.rollback()
pass

@ -18,9 +18,9 @@ from sqlalchemy.exc import IntegrityError
from sqlalchemy import __version__ as sqlalchemyVersion
from math import ceil
from flask_login import LoginManager, login_user, logout_user, login_required, current_user
from flask_login import __version__ as flask_loginVersion
from flask_login.__about__ import __version__ as flask_loginVersion
from flask_principal import Principal, Identity, AnonymousIdentity, identity_changed
from flask_login import __version__ as flask_principalVersion
from flask_principal import __version__ as flask_principalVersion
from flask_babel import Babel
from flask_babel import gettext as _
import requests
@ -47,13 +47,14 @@ import db
from shutil import move, copyfile
from tornado.ioloop import IOLoop
from tornado import version as tornadoVersion
from builtins import str
#from builtins import str
try:
from urllib.parse import quote
from imp import reload
from past.builtins import xrange
except:
pass
except ImportError as e:
from urllib import quote
try:
from wand.image import Image
@ -286,7 +287,7 @@ def shortentitle_filter(s):
def mimetype_filter(val):
try:
s = mimetypes.types_map['.' + val]
except:
except Exception as e:
s = 'application/octet-stream'
return s
@ -761,7 +762,7 @@ def get_updater_status():
elif request.method == "GET":
try:
status['status']=helper.updater_thread.get_update_status()
except:
except Exception as e:
status['status'] = 7
return json.dumps(status)
@ -776,7 +777,7 @@ def get_languages_json():
try:
cur_l = LC.parse(lang.lang_code)
lang.name = cur_l.get_language_name(get_locale())
except:
except Exception as e:
lang.name = _(isoLanguages.get(part3=lang.lang_code).name)
entries = [s for s in languages if query in s.name.lower()]
json_dumps = json.dumps([dict(name=r.name) for r in entries])
@ -936,13 +937,13 @@ def language_overview():
try:
cur_l = LC.parse(lang.lang_code)
lang.name = cur_l.get_language_name(get_locale())
except:
except Exception as e:
lang.name = _(isoLanguages.get(part3=lang.lang_code).name)
else:
try:
langfound = 1
cur_l = LC.parse(current_user.filter_language())
except:
except Exception as e:
langfound = 0
languages = db.session.query(db.Languages).filter(
db.Languages.lang_code == current_user.filter_language()).all()
@ -966,7 +967,7 @@ def language(name, page):
try:
cur_l = LC.parse(name)
name = cur_l.get_language_name(get_locale())
except:
except Exception as e:
name = _(isoLanguages.get(part3=name).name)
return render_title_template('index.html', random=random, entries=entries, pagination=pagination,
title=_(u"Language: %(name)s", name=name))
@ -1010,7 +1011,7 @@ def show_book(id):
try:
entries.languages[index].language_name = LC.parse(entries.languages[index].lang_code).get_language_name(
get_locale())
except:
except Exception as e:
entries.languages[index].language_name = _(
isoLanguages.get(part3=entries.languages[index].lang_code).name)
cc = db.session.query(db.Custom_Columns).filter(db.Custom_Columns.datatype.notin_(db.cc_exceptions)).all()
@ -1051,8 +1052,10 @@ def stats():
stdin=subprocess.PIPE)
p.wait()
for lines in p.stdout.readlines():
if re.search('Amazon kindlegen\(', str(lines)):
versions['KindlegenVersion'] = str(lines)
if type(lines) is bytes:
lines = lines.decode('utf-8')
if re.search('Amazon kindlegen\(', lines):
versions['KindlegenVersion'] = lines
versions['PythonVersion'] = sys.version
versions['babel'] = babelVersion
versions['sqlalchemy'] = sqlalchemyVersion
@ -1154,7 +1157,7 @@ def advanced_search():
try:
cur_l = LC.parse(lang.lang_code)
lang.name = cur_l.get_language_name(get_locale())
except:
except Exception as e:
lang.name = _(isoLanguages.get(part3=lang.lang_code).name)
searchterm.extend(language.name for language in language_names)
searchterm = " + ".join(filter(None, searchterm))
@ -1186,7 +1189,7 @@ def advanced_search():
try:
cur_l = LC.parse(lang.lang_code)
lang.name = cur_l.get_language_name(get_locale())
except:
except Exception as e:
lang.name = _(isoLanguages.get(part3=lang.lang_code).name)
else:
languages = None
@ -1243,22 +1246,22 @@ def read_book(book_id, format):
zfile.close()
return render_title_template('read.html', bookid=book_id, title=_(u"Read a Book"))
elif format.lower() == "pdf":
all_name = str(book_id) + "/" + urllib.quote(book.data[0].name) + ".pdf"
tmp_file = os.path.join(book_dir, urllib.quote(book.data[0].name)) + ".pdf"
all_name = str(book_id) + "/" + quote(book.data[0].name) + ".pdf"
tmp_file = os.path.join(book_dir, quote(book.data[0].name)) + ".pdf"
if not os.path.exists(tmp_file):
pdf_file = os.path.join(config.config_calibre_dir, book.path, book.data[0].name) + ".pdf"
copyfile(pdf_file, tmp_file)
return render_title_template('readpdf.html', pdffile=all_name, title=_(u"Read a Book"))
elif format.lower() == "txt":
all_name = str(book_id) + "/" + urllib.quote(book.data[0].name) + ".txt"
tmp_file = os.path.join(book_dir, urllib.quote(book.data[0].name)) + ".txt"
all_name = str(book_id) + "/" + quote(book.data[0].name) + ".txt"
tmp_file = os.path.join(book_dir, quote(book.data[0].name)) + ".txt"
if not os.path.exists(all_name):
txt_file = os.path.join(config.config_calibre_dir, book.path, book.data[0].name) + ".txt"
copyfile(txt_file, tmp_file)
return render_title_template('readtxt.html', txtfile=all_name, title=_(u"Read a Book"))
elif format.lower() == "cbr":
all_name = str(book_id) + "/" + urllib.quote(book.data[0].name) + ".cbr"
tmp_file = os.path.join(book_dir, urllib.quote(book.data[0].name)) + ".cbr"
all_name = str(book_id) + "/" + quote(book.data[0].name) + ".cbr"
tmp_file = os.path.join(book_dir, quote(book.data[0].name)) + ".cbr"
if not os.path.exists(all_name):
cbr_file = os.path.join(config.config_calibre_dir, book.path, book.data[0].name) + ".cbr"
copyfile(cbr_file, tmp_file)
@ -1288,9 +1291,9 @@ def get_download_link(book_id, format):
send_from_directory(os.path.join(config.config_calibre_dir, book.path), data.name + "." + format))
try:
response.headers["Content-Type"] = mimetypes.types_map['.' + format]
except:
except Exception as e:
pass
response.headers["Content-Disposition"] = "attachment; filename=\"%s.%s\"" % (urllib.quote(file_name.encode('utf-8')), format)
response.headers["Content-Disposition"] = "attachment; filename=\"%s.%s\"" % (quote(file_name.encode('utf-8')), format)
return response
else:
abort(404)
@ -1320,7 +1323,7 @@ def register():
try:
ub.session.add(content)
ub.session.commit()
except:
except Exception as e:
ub.session.rollback()
flash(_(u"An unknown error occured. Please try again later."), category="error")
return render_title_template('register.html', title=_(u"register"))
@ -1445,7 +1448,7 @@ def create_shelf():
ub.session.add(shelf)
ub.session.commit()
flash(_(u"Shelf %(title)s created", title=to_save["title"]), category="success")
except:
except Exception as e:
flash(_(u"There was an error"), category="error")
return render_title_template('shelf_edit.html', shelf=shelf, title=_(u"create a shelf"))
else:
@ -1473,7 +1476,7 @@ def edit_shelf(shelf_id):
try:
ub.session.commit()
flash(_(u"Shelf %(title)s changed", title=to_save["title"]), category="success")
except:
except Exception as e:
flash(_(u"There was an error"), category="error")
return render_title_template('shelf_edit.html', shelf=shelf, title=_(u"Edit a shelf"))
else:
@ -1561,7 +1564,7 @@ def profile():
try:
cur_l = LC.parse(lang.lang_code)
lang.name = cur_l.get_language_name(get_locale())
except:
except Exception as e:
lang.name = _(isoLanguages.get(part3=lang.lang_code).name)
translations = babel.list_translations() + [LC('en')]
for book in content.downloads:
@ -1737,7 +1740,7 @@ def new_user():
try:
cur_l = LC.parse(lang.lang_code)
lang.name = cur_l.get_language_name(get_locale())
except:
except Exception as e:
lang.name = _(isoLanguages.get(part3=lang.lang_code).name)
translations = [LC('en')] + babel.list_translations()
if request.method == "POST":
@ -1835,7 +1838,7 @@ def edit_user(user_id):
try:
cur_l = LC.parse(lang.lang_code)
lang.name = cur_l.get_language_name(get_locale())
except:
except Exception as e:
lang.name = _(isoLanguages.get(part3=lang.lang_code).name)
translations = babel.list_translations() + [LC('en')]
for book in content.downloads:
@ -1957,7 +1960,7 @@ def edit_book(book_id):
try:
book.languages[index].language_name = LC.parse(book.languages[index].lang_code).get_language_name(
get_locale())
except:
except Exception as e:
book.languages[index].language_name = _(isoLanguages.get(part3=book.languages[index].lang_code).name)
for author in book.authors:
author_names.append(author.name)
@ -2007,7 +2010,7 @@ def edit_book(book_id):
for lang in languages:
try:
lang.name = LC.parse(lang.lang_code).get_language_name(get_locale()).lower()
except:
except Exception as e:
lang.name = _(isoLanguages.get(part3=lang.lang_code).name).lower()
for inp_lang in input_languages:
if inp_lang == lang.name:

@ -0,0 +1,3 @@
#!/bin/bash -e
pip install --target ./vendor -r requirements.txt

@ -1,2 +1,25 @@
future
sqlalchemy
PyPDF2
babel
blinker
click
flask
flask_babel
flask_login
flask_principal
iso-639
itsdangerous
jinja2
markupsafe
pytz
requests
singledispatch
six
sqlalchemy
tornado
https://pypi.python.org/packages/02/f8/97105237d0ba693b6f0bdcd94da0504e9a4433988c4393d8d3049094be7a/validate-1.0.1.tar.gz
#validate
wand
werkzeug

@ -1,22 +0,0 @@
Copyright (c) 2011 Matthew Frazier
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.

@ -1,22 +0,0 @@
Copyright (c) 2012 Ali Afshar
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.

@ -1,31 +0,0 @@
Copyright (c) 2011 by Armin Ronacher and the Django Software Foundation.
Some rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* The names of the contributors may not be used to endorse or
promote products derived from this software without specific
prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -1,5 +0,0 @@
from .pdf import PdfFileReader, PdfFileWriter
from .merger import PdfFileMerger
from .pagerange import PageRange, parse_filename_page_ranges
from ._version import __version__
__all__ = ["pdf", "PdfFileMerger"]

@ -1 +0,0 @@
__version__ = '1.26.0'

@ -1,362 +0,0 @@
# vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
Implementation of stream filters for PDF.
"""
__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"
from .utils import PdfReadError, ord_, chr_
from sys import version_info
if version_info < ( 3, 0 ):
from cStringIO import StringIO
else:
from io import StringIO
import struct
try:
import zlib
def decompress(data):
return zlib.decompress(data)
def compress(data):
return zlib.compress(data)
except ImportError:
# Unable to import zlib. Attempt to use the System.IO.Compression
# library from the .NET framework. (IronPython only)
import System
from System import IO, Collections, Array
def _string_to_bytearr(buf):
retval = Array.CreateInstance(System.Byte, len(buf))
for i in range(len(buf)):
retval[i] = ord(buf[i])
return retval
def _bytearr_to_string(bytes):
retval = ""
for i in range(bytes.Length):
retval += chr(bytes[i])
return retval
def _read_bytes(stream):
ms = IO.MemoryStream()
buf = Array.CreateInstance(System.Byte, 2048)
while True:
bytes = stream.Read(buf, 0, buf.Length)
if bytes == 0:
break
else:
ms.Write(buf, 0, bytes)
retval = ms.ToArray()
ms.Close()
return retval
def decompress(data):
bytes = _string_to_bytearr(data)
ms = IO.MemoryStream()
ms.Write(bytes, 0, bytes.Length)
ms.Position = 0 # fseek 0
gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
bytes = _read_bytes(gz)
retval = _bytearr_to_string(bytes)
gz.Close()
return retval
def compress(data):
bytes = _string_to_bytearr(data)
ms = IO.MemoryStream()
gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True)
gz.Write(bytes, 0, bytes.Length)
gz.Close()
ms.Position = 0 # fseek 0
bytes = ms.ToArray()
retval = _bytearr_to_string(bytes)
ms.Close()
return retval
class FlateDecode(object):
def decode(data, decodeParms):
data = decompress(data)
predictor = 1
if decodeParms:
try:
predictor = decodeParms.get("/Predictor", 1)
except AttributeError:
pass # usually an array with a null object was read
# predictor 1 == no predictor
if predictor != 1:
columns = decodeParms["/Columns"]
# PNG prediction:
if predictor >= 10 and predictor <= 15:
output = StringIO()
# PNG prediction can vary from row to row
rowlength = columns + 1
assert len(data) % rowlength == 0
prev_rowdata = (0,) * rowlength
for row in range(len(data) // rowlength):
rowdata = [ord_(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
filterByte = rowdata[0]
if filterByte == 0:
pass
elif filterByte == 1:
for i in range(2, rowlength):
rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
elif filterByte == 2:
for i in range(1, rowlength):
rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
else:
# unsupported PNG filter
raise PdfReadError("Unsupported PNG filter %r" % filterByte)
prev_rowdata = rowdata
output.write(''.join([chr(x) for x in rowdata[1:]]))
data = output.getvalue()
else:
# unsupported predictor
raise PdfReadError("Unsupported flatedecode predictor %r" % predictor)
return data
decode = staticmethod(decode)
def encode(data):
return compress(data)
encode = staticmethod(encode)
class ASCIIHexDecode(object):
def decode(data, decodeParms=None):
retval = ""
char = ""
x = 0
while True:
c = data[x]
if c == ">":
break
elif c.isspace():
x += 1
continue
char += c
if len(char) == 2:
retval += chr(int(char, base=16))
char = ""
x += 1
assert char == ""
return retval
decode = staticmethod(decode)
class LZWDecode(object):
"""Taken from:
http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm
"""
class decoder(object):
def __init__(self, data):
self.STOP=257
self.CLEARDICT=256
self.data=data
self.bytepos=0
self.bitpos=0
self.dict=[""]*4096
for i in range(256):
self.dict[i]=chr(i)
self.resetDict()
def resetDict(self):
self.dictlen=258
self.bitspercode=9
def nextCode(self):
fillbits=self.bitspercode
value=0
while fillbits>0 :
if self.bytepos >= len(self.data):
return -1
nextbits=ord(self.data[self.bytepos])
bitsfromhere=8-self.bitpos
if bitsfromhere>fillbits:
bitsfromhere=fillbits
value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) &
(0xff >> (8-bitsfromhere))) <<
(fillbits-bitsfromhere))
fillbits -= bitsfromhere
self.bitpos += bitsfromhere
if self.bitpos >=8:
self.bitpos=0
self.bytepos = self.bytepos+1
return value
def decode(self):
""" algorithm derived from:
http://www.rasip.fer.hr/research/compress/algorithms/fund/lz/lzw.html
and the PDFReference
"""
cW = self.CLEARDICT;
baos=""
while True:
pW = cW;
cW = self.nextCode();
if cW == -1:
raise PdfReadError("Missed the stop code in LZWDecode!")
if cW == self.STOP:
break;
elif cW == self.CLEARDICT:
self.resetDict();
elif pW == self.CLEARDICT:
baos+=self.dict[cW]
else:
if cW < self.dictlen:
baos += self.dict[cW]
p=self.dict[pW]+self.dict[cW][0]
self.dict[self.dictlen]=p
self.dictlen+=1
else:
p=self.dict[pW]+self.dict[pW][0]
baos+=p
self.dict[self.dictlen] = p;
self.dictlen+=1
if (self.dictlen >= (1 << self.bitspercode) - 1 and
self.bitspercode < 12):
self.bitspercode+=1
return baos
@staticmethod
def decode(data,decodeParams=None):
return LZWDecode.decoder(data).decode()
class ASCII85Decode(object):
def decode(data, decodeParms=None):
if version_info < ( 3, 0 ):
retval = ""
group = []
x = 0
hitEod = False
# remove all whitespace from data
data = [y for y in data if not (y in ' \n\r\t')]
while not hitEod:
c = data[x]
if len(retval) == 0 and c == "<" and data[x+1] == "~":
x += 2
continue
#elif c.isspace():
# x += 1
# continue
elif c == 'z':
assert len(group) == 0
retval += '\x00\x00\x00\x00'
x += 1
continue
elif c == "~" and data[x+1] == ">":
if len(group) != 0:
# cannot have a final group of just 1 char
assert len(group) > 1
cnt = len(group) - 1
group += [ 85, 85, 85 ]
hitEod = cnt
else:
break
else:
c = ord(c) - 33
assert c >= 0 and c < 85
group += [ c ]
if len(group) >= 5:
b = group[0] * (85**4) + \
group[1] * (85**3) + \
group[2] * (85**2) + \
group[3] * 85 + \
group[4]
assert b < (2**32 - 1)
c4 = chr((b >> 0) % 256)
c3 = chr((b >> 8) % 256)
c2 = chr((b >> 16) % 256)
c1 = chr(b >> 24)
retval += (c1 + c2 + c3 + c4)
if hitEod:
retval = retval[:-4+hitEod]
group = []
x += 1
return retval
else:
if isinstance(data, str):
data = data.encode('ascii')
n = b = 0
out = bytearray()
for c in data:
if ord('!') <= c and c <= ord('u'):
n += 1
b = b*85+(c-33)
if n == 5:
out += struct.pack(b'>L',b)
n = b = 0
elif c == ord('z'):
assert n == 0
out += b'\0\0\0\0'
elif c == ord('~'):
if n:
for _ in range(5-n):
b = b*85+84
out += struct.pack(b'>L',b)[:n-1]
break
return bytes(out)
decode = staticmethod(decode)
def decodeStreamData(stream):
from .generic import NameObject
filters = stream.get("/Filter", ())
if len(filters) and not isinstance(filters[0], NameObject):
# we have a single filter instance
filters = (filters,)
data = stream._data
# If there is not data to decode we should not try to decode the data.
if data:
for filterType in filters:
if filterType == "/FlateDecode" or filterType == "/Fl":
data = FlateDecode.decode(data, stream.get("/DecodeParms"))
elif filterType == "/ASCIIHexDecode" or filterType == "/AHx":
data = ASCIIHexDecode.decode(data)
elif filterType == "/LZWDecode" or filterType == "/LZW":
data = LZWDecode.decode(data, stream.get("/DecodeParms"))
elif filterType == "/ASCII85Decode" or filterType == "/A85":
data = ASCII85Decode.decode(data)
elif filterType == "/Crypt":
decodeParams = stream.get("/DecodeParams", {})
if "/Name" not in decodeParams and "/Type" not in decodeParams:
pass
else:
raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
else:
# unsupported filter
raise NotImplementedError("unsupported filter %s" % filterType)
return data

File diff suppressed because it is too large Load Diff

@ -1,553 +0,0 @@
# vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from .generic import *
from .utils import isString, str_
from .pdf import PdfFileReader, PdfFileWriter
from .pagerange import PageRange
from sys import version_info
if version_info < ( 3, 0 ):
from cStringIO import StringIO
StreamIO = StringIO
else:
from io import BytesIO
from io import FileIO as file
StreamIO = BytesIO
class _MergedPage(object):
"""
_MergedPage is used internally by PdfFileMerger to collect necessary
information on each page that is being merged.
"""
def __init__(self, pagedata, src, id):
self.src = src
self.pagedata = pagedata
self.out_pagedata = None
self.id = id
class PdfFileMerger(object):
"""
Initializes a PdfFileMerger object. PdfFileMerger merges multiple PDFs
into a single PDF. It can concatenate, slice, insert, or any combination
of the above.
See the functions :meth:`merge()<merge>` (or :meth:`append()<append>`)
and :meth:`write()<write>` for usage information.
:param bool strict: Determines whether user should be warned of all
problems and also causes some correctable problems to be fatal.
Defaults to ``True``.
"""
def __init__(self, strict=True):
self.inputs = []
self.pages = []
self.output = PdfFileWriter()
self.bookmarks = []
self.named_dests = []
self.id_count = 0
self.strict = strict
def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
"""
Merges the pages from the given file into the output file at the
specified page number.
:param int position: The *page number* to insert this file. File will
be inserted after the given number.
:param fileobj: A File Object or an object that supports the standard read
and seek methods similar to a File Object. Could also be a
string representing a path to a PDF file.
:param str bookmark: Optionally, you may specify a bookmark to be applied at
the beginning of the included file by supplying the text of the bookmark.
:param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
to merge only the specified range of pages from the source
document into the output document.
:param bool import_bookmarks: You may prevent the source document's bookmarks
from being imported by specifying this as ``False``.
"""
# This parameter is passed to self.inputs.append and means
# that the stream used was created in this method.
my_file = False
# If the fileobj parameter is a string, assume it is a path
# and create a file object at that location. If it is a file,
# copy the file's contents into a BytesIO (or StreamIO) stream object; if
# it is a PdfFileReader, copy that reader's stream into a
# BytesIO (or StreamIO) stream.
# If fileobj is none of the above types, it is not modified
decryption_key = None
if isString(fileobj):
fileobj = file(fileobj, 'rb')
my_file = True
elif isinstance(fileobj, file):
fileobj.seek(0)
filecontent = fileobj.read()
fileobj = StreamIO(filecontent)
my_file = True
elif isinstance(fileobj, PdfFileReader):
orig_tell = fileobj.stream.tell()
fileobj.stream.seek(0)
filecontent = StreamIO(fileobj.stream.read())
fileobj.stream.seek(orig_tell) # reset the stream to its original location
fileobj = filecontent
if hasattr(fileobj, '_decryption_key'):
decryption_key = fileobj._decryption_key
my_file = True
# Create a new PdfFileReader instance using the stream
# (either file or BytesIO or StringIO) created above
pdfr = PdfFileReader(fileobj, strict=self.strict)
if decryption_key is not None:
pdfr._decryption_key = decryption_key
# Find the range of pages to merge.
if pages == None:
pages = (0, pdfr.getNumPages())
elif isinstance(pages, PageRange):
pages = pages.indices(pdfr.getNumPages())
elif not isinstance(pages, tuple):
raise TypeError('"pages" must be a tuple of (start, stop[, step])')
srcpages = []
if bookmark:
bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
outline = []
if import_bookmarks:
outline = pdfr.getOutlines()
outline = self._trim_outline(pdfr, outline, pages)
if bookmark:
self.bookmarks += [bookmark, outline]
else:
self.bookmarks += outline
dests = pdfr.namedDestinations
dests = self._trim_dests(pdfr, dests, pages)
self.named_dests += dests
# Gather all the pages that are going to be merged
for i in range(*pages):
pg = pdfr.getPage(i)
id = self.id_count
self.id_count += 1
mp = _MergedPage(pg, pdfr, id)
srcpages.append(mp)
self._associate_dests_to_pages(srcpages)
self._associate_bookmarks_to_pages(srcpages)
# Slice to insert the pages at the specified position
self.pages[position:position] = srcpages
# Keep track of our input files so we can close them later
self.inputs.append((fileobj, pdfr, my_file))
def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True):
"""
Identical to the :meth:`merge()<merge>` method, but assumes you want to concatenate
all pages onto the end of the file instead of specifying a position.
:param fileobj: A File Object or an object that supports the standard read
and seek methods similar to a File Object. Could also be a
string representing a path to a PDF file.
:param str bookmark: Optionally, you may specify a bookmark to be applied at
the beginning of the included file by supplying the text of the bookmark.
:param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
to merge only the specified range of pages from the source
document into the output document.
:param bool import_bookmarks: You may prevent the source document's bookmarks
from being imported by specifying this as ``False``.
"""
self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks)
def write(self, fileobj):
"""
Writes all data that has been merged to the given output file.
:param fileobj: Output file. Can be a filename or any kind of
file-like object.
"""
my_file = False
if isString(fileobj):
fileobj = file(fileobj, 'wb')
my_file = True
# Add pages to the PdfFileWriter
# The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13
for page in self.pages:
self.output.addPage(page.pagedata)
page.out_pagedata = self.output.getReference(self.output._pages.getObject()["/Kids"][-1].getObject())
#idnum = self.output._objects.index(self.output._pages.getObject()["/Kids"][-1].getObject()) + 1
#page.out_pagedata = IndirectObject(idnum, 0, self.output)
# Once all pages are added, create bookmarks to point at those pages
self._write_dests()
self._write_bookmarks()
# Write the output to the file
self.output.write(fileobj)
if my_file:
fileobj.close()
def close(self):
"""
Shuts all file descriptors (input and output) and clears all memory
usage.
"""
self.pages = []
for fo, pdfr, mine in self.inputs:
if mine:
fo.close()
self.inputs = []
self.output = None
def addMetadata(self, infos):
"""
Add custom metadata to the output.
:param dict infos: a Python dictionary where each key is a field
and each value is your new metadata.
Example: ``{u'/Title': u'My title'}``
"""
self.output.addMetadata(infos)
def setPageLayout(self, layout):
"""
Set the page layout
:param str layout: The page layout to be used
Valid layouts are:
/NoLayout Layout explicitly not specified
/SinglePage Show one page at a time
/OneColumn Show one column at a time
/TwoColumnLeft Show pages in two columns, odd-numbered pages on the left
/TwoColumnRight Show pages in two columns, odd-numbered pages on the right
/TwoPageLeft Show two pages at a time, odd-numbered pages on the left
/TwoPageRight Show two pages at a time, odd-numbered pages on the right
"""
self.output.setPageLayout(layout)
def setPageMode(self, mode):
"""
Set the page mode.
:param str mode: The page mode to use.
Valid modes are:
/UseNone Do not show outlines or thumbnails panels
/UseOutlines Show outlines (aka bookmarks) panel
/UseThumbs Show page thumbnails panel
/FullScreen Fullscreen view
/UseOC Show Optional Content Group (OCG) panel
/UseAttachments Show attachments panel
"""
self.output.setPageMode(mode)
def _trim_dests(self, pdf, dests, pages):
"""
Removes any named destinations that are not a part of the specified
page set.
"""
new_dests = []
prev_header_added = True
for k, o in list(dests.items()):
for j in range(*pages):
if pdf.getPage(j).getObject() == o['/Page'].getObject():
o[NameObject('/Page')] = o['/Page'].getObject()
assert str_(k) == str_(o['/Title'])
new_dests.append(o)
break
return new_dests
def _trim_outline(self, pdf, outline, pages):
"""
Removes any outline/bookmark entries that are not a part of the
specified page set.
"""
new_outline = []
prev_header_added = True
for i, o in enumerate(outline):
if isinstance(o, list):
sub = self._trim_outline(pdf, o, pages)
if sub:
if not prev_header_added:
new_outline.append(outline[i-1])
new_outline.append(sub)
else:
prev_header_added = False
for j in range(*pages):
if pdf.getPage(j).getObject() == o['/Page'].getObject():
o[NameObject('/Page')] = o['/Page'].getObject()
new_outline.append(o)
prev_header_added = True
break
return new_outline
def _write_dests(self):
dests = self.named_dests
for v in dests:
pageno = None
pdf = None
if '/Page' in v:
for i, p in enumerate(self.pages):
if p.id == v['/Page']:
v[NameObject('/Page')] = p.out_pagedata
pageno = i
pdf = p.src
break
if pageno != None:
self.output.addNamedDestinationObject(v)
def _write_bookmarks(self, bookmarks=None, parent=None):
if bookmarks == None:
bookmarks = self.bookmarks
last_added = None
for b in bookmarks:
if isinstance(b, list):
self._write_bookmarks(b, last_added)
continue
pageno = None
pdf = None
if '/Page' in b:
for i, p in enumerate(self.pages):
if p.id == b['/Page']:
#b[NameObject('/Page')] = p.out_pagedata
args = [NumberObject(p.id), NameObject(b['/Type'])]
#nothing more to add
#if b['/Type'] == '/Fit' or b['/Type'] == '/FitB'
if b['/Type'] == '/FitH' or b['/Type'] == '/FitBH':
if '/Top' in b and not isinstance(b['/Top'], NullObject):
args.append(FloatObject(b['/Top']))
else:
args.append(FloatObject(0))
del b['/Top']
elif b['/Type'] == '/FitV' or b['/Type'] == '/FitBV':
if '/Left' in b and not isinstance(b['/Left'], NullObject):
args.append(FloatObject(b['/Left']))
else:
args.append(FloatObject(0))
del b['/Left']
elif b['/Type'] == '/XYZ':
if '/Left' in b and not isinstance(b['/Left'], NullObject):
args.append(FloatObject(b['/Left']))
else:
args.append(FloatObject(0))
if '/Top' in b and not isinstance(b['/Top'], NullObject):
args.append(FloatObject(b['/Top']))
else:
args.append(FloatObject(0))
if '/Zoom' in b and not isinstance(b['/Zoom'], NullObject):
args.append(FloatObject(b['/Zoom']))
else:
args.append(FloatObject(0))
del b['/Top'], b['/Zoom'], b['/Left']
elif b['/Type'] == '/FitR':
if '/Left' in b and not isinstance(b['/Left'], NullObject):
args.append(FloatObject(b['/Left']))
else:
args.append(FloatObject(0))
if '/Bottom' in b and not isinstance(b['/Bottom'], NullObject):
args.append(FloatObject(b['/Bottom']))
else:
args.append(FloatObject(0))
if '/Right' in b and not isinstance(b['/Right'], NullObject):
args.append(FloatObject(b['/Right']))
else:
args.append(FloatObject(0))
if '/Top' in b and not isinstance(b['/Top'], NullObject):
args.append(FloatObject(b['/Top']))
else:
args.append(FloatObject(0))
del b['/Left'], b['/Right'], b['/Bottom'], b['/Top']
b[NameObject('/A')] = DictionaryObject({NameObject('/S'): NameObject('/GoTo'), NameObject('/D'): ArrayObject(args)})
pageno = i
pdf = p.src
break
if pageno != None:
del b['/Page'], b['/Type']
last_added = self.output.addBookmarkDict(b, parent)
def _associate_dests_to_pages(self, pages):
for nd in self.named_dests:
pageno = None
np = nd['/Page']
if isinstance(np, NumberObject):
continue
for p in pages:
if np.getObject() == p.pagedata.getObject():
pageno = p.id
if pageno != None:
nd[NameObject('/Page')] = NumberObject(pageno)
else:
raise ValueError("Unresolved named destination '%s'" % (nd['/Title'],))
def _associate_bookmarks_to_pages(self, pages, bookmarks=None):
if bookmarks == None:
bookmarks = self.bookmarks
for b in bookmarks:
if isinstance(b, list):
self._associate_bookmarks_to_pages(pages, b)
continue
pageno = None
bp = b['/Page']
if isinstance(bp, NumberObject):
continue
for p in pages:
if bp.getObject() == p.pagedata.getObject():
pageno = p.id
if pageno != None:
b[NameObject('/Page')] = NumberObject(pageno)
else:
raise ValueError("Unresolved bookmark '%s'" % (b['/Title'],))
def findBookmark(self, bookmark, root=None):
if root == None:
root = self.bookmarks
for i, b in enumerate(root):
if isinstance(b, list):
res = self.findBookmark(bookmark, b)
if res:
return [i] + res
elif b == bookmark or b['/Title'] == bookmark:
return [i]
return None
def addBookmark(self, title, pagenum, parent=None):
"""
Add a bookmark to this PDF file.
:param str title: Title to use for this bookmark.
:param int pagenum: Page number this bookmark will point to.
:param parent: A reference to a parent bookmark to create nested
bookmarks.
"""
if parent == None:
iloc = [len(self.bookmarks)-1]
elif isinstance(parent, list):
iloc = parent
else:
iloc = self.findBookmark(parent)
dest = Bookmark(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
if parent == None:
self.bookmarks.append(dest)
else:
bmparent = self.bookmarks
for i in iloc[:-1]:
bmparent = bmparent[i]
npos = iloc[-1]+1
if npos < len(bmparent) and isinstance(bmparent[npos], list):
bmparent[npos].append(dest)
else:
bmparent.insert(npos, [dest])
return dest
def addNamedDestination(self, title, pagenum):
"""
Add a destination to the output.
:param str title: Title to use
:param int pagenum: Page number this destination points at.
"""
dest = Destination(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
self.named_dests.append(dest)
class OutlinesObject(list):
def __init__(self, pdf, tree, parent=None):
list.__init__(self)
self.tree = tree
self.pdf = pdf
self.parent = parent
def remove(self, index):
obj = self[index]
del self[index]
self.tree.removeChild(obj)
def add(self, title, pagenum):
pageRef = self.pdf.getObject(self.pdf._pages)['/Kids'][pagenum]
action = DictionaryObject()
action.update({
NameObject('/D') : ArrayObject([pageRef, NameObject('/FitH'), NumberObject(826)]),
NameObject('/S') : NameObject('/GoTo')
})
actionRef = self.pdf._addObject(action)
bookmark = TreeObject()
bookmark.update({
NameObject('/A'): actionRef,
NameObject('/Title'): createStringObject(title),
})
self.pdf._addObject(bookmark)
self.tree.addChild(bookmark)
def removeAll(self):
for child in [x for x in self.tree.children()]:
self.tree.removeChild(child)
self.pop()

@ -1,152 +0,0 @@
#!/usr/bin/env python
"""
Representation and utils for ranges of PDF file pages.
Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
All rights reserved. This software is available under a BSD license;
see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE
"""
import re
from .utils import isString
_INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0".
PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)
# groups: 12 34 5 6 7 8
class ParseError(Exception):
pass
PAGE_RANGE_HELP = """Remember, page indices start with zero.
Page range expression examples:
: all pages. -1 last page.
22 just the 23rd page. :-1 all but the last page.
0:3 the first three pages. -2 second-to-last page.
:3 the first three pages. -2: last two pages.
5: from the sixth page onward. -3:-1 third & second to last.
The third, "stride" or "step" number is also recognized.
::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0.
1:10:2 1 3 5 7 9 2::-1 2 1 0.
::-1 all pages in reverse order.
"""
class PageRange(object):
"""
A slice-like representation of a range of page indices,
i.e. page numbers, only starting at zero.
The syntax is like what you would put between brackets [ ].
The slice is one of the few Python types that can't be subclassed,
but this class converts to and from slices, and allows similar use.
o PageRange(str) parses a string representing a page range.
o PageRange(slice) directly "imports" a slice.
o to_slice() gives the equivalent slice.
o str() and repr() allow printing.
o indices(n) is like slice.indices(n).
"""
def __init__(self, arg):
"""
Initialize with either a slice -- giving the equivalent page range,
or a PageRange object -- making a copy,
or a string like
"int", "[int]:[int]" or "[int]:[int]:[int]",
where the brackets indicate optional ints.
{page_range_help}
Note the difference between this notation and arguments to slice():
slice(3) means the first three pages;
PageRange("3") means the range of only the fourth page.
However PageRange(slice(3)) means the first three pages.
"""
if isinstance(arg, slice):
self._slice = arg
return
if isinstance(arg, PageRange):
self._slice = arg.to_slice()
return
m = isString(arg) and re.match(PAGE_RANGE_RE, arg)
if not m:
raise ParseError(arg)
elif m.group(2):
# Special case: just an int means a range of one page.
start = int(m.group(2))
stop = start + 1 if start != -1 else None
self._slice = slice(start, stop)
else:
self._slice = slice(*[int(g) if g else None
for g in m.group(4, 6, 8)])
# Just formatting this when there is __doc__ for __init__
if __init__.__doc__:
__init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP)
@staticmethod
def valid(input):
""" True if input is a valid initializer for a PageRange. """
return isinstance(input, slice) or \
isinstance(input, PageRange) or \
(isString(input)
and bool(re.match(PAGE_RANGE_RE, input)))
def to_slice(self):
""" Return the slice equivalent of this page range. """
return self._slice
def __str__(self):
""" A string like "1:2:3". """
s = self._slice
if s.step == None:
if s.start != None and s.stop == s.start + 1:
return str(s.start)
indices = s.start, s.stop
else:
indices = s.start, s.stop, s.step
return ':'.join("" if i == None else str(i) for i in indices)
def __repr__(self):
""" A string like "PageRange('1:2:3')". """
return "PageRange(" + repr(str(self)) + ")"
def indices(self, n):
"""
n is the length of the list of pages to choose from.
Returns arguments for range(). See help(slice.indices).
"""
return self._slice.indices(n)
PAGE_RANGE_ALL = PageRange(":") # The range of all pages.
def parse_filename_page_ranges(args):
"""
Given a list of filenames and page ranges, return a list of
(filename, page_range) pairs.
First arg must be a filename; other ags are filenames, page-range
expressions, slice objects, or PageRange objects.
A filename not followed by a page range indicates all pages of the file.
"""
pairs = []
pdf_filename = None
did_page_range = False
for arg in args + [None]:
if PageRange.valid(arg):
if not pdf_filename:
raise ValueError("The first argument must be a filename, " \
"not a page range.")
pairs.append( (pdf_filename, PageRange(arg)) )
did_page_range = True
else:
# New filename or end of list--do all of the previous file?
if pdf_filename and not did_page_range:
pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
pdf_filename = arg
did_page_range = False
return pairs

3004
vendor/PyPDF2/pdf.py vendored

File diff suppressed because it is too large Load Diff

@ -1,295 +0,0 @@
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
Utility functions for PDF library.
"""
__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"
import sys
try:
import __builtin__ as builtins
except ImportError: # Py3
import builtins
xrange_fn = getattr(builtins, "xrange", range)
_basestring = getattr(builtins, "basestring", str)
bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
string_type = getattr(builtins, "unicode", str)
int_types = (int, long) if sys.version_info[0] < 3 else (int,)
# Make basic type tests more consistent
def isString(s):
"""Test if arg is a string. Compatible with Python 2 and 3."""
return isinstance(s, _basestring)
def isInt(n):
"""Test if arg is an int. Compatible with Python 2 and 3."""
return isinstance(n, int_types)
def isBytes(b):
"""Test if arg is a bytes instance. Compatible with Python 2 and 3."""
return isinstance(b, bytes_type)
#custom implementation of warnings.formatwarning
def formatWarning(message, category, filename, lineno, line=None):
file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name
return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)
def readUntilWhitespace(stream, maxchars=None):
"""
Reads non-whitespace characters and returns them.
Stops upon encountering whitespace or when maxchars is reached.
"""
txt = b_("")
while True:
tok = stream.read(1)
if tok.isspace() or not tok:
break
txt += tok
if len(txt) == maxchars:
break
return txt
def readNonWhitespace(stream):
"""
Finds and reads the next non-whitespace character (ignores whitespace).
"""
tok = WHITESPACES[0]
while tok in WHITESPACES:
tok = stream.read(1)
return tok
def skipOverWhitespace(stream):
"""
Similar to readNonWhitespace, but returns a Boolean if more than
one whitespace character was read.
"""
tok = WHITESPACES[0]
cnt = 0;
while tok in WHITESPACES:
tok = stream.read(1)
cnt+=1
return (cnt > 1)
def skipOverComment(stream):
tok = stream.read(1)
stream.seek(-1, 1)
if tok == b_('%'):
while tok not in (b_('\n'), b_('\r')):
tok = stream.read(1)
def readUntilRegex(stream, regex, ignore_eof=False):
"""
Reads until the regular expression pattern matched (ignore the match)
Raise PdfStreamError on premature end-of-file.
:param bool ignore_eof: If true, ignore end-of-line and return immediately
"""
name = b_('')
while True:
tok = stream.read(16)
if not tok:
# stream has truncated prematurely
if ignore_eof == True:
return name
else:
raise PdfStreamError("Stream has ended unexpectedly")
m = regex.search(tok)
if m is not None:
name += tok[:m.start()]
stream.seek(m.start()-len(tok), 1)
break
name += tok
return name
class ConvertFunctionsToVirtualList(object):
def __init__(self, lengthFunction, getFunction):
self.lengthFunction = lengthFunction
self.getFunction = getFunction
def __len__(self):
return self.lengthFunction()
def __getitem__(self, index):
if isinstance(index, slice):
indices = xrange_fn(*index.indices(len(self)))
cls = type(self)
return cls(indices.__len__, lambda idx: self[indices[idx]])
if not isInt(index):
raise TypeError("sequence indices must be integers")
len_self = len(self)
if index < 0:
# support negative indexes
index = len_self + index
if index < 0 or index >= len_self:
raise IndexError("sequence index out of range")
return self.getFunction(index)
def RC4_encrypt(key, plaintext):
S = [i for i in range(256)]
j = 0
for i in range(256):
j = (j + S[i] + ord_(key[i % len(key)])) % 256
S[i], S[j] = S[j], S[i]
i, j = 0, 0
retval = b_("")
for x in range(len(plaintext)):
i = (i + 1) % 256
j = (j + S[i]) % 256
S[i], S[j] = S[j], S[i]
t = S[(S[i] + S[j]) % 256]
retval += b_(chr(ord_(plaintext[x]) ^ t))
return retval
def matrixMultiply(a, b):
return [[sum([float(i)*float(j)
for i, j in zip(row, col)]
) for col in zip(*b)]
for row in a]
def markLocation(stream):
"""Creates text file showing current location in context."""
# Mainly for debugging
RADIUS = 5000
stream.seek(-RADIUS, 1)
outputDoc = open('PyPDF2_pdfLocation.txt', 'w')
outputDoc.write(stream.read(RADIUS))
outputDoc.write('HERE')
outputDoc.write(stream.read(RADIUS))
outputDoc.close()
stream.seek(-RADIUS, 1)
class PyPdfError(Exception):
pass
class PdfReadError(PyPdfError):
pass
class PageSizeNotDefinedError(PyPdfError):
pass
class PdfReadWarning(UserWarning):
pass
class PdfStreamError(PdfReadError):
pass
if sys.version_info[0] < 3:
def b_(s):
return s
else:
B_CACHE = {}
def b_(s):
bc = B_CACHE
if s in bc:
return bc[s]
if type(s) == bytes:
return s
else:
r = s.encode('latin-1')
if len(s) < 2:
bc[s] = r
return r
def u_(s):
if sys.version_info[0] < 3:
return unicode(s, 'unicode_escape')
else:
return s
def str_(b):
if sys.version_info[0] < 3:
return b
else:
if type(b) == bytes:
return b.decode('latin-1')
else:
return b
def ord_(b):
if sys.version_info[0] < 3 or type(b) == str:
return ord(b)
else:
return b
def chr_(c):
if sys.version_info[0] < 3:
return c
else:
return chr(c)
def barray(b):
if sys.version_info[0] < 3:
return b
else:
return bytearray(b)
def hexencode(b):
if sys.version_info[0] < 3:
return b.encode('hex')
else:
import codecs
coder = codecs.getencoder('hex_codec')
return coder(b)[0]
def hexStr(num):
return hex(num).replace('L', '')
WHITESPACES = [b_(x) for x in [' ', '\n', '\r', '\t', '\x00']]

358
vendor/PyPDF2/xmp.py vendored

@ -1,358 +0,0 @@
import re
import datetime
import decimal
from .generic import PdfObject
from xml.dom import getDOMImplementation
from xml.dom.minidom import parseString
from .utils import u_
RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/"
PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/"
XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/"
# What is the PDFX namespace, you might ask? I might ask that too. It's
# a completely undocumented namespace used to place "custom metadata"
# properties, which are arbitrary metadata properties with no semantic or
# documented meaning. Elements in the namespace are key/value-style storage,
# where the element name is the key and the content is the value. The keys
# are transformed into valid XML identifiers by substituting an invalid
# identifier character with \u2182 followed by the unicode hex ID of the
# original character. A key like "my car" is therefore "my\u21820020car".
#
# \u2182, in case you're wondering, is the unicode character
# \u{ROMAN NUMERAL TEN THOUSAND}, a straightforward and obvious choice for
# escaping characters.
#
# Intentional users of the pdfx namespace should be shot on sight. A
# custom data schema and sensical XML elements could be used instead, as is
# suggested by Adobe's own documentation on XMP (under "Extensibility of
# Schemas").
#
# Information presented here on the /pdfx/ schema is a result of limited
# reverse engineering, and does not constitute a full specification.
PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/"
iso8601 = re.compile("""
(?P<year>[0-9]{4})
(-
(?P<month>[0-9]{2})
(-
(?P<day>[0-9]+)
(T
(?P<hour>[0-9]{2}):
(?P<minute>[0-9]{2})
(:(?P<second>[0-9]{2}(.[0-9]+)?))?
(?P<tzd>Z|[-+][0-9]{2}:[0-9]{2})
)?
)?
)?
""", re.VERBOSE)
class XmpInformation(PdfObject):
"""
An object that represents Adobe XMP metadata.
Usually accessed by :meth:`getXmpMetadata()<PyPDF2.PdfFileReader.getXmpMetadata>`
"""
def __init__(self, stream):
self.stream = stream
docRoot = parseString(self.stream.getData())
self.rdfRoot = docRoot.getElementsByTagNameNS(RDF_NAMESPACE, "RDF")[0]
self.cache = {}
def writeToStream(self, stream, encryption_key):
self.stream.writeToStream(stream, encryption_key)
def getElement(self, aboutUri, namespace, name):
for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
attr = desc.getAttributeNodeNS(namespace, name)
if attr != None:
yield attr
for element in desc.getElementsByTagNameNS(namespace, name):
yield element
def getNodesInNamespace(self, aboutUri, namespace):
for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri:
for i in range(desc.attributes.length):
attr = desc.attributes.item(i)
if attr.namespaceURI == namespace:
yield attr
for child in desc.childNodes:
if child.namespaceURI == namespace:
yield child
def _getText(self, element):
text = ""
for child in element.childNodes:
if child.nodeType == child.TEXT_NODE:
text += child.data
return text
def _converter_string(value):
return value
def _converter_date(value):
m = iso8601.match(value)
year = int(m.group("year"))
month = int(m.group("month") or "1")
day = int(m.group("day") or "1")
hour = int(m.group("hour") or "0")
minute = int(m.group("minute") or "0")
second = decimal.Decimal(m.group("second") or "0")
seconds = second.to_integral(decimal.ROUND_FLOOR)
milliseconds = (second - seconds) * 1000000
tzd = m.group("tzd") or "Z"
dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds)
if tzd != "Z":
tzd_hours, tzd_minutes = [int(x) for x in tzd.split(":")]
tzd_hours *= -1
if tzd_hours < 0:
tzd_minutes *= -1
dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes)
return dt
_test_converter_date = staticmethod(_converter_date)
def _getter_bag(namespace, name, converter):
def get(self):
cached = self.cache.get(namespace, {}).get(name)
if cached:
return cached
retval = []
for element in self.getElement("", namespace, name):
bags = element.getElementsByTagNameNS(RDF_NAMESPACE, "Bag")
if len(bags):
for bag in bags:
for item in bag.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
value = self._getText(item)
value = converter(value)
retval.append(value)
ns_cache = self.cache.setdefault(namespace, {})
ns_cache[name] = retval
return retval
return get
def _getter_seq(namespace, name, converter):
def get(self):
cached = self.cache.get(namespace, {}).get(name)
if cached:
return cached
retval = []
for element in self.getElement("", namespace, name):
seqs = element.getElementsByTagNameNS(RDF_NAMESPACE, "Seq")
if len(seqs):
for seq in seqs:
for item in seq.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
value = self._getText(item)
value = converter(value)
retval.append(value)
else:
value = converter(self._getText(element))
retval.append(value)
ns_cache = self.cache.setdefault(namespace, {})
ns_cache[name] = retval
return retval
return get
def _getter_langalt(namespace, name, converter):
def get(self):
cached = self.cache.get(namespace, {}).get(name)
if cached:
return cached
retval = {}
for element in self.getElement("", namespace, name):
alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt")
if len(alts):
for alt in alts:
for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
value = self._getText(item)
value = converter(value)
retval[item.getAttribute("xml:lang")] = value
else:
retval["x-default"] = converter(self._getText(element))
ns_cache = self.cache.setdefault(namespace, {})
ns_cache[name] = retval
return retval
return get
def _getter_single(namespace, name, converter):
def get(self):
cached = self.cache.get(namespace, {}).get(name)
if cached:
return cached
value = None
for element in self.getElement("", namespace, name):
if element.nodeType == element.ATTRIBUTE_NODE:
value = element.nodeValue
else:
value = self._getText(element)
break
if value != None:
value = converter(value)
ns_cache = self.cache.setdefault(namespace, {})
ns_cache[name] = value
return value
return get
dc_contributor = property(_getter_bag(DC_NAMESPACE, "contributor", _converter_string))
"""
Contributors to the resource (other than the authors). An unsorted
array of names.
"""
dc_coverage = property(_getter_single(DC_NAMESPACE, "coverage", _converter_string))
"""
Text describing the extent or scope of the resource.
"""
dc_creator = property(_getter_seq(DC_NAMESPACE, "creator", _converter_string))
"""
A sorted array of names of the authors of the resource, listed in order
of precedence.
"""
dc_date = property(_getter_seq(DC_NAMESPACE, "date", _converter_date))
"""
A sorted array of dates (datetime.datetime instances) of signifigance to
the resource. The dates and times are in UTC.
"""
dc_description = property(_getter_langalt(DC_NAMESPACE, "description", _converter_string))
"""
A language-keyed dictionary of textual descriptions of the content of the
resource.
"""
dc_format = property(_getter_single(DC_NAMESPACE, "format", _converter_string))
"""
The mime-type of the resource.
"""
dc_identifier = property(_getter_single(DC_NAMESPACE, "identifier", _converter_string))
"""
Unique identifier of the resource.
"""
dc_language = property(_getter_bag(DC_NAMESPACE, "language", _converter_string))
"""
An unordered array specifying the languages used in the resource.
"""
dc_publisher = property(_getter_bag(DC_NAMESPACE, "publisher", _converter_string))
"""
An unordered array of publisher names.
"""
dc_relation = property(_getter_bag(DC_NAMESPACE, "relation", _converter_string))
"""
An unordered array of text descriptions of relationships to other
documents.
"""
dc_rights = property(_getter_langalt(DC_NAMESPACE, "rights", _converter_string))
"""
A language-keyed dictionary of textual descriptions of the rights the
user has to this resource.
"""
dc_source = property(_getter_single(DC_NAMESPACE, "source", _converter_string))
"""
Unique identifier of the work from which this resource was derived.
"""
dc_subject = property(_getter_bag(DC_NAMESPACE, "subject", _converter_string))
"""
An unordered array of descriptive phrases or keywrods that specify the
topic of the content of the resource.
"""
dc_title = property(_getter_langalt(DC_NAMESPACE, "title", _converter_string))
"""
A language-keyed dictionary of the title of the resource.
"""
dc_type = property(_getter_bag(DC_NAMESPACE, "type", _converter_string))
"""
An unordered array of textual descriptions of the document type.
"""
pdf_keywords = property(_getter_single(PDF_NAMESPACE, "Keywords", _converter_string))
"""
An unformatted text string representing document keywords.
"""
pdf_pdfversion = property(_getter_single(PDF_NAMESPACE, "PDFVersion", _converter_string))
"""
The PDF file version, for example 1.0, 1.3.
"""
pdf_producer = property(_getter_single(PDF_NAMESPACE, "Producer", _converter_string))
"""
The name of the tool that created the PDF document.
"""
xmp_createDate = property(_getter_single(XMP_NAMESPACE, "CreateDate", _converter_date))
"""
The date and time the resource was originally created. The date and
time are returned as a UTC datetime.datetime object.
"""
xmp_modifyDate = property(_getter_single(XMP_NAMESPACE, "ModifyDate", _converter_date))
"""
The date and time the resource was last modified. The date and time
are returned as a UTC datetime.datetime object.
"""
xmp_metadataDate = property(_getter_single(XMP_NAMESPACE, "MetadataDate", _converter_date))
"""
The date and time that any metadata for this resource was last
changed. The date and time are returned as a UTC datetime.datetime
object.
"""
xmp_creatorTool = property(_getter_single(XMP_NAMESPACE, "CreatorTool", _converter_string))
"""
The name of the first known tool used to create the resource.
"""
xmpmm_documentId = property(_getter_single(XMPMM_NAMESPACE, "DocumentID", _converter_string))
"""
The common identifier for all versions and renditions of this resource.
"""
xmpmm_instanceId = property(_getter_single(XMPMM_NAMESPACE, "InstanceID", _converter_string))
"""
An identifier for a specific incarnation of a document, updated each
time a file is saved.
"""
def custom_properties(self):
if not hasattr(self, "_custom_properties"):
self._custom_properties = {}
for node in self.getNodesInNamespace("", PDFX_NAMESPACE):
key = node.localName
while True:
# see documentation about PDFX_NAMESPACE earlier in file
idx = key.find(u_("\u2182"))
if idx == -1:
break
key = key[:idx] + chr(int(key[idx+1:idx+5], base=16)) + key[idx+5:]
if node.nodeType == node.ATTRIBUTE_NODE:
value = node.nodeValue
else:
value = self._getText(node)
self._custom_properties[key] = value
return self._custom_properties
custom_properties = property(custom_properties)
"""
Retrieves custom metadata properties defined in the undocumented pdfx
metadata schema.
:return: a dictionary of key/value items for custom metadata properties.
:rtype: dict
"""

@ -1 +0,0 @@
__version__ = '5.0.6'

@ -1,28 +0,0 @@
Babel is written and maintained by the Babel team and various contributors:
Maintainer and Current Project Lead:
- Armin Ronacher <armin.ronacher@active-4.com>
Contributors:
- Christopher Lenz <cmlenz@gmail.com>
- Alex Morega <alex@grep.ro>
- Felix Schwarz <felix.schwarz@oss.schwarz.eu>
- Pedro Algarvio <pedro@algarvio.me>
- Jeroen Ruigrok van der Werven <asmodai@in-nomine.org>
- Philip Jenvey <pjenvey@underboss.org>
- Tobias Bieniek <Tobias.Bieniek@gmx.de>
- Jonas Borgström <jonas@edgewall.org>
- Daniel Neuhäuser <dasdasich@gmail.com>
- Nick Retallack <nick@bitcasa.com>
- Thomas Waldmann <tw@waldmann-edv.de>
- Lennart Regebro <regebro@gmail.com>
Babel was previously developed under the Copyright of Edgewall Software. The
following copyright notice holds true for releases before 2013: "Copyright (c)
2007 - 2011 by Edgewall Software"
In addition to the regular contributions Babel includes a fork of Lennart
Regebro's tzlocal that originally was licensed under the CC0 license. The
original copyright of that project is "Copyright 2013 by Lennart Regebro".

@ -1,29 +0,0 @@
Copyright (C) 2013 by the Babel Team, see AUTHORS for more information.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. The name of the author may not be used to endorse or promote
products derived from this software without specific prior
written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -1,24 +0,0 @@
# -*- coding: utf-8 -*-
"""
babel
~~~~~
Integrated collection of utilities that assist in internationalizing and
localizing applications.
This package is basically composed of two major parts:
* tools to build and work with ``gettext`` message catalogs
* a Python interface to the CLDR (Common Locale Data Repository), providing
access to various locale display names, localized number and date
formatting, etc.
:copyright: (c) 2013 by the Babel Team.
:license: BSD, see LICENSE for more details.
"""
from babel.core import UnknownLocaleError, Locale, default_locale, \
negotiate_locale, parse_locale, get_locale_identifier
__version__ = '1.3'

@ -1,51 +0,0 @@
import sys
PY2 = sys.version_info[0] == 2
_identity = lambda x: x
if not PY2:
text_type = str
string_types = (str,)
integer_types = (int, )
unichr = chr
text_to_native = lambda s, enc: s
iterkeys = lambda d: iter(d.keys())
itervalues = lambda d: iter(d.values())
iteritems = lambda d: iter(d.items())
from io import StringIO, BytesIO
import pickle
izip = zip
imap = map
range_type = range
cmp = lambda a, b: (a > b) - (a < b)
else:
text_type = unicode
string_types = (str, unicode)
integer_types = (int, long)
text_to_native = lambda s, enc: s.encode(enc)
unichr = unichr
iterkeys = lambda d: d.iterkeys()
itervalues = lambda d: d.itervalues()
iteritems = lambda d: d.iteritems()
from cStringIO import StringIO as BytesIO
from StringIO import StringIO
import cPickle as pickle
from itertools import izip, imap
range_type = xrange
cmp = cmp
number_types = integer_types + (float,)

941
vendor/babel/core.py vendored

@ -1,941 +0,0 @@
# -*- coding: utf-8 -*-
"""
babel.core
~~~~~~~~~~
Core locale representation and locale data access.
:copyright: (c) 2013 by the Babel Team.
:license: BSD, see LICENSE for more details.
"""
import os
from babel import localedata
from babel._compat import pickle, string_types
__all__ = ['UnknownLocaleError', 'Locale', 'default_locale', 'negotiate_locale',
'parse_locale']
_global_data = None
def _raise_no_data_error():
raise RuntimeError('The babel data files are not available. '
'This usually happens because you are using '
'a source checkout from Babel and you did '
'not build the data files. Just make sure '
'to run "python setup.py import_cldr" before '
'installing the library.')
def get_global(key):
"""Return the dictionary for the given key in the global data.
The global data is stored in the ``babel/global.dat`` file and contains
information independent of individual locales.
>>> get_global('zone_aliases')['UTC']
u'Etc/GMT'
>>> get_global('zone_territories')['Europe/Berlin']
u'DE'
.. versionadded:: 0.9
:param key: the data key
"""
global _global_data
if _global_data is None:
dirname = os.path.join(os.path.dirname(__file__))
filename = os.path.join(dirname, 'global.dat')
if not os.path.isfile(filename):
_raise_no_data_error()
fileobj = open(filename, 'rb')
try:
_global_data = pickle.load(fileobj)
finally:
fileobj.close()
return _global_data.get(key, {})
LOCALE_ALIASES = {
'ar': 'ar_SY', 'bg': 'bg_BG', 'bs': 'bs_BA', 'ca': 'ca_ES', 'cs': 'cs_CZ',
'da': 'da_DK', 'de': 'de_DE', 'el': 'el_GR', 'en': 'en_US', 'es': 'es_ES',
'et': 'et_EE', 'fa': 'fa_IR', 'fi': 'fi_FI', 'fr': 'fr_FR', 'gl': 'gl_ES',
'he': 'he_IL', 'hu': 'hu_HU', 'id': 'id_ID', 'is': 'is_IS', 'it': 'it_IT',
'ja': 'ja_JP', 'km': 'km_KH', 'ko': 'ko_KR', 'lt': 'lt_LT', 'lv': 'lv_LV',
'mk': 'mk_MK', 'nl': 'nl_NL', 'nn': 'nn_NO', 'no': 'nb_NO', 'pl': 'pl_PL',
'pt': 'pt_PT', 'ro': 'ro_RO', 'ru': 'ru_RU', 'sk': 'sk_SK', 'sl': 'sl_SI',
'sv': 'sv_SE', 'th': 'th_TH', 'tr': 'tr_TR', 'uk': 'uk_UA'
}
class UnknownLocaleError(Exception):
"""Exception thrown when a locale is requested for which no locale data
is available.
"""
def __init__(self, identifier):
"""Create the exception.
:param identifier: the identifier string of the unsupported locale
"""
Exception.__init__(self, 'unknown locale %r' % identifier)
#: The identifier of the locale that could not be found.
self.identifier = identifier
class Locale(object):
"""Representation of a specific locale.
>>> locale = Locale('en', 'US')
>>> repr(locale)
"Locale('en', territory='US')"
>>> locale.display_name
u'English (United States)'
A `Locale` object can also be instantiated from a raw locale string:
>>> locale = Locale.parse('en-US', sep='-')
>>> repr(locale)
"Locale('en', territory='US')"
`Locale` objects provide access to a collection of locale data, such as
territory and language names, number and date format patterns, and more:
>>> locale.number_symbols['decimal']
u'.'
If a locale is requested for which no locale data is available, an
`UnknownLocaleError` is raised:
>>> Locale.parse('en_DE')
Traceback (most recent call last):
...
UnknownLocaleError: unknown locale 'en_DE'
For more information see :rfc:`3066`.
"""
def __init__(self, language, territory=None, script=None, variant=None):
"""Initialize the locale object from the given identifier components.
>>> locale = Locale('en', 'US')
>>> locale.language
'en'
>>> locale.territory
'US'
:param language: the language code
:param territory: the territory (country or region) code
:param script: the script code
:param variant: the variant code
:raise `UnknownLocaleError`: if no locale data is available for the
requested locale
"""
#: the language code
self.language = language
#: the territory (country or region) code
self.territory = territory
#: the script code
self.script = script
#: the variant code
self.variant = variant
self.__data = None
identifier = str(self)
if not localedata.exists(identifier):
raise UnknownLocaleError(identifier)
@classmethod
def default(cls, category=None, aliases=LOCALE_ALIASES):
"""Return the system default locale for the specified category.
>>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LC_MESSAGES']:
... os.environ[name] = ''
>>> os.environ['LANG'] = 'fr_FR.UTF-8'
>>> Locale.default('LC_MESSAGES')
Locale('fr', territory='FR')
The following fallbacks to the variable are always considered:
- ``LANGUAGE``
- ``LC_ALL``
- ``LC_CTYPE``
- ``LANG``
:param category: one of the ``LC_XXX`` environment variable names
:param aliases: a dictionary of aliases for locale identifiers
"""
# XXX: use likely subtag expansion here instead of the
# aliases dictionary.
locale_string = default_locale(category, aliases=aliases)
return cls.parse(locale_string)
@classmethod
def negotiate(cls, preferred, available, sep='_', aliases=LOCALE_ALIASES):
"""Find the best match between available and requested locale strings.
>>> Locale.negotiate(['de_DE', 'en_US'], ['de_DE', 'de_AT'])
Locale('de', territory='DE')
>>> Locale.negotiate(['de_DE', 'en_US'], ['en', 'de'])
Locale('de')
>>> Locale.negotiate(['de_DE', 'de'], ['en_US'])
You can specify the character used in the locale identifiers to separate
the differnet components. This separator is applied to both lists. Also,
case is ignored in the comparison:
>>> Locale.negotiate(['de-DE', 'de'], ['en-us', 'de-de'], sep='-')
Locale('de', territory='DE')
:param preferred: the list of locale identifers preferred by the user
:param available: the list of locale identifiers available
:param aliases: a dictionary of aliases for locale identifiers
"""
identifier = negotiate_locale(preferred, available, sep=sep,
aliases=aliases)
if identifier:
return Locale.parse(identifier, sep=sep)
@classmethod
def parse(cls, identifier, sep='_', resolve_likely_subtags=True):
"""Create a `Locale` instance for the given locale identifier.
>>> l = Locale.parse('de-DE', sep='-')
>>> l.display_name
u'Deutsch (Deutschland)'
If the `identifier` parameter is not a string, but actually a `Locale`
object, that object is returned:
>>> Locale.parse(l)
Locale('de', territory='DE')
This also can perform resolving of likely subtags which it does
by default. This is for instance useful to figure out the most
likely locale for a territory you can use ``'und'`` as the
language tag:
>>> Locale.parse('und_AT')
Locale('de', territory='AT')
:param identifier: the locale identifier string
:param sep: optional component separator
:param resolve_likely_subtags: if this is specified then a locale will
have its likely subtag resolved if the
locale otherwise does not exist. For
instance ``zh_TW`` by itself is not a
locale that exists but Babel can
automatically expand it to the full
form of ``zh_hant_TW``. Note that this
expansion is only taking place if no
locale exists otherwise. For instance
there is a locale ``en`` that can exist
by itself.
:raise `ValueError`: if the string does not appear to be a valid locale
identifier
:raise `UnknownLocaleError`: if no locale data is available for the
requested locale
"""
if identifier is None:
return None
elif isinstance(identifier, Locale):
return identifier
elif not isinstance(identifier, string_types):
raise TypeError('Unxpected value for identifier: %r' % (identifier,))
parts = parse_locale(identifier, sep=sep)
input_id = get_locale_identifier(parts)
def _try_load(parts):
try:
return cls(*parts)
except UnknownLocaleError:
return None
def _try_load_reducing(parts):
# Success on first hit, return it.
locale = _try_load(parts)
if locale is not None:
return locale
# Now try without script and variant
locale = _try_load(parts[:2])
if locale is not None:
return locale
locale = _try_load(parts)
if locale is not None:
return locale
if not resolve_likely_subtags:
raise UnknownLocaleError(input_id)
# From here onwards is some very bad likely subtag resolving. This
# whole logic is not entirely correct but good enough (tm) for the
# time being. This has been added so that zh_TW does not cause
# errors for people when they upgrade. Later we should properly
# implement ICU like fuzzy locale objects and provide a way to
# maximize and minimize locale tags.
language, territory, script, variant = parts
language = get_global('language_aliases').get(language, language)
territory = get_global('territory_aliases').get(territory, territory)
script = get_global('script_aliases').get(script, script)
variant = get_global('variant_aliases').get(variant, variant)
if territory == 'ZZ':
territory = None
if script == 'Zzzz':
script = None
parts = language, territory, script, variant
# First match: try the whole identifier
new_id = get_locale_identifier(parts)
likely_subtag = get_global('likely_subtags').get(new_id)
if likely_subtag is not None:
locale = _try_load_reducing(parse_locale(likely_subtag))
if locale is not None:
return locale
# If we did not find anything so far, try again with a
# simplified identifier that is just the language
likely_subtag = get_global('likely_subtags').get(language)
if likely_subtag is not None:
language2, _, script2, variant2 = parse_locale(likely_subtag)
locale = _try_load_reducing((language2, territory, script2, variant2))
if locale is not None:
return locale
raise UnknownLocaleError(input_id)
def __eq__(self, other):
for key in ('language', 'territory', 'script', 'variant'):
if not hasattr(other, key):
return False
return (self.language == other.language) and \
(self.territory == other.territory) and \
(self.script == other.script) and \
(self.variant == other.variant)
def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
parameters = ['']
for key in ('territory', 'script', 'variant'):
value = getattr(self, key)
if value is not None:
parameters.append('%s=%r' % (key, value))
parameter_string = '%r' % self.language + ', '.join(parameters)
return 'Locale(%s)' % parameter_string
def __str__(self):
return get_locale_identifier((self.language, self.territory,
self.script, self.variant))
@property
def _data(self):
if self.__data is None:
self.__data = localedata.LocaleDataDict(localedata.load(str(self)))
return self.__data
def get_display_name(self, locale=None):
"""Return the display name of the locale using the given locale.
The display name will include the language, territory, script, and
variant, if those are specified.
>>> Locale('zh', 'CN', script='Hans').get_display_name('en')
u'Chinese (Simplified, China)'
:param locale: the locale to use
"""
if locale is None:
locale = self
locale = Locale.parse(locale)
retval = locale.languages.get(self.language)
if self.territory or self.script or self.variant:
details = []
if self.script:
details.append(locale.scripts.get(self.script))
if self.territory:
details.append(locale.territories.get(self.territory))
if self.variant:
details.append(locale.variants.get(self.variant))
details = filter(None, details)
if details:
retval += ' (%s)' % u', '.join(details)
return retval
display_name = property(get_display_name, doc="""\
The localized display name of the locale.
>>> Locale('en').display_name
u'English'
>>> Locale('en', 'US').display_name
u'English (United States)'
>>> Locale('sv').display_name
u'svenska'
:type: `unicode`
""")
def get_language_name(self, locale=None):
"""Return the language of this locale in the given locale.
>>> Locale('zh', 'CN', script='Hans').get_language_name('de')
u'Chinesisch'
.. versionadded:: 1.0
:param locale: the locale to use
"""
if locale is None:
locale = self
locale = Locale.parse(locale)
return locale.languages.get(self.language)
language_name = property(get_language_name, doc="""\
The localized language name of the locale.
>>> Locale('en', 'US').language_name
u'English'
""")
def get_territory_name(self, locale=None):
"""Return the territory name in the given locale."""
if locale is None:
locale = self
locale = Locale.parse(locale)
return locale.territories.get(self.territory)
territory_name = property(get_territory_name, doc="""\
The localized territory name of the locale if available.
>>> Locale('de', 'DE').territory_name
u'Deutschland'
""")
def get_script_name(self, locale=None):
"""Return the script name in the given locale."""
if locale is None:
locale = self
locale = Locale.parse(locale)
return locale.scripts.get(self.script)
script_name = property(get_script_name, doc="""\
The localized script name of the locale if available.
>>> Locale('ms', 'SG', script='Latn').script_name
u'Latin'
""")
@property
def english_name(self):
"""The english display name of the locale.
>>> Locale('de').english_name
u'German'
>>> Locale('de', 'DE').english_name
u'German (Germany)'
:type: `unicode`"""
return self.get_display_name(Locale('en'))
#{ General Locale Display Names
@property
def languages(self):
"""Mapping of language codes to translated language names.
>>> Locale('de', 'DE').languages['ja']
u'Japanisch'
See `ISO 639 <http://www.loc.gov/standards/iso639-2/>`_ for
more information.
"""
return self._data['languages']
@property
def scripts(self):
"""Mapping of script codes to translated script names.
>>> Locale('en', 'US').scripts['Hira']
u'Hiragana'
See `ISO 15924 <http://www.evertype.com/standards/iso15924/>`_
for more information.
"""
return self._data['scripts']
@property
def territories(self):
"""Mapping of script codes to translated script names.
>>> Locale('es', 'CO').territories['DE']
u'Alemania'
See `ISO 3166 <http://www.iso.org/iso/en/prods-services/iso3166ma/>`_
for more information.
"""
return self._data['territories']
@property
def variants(self):
"""Mapping of script codes to translated script names.
>>> Locale('de', 'DE').variants['1901']
u'Alte deutsche Rechtschreibung'
"""
return self._data['variants']
#{ Number Formatting
@property
def currencies(self):
"""Mapping of currency codes to translated currency names. This
only returns the generic form of the currency name, not the count
specific one. If an actual number is requested use the
:func:`babel.numbers.get_currency_name` function.
>>> Locale('en').currencies['COP']
u'Colombian Peso'
>>> Locale('de', 'DE').currencies['COP']
u'Kolumbianischer Peso'
"""
return self._data['currency_names']
@property
def currency_symbols(self):
"""Mapping of currency codes to symbols.
>>> Locale('en', 'US').currency_symbols['USD']
u'$'
>>> Locale('es', 'CO').currency_symbols['USD']
u'US$'
"""
return self._data['currency_symbols']
@property
def number_symbols(self):
"""Symbols used in number formatting.
>>> Locale('fr', 'FR').number_symbols['decimal']
u','
"""
return self._data['number_symbols']
@property
def decimal_formats(self):
"""Locale patterns for decimal number formatting.
>>> Locale('en', 'US').decimal_formats[None]
<NumberPattern u'#,##0.###'>
"""
return self._data['decimal_formats']
@property
def currency_formats(self):
"""Locale patterns for currency number formatting.
>>> print Locale('en', 'US').currency_formats[None]
<NumberPattern u'\\xa4#,##0.00'>
"""
return self._data['currency_formats']
@property
def percent_formats(self):
"""Locale patterns for percent number formatting.
>>> Locale('en', 'US').percent_formats[None]
<NumberPattern u'#,##0%'>
"""
return self._data['percent_formats']
@property
def scientific_formats(self):
"""Locale patterns for scientific number formatting.
>>> Locale('en', 'US').scientific_formats[None]
<NumberPattern u'#E0'>
"""
return self._data['scientific_formats']
#{ Calendar Information and Date Formatting
@property
def periods(self):
"""Locale display names for day periods (AM/PM).
>>> Locale('en', 'US').periods['am']
u'AM'
"""
return self._data['periods']
@property
def days(self):
"""Locale display names for weekdays.
>>> Locale('de', 'DE').days['format']['wide'][3]
u'Donnerstag'
"""
return self._data['days']
@property
def months(self):
"""Locale display names for months.
>>> Locale('de', 'DE').months['format']['wide'][10]
u'Oktober'
"""
return self._data['months']
@property
def quarters(self):
"""Locale display names for quarters.
>>> Locale('de', 'DE').quarters['format']['wide'][1]
u'1. Quartal'
"""
return self._data['quarters']
@property
def eras(self):
"""Locale display names for eras.
>>> Locale('en', 'US').eras['wide'][1]
u'Anno Domini'
>>> Locale('en', 'US').eras['abbreviated'][0]
u'BC'
"""
return self._data['eras']
@property
def time_zones(self):
"""Locale display names for time zones.
>>> Locale('en', 'US').time_zones['Europe/London']['long']['daylight']
u'British Summer Time'
>>> Locale('en', 'US').time_zones['America/St_Johns']['city']
u'St. John\u2019s'
"""
return self._data['time_zones']
@property
def meta_zones(self):
"""Locale display names for meta time zones.
Meta time zones are basically groups of different Olson time zones that
have the same GMT offset and daylight savings time.
>>> Locale('en', 'US').meta_zones['Europe_Central']['long']['daylight']
u'Central European Summer Time'
.. versionadded:: 0.9
"""
return self._data['meta_zones']
@property
def zone_formats(self):
"""Patterns related to the formatting of time zones.
>>> Locale('en', 'US').zone_formats['fallback']
u'%(1)s (%(0)s)'
>>> Locale('pt', 'BR').zone_formats['region']
u'Hor\\xe1rio %s'
.. versionadded:: 0.9
"""
return self._data['zone_formats']
@property
def first_week_day(self):
"""The first day of a week, with 0 being Monday.
>>> Locale('de', 'DE').first_week_day
0
>>> Locale('en', 'US').first_week_day
6
"""
return self._data['week_data']['first_day']
@property
def weekend_start(self):
"""The day the weekend starts, with 0 being Monday.
>>> Locale('de', 'DE').weekend_start
5
"""
return self._data['week_data']['weekend_start']
@property
def weekend_end(self):
"""The day the weekend ends, with 0 being Monday.
>>> Locale('de', 'DE').weekend_end
6
"""
return self._data['week_data']['weekend_end']
@property
def min_week_days(self):
"""The minimum number of days in a week so that the week is counted as
the first week of a year or month.
>>> Locale('de', 'DE').min_week_days
4
"""
return self._data['week_data']['min_days']
@property
def date_formats(self):
"""Locale patterns for date formatting.
>>> Locale('en', 'US').date_formats['short']
<DateTimePattern u'M/d/yy'>
>>> Locale('fr', 'FR').date_formats['long']
<DateTimePattern u'd MMMM y'>
"""
return self._data['date_formats']
@property
def time_formats(self):
"""Locale patterns for time formatting.
>>> Locale('en', 'US').time_formats['short']
<DateTimePattern u'h:mm a'>
>>> Locale('fr', 'FR').time_formats['long']
<DateTimePattern u'HH:mm:ss z'>
"""
return self._data['time_formats']
@property
def datetime_formats(self):
"""Locale patterns for datetime formatting.
>>> Locale('en').datetime_formats['full']
u"{1} 'at' {0}"
>>> Locale('th').datetime_formats['medium']
u'{1}, {0}'
"""
return self._data['datetime_formats']
@property
def plural_form(self):
"""Plural rules for the locale.
>>> Locale('en').plural_form(1)
'one'
>>> Locale('en').plural_form(0)
'other'
>>> Locale('fr').plural_form(0)
'one'
>>> Locale('ru').plural_form(100)
'many'
"""
return self._data['plural_form']
def default_locale(category=None, aliases=LOCALE_ALIASES):
"""Returns the system default locale for a given category, based on
environment variables.
>>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE']:
... os.environ[name] = ''
>>> os.environ['LANG'] = 'fr_FR.UTF-8'
>>> default_locale('LC_MESSAGES')
'fr_FR'
The "C" or "POSIX" pseudo-locales are treated as aliases for the
"en_US_POSIX" locale:
>>> os.environ['LC_MESSAGES'] = 'POSIX'
>>> default_locale('LC_MESSAGES')
'en_US_POSIX'
The following fallbacks to the variable are always considered:
- ``LANGUAGE``
- ``LC_ALL``
- ``LC_CTYPE``
- ``LANG``
:param category: one of the ``LC_XXX`` environment variable names
:param aliases: a dictionary of aliases for locale identifiers
"""
varnames = (category, 'LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')
for name in filter(None, varnames):
locale = os.getenv(name)
if locale:
if name == 'LANGUAGE' and ':' in locale:
# the LANGUAGE variable may contain a colon-separated list of
# language codes; we just pick the language on the list
locale = locale.split(':')[0]
if locale in ('C', 'POSIX'):
locale = 'en_US_POSIX'
elif aliases and locale in aliases:
locale = aliases[locale]
try:
return get_locale_identifier(parse_locale(locale))
except ValueError:
pass
def negotiate_locale(preferred, available, sep='_', aliases=LOCALE_ALIASES):
"""Find the best match between available and requested locale strings.
>>> negotiate_locale(['de_DE', 'en_US'], ['de_DE', 'de_AT'])
'de_DE'
>>> negotiate_locale(['de_DE', 'en_US'], ['en', 'de'])
'de'
Case is ignored by the algorithm, the result uses the case of the preferred
locale identifier:
>>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at'])
'de_DE'
>>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at'])
'de_DE'
By default, some web browsers unfortunately do not include the territory
in the locale identifier for many locales, and some don't even allow the
user to easily add the territory. So while you may prefer using qualified
locale identifiers in your web-application, they would not normally match
the language-only locale sent by such browsers. To workaround that, this
function uses a default mapping of commonly used langauge-only locale
identifiers to identifiers including the territory:
>>> negotiate_locale(['ja', 'en_US'], ['ja_JP', 'en_US'])
'ja_JP'
Some browsers even use an incorrect or outdated language code, such as "no"
for Norwegian, where the correct locale identifier would actually be "nb_NO"
(Bokmål) or "nn_NO" (Nynorsk). The aliases are intended to take care of
such cases, too:
>>> negotiate_locale(['no', 'sv'], ['nb_NO', 'sv_SE'])
'nb_NO'
You can override this default mapping by passing a different `aliases`
dictionary to this function, or you can bypass the behavior althogher by
setting the `aliases` parameter to `None`.
:param preferred: the list of locale strings preferred by the user
:param available: the list of locale strings available
:param sep: character that separates the different parts of the locale
strings
:param aliases: a dictionary of aliases for locale identifiers
"""
available = [a.lower() for a in available if a]
for locale in preferred:
ll = locale.lower()
if ll in available:
return locale
if aliases:
alias = aliases.get(ll)
if alias:
alias = alias.replace('_', sep)
if alias.lower() in available:
return alias
parts = locale.split(sep)
if len(parts) > 1 and parts[0].lower() in available:
return parts[0]
return None
def parse_locale(identifier, sep='_'):
"""Parse a locale identifier into a tuple of the form ``(language,
territory, script, variant)``.
>>> parse_locale('zh_CN')
('zh', 'CN', None, None)
>>> parse_locale('zh_Hans_CN')
('zh', 'CN', 'Hans', None)
The default component separator is "_", but a different separator can be
specified using the `sep` parameter:
>>> parse_locale('zh-CN', sep='-')
('zh', 'CN', None, None)
If the identifier cannot be parsed into a locale, a `ValueError` exception
is raised:
>>> parse_locale('not_a_LOCALE_String')
Traceback (most recent call last):
...
ValueError: 'not_a_LOCALE_String' is not a valid locale identifier
Encoding information and locale modifiers are removed from the identifier:
>>> parse_locale('it_IT@euro')
('it', 'IT', None, None)
>>> parse_locale('en_US.UTF-8')
('en', 'US', None, None)
>>> parse_locale('de_DE.iso885915@euro')
('de', 'DE', None, None)
See :rfc:`4646` for more information.
:param identifier: the locale identifier string
:param sep: character that separates the different components of the locale
identifier
:raise `ValueError`: if the string does not appear to be a valid locale
identifier
"""
if '.' in identifier:
# this is probably the charset/encoding, which we don't care about
identifier = identifier.split('.', 1)[0]
if '@' in identifier:
# this is a locale modifier such as @euro, which we don't care about
# either
identifier = identifier.split('@', 1)[0]
parts = identifier.split(sep)
lang = parts.pop(0).lower()
if not lang.isalpha():
raise ValueError('expected only letters, got %r' % lang)
script = territory = variant = None
if parts:
if len(parts[0]) == 4 and parts[0].isalpha():
script = parts.pop(0).title()
if parts:
if len(parts[0]) == 2 and parts[0].isalpha():
territory = parts.pop(0).upper()
elif len(parts[0]) == 3 and parts[0].isdigit():
territory = parts.pop(0)
if parts:
if len(parts[0]) == 4 and parts[0][0].isdigit() or \
len(parts[0]) >= 5 and parts[0][0].isalpha():
variant = parts.pop()
if parts:
raise ValueError('%r is not a valid locale identifier' % identifier)
return lang, territory, script, variant
def get_locale_identifier(tup, sep='_'):
"""The reverse of :func:`parse_locale`. It creates a locale identifier out
of a ``(language, territory, script, variant)`` tuple. Items can be set to
``None`` and trailing ``None``\s can also be left out of the tuple.
>>> get_locale_identifier(('de', 'DE', None, '1999'))
'de_DE_1999'
.. versionadded:: 1.0
:param tup: the tuple as returned by :func:`parse_locale`.
:param sep: the separator for the identifier.
"""
tup = tuple(tup[:4])
lang, territory, script, variant = tup + (None,) * (4 - len(tup))
return sep.join(filter(None, (lang, script, territory, variant)))

1181
vendor/babel/dates.py vendored

File diff suppressed because it is too large Load Diff

Binary file not shown.

@ -1,209 +0,0 @@
# -*- coding: utf-8 -*-
"""
babel.localedata
~~~~~~~~~~~~~~~~
Low-level locale data access.
:note: The `Locale` class, which uses this module under the hood, provides a
more convenient interface for accessing the locale data.
:copyright: (c) 2013 by the Babel Team.
:license: BSD, see LICENSE for more details.
"""
import os
import threading
from collections import MutableMapping
from babel._compat import pickle
_cache = {}
_cache_lock = threading.RLock()
_dirname = os.path.join(os.path.dirname(__file__), 'localedata')
def exists(name):
"""Check whether locale data is available for the given locale. Ther
return value is `True` if it exists, `False` otherwise.
:param name: the locale identifier string
"""
if name in _cache:
return True
return os.path.exists(os.path.join(_dirname, '%s.dat' % name))
def locale_identifiers():
"""Return a list of all locale identifiers for which locale data is
available.
.. versionadded:: 0.8.1
:return: a list of locale identifiers (strings)
"""
return [stem for stem, extension in [
os.path.splitext(filename) for filename in os.listdir(_dirname)
] if extension == '.dat' and stem != 'root']
def load(name, merge_inherited=True):
"""Load the locale data for the given locale.
The locale data is a dictionary that contains much of the data defined by
the Common Locale Data Repository (CLDR). This data is stored as a
collection of pickle files inside the ``babel`` package.
>>> d = load('en_US')
>>> d['languages']['sv']
u'Swedish'
Note that the results are cached, and subsequent requests for the same
locale return the same dictionary:
>>> d1 = load('en_US')
>>> d2 = load('en_US')
>>> d1 is d2
True
:param name: the locale identifier string (or "root")
:param merge_inherited: whether the inherited data should be merged into
the data of the requested locale
:raise `IOError`: if no locale data file is found for the given locale
identifer, or one of the locales it inherits from
"""
_cache_lock.acquire()
try:
data = _cache.get(name)
if not data:
# Load inherited data
if name == 'root' or not merge_inherited:
data = {}
else:
parts = name.split('_')
if len(parts) == 1:
parent = 'root'
else:
parent = '_'.join(parts[:-1])
data = load(parent).copy()
filename = os.path.join(_dirname, '%s.dat' % name)
fileobj = open(filename, 'rb')
try:
if name != 'root' and merge_inherited:
merge(data, pickle.load(fileobj))
else:
data = pickle.load(fileobj)
_cache[name] = data
finally:
fileobj.close()
return data
finally:
_cache_lock.release()
def merge(dict1, dict2):
"""Merge the data from `dict2` into the `dict1` dictionary, making copies
of nested dictionaries.
>>> d = {1: 'foo', 3: 'baz'}
>>> merge(d, {1: 'Foo', 2: 'Bar'})
>>> items = d.items(); items.sort(); items
[(1, 'Foo'), (2, 'Bar'), (3, 'baz')]
:param dict1: the dictionary to merge into
:param dict2: the dictionary containing the data that should be merged
"""
for key, val2 in dict2.items():
if val2 is not None:
val1 = dict1.get(key)
if isinstance(val2, dict):
if val1 is None:
val1 = {}
if isinstance(val1, Alias):
val1 = (val1, val2)
elif isinstance(val1, tuple):
alias, others = val1
others = others.copy()
merge(others, val2)
val1 = (alias, others)
else:
val1 = val1.copy()
merge(val1, val2)
else:
val1 = val2
dict1[key] = val1
class Alias(object):
"""Representation of an alias in the locale data.
An alias is a value that refers to some other part of the locale data,
as specified by the `keys`.
"""
def __init__(self, keys):
self.keys = tuple(keys)
def __repr__(self):
return '<%s %r>' % (type(self).__name__, self.keys)
def resolve(self, data):
"""Resolve the alias based on the given data.
This is done recursively, so if one alias resolves to a second alias,
that second alias will also be resolved.
:param data: the locale data
:type data: `dict`
"""
base = data
for key in self.keys:
data = data[key]
if isinstance(data, Alias):
data = data.resolve(base)
elif isinstance(data, tuple):
alias, others = data
data = alias.resolve(base)
return data
class LocaleDataDict(MutableMapping):
"""Dictionary wrapper that automatically resolves aliases to the actual
values.
"""
def __init__(self, data, base=None):
self._data = data
if base is None:
base = data
self.base = base
def __len__(self):
return len(self._data)
def __iter__(self):
return iter(self._data)
def __getitem__(self, key):
orig = val = self._data[key]
if isinstance(val, Alias): # resolve an alias
val = val.resolve(self.base)
if isinstance(val, tuple): # Merge a partial dict with an alias
alias, others = val
val = alias.resolve(self.base).copy()
merge(val, others)
if type(val) is dict: # Return a nested alias-resolving dict
val = LocaleDataDict(val, base=self.base)
if val is not orig:
self._data[key] = val
return val
def __setitem__(self, key, value):
self._data[key] = value
def __delitem__(self, key):
del self._data[key]
def copy(self):
return LocaleDataDict(self._data.copy(), base=self.base)

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -1,4 +0,0 @@
}q(Ucurrency_symbolsq}qUscientific_formatsq}qUpercent_formatsq}qUnumber_symbolsq}q Ucurrency_names_pluralq
}q U week_dataq }q (Umin_daysqKU weekend_startqKU first_dayqKU weekend_endqKuU zone_formatsq}qUcurrency_formatsq}qU_versionqM5 U languagesq}qU territoriesq}U
time_zonesq}qUscriptsq}qUdecimal_formatsq}qU
meta_zonesq }q!Uvariantsq"}q#Ucurrency_namesq$}q%U unit_patternsq&}q'u.

Binary file not shown.

Binary file not shown.

@ -1,4 +0,0 @@
}q(Ucurrency_symbolsq}qUscientific_formatsq}qUpercent_formatsq}qUnumber_symbolsq}q Ucurrency_names_pluralq
}q U week_dataq }q (Umin_daysqKU weekend_startqKU first_dayqKU weekend_endqKuU zone_formatsq}qUcurrency_formatsq}qU_versionqM5 U languagesq}qU territoriesq}U
time_zonesq}qUscriptsq}qUdecimal_formatsq}qU
meta_zonesq }q!Uvariantsq"}q#Ucurrency_namesq$}q%U unit_patternsq&}q'u.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -1,4 +0,0 @@
}q(Ucurrency_symbolsq}qUscientific_formatsq}qUpercent_formatsq}qUnumber_symbolsq}q Ucurrency_names_pluralq
}q U week_dataq }q (Umin_daysqKU weekend_startqKU first_dayqKU weekend_endqKuU zone_formatsq}qUcurrency_formatsq}qU_versionqM5 U languagesq}qU territoriesq}U
time_zonesq}qUscriptsq}qUdecimal_formatsq}qU
meta_zonesq }q!Uvariantsq"}q#Ucurrency_namesq$}q%U unit_patternsq&}q'u.

Binary file not shown.

Binary file not shown.

@ -1,4 +0,0 @@
}q(Ucurrency_symbolsq}qUscientific_formatsq}qUpercent_formatsq}qUnumber_symbolsq}q Ucurrency_names_pluralq
}q U week_dataq }q (U weekend_startqKU first_dayqKU weekend_endqKuU zone_formatsq}qUcurrency_formatsq}qU_versionqM5 U languagesq}qU territoriesq}U
time_zonesq}qUscriptsq}qUdecimal_formatsq}qU
meta_zonesq}q Uvariantsq!}q"Ucurrency_namesq#}q$U unit_patternsq%}q&u.

@ -1,4 +0,0 @@
}q(Ucurrency_symbolsq}qUscientific_formatsq}qUpercent_formatsq}qUnumber_symbolsq}q Ucurrency_names_pluralq
}q U week_dataq }q (U weekend_startqKU first_dayqKU weekend_endqKuU zone_formatsq}qUcurrency_formatsq}qU_versionqM5 U languagesq}qU territoriesq}U
time_zonesq}qUscriptsq}qUdecimal_formatsq}qU
meta_zonesq}q Uvariantsq!}q"Ucurrency_namesq#}q$U unit_patternsq%}q&u.

Binary file not shown.

Binary file not shown.

@ -1,4 +0,0 @@
}q(Ucurrency_symbolsq}qUscientific_formatsq}qUpercent_formatsq}qUnumber_symbolsq}q Ucurrency_names_pluralq
}q U week_dataq }q (Umin_daysqKU weekend_startqKU first_dayqKU weekend_endqKuU zone_formatsq}qUcurrency_formatsq}qU_versionqM5 U languagesq}qU territoriesq}U
time_zonesq}qUscriptsq}qUdecimal_formatsq}qU
meta_zonesq }q!Uvariantsq"}q#Ucurrency_namesq$}q%U unit_patternsq&}q'u.

Binary file not shown.

Binary file not shown.

@ -1,4 +0,0 @@
}q(Ucurrency_symbolsq}qUscientific_formatsq}qUpercent_formatsq}qUnumber_symbolsq}q Ucurrency_names_pluralq
}q U week_dataq }q (U weekend_startqKU first_dayqKU weekend_endqKuU zone_formatsq}qUcurrency_formatsq}qU_versionqM5 U languagesq}qU territoriesq}U
time_zonesq}qUscriptsq}qUdecimal_formatsq}qU
meta_zonesq}q Uvariantsq!}q"Ucurrency_namesq#}q$U unit_patternsq%}q&u.

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -1,4 +0,0 @@
}q(Ucurrency_symbolsq}qUscientific_formatsq}qUpercent_formatsq}qUnumber_symbolsq}q Ucurrency_names_pluralq
}q U week_dataq }q (U weekend_startqKU first_dayqKU weekend_endqKuU zone_formatsq}qUcurrency_formatsq}qU_versionqM5 U languagesq}qU territoriesq}U
time_zonesq}qUscriptsq}qUdecimal_formatsq}qU
meta_zonesq}q Uvariantsq!}q"Ucurrency_namesq#}q$U unit_patternsq%}q&u.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -1,4 +0,0 @@
}q(Ucurrency_symbolsq}qUscientific_formatsq}qUpercent_formatsq}qUnumber_symbolsq}q Ucurrency_names_pluralq
}q U week_dataq }q (U weekend_startqKU first_dayqKU weekend_endqKuU zone_formatsq}qUcurrency_formatsq}qU_versionqM5 U languagesq}qU territoriesq}U
time_zonesq}qUscriptsq}qUdecimal_formatsq}qU
meta_zonesq}q Uvariantsq!}q"Ucurrency_namesq#}q$U unit_patternsq%}q&u.

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -1,4 +0,0 @@
}q(Ucurrency_symbolsq}qUscientific_formatsq}qUpercent_formatsq}qUnumber_symbolsq}q Ucurrency_names_pluralq
}q U week_dataq }q (Umin_daysqKU weekend_startqKU first_dayqKU weekend_endqKuU zone_formatsq}qUcurrency_formatsq}qU_versionqM5 U languagesq}qU territoriesq}U
time_zonesq}qUscriptsq}qUdecimal_formatsq}qU
meta_zonesq }q!Uvariantsq"}q#Ucurrency_namesq$}q%U unit_patternsq&}q'u.

Binary file not shown.

Binary file not shown.

@ -1,4 +0,0 @@
}q(Ucurrency_symbolsq}qUscientific_formatsq}qUpercent_formatsq}qUnumber_symbolsq}q Ucurrency_names_pluralq
}q U week_dataq }q U zone_formatsq}qUcurrency_formatsq}qU_versionqM5 U languagesq}qU territoriesq}U
time_zonesq}qUscriptsq}qUdecimal_formatsq}qU
meta_zonesq}qUvariantsq}qUcurrency_namesq }q!U unit_patternsq"}q#u.

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -1,4 +0,0 @@
}q(Ucurrency_symbolsq}qUscientific_formatsq}qUpercent_formatsq}qUnumber_symbolsq}q Ucurrency_names_pluralq
}q U week_dataq }q (U weekend_startqKU first_dayqKuU zone_formatsq}qUcurrency_formatsq}qU_versionqM5 U languagesq}qU territoriesq}U
time_zonesq}qUscriptsq}qUdecimal_formatsq}qU
meta_zonesq}qUvariantsq }q!Ucurrency_namesq"}q#U unit_patternsq$}q%u.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -1,4 +0,0 @@
}q(Ucurrency_symbolsq}qUscientific_formatsq}qUpercent_formatsq}qUnumber_symbolsq}q Ucurrency_names_pluralq
}q U week_dataq }q U first_dayqKsU zone_formatsq}qUcurrency_formatsq}qU_versionqM5 U languagesq}qU territoriesq}U
time_zonesq}qUscriptsq}qUdecimal_formatsq}qU
meta_zonesq}qUvariantsq}q Ucurrency_namesq!}q"U unit_patternsq#}q$u.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -1,4 +0,0 @@
}q(Ucurrency_symbolsq}qUscientific_formatsq}qUpercent_formatsq}qUnumber_symbolsq}q Ucurrency_names_pluralq
}q U week_dataq }q U first_dayqKsU zone_formatsq}qUcurrency_formatsq}qU_versionqM5 U languagesq}qU territoriesq}U
time_zonesq}qUscriptsq}qUdecimal_formatsq}qU
meta_zonesq}qUvariantsq}q Ucurrency_namesq!}q"U unit_patternsq#}q$u.

Binary file not shown.

Binary file not shown.

@ -1,4 +0,0 @@
}q(Ucurrency_symbolsq}qUscientific_formatsq}qUpercent_formatsq}qUnumber_symbolsq}q Ucurrency_names_pluralq
}q U week_dataq }q U first_dayqKsU zone_formatsq}qUcurrency_formatsq}qU_versionqM5 U languagesq}qU territoriesq}U
time_zonesq}qUscriptsq}qUdecimal_formatsq}qU
meta_zonesq}qUvariantsq}q Ucurrency_namesq!}q"U unit_patternsq#}q$u.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save