bo-graduation/venv/lib/python3.7/site-packages/joblib/numpy_pickle_utils.py

"""Utilities for fast persistence of big data, with optional compression."""

# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
# Copyright (c) 2009 Gael Varoquaux
# License: BSD Style, 3 clauses.

import pickle
import io
import warnings
import contextlib
from contextlib import closing

from ._compat import PY3_OR_LATER, PY27
from .compressor import _ZFILE_PREFIX
from .compressor import _COMPRESSORS

if PY3_OR_LATER:
    Unpickler = pickle._Unpickler
    Pickler = pickle._Pickler
    xrange = range
else:
    Unpickler = pickle.Unpickler
    Pickler = pickle.Pickler

try:
    import numpy as np
except ImportError:
    np = None

try:
    # The python standard library can be built without bz2 so we make bz2
    # usage optional.
    # see https://github.com/scikit-learn/scikit-learn/issues/7526 for more
    # details.
    import bz2
except ImportError:
    bz2 = None

# Buffer size used in io.BufferedReader and io.BufferedWriter
_IO_BUFFER_SIZE = 1024 ** 2


def _is_raw_file(fileobj):
    """Check if fileobj is a raw file object, e.g created with open."""
    if PY3_OR_LATER:
        fileobj = getattr(fileobj, 'raw', fileobj)
        return isinstance(fileobj, io.FileIO)
    else:
        return isinstance(fileobj, file)  # noqa


def _get_prefixes_max_len():
    # Compute the max prefix len of registered compressors.
    prefixes = [len(compressor.prefix) for compressor in _COMPRESSORS.values()]
    prefixes += [len(_ZFILE_PREFIX)]
    return max(prefixes)


###############################################################################
# Cache file utilities
def _detect_compressor(fileobj):
    """Return the compressor matching fileobj.

    Parameters
    ----------
    fileobj: file object

    Returns
    -------
    str in {'zlib', 'gzip', 'bz2', 'lzma', 'xz', 'compat', 'not-compressed'}
    """
    # Read the magic number in the first bytes of the file.
    max_prefix_len = _get_prefixes_max_len()
    if hasattr(fileobj, 'peek'):
        # Peek allows to read those bytes without moving the cursor in the
        # file whic.
        first_bytes = fileobj.peek(max_prefix_len)
    else:
        # Fallback to seek if the fileobject is not peekable.
        first_bytes = fileobj.read(max_prefix_len)
        fileobj.seek(0)

    if first_bytes.startswith(_ZFILE_PREFIX):
        return "compat"
    else:
        for name, compressor in _COMPRESSORS.items():
            if first_bytes.startswith(compressor.prefix):
                return name

    return "not-compressed"


def _buffered_read_file(fobj):
    """Return a buffered version of a read file object."""
    if PY27 and bz2 is not None and isinstance(fobj, bz2.BZ2File):
        # Python 2.7 doesn't work with BZ2File through a buffer: "no
        # attribute 'readable'" error.
        return fobj
    else:
        return io.BufferedReader(fobj, buffer_size=_IO_BUFFER_SIZE)


def _buffered_write_file(fobj):
    """Return a buffered version of a write file object."""
    if PY27 and bz2 is not None and isinstance(fobj, bz2.BZ2File):
        # Python 2.7 doesn't work with BZ2File through a buffer: no attribute
        # 'writable'.
        # BZ2File doesn't implement the file object context manager in python 2
        # so we wrap the fileobj using `closing`.
        return closing(fobj)
    else:
        return io.BufferedWriter(fobj, buffer_size=_IO_BUFFER_SIZE)


@contextlib.contextmanager
def _read_fileobject(fileobj, filename, mmap_mode=None):
    """Utility function opening the right fileobject from a filename.

    The magic number is used to choose between the type of file object to open:
    * regular file object (default)
    * zlib file object
    * gzip file object
    * bz2 file object
    * lzma file object (for xz and lzma compressor)

    Parameters
    ----------
    fileobj: file object
    compressor: str in {'zlib', 'gzip', 'bz2', 'lzma', 'xz', 'compat',
                        'not-compressed'}
    filename: str
        filename path corresponding to the fileobj parameter.
    mmap_mode: str
        memory map mode that should be used to open the pickle file. This
        parameter is useful to verify that the user is not trying to one with
        compression. Default: None.

    Returns
    -------
        a file like object

    """
    # Detect if the fileobj contains compressed data.
    compressor = _detect_compressor(fileobj)

    if compressor == 'compat':
        # Compatibility with old pickle mode: simply return the input
        # filename "as-is" and let the compatibility function be called by the
        # caller.
        warnings.warn("The file '%s' has been generated with a joblib "
                      "version less than 0.10. "
                      "Please regenerate this pickle file." % filename,
                      DeprecationWarning, stacklevel=2)
        yield filename
    else:
        if compressor in _COMPRESSORS:
            # based on the compressor detected in the file, we open the
            # correct decompressor file object, wrapped in a buffer.
            compressor_wrapper = _COMPRESSORS[compressor]
            inst = compressor_wrapper.decompressor_file(fileobj)
            fileobj = _buffered_read_file(inst)

        # Checking if incompatible load parameters with the type of file:
        # mmap_mode cannot be used with compressed file or in memory buffers
        # such as io.BytesIO.
        if mmap_mode is not None:
            if isinstance(fileobj, io.BytesIO):
                warnings.warn('In memory persistence is not compatible with '
                              'mmap_mode "%(mmap_mode)s" flag passed. '
                              'mmap_mode option will be ignored.'
                              % locals(), stacklevel=2)
            elif compressor != 'not-compressed':
                warnings.warn('mmap_mode "%(mmap_mode)s" is not compatible '
                              'with compressed file %(filename)s. '
                              '"%(mmap_mode)s" flag will be ignored.'
                              % locals(), stacklevel=2)
            elif not _is_raw_file(fileobj):
                warnings.warn('"%(fileobj)r" is not a raw file, mmap_mode '
                              '"%(mmap_mode)s" flag will be ignored.'
                              % locals(), stacklevel=2)

        yield fileobj


def _write_fileobject(filename, compress=("zlib", 3)):
    """Return the right compressor file object in write mode."""
    compressmethod = compress[0]
    compresslevel = compress[1]

    if compressmethod in _COMPRESSORS.keys():
        file_instance = _COMPRESSORS[compressmethod].compressor_file(
            filename, compresslevel=compresslevel)
        return _buffered_write_file(file_instance)
    else:
        file_instance = _COMPRESSORS['zlib'].compressor_file(
            filename, compresslevel=compresslevel)
        return _buffered_write_file(file_instance)


# Utility functions/variables from numpy required for writing arrays.
# We need at least the functions introduced in version 1.9 of numpy. Here,
# we use the ones from numpy 1.10.2.
BUFFER_SIZE = 2 ** 18  # size of buffer for reading npz files in bytes


def _read_bytes(fp, size, error_template="ran out of data"):
    """Read from file-like object until size bytes are read.

    Raises ValueError if not EOF is encountered before size bytes are read.
    Non-blocking objects only supported if they derive from io objects.

    Required as e.g. ZipExtFile in python 2.6 can return less data than
    requested.

    This function was taken from numpy/lib/format.py in version 1.10.2.

    Parameters
    ----------
    fp: file-like object
    size: int
    error_template: str

    Returns
    -------
    a bytes object
        The data read in bytes.

    """
    data = bytes()
    while True:
        # io files (default in python3) return None or raise on
        # would-block, python2 file will truncate, probably nothing can be
        # done about that.  note that regular files can't be non-blocking
        try:
            r = fp.read(size - len(data))
            data += r
            if len(r) == 0 or len(data) == size:
                break
        except io.BlockingIOError:
            pass
    if len(data) != size:
        msg = "EOF: reading %s, expected %d bytes got %d"
        raise ValueError(msg % (error_template, size, len(data)))
    else:
        return data
add tag_comparison_v3.py 5 years ago			`"""Utilities for fast persistence of big data, with optional compression."""`

			`# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>`
			`# Copyright (c) 2009 Gael Varoquaux`
			`# License: BSD Style, 3 clauses.`

			`import pickle`
			`import io`
			`import warnings`
			`import contextlib`
			`from contextlib import closing`

			`from ._compat import PY3_OR_LATER, PY27`
			`from .compressor import _ZFILE_PREFIX`
			`from .compressor import _COMPRESSORS`

			`if PY3_OR_LATER:`
			`Unpickler = pickle._Unpickler`
			`Pickler = pickle._Pickler`
			`xrange = range`
			`else:`
			`Unpickler = pickle.Unpickler`
			`Pickler = pickle.Pickler`

			`try:`
			`import numpy as np`
			`except ImportError:`
			`np = None`

			`try:`
			`# The python standard library can be built without bz2 so we make bz2`
			`# usage optional.`
			`# see https://github.com/scikit-learn/scikit-learn/issues/7526 for more`
			`# details.`
			`import bz2`
			`except ImportError:`
			`bz2 = None`

			`# Buffer size used in io.BufferedReader and io.BufferedWriter`
			`_IO_BUFFER_SIZE = 1024 ** 2`


			`def _is_raw_file(fileobj):`
			`"""Check if fileobj is a raw file object, e.g created with open."""`
			`if PY3_OR_LATER:`
			`fileobj = getattr(fileobj, 'raw', fileobj)`
			`return isinstance(fileobj, io.FileIO)`
			`else:`
			`return isinstance(fileobj, file) # noqa`


			`def _get_prefixes_max_len():`
			`# Compute the max prefix len of registered compressors.`
			`prefixes = [len(compressor.prefix) for compressor in _COMPRESSORS.values()]`
			`prefixes += [len(_ZFILE_PREFIX)]`
			`return max(prefixes)`


			`###############################################################################`
			`# Cache file utilities`
			`def _detect_compressor(fileobj):`
			`"""Return the compressor matching fileobj.`

			`Parameters`
			`----------`
			`fileobj: file object`

			`Returns`
			`-------`
			`str in {'zlib', 'gzip', 'bz2', 'lzma', 'xz', 'compat', 'not-compressed'}`
			`"""`
			`# Read the magic number in the first bytes of the file.`
			`max_prefix_len = _get_prefixes_max_len()`
			`if hasattr(fileobj, 'peek'):`
			`# Peek allows to read those bytes without moving the cursor in the`
			`# file whic.`
			`first_bytes = fileobj.peek(max_prefix_len)`
			`else:`
			`# Fallback to seek if the fileobject is not peekable.`
			`first_bytes = fileobj.read(max_prefix_len)`
			`fileobj.seek(0)`

			`if first_bytes.startswith(_ZFILE_PREFIX):`
			`return "compat"`
			`else:`
			`for name, compressor in _COMPRESSORS.items():`
			`if first_bytes.startswith(compressor.prefix):`
			`return name`

			`return "not-compressed"`


			`def _buffered_read_file(fobj):`
			`"""Return a buffered version of a read file object."""`
			`if PY27 and bz2 is not None and isinstance(fobj, bz2.BZ2File):`
			`# Python 2.7 doesn't work with BZ2File through a buffer: "no`
			`# attribute 'readable'" error.`
			`return fobj`
			`else:`
			`return io.BufferedReader(fobj, buffer_size=_IO_BUFFER_SIZE)`


			`def _buffered_write_file(fobj):`
			`"""Return a buffered version of a write file object."""`
			`if PY27 and bz2 is not None and isinstance(fobj, bz2.BZ2File):`
			`# Python 2.7 doesn't work with BZ2File through a buffer: no attribute`
			`# 'writable'.`
			`# BZ2File doesn't implement the file object context manager in python 2`
			# so we wrap the fileobj using `closing`.
			`return closing(fobj)`
			`else:`
			`return io.BufferedWriter(fobj, buffer_size=_IO_BUFFER_SIZE)`


			`@contextlib.contextmanager`
			`def _read_fileobject(fileobj, filename, mmap_mode=None):`
			`"""Utility function opening the right fileobject from a filename.`

			`The magic number is used to choose between the type of file object to open:`
			`* regular file object (default)`
			`* zlib file object`
			`* gzip file object`
			`* bz2 file object`
			`* lzma file object (for xz and lzma compressor)`

			`Parameters`
			`----------`
			`fileobj: file object`
			`compressor: str in {'zlib', 'gzip', 'bz2', 'lzma', 'xz', 'compat',`
			`'not-compressed'}`
			`filename: str`
			`filename path corresponding to the fileobj parameter.`
			`mmap_mode: str`
			`memory map mode that should be used to open the pickle file. This`
			`parameter is useful to verify that the user is not trying to one with`
			`compression. Default: None.`

			`Returns`
			`-------`
			`a file like object`

			`"""`
			`# Detect if the fileobj contains compressed data.`
			`compressor = _detect_compressor(fileobj)`

			`if compressor == 'compat':`
			`# Compatibility with old pickle mode: simply return the input`
			`# filename "as-is" and let the compatibility function be called by the`
			`# caller.`
			`warnings.warn("The file '%s' has been generated with a joblib "`
			`"version less than 0.10. "`
			`"Please regenerate this pickle file." % filename,`
			`DeprecationWarning, stacklevel=2)`
			`yield filename`
			`else:`
			`if compressor in _COMPRESSORS:`
			`# based on the compressor detected in the file, we open the`
			`# correct decompressor file object, wrapped in a buffer.`
			`compressor_wrapper = _COMPRESSORS[compressor]`
			`inst = compressor_wrapper.decompressor_file(fileobj)`
			`fileobj = _buffered_read_file(inst)`

			`# Checking if incompatible load parameters with the type of file:`
			`# mmap_mode cannot be used with compressed file or in memory buffers`
			`# such as io.BytesIO.`
			`if mmap_mode is not None:`
			`if isinstance(fileobj, io.BytesIO):`
			`warnings.warn('In memory persistence is not compatible with '`
			`'mmap_mode "%(mmap_mode)s" flag passed. '`
			`'mmap_mode option will be ignored.'`
			`% locals(), stacklevel=2)`
			`elif compressor != 'not-compressed':`
			`warnings.warn('mmap_mode "%(mmap_mode)s" is not compatible '`
			`'with compressed file %(filename)s. '`
			`'"%(mmap_mode)s" flag will be ignored.'`
			`% locals(), stacklevel=2)`
			`elif not _is_raw_file(fileobj):`
			`warnings.warn('"%(fileobj)r" is not a raw file, mmap_mode '`
			`'"%(mmap_mode)s" flag will be ignored.'`
			`% locals(), stacklevel=2)`

			`yield fileobj`


			`def _write_fileobject(filename, compress=("zlib", 3)):`
			`"""Return the right compressor file object in write mode."""`
			`compressmethod = compress[0]`
			`compresslevel = compress[1]`

			`if compressmethod in _COMPRESSORS.keys():`
			`file_instance = _COMPRESSORS[compressmethod].compressor_file(`
			`filename, compresslevel=compresslevel)`
			`return _buffered_write_file(file_instance)`
			`else:`
			`file_instance = _COMPRESSORS['zlib'].compressor_file(`
			`filename, compresslevel=compresslevel)`
			`return _buffered_write_file(file_instance)`


			`# Utility functions/variables from numpy required for writing arrays.`
			`# We need at least the functions introduced in version 1.9 of numpy. Here,`
			`# we use the ones from numpy 1.10.2.`
			`BUFFER_SIZE = 2 ** 18 # size of buffer for reading npz files in bytes`


			`def _read_bytes(fp, size, error_template="ran out of data"):`
			`"""Read from file-like object until size bytes are read.`

			`Raises ValueError if not EOF is encountered before size bytes are read.`
			`Non-blocking objects only supported if they derive from io objects.`

			`Required as e.g. ZipExtFile in python 2.6 can return less data than`
			`requested.`

			`This function was taken from numpy/lib/format.py in version 1.10.2.`

			`Parameters`
			`----------`
			`fp: file-like object`
			`size: int`
			`error_template: str`

			`Returns`
			`-------`
			`a bytes object`
			`The data read in bytes.`

			`"""`
			`data = bytes()`
			`while True:`
			`# io files (default in python3) return None or raise on`
			`# would-block, python2 file will truncate, probably nothing can be`
			`# done about that. note that regular files can't be non-blocking`
			`try:`
			`r = fp.read(size - len(data))`
			`data += r`
			`if len(r) == 0 or len(data) == size:`
			`break`
			`except io.BlockingIOError:`
			`pass`
			`if len(data) != size:`
			`msg = "EOF: reading %s, expected %d bytes got %d"`
			`raise ValueError(msg % (error_template, size, len(data)))`
			`else:`
			`return data`