from __future__ import division, print_function, absolute_import import functools import operator import sys import warnings import numbers from collections import namedtuple from multiprocessing import Pool import inspect import numpy as np def _valarray(shape, value=np.nan, typecode=None): """Return an array of all value. """ out = np.ones(shape, dtype=bool) * value if typecode is not None: out = out.astype(typecode) if not isinstance(out, np.ndarray): out = np.asarray(out) return out def _lazywhere(cond, arrays, f, fillvalue=None, f2=None): """ np.where(cond, x, fillvalue) always evaluates x even where cond is False. This one only evaluates f(arr1[cond], arr2[cond], ...). For example, >>> a, b = np.array([1, 2, 3, 4]), np.array([5, 6, 7, 8]) >>> def f(a, b): return a*b >>> _lazywhere(a > 2, (a, b), f, np.nan) array([ nan, nan, 21., 32.]) Notice it assumes that all `arrays` are of the same shape, or can be broadcasted together. """ if fillvalue is None: if f2 is None: raise ValueError("One of (fillvalue, f2) must be given.") else: fillvalue = np.nan else: if f2 is not None: raise ValueError("Only one of (fillvalue, f2) can be given.") arrays = np.broadcast_arrays(*arrays) temp = tuple(np.extract(cond, arr) for arr in arrays) tcode = np.mintypecode([a.dtype.char for a in arrays]) out = _valarray(np.shape(arrays[0]), value=fillvalue, typecode=tcode) np.place(out, cond, f(*temp)) if f2 is not None: temp = tuple(np.extract(~cond, arr) for arr in arrays) np.place(out, ~cond, f2(*temp)) return out def _lazyselect(condlist, choicelist, arrays, default=0): """ Mimic `np.select(condlist, choicelist)`. Notice it assumes that all `arrays` are of the same shape, or can be broadcasted together. All functions in `choicelist` must accept array arguments in the order given in `arrays` and must return an array of the same shape as broadcasted `arrays`. Examples -------- >>> x = np.arange(6) >>> np.select([x <3, x > 3], [x**2, x**3], default=0) array([ 0, 1, 4, 0, 64, 125]) >>> _lazyselect([x < 3, x > 3], [lambda x: x**2, lambda x: x**3], (x,)) array([ 0., 1., 4., 0., 64., 125.]) >>> a = -np.ones_like(x) >>> _lazyselect([x < 3, x > 3], ... [lambda x, a: x**2, lambda x, a: a * x**3], ... (x, a), default=np.nan) array([ 0., 1., 4., nan, -64., -125.]) """ arrays = np.broadcast_arrays(*arrays) tcode = np.mintypecode([a.dtype.char for a in arrays]) out = _valarray(np.shape(arrays[0]), value=default, typecode=tcode) for index in range(len(condlist)): func, cond = choicelist[index], condlist[index] if np.all(cond is False): continue cond, _ = np.broadcast_arrays(cond, arrays[0]) temp = tuple(np.extract(cond, arr) for arr in arrays) np.place(out, cond, func(*temp)) return out def _aligned_zeros(shape, dtype=float, order="C", align=None): """Allocate a new ndarray with aligned memory. Primary use case for this currently is working around a f2py issue in Numpy 1.9.1, where dtype.alignment is such that np.zeros() does not necessarily create arrays aligned up to it. """ dtype = np.dtype(dtype) if align is None: align = dtype.alignment if not hasattr(shape, '__len__'): shape = (shape,) size = functools.reduce(operator.mul, shape) * dtype.itemsize buf = np.empty(size + align + 1, np.uint8) offset = buf.__array_interface__['data'][0] % align if offset != 0: offset = align - offset # Note: slices producing 0-size arrays do not necessarily change # data pointer --- so we use and allocate size+1 buf = buf[offset:offset+size+1][:-1] data = np.ndarray(shape, dtype, buf, order=order) data.fill(0) return data def _prune_array(array): """Return an array equivalent to the input array. If the input array is a view of a much larger array, copy its contents to a newly allocated array. Otherwise, return the input unchanged. """ if array.base is not None and array.size < array.base.size // 2: return array.copy() return array class DeprecatedImport(object): """ Deprecated import, with redirection + warning. Examples -------- Suppose you previously had in some module:: from foo import spam If this has to be deprecated, do:: spam = DeprecatedImport("foo.spam", "baz") to redirect users to use "baz" module instead. """ def __init__(self, old_module_name, new_module_name): self._old_name = old_module_name self._new_name = new_module_name __import__(self._new_name) self._mod = sys.modules[self._new_name] def __dir__(self): return dir(self._mod) def __getattr__(self, name): warnings.warn("Module %s is deprecated, use %s instead" % (self._old_name, self._new_name), DeprecationWarning) return getattr(self._mod, name) # copy-pasted from scikit-learn utils/validation.py def check_random_state(seed): """Turn seed into a np.random.RandomState instance If seed is None (or np.random), return the RandomState singleton used by np.random. If seed is an int, return a new RandomState instance seeded with seed. If seed is already a RandomState instance, return it. Otherwise raise ValueError. """ if seed is None or seed is np.random: return np.random.mtrand._rand if isinstance(seed, (numbers.Integral, np.integer)): return np.random.RandomState(seed) if isinstance(seed, np.random.RandomState): return seed raise ValueError('%r cannot be used to seed a numpy.random.RandomState' ' instance' % seed) def _asarray_validated(a, check_finite=True, sparse_ok=False, objects_ok=False, mask_ok=False, as_inexact=False): """ Helper function for scipy argument validation. Many scipy linear algebra functions do support arbitrary array-like input arguments. Examples of commonly unsupported inputs include matrices containing inf/nan, sparse matrix representations, and matrices with complicated elements. Parameters ---------- a : array_like The array-like input. check_finite : bool, optional Whether to check that the input matrices contain only finite numbers. Disabling may give a performance gain, but may result in problems (crashes, non-termination) if the inputs do contain infinities or NaNs. Default: True sparse_ok : bool, optional True if scipy sparse matrices are allowed. objects_ok : bool, optional True if arrays with dype('O') are allowed. mask_ok : bool, optional True if masked arrays are allowed. as_inexact : bool, optional True to convert the input array to a np.inexact dtype. Returns ------- ret : ndarray The converted validated array. """ if not sparse_ok: import scipy.sparse if scipy.sparse.issparse(a): msg = ('Sparse matrices are not supported by this function. ' 'Perhaps one of the scipy.sparse.linalg functions ' 'would work instead.') raise ValueError(msg) if not mask_ok: if np.ma.isMaskedArray(a): raise ValueError('masked arrays are not supported') toarray = np.asarray_chkfinite if check_finite else np.asarray a = toarray(a) if not objects_ok: if a.dtype is np.dtype('O'): raise ValueError('object arrays are not supported') if as_inexact: if not np.issubdtype(a.dtype, np.inexact): a = toarray(a, dtype=np.float_) return a # Add a replacement for inspect.getargspec() which is deprecated in python 3.5 # The version below is borrowed from Django, # https://github.com/django/django/pull/4846 # Note an inconsistency between inspect.getargspec(func) and # inspect.signature(func). If `func` is a bound method, the latter does *not* # list `self` as a first argument, while the former *does*. # Hence cook up a common ground replacement: `getargspec_no_self` which # mimics `inspect.getargspec` but does not list `self`. # # This way, the caller code does not need to know whether it uses a legacy # .getargspec or bright and shiny .signature. try: # is it python 3.3 or higher? inspect.signature # Apparently, yes. Wrap inspect.signature ArgSpec = namedtuple('ArgSpec', ['args', 'varargs', 'keywords', 'defaults']) def getargspec_no_self(func): """inspect.getargspec replacement using inspect.signature. inspect.getargspec is deprecated in python 3. This is a replacement based on the (new in python 3.3) `inspect.signature`. Parameters ---------- func : callable A callable to inspect Returns ------- argspec : ArgSpec(args, varargs, varkw, defaults) This is similar to the result of inspect.getargspec(func) under python 2.x. NOTE: if the first argument of `func` is self, it is *not*, I repeat *not* included in argspec.args. This is done for consistency between inspect.getargspec() under python 2.x, and inspect.signature() under python 3.x. """ sig = inspect.signature(func) args = [ p.name for p in sig.parameters.values() if p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD ] varargs = [ p.name for p in sig.parameters.values() if p.kind == inspect.Parameter.VAR_POSITIONAL ] varargs = varargs[0] if varargs else None varkw = [ p.name for p in sig.parameters.values() if p.kind == inspect.Parameter.VAR_KEYWORD ] varkw = varkw[0] if varkw else None defaults = [ p.default for p in sig.parameters.values() if (p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD and p.default is not p.empty) ] or None return ArgSpec(args, varargs, varkw, defaults) except AttributeError: # python 2.x def getargspec_no_self(func): """inspect.getargspec replacement for compatibility with python 3.x. inspect.getargspec is deprecated in python 3. This wraps it, and *removes* `self` from the argument list of `func`, if present. This is done for forward compatibility with python 3. Parameters ---------- func : callable A callable to inspect Returns ------- argspec : ArgSpec(args, varargs, varkw, defaults) This is similar to the result of inspect.getargspec(func) under python 2.x. NOTE: if the first argument of `func` is self, it is *not*, I repeat *not* included in argspec.args. This is done for consistency between inspect.getargspec() under python 2.x, and inspect.signature() under python 3.x. """ argspec = inspect.getargspec(func) if argspec.args[0] == 'self': argspec.args.pop(0) return argspec class MapWrapper(object): """ Parallelisation wrapper for working with map-like callables, such as `multiprocessing.Pool.map`. Parameters ---------- pool : int or map-like callable If `pool` is an integer, then it specifies the number of threads to use for parallelization. If ``int(pool) == 1``, then no parallel processing is used and the map builtin is used. If ``pool == -1``, then the pool will utilise all available CPUs. If `pool` is a map-like callable that follows the same calling sequence as the built-in map function, then this callable is used for parallelisation. """ def __init__(self, pool=1): self.pool = None self._mapfunc = map self._own_pool = False if callable(pool): self.pool = pool self._mapfunc = self.pool else: # user supplies a number if int(pool) == -1: # use as many processors as possible self.pool = Pool() self._mapfunc = self.pool.map self._own_pool = True elif int(pool) == 1: pass elif int(pool) > 1: # use the number of processors requested self.pool = Pool(processes=int(pool)) self._mapfunc = self.pool.map self._own_pool = True else: raise RuntimeError("Number of workers specified must be -1," " an int >= 1, or an object with a 'map' method") def __enter__(self): return self def __del__(self): self.close() def terminate(self): if self._own_pool: self.pool.terminate() def join(self): if self._own_pool: self.pool.join() def close(self): if self._own_pool: self.pool.close() def __exit__(self, exc_type, exc_value, traceback): if self._own_pool: if exc_type is None: self.pool.close() self.pool.join() else: self.pool.terminate() def __call__(self, func, iterable): # only accept one iterable because that's all Pool.map accepts try: return self._mapfunc(func, iterable) except TypeError: # wrong number of arguments raise TypeError("The map-like callable must be of the" " form f(func, iterable)")