r""" Nonlinear solvers ----------------- .. currentmodule:: scipy.optimize This is a collection of general-purpose nonlinear multidimensional solvers. These solvers find *x* for which *F(x) = 0*. Both *x* and *F* can be multidimensional. Routines ~~~~~~~~ Large-scale nonlinear solvers: .. autosummary:: newton_krylov anderson General nonlinear solvers: .. autosummary:: broyden1 broyden2 Simple iterations: .. autosummary:: excitingmixing linearmixing diagbroyden Examples ~~~~~~~~ **Small problem** >>> def F(x): ... return np.cos(x) + x[::-1] - [1, 2, 3, 4] >>> import scipy.optimize >>> x = scipy.optimize.broyden1(F, [1,1,1,1], f_tol=1e-14) >>> x array([ 4.04674914, 3.91158389, 2.71791677, 1.61756251]) >>> np.cos(x) + x[::-1] array([ 1., 2., 3., 4.]) **Large problem** Suppose that we needed to solve the following integrodifferential equation on the square :math:`[0,1]\times[0,1]`: .. math:: \nabla^2 P = 10 \left(\int_0^1\int_0^1\cosh(P)\,dx\,dy\right)^2 with :math:`P(x,1) = 1` and :math:`P=0` elsewhere on the boundary of the square. The solution can be found using the `newton_krylov` solver: .. plot:: import numpy as np from scipy.optimize import newton_krylov from numpy import cosh, zeros_like, mgrid, zeros # parameters nx, ny = 75, 75 hx, hy = 1./(nx-1), 1./(ny-1) P_left, P_right = 0, 0 P_top, P_bottom = 1, 0 def residual(P): d2x = zeros_like(P) d2y = zeros_like(P) d2x[1:-1] = (P[2:] - 2*P[1:-1] + P[:-2]) / hx/hx d2x[0] = (P[1] - 2*P[0] + P_left)/hx/hx d2x[-1] = (P_right - 2*P[-1] + P[-2])/hx/hx d2y[:,1:-1] = (P[:,2:] - 2*P[:,1:-1] + P[:,:-2])/hy/hy d2y[:,0] = (P[:,1] - 2*P[:,0] + P_bottom)/hy/hy d2y[:,-1] = (P_top - 2*P[:,-1] + P[:,-2])/hy/hy return d2x + d2y - 10*cosh(P).mean()**2 # solve guess = zeros((nx, ny), float) sol = newton_krylov(residual, guess, method='lgmres', verbose=1) print('Residual: %g' % abs(residual(sol)).max()) # visualize import matplotlib.pyplot as plt x, y = mgrid[0:1:(nx*1j), 0:1:(ny*1j)] plt.pcolor(x, y, sol) plt.colorbar() plt.show() """ # Copyright (C) 2009, Pauli Virtanen # Distributed under the same license as Scipy. from __future__ import division, print_function, absolute_import import sys import numpy as np from scipy._lib.six import callable, exec_, xrange from scipy.linalg import norm, solve, inv, qr, svd, LinAlgError from numpy import asarray, dot, vdot import scipy.sparse.linalg import scipy.sparse from scipy.linalg import get_blas_funcs import inspect from scipy._lib._util import getargspec_no_self as _getargspec from .linesearch import scalar_search_wolfe1, scalar_search_armijo __all__ = [ 'broyden1', 'broyden2', 'anderson', 'linearmixing', 'diagbroyden', 'excitingmixing', 'newton_krylov'] #------------------------------------------------------------------------------ # Utility functions #------------------------------------------------------------------------------ class NoConvergence(Exception): pass def maxnorm(x): return np.absolute(x).max() def _as_inexact(x): """Return `x` as an array, of either floats or complex floats""" x = asarray(x) if not np.issubdtype(x.dtype, np.inexact): return asarray(x, dtype=np.float_) return x def _array_like(x, x0): """Return ndarray `x` as same array subclass and shape as `x0`""" x = np.reshape(x, np.shape(x0)) wrap = getattr(x0, '__array_wrap__', x.__array_wrap__) return wrap(x) def _safe_norm(v): if not np.isfinite(v).all(): return np.array(np.inf) return norm(v) #------------------------------------------------------------------------------ # Generic nonlinear solver machinery #------------------------------------------------------------------------------ _doc_parts = dict( params_basic=""" F : function(x) -> f Function whose root to find; should take and return an array-like object. xin : array_like Initial guess for the solution """.strip(), params_extra=""" iter : int, optional Number of iterations to make. If omitted (default), make as many as required to meet tolerances. verbose : bool, optional Print status to stdout on every iteration. maxiter : int, optional Maximum number of iterations to make. If more are needed to meet convergence, `NoConvergence` is raised. f_tol : float, optional Absolute tolerance (in max-norm) for the residual. If omitted, default is 6e-6. f_rtol : float, optional Relative tolerance for the residual. If omitted, not used. x_tol : float, optional Absolute minimum step size, as determined from the Jacobian approximation. If the step size is smaller than this, optimization is terminated as successful. If omitted, not used. x_rtol : float, optional Relative minimum step size. If omitted, not used. tol_norm : function(vector) -> scalar, optional Norm to use in convergence check. Default is the maximum norm. line_search : {None, 'armijo' (default), 'wolfe'}, optional Which type of a line search to use to determine the step size in the direction given by the Jacobian approximation. Defaults to 'armijo'. callback : function, optional Optional callback function. It is called on every iteration as ``callback(x, f)`` where `x` is the current solution and `f` the corresponding residual. Returns ------- sol : ndarray An array (of similar array type as `x0`) containing the final solution. Raises ------ NoConvergence When a solution was not found. """.strip() ) def _set_doc(obj): if obj.__doc__: obj.__doc__ = obj.__doc__ % _doc_parts def nonlin_solve(F, x0, jacobian='krylov', iter=None, verbose=False, maxiter=None, f_tol=None, f_rtol=None, x_tol=None, x_rtol=None, tol_norm=None, line_search='armijo', callback=None, full_output=False, raise_exception=True): """ Find a root of a function, in a way suitable for large-scale problems. Parameters ---------- %(params_basic)s jacobian : Jacobian A Jacobian approximation: `Jacobian` object or something that `asjacobian` can transform to one. Alternatively, a string specifying which of the builtin Jacobian approximations to use: krylov, broyden1, broyden2, anderson diagbroyden, linearmixing, excitingmixing %(params_extra)s full_output : bool If true, returns a dictionary `info` containing convergence information. raise_exception : bool If True, a `NoConvergence` exception is raise if no solution is found. See Also -------- asjacobian, Jacobian Notes ----- This algorithm implements the inexact Newton method, with backtracking or full line searches. Several Jacobian approximations are available, including Krylov and Quasi-Newton methods. References ---------- .. [KIM] C. T. Kelley, \"Iterative Methods for Linear and Nonlinear Equations\". Society for Industrial and Applied Mathematics. (1995) https://archive.siam.org/books/kelley/fr16/ """ # Can't use default parameters because it's being explicitly passed as None # from the calling function, so we need to set it here. tol_norm = maxnorm if tol_norm is None else tol_norm condition = TerminationCondition(f_tol=f_tol, f_rtol=f_rtol, x_tol=x_tol, x_rtol=x_rtol, iter=iter, norm=tol_norm) x0 = _as_inexact(x0) func = lambda z: _as_inexact(F(_array_like(z, x0))).flatten() x = x0.flatten() dx = np.inf Fx = func(x) Fx_norm = norm(Fx) jacobian = asjacobian(jacobian) jacobian.setup(x.copy(), Fx, func) if maxiter is None: if iter is not None: maxiter = iter + 1 else: maxiter = 100*(x.size+1) if line_search is True: line_search = 'armijo' elif line_search is False: line_search = None if line_search not in (None, 'armijo', 'wolfe'): raise ValueError("Invalid line search") # Solver tolerance selection gamma = 0.9 eta_max = 0.9999 eta_treshold = 0.1 eta = 1e-3 for n in xrange(maxiter): status = condition.check(Fx, x, dx) if status: break # The tolerance, as computed for scipy.sparse.linalg.* routines tol = min(eta, eta*Fx_norm) dx = -jacobian.solve(Fx, tol=tol) if norm(dx) == 0: raise ValueError("Jacobian inversion yielded zero vector. " "This indicates a bug in the Jacobian " "approximation.") # Line search, or Newton step if line_search: s, x, Fx, Fx_norm_new = _nonlin_line_search(func, x, Fx, dx, line_search) else: s = 1.0 x = x + dx Fx = func(x) Fx_norm_new = norm(Fx) jacobian.update(x.copy(), Fx) if callback: callback(x, Fx) # Adjust forcing parameters for inexact methods eta_A = gamma * Fx_norm_new**2 / Fx_norm**2 if gamma * eta**2 < eta_treshold: eta = min(eta_max, eta_A) else: eta = min(eta_max, max(eta_A, gamma*eta**2)) Fx_norm = Fx_norm_new # Print status if verbose: sys.stdout.write("%d: |F(x)| = %g; step %g\n" % ( n, tol_norm(Fx), s)) sys.stdout.flush() else: if raise_exception: raise NoConvergence(_array_like(x, x0)) else: status = 2 if full_output: info = {'nit': condition.iteration, 'fun': Fx, 'status': status, 'success': status == 1, 'message': {1: 'A solution was found at the specified ' 'tolerance.', 2: 'The maximum number of iterations allowed ' 'has been reached.' }[status] } return _array_like(x, x0), info else: return _array_like(x, x0) _set_doc(nonlin_solve) def _nonlin_line_search(func, x, Fx, dx, search_type='armijo', rdiff=1e-8, smin=1e-2): tmp_s = [0] tmp_Fx = [Fx] tmp_phi = [norm(Fx)**2] s_norm = norm(x) / norm(dx) def phi(s, store=True): if s == tmp_s[0]: return tmp_phi[0] xt = x + s*dx v = func(xt) p = _safe_norm(v)**2 if store: tmp_s[0] = s tmp_phi[0] = p tmp_Fx[0] = v return p def derphi(s): ds = (abs(s) + s_norm + 1) * rdiff return (phi(s+ds, store=False) - phi(s)) / ds if search_type == 'wolfe': s, phi1, phi0 = scalar_search_wolfe1(phi, derphi, tmp_phi[0], xtol=1e-2, amin=smin) elif search_type == 'armijo': s, phi1 = scalar_search_armijo(phi, tmp_phi[0], -tmp_phi[0], amin=smin) if s is None: # XXX: No suitable step length found. Take the full Newton step, # and hope for the best. s = 1.0 x = x + s*dx if s == tmp_s[0]: Fx = tmp_Fx[0] else: Fx = func(x) Fx_norm = norm(Fx) return s, x, Fx, Fx_norm class TerminationCondition(object): """ Termination condition for an iteration. It is terminated if - |F| < f_rtol*|F_0|, AND - |F| < f_tol AND - |dx| < x_rtol*|x|, AND - |dx| < x_tol """ def __init__(self, f_tol=None, f_rtol=None, x_tol=None, x_rtol=None, iter=None, norm=maxnorm): if f_tol is None: f_tol = np.finfo(np.float_).eps ** (1./3) if f_rtol is None: f_rtol = np.inf if x_tol is None: x_tol = np.inf if x_rtol is None: x_rtol = np.inf self.x_tol = x_tol self.x_rtol = x_rtol self.f_tol = f_tol self.f_rtol = f_rtol self.norm = norm self.iter = iter self.f0_norm = None self.iteration = 0 def check(self, f, x, dx): self.iteration += 1 f_norm = self.norm(f) x_norm = self.norm(x) dx_norm = self.norm(dx) if self.f0_norm is None: self.f0_norm = f_norm if f_norm == 0: return 1 if self.iter is not None: # backwards compatibility with Scipy 0.6.0 return 2 * (self.iteration > self.iter) # NB: condition must succeed for rtol=inf even if norm == 0 return int((f_norm <= self.f_tol and f_norm/self.f_rtol <= self.f0_norm) and (dx_norm <= self.x_tol and dx_norm/self.x_rtol <= x_norm)) #------------------------------------------------------------------------------ # Generic Jacobian approximation #------------------------------------------------------------------------------ class Jacobian(object): """ Common interface for Jacobians or Jacobian approximations. The optional methods come useful when implementing trust region etc. algorithms that often require evaluating transposes of the Jacobian. Methods ------- solve Returns J^-1 * v update Updates Jacobian to point `x` (where the function has residual `Fx`) matvec : optional Returns J * v rmatvec : optional Returns A^H * v rsolve : optional Returns A^-H * v matmat : optional Returns A * V, where V is a dense matrix with dimensions (N,K). todense : optional Form the dense Jacobian matrix. Necessary for dense trust region algorithms, and useful for testing. Attributes ---------- shape Matrix dimensions (M, N) dtype Data type of the matrix. func : callable, optional Function the Jacobian corresponds to """ def __init__(self, **kw): names = ["solve", "update", "matvec", "rmatvec", "rsolve", "matmat", "todense", "shape", "dtype"] for name, value in kw.items(): if name not in names: raise ValueError("Unknown keyword argument %s" % name) if value is not None: setattr(self, name, kw[name]) if hasattr(self, 'todense'): self.__array__ = lambda: self.todense() def aspreconditioner(self): return InverseJacobian(self) def solve(self, v, tol=0): raise NotImplementedError def update(self, x, F): pass def setup(self, x, F, func): self.func = func self.shape = (F.size, x.size) self.dtype = F.dtype if self.__class__.setup is Jacobian.setup: # Call on the first point unless overridden self.update(x, F) class InverseJacobian(object): def __init__(self, jacobian): self.jacobian = jacobian self.matvec = jacobian.solve self.update = jacobian.update if hasattr(jacobian, 'setup'): self.setup = jacobian.setup if hasattr(jacobian, 'rsolve'): self.rmatvec = jacobian.rsolve @property def shape(self): return self.jacobian.shape @property def dtype(self): return self.jacobian.dtype def asjacobian(J): """ Convert given object to one suitable for use as a Jacobian. """ spsolve = scipy.sparse.linalg.spsolve if isinstance(J, Jacobian): return J elif inspect.isclass(J) and issubclass(J, Jacobian): return J() elif isinstance(J, np.ndarray): if J.ndim > 2: raise ValueError('array must have rank <= 2') J = np.atleast_2d(np.asarray(J)) if J.shape[0] != J.shape[1]: raise ValueError('array must be square') return Jacobian(matvec=lambda v: dot(J, v), rmatvec=lambda v: dot(J.conj().T, v), solve=lambda v: solve(J, v), rsolve=lambda v: solve(J.conj().T, v), dtype=J.dtype, shape=J.shape) elif scipy.sparse.isspmatrix(J): if J.shape[0] != J.shape[1]: raise ValueError('matrix must be square') return Jacobian(matvec=lambda v: J*v, rmatvec=lambda v: J.conj().T * v, solve=lambda v: spsolve(J, v), rsolve=lambda v: spsolve(J.conj().T, v), dtype=J.dtype, shape=J.shape) elif hasattr(J, 'shape') and hasattr(J, 'dtype') and hasattr(J, 'solve'): return Jacobian(matvec=getattr(J, 'matvec'), rmatvec=getattr(J, 'rmatvec'), solve=J.solve, rsolve=getattr(J, 'rsolve'), update=getattr(J, 'update'), setup=getattr(J, 'setup'), dtype=J.dtype, shape=J.shape) elif callable(J): # Assume it's a function J(x) that returns the Jacobian class Jac(Jacobian): def update(self, x, F): self.x = x def solve(self, v, tol=0): m = J(self.x) if isinstance(m, np.ndarray): return solve(m, v) elif scipy.sparse.isspmatrix(m): return spsolve(m, v) else: raise ValueError("Unknown matrix type") def matvec(self, v): m = J(self.x) if isinstance(m, np.ndarray): return dot(m, v) elif scipy.sparse.isspmatrix(m): return m*v else: raise ValueError("Unknown matrix type") def rsolve(self, v, tol=0): m = J(self.x) if isinstance(m, np.ndarray): return solve(m.conj().T, v) elif scipy.sparse.isspmatrix(m): return spsolve(m.conj().T, v) else: raise ValueError("Unknown matrix type") def rmatvec(self, v): m = J(self.x) if isinstance(m, np.ndarray): return dot(m.conj().T, v) elif scipy.sparse.isspmatrix(m): return m.conj().T * v else: raise ValueError("Unknown matrix type") return Jac() elif isinstance(J, str): return dict(broyden1=BroydenFirst, broyden2=BroydenSecond, anderson=Anderson, diagbroyden=DiagBroyden, linearmixing=LinearMixing, excitingmixing=ExcitingMixing, krylov=KrylovJacobian)[J]() else: raise TypeError('Cannot convert object to a Jacobian') #------------------------------------------------------------------------------ # Broyden #------------------------------------------------------------------------------ class GenericBroyden(Jacobian): def setup(self, x0, f0, func): Jacobian.setup(self, x0, f0, func) self.last_f = f0 self.last_x = x0 if hasattr(self, 'alpha') and self.alpha is None: # Autoscale the initial Jacobian parameter # unless we have already guessed the solution. normf0 = norm(f0) if normf0: self.alpha = 0.5*max(norm(x0), 1) / normf0 else: self.alpha = 1.0 def _update(self, x, f, dx, df, dx_norm, df_norm): raise NotImplementedError def update(self, x, f): df = f - self.last_f dx = x - self.last_x self._update(x, f, dx, df, norm(dx), norm(df)) self.last_f = f self.last_x = x class LowRankMatrix(object): r""" A matrix represented as .. math:: \alpha I + \sum_{n=0}^{n=M} c_n d_n^\dagger However, if the rank of the matrix reaches the dimension of the vectors, full matrix representation will be used thereon. """ def __init__(self, alpha, n, dtype): self.alpha = alpha self.cs = [] self.ds = [] self.n = n self.dtype = dtype self.collapsed = None @staticmethod def _matvec(v, alpha, cs, ds): axpy, scal, dotc = get_blas_funcs(['axpy', 'scal', 'dotc'], cs[:1] + [v]) w = alpha * v for c, d in zip(cs, ds): a = dotc(d, v) w = axpy(c, w, w.size, a) return w @staticmethod def _solve(v, alpha, cs, ds): """Evaluate w = M^-1 v""" if len(cs) == 0: return v/alpha # (B + C D^H)^-1 = B^-1 - B^-1 C (I + D^H B^-1 C)^-1 D^H B^-1 axpy, dotc = get_blas_funcs(['axpy', 'dotc'], cs[:1] + [v]) c0 = cs[0] A = alpha * np.identity(len(cs), dtype=c0.dtype) for i, d in enumerate(ds): for j, c in enumerate(cs): A[i,j] += dotc(d, c) q = np.zeros(len(cs), dtype=c0.dtype) for j, d in enumerate(ds): q[j] = dotc(d, v) q /= alpha q = solve(A, q) w = v/alpha for c, qc in zip(cs, q): w = axpy(c, w, w.size, -qc) return w def matvec(self, v): """Evaluate w = M v""" if self.collapsed is not None: return np.dot(self.collapsed, v) return LowRankMatrix._matvec(v, self.alpha, self.cs, self.ds) def rmatvec(self, v): """Evaluate w = M^H v""" if self.collapsed is not None: return np.dot(self.collapsed.T.conj(), v) return LowRankMatrix._matvec(v, np.conj(self.alpha), self.ds, self.cs) def solve(self, v, tol=0): """Evaluate w = M^-1 v""" if self.collapsed is not None: return solve(self.collapsed, v) return LowRankMatrix._solve(v, self.alpha, self.cs, self.ds) def rsolve(self, v, tol=0): """Evaluate w = M^-H v""" if self.collapsed is not None: return solve(self.collapsed.T.conj(), v) return LowRankMatrix._solve(v, np.conj(self.alpha), self.ds, self.cs) def append(self, c, d): if self.collapsed is not None: self.collapsed += c[:,None] * d[None,:].conj() return self.cs.append(c) self.ds.append(d) if len(self.cs) > c.size: self.collapse() def __array__(self): if self.collapsed is not None: return self.collapsed Gm = self.alpha*np.identity(self.n, dtype=self.dtype) for c, d in zip(self.cs, self.ds): Gm += c[:,None]*d[None,:].conj() return Gm def collapse(self): """Collapse the low-rank matrix to a full-rank one.""" self.collapsed = np.array(self) self.cs = None self.ds = None self.alpha = None def restart_reduce(self, rank): """ Reduce the rank of the matrix by dropping all vectors. """ if self.collapsed is not None: return assert rank > 0 if len(self.cs) > rank: del self.cs[:] del self.ds[:] def simple_reduce(self, rank): """ Reduce the rank of the matrix by dropping oldest vectors. """ if self.collapsed is not None: return assert rank > 0 while len(self.cs) > rank: del self.cs[0] del self.ds[0] def svd_reduce(self, max_rank, to_retain=None): """ Reduce the rank of the matrix by retaining some SVD components. This corresponds to the \"Broyden Rank Reduction Inverse\" algorithm described in [1]_. Note that the SVD decomposition can be done by solving only a problem whose size is the effective rank of this matrix, which is viable even for large problems. Parameters ---------- max_rank : int Maximum rank of this matrix after reduction. to_retain : int, optional Number of SVD components to retain when reduction is done (ie. rank > max_rank). Default is ``max_rank - 2``. References ---------- .. [1] B.A. van der Rotten, PhD thesis, \"A limited memory Broyden method to solve high-dimensional systems of nonlinear equations\". Mathematisch Instituut, Universiteit Leiden, The Netherlands (2003). https://web.archive.org/web/20161022015821/http://www.math.leidenuniv.nl/scripties/Rotten.pdf """ if self.collapsed is not None: return p = max_rank if to_retain is not None: q = to_retain else: q = p - 2 if self.cs: p = min(p, len(self.cs[0])) q = max(0, min(q, p-1)) m = len(self.cs) if m < p: # nothing to do return C = np.array(self.cs).T D = np.array(self.ds).T D, R = qr(D, mode='economic') C = dot(C, R.T.conj()) U, S, WH = svd(C, full_matrices=False, compute_uv=True) C = dot(C, inv(WH)) D = dot(D, WH.T.conj()) for k in xrange(q): self.cs[k] = C[:,k].copy() self.ds[k] = D[:,k].copy() del self.cs[q:] del self.ds[q:] _doc_parts['broyden_params'] = """ alpha : float, optional Initial guess for the Jacobian is ``(-1/alpha)``. reduction_method : str or tuple, optional Method used in ensuring that the rank of the Broyden matrix stays low. Can either be a string giving the name of the method, or a tuple of the form ``(method, param1, param2, ...)`` that gives the name of the method and values for additional parameters. Methods available: - ``restart``: drop all matrix columns. Has no extra parameters. - ``simple``: drop oldest matrix column. Has no extra parameters. - ``svd``: keep only the most significant SVD components. Takes an extra parameter, ``to_retain``, which determines the number of SVD components to retain when rank reduction is done. Default is ``max_rank - 2``. max_rank : int, optional Maximum rank for the Broyden matrix. Default is infinity (ie., no rank reduction). """.strip() class BroydenFirst(GenericBroyden): r""" Find a root of a function, using Broyden's first Jacobian approximation. This method is also known as \"Broyden's good method\". Parameters ---------- %(params_basic)s %(broyden_params)s %(params_extra)s Notes ----- This algorithm implements the inverse Jacobian Quasi-Newton update .. math:: H_+ = H + (dx - H df) dx^\dagger H / ( dx^\dagger H df) which corresponds to Broyden's first Jacobian update .. math:: J_+ = J + (df - J dx) dx^\dagger / dx^\dagger dx References ---------- .. [1] B.A. van der Rotten, PhD thesis, \"A limited memory Broyden method to solve high-dimensional systems of nonlinear equations\". Mathematisch Instituut, Universiteit Leiden, The Netherlands (2003). https://web.archive.org/web/20161022015821/http://www.math.leidenuniv.nl/scripties/Rotten.pdf """ def __init__(self, alpha=None, reduction_method='restart', max_rank=None): GenericBroyden.__init__(self) self.alpha = alpha self.Gm = None if max_rank is None: max_rank = np.inf self.max_rank = max_rank if isinstance(reduction_method, str): reduce_params = () else: reduce_params = reduction_method[1:] reduction_method = reduction_method[0] reduce_params = (max_rank - 1,) + reduce_params if reduction_method == 'svd': self._reduce = lambda: self.Gm.svd_reduce(*reduce_params) elif reduction_method == 'simple': self._reduce = lambda: self.Gm.simple_reduce(*reduce_params) elif reduction_method == 'restart': self._reduce = lambda: self.Gm.restart_reduce(*reduce_params) else: raise ValueError("Unknown rank reduction method '%s'" % reduction_method) def setup(self, x, F, func): GenericBroyden.setup(self, x, F, func) self.Gm = LowRankMatrix(-self.alpha, self.shape[0], self.dtype) def todense(self): return inv(self.Gm) def solve(self, f, tol=0): r = self.Gm.matvec(f) if not np.isfinite(r).all(): # singular; reset the Jacobian approximation self.setup(self.last_x, self.last_f, self.func) return self.Gm.matvec(f) def matvec(self, f): return self.Gm.solve(f) def rsolve(self, f, tol=0): return self.Gm.rmatvec(f) def rmatvec(self, f): return self.Gm.rsolve(f) def _update(self, x, f, dx, df, dx_norm, df_norm): self._reduce() # reduce first to preserve secant condition v = self.Gm.rmatvec(dx) c = dx - self.Gm.matvec(df) d = v / vdot(df, v) self.Gm.append(c, d) class BroydenSecond(BroydenFirst): """ Find a root of a function, using Broyden\'s second Jacobian approximation. This method is also known as \"Broyden's bad method\". Parameters ---------- %(params_basic)s %(broyden_params)s %(params_extra)s Notes ----- This algorithm implements the inverse Jacobian Quasi-Newton update .. math:: H_+ = H + (dx - H df) df^\\dagger / ( df^\\dagger df) corresponding to Broyden's second method. References ---------- .. [1] B.A. van der Rotten, PhD thesis, \"A limited memory Broyden method to solve high-dimensional systems of nonlinear equations\". Mathematisch Instituut, Universiteit Leiden, The Netherlands (2003). https://web.archive.org/web/20161022015821/http://www.math.leidenuniv.nl/scripties/Rotten.pdf """ def _update(self, x, f, dx, df, dx_norm, df_norm): self._reduce() # reduce first to preserve secant condition v = df c = dx - self.Gm.matvec(df) d = v / df_norm**2 self.Gm.append(c, d) #------------------------------------------------------------------------------ # Broyden-like (restricted memory) #------------------------------------------------------------------------------ class Anderson(GenericBroyden): """ Find a root of a function, using (extended) Anderson mixing. The Jacobian is formed by for a 'best' solution in the space spanned by last `M` vectors. As a result, only a MxM matrix inversions and MxN multiplications are required. [Ey]_ Parameters ---------- %(params_basic)s alpha : float, optional Initial guess for the Jacobian is (-1/alpha). M : float, optional Number of previous vectors to retain. Defaults to 5. w0 : float, optional Regularization parameter for numerical stability. Compared to unity, good values of the order of 0.01. %(params_extra)s References ---------- .. [Ey] V. Eyert, J. Comp. Phys., 124, 271 (1996). """ # Note: # # Anderson method maintains a rank M approximation of the inverse Jacobian, # # J^-1 v ~ -v*alpha + (dX + alpha dF) A^-1 dF^H v # A = W + dF^H dF # W = w0^2 diag(dF^H dF) # # so that for w0 = 0 the secant condition applies for last M iterates, ie., # # J^-1 df_j = dx_j # # for all j = 0 ... M-1. # # Moreover, (from Sherman-Morrison-Woodbury formula) # # J v ~ [ b I - b^2 C (I + b dF^H A^-1 C)^-1 dF^H ] v # C = (dX + alpha dF) A^-1 # b = -1/alpha # # and after simplification # # J v ~ -v/alpha + (dX/alpha + dF) (dF^H dX - alpha W)^-1 dF^H v # def __init__(self, alpha=None, w0=0.01, M=5): GenericBroyden.__init__(self) self.alpha = alpha self.M = M self.dx = [] self.df = [] self.gamma = None self.w0 = w0 def solve(self, f, tol=0): dx = -self.alpha*f n = len(self.dx) if n == 0: return dx df_f = np.empty(n, dtype=f.dtype) for k in xrange(n): df_f[k] = vdot(self.df[k], f) try: gamma = solve(self.a, df_f) except LinAlgError: # singular; reset the Jacobian approximation del self.dx[:] del self.df[:] return dx for m in xrange(n): dx += gamma[m]*(self.dx[m] + self.alpha*self.df[m]) return dx def matvec(self, f): dx = -f/self.alpha n = len(self.dx) if n == 0: return dx df_f = np.empty(n, dtype=f.dtype) for k in xrange(n): df_f[k] = vdot(self.df[k], f) b = np.empty((n, n), dtype=f.dtype) for i in xrange(n): for j in xrange(n): b[i,j] = vdot(self.df[i], self.dx[j]) if i == j and self.w0 != 0: b[i,j] -= vdot(self.df[i], self.df[i])*self.w0**2*self.alpha gamma = solve(b, df_f) for m in xrange(n): dx += gamma[m]*(self.df[m] + self.dx[m]/self.alpha) return dx def _update(self, x, f, dx, df, dx_norm, df_norm): if self.M == 0: return self.dx.append(dx) self.df.append(df) while len(self.dx) > self.M: self.dx.pop(0) self.df.pop(0) n = len(self.dx) a = np.zeros((n, n), dtype=f.dtype) for i in xrange(n): for j in xrange(i, n): if i == j: wd = self.w0**2 else: wd = 0 a[i,j] = (1+wd)*vdot(self.df[i], self.df[j]) a += np.triu(a, 1).T.conj() self.a = a #------------------------------------------------------------------------------ # Simple iterations #------------------------------------------------------------------------------ class DiagBroyden(GenericBroyden): """ Find a root of a function, using diagonal Broyden Jacobian approximation. The Jacobian approximation is derived from previous iterations, by retaining only the diagonal of Broyden matrices. .. warning:: This algorithm may be useful for specific problems, but whether it will work may depend strongly on the problem. Parameters ---------- %(params_basic)s alpha : float, optional Initial guess for the Jacobian is (-1/alpha). %(params_extra)s """ def __init__(self, alpha=None): GenericBroyden.__init__(self) self.alpha = alpha def setup(self, x, F, func): GenericBroyden.setup(self, x, F, func) self.d = np.ones((self.shape[0],), dtype=self.dtype) / self.alpha def solve(self, f, tol=0): return -f / self.d def matvec(self, f): return -f * self.d def rsolve(self, f, tol=0): return -f / self.d.conj() def rmatvec(self, f): return -f * self.d.conj() def todense(self): return np.diag(-self.d) def _update(self, x, f, dx, df, dx_norm, df_norm): self.d -= (df + self.d*dx)*dx/dx_norm**2 class LinearMixing(GenericBroyden): """ Find a root of a function, using a scalar Jacobian approximation. .. warning:: This algorithm may be useful for specific problems, but whether it will work may depend strongly on the problem. Parameters ---------- %(params_basic)s alpha : float, optional The Jacobian approximation is (-1/alpha). %(params_extra)s """ def __init__(self, alpha=None): GenericBroyden.__init__(self) self.alpha = alpha def solve(self, f, tol=0): return -f*self.alpha def matvec(self, f): return -f/self.alpha def rsolve(self, f, tol=0): return -f*np.conj(self.alpha) def rmatvec(self, f): return -f/np.conj(self.alpha) def todense(self): return np.diag(np.full(self.shape[0], -1/self.alpha)) def _update(self, x, f, dx, df, dx_norm, df_norm): pass class ExcitingMixing(GenericBroyden): """ Find a root of a function, using a tuned diagonal Jacobian approximation. The Jacobian matrix is diagonal and is tuned on each iteration. .. warning:: This algorithm may be useful for specific problems, but whether it will work may depend strongly on the problem. Parameters ---------- %(params_basic)s alpha : float, optional Initial Jacobian approximation is (-1/alpha). alphamax : float, optional The entries of the diagonal Jacobian are kept in the range ``[alpha, alphamax]``. %(params_extra)s """ def __init__(self, alpha=None, alphamax=1.0): GenericBroyden.__init__(self) self.alpha = alpha self.alphamax = alphamax self.beta = None def setup(self, x, F, func): GenericBroyden.setup(self, x, F, func) self.beta = np.full((self.shape[0],), self.alpha, dtype=self.dtype) def solve(self, f, tol=0): return -f*self.beta def matvec(self, f): return -f/self.beta def rsolve(self, f, tol=0): return -f*self.beta.conj() def rmatvec(self, f): return -f/self.beta.conj() def todense(self): return np.diag(-1/self.beta) def _update(self, x, f, dx, df, dx_norm, df_norm): incr = f*self.last_f > 0 self.beta[incr] += self.alpha self.beta[~incr] = self.alpha np.clip(self.beta, 0, self.alphamax, out=self.beta) #------------------------------------------------------------------------------ # Iterative/Krylov approximated Jacobians #------------------------------------------------------------------------------ class KrylovJacobian(Jacobian): r""" Find a root of a function, using Krylov approximation for inverse Jacobian. This method is suitable for solving large-scale problems. Parameters ---------- %(params_basic)s rdiff : float, optional Relative step size to use in numerical differentiation. method : {'lgmres', 'gmres', 'bicgstab', 'cgs', 'minres'} or function Krylov method to use to approximate the Jacobian. Can be a string, or a function implementing the same interface as the iterative solvers in `scipy.sparse.linalg`. The default is `scipy.sparse.linalg.lgmres`. inner_M : LinearOperator or InverseJacobian Preconditioner for the inner Krylov iteration. Note that you can use also inverse Jacobians as (adaptive) preconditioners. For example, >>> from scipy.optimize.nonlin import BroydenFirst, KrylovJacobian >>> from scipy.optimize.nonlin import InverseJacobian >>> jac = BroydenFirst() >>> kjac = KrylovJacobian(inner_M=InverseJacobian(jac)) If the preconditioner has a method named 'update', it will be called as ``update(x, f)`` after each nonlinear step, with ``x`` giving the current point, and ``f`` the current function value. inner_tol, inner_maxiter, ... Parameters to pass on to the \"inner\" Krylov solver. See `scipy.sparse.linalg.gmres` for details. outer_k : int, optional Size of the subspace kept across LGMRES nonlinear iterations. See `scipy.sparse.linalg.lgmres` for details. %(params_extra)s See Also -------- scipy.sparse.linalg.gmres scipy.sparse.linalg.lgmres Notes ----- This function implements a Newton-Krylov solver. The basic idea is to compute the inverse of the Jacobian with an iterative Krylov method. These methods require only evaluating the Jacobian-vector products, which are conveniently approximated by a finite difference: .. math:: J v \approx (f(x + \omega*v/|v|) - f(x)) / \omega Due to the use of iterative matrix inverses, these methods can deal with large nonlinear problems. Scipy's `scipy.sparse.linalg` module offers a selection of Krylov solvers to choose from. The default here is `lgmres`, which is a variant of restarted GMRES iteration that reuses some of the information obtained in the previous Newton steps to invert Jacobians in subsequent steps. For a review on Newton-Krylov methods, see for example [1]_, and for the LGMRES sparse inverse method, see [2]_. References ---------- .. [1] D.A. Knoll and D.E. Keyes, J. Comp. Phys. 193, 357 (2004). :doi:`10.1016/j.jcp.2003.08.010` .. [2] A.H. Baker and E.R. Jessup and T. Manteuffel, SIAM J. Matrix Anal. Appl. 26, 962 (2005). :doi:`10.1137/S0895479803422014` """ def __init__(self, rdiff=None, method='lgmres', inner_maxiter=20, inner_M=None, outer_k=10, **kw): self.preconditioner = inner_M self.rdiff = rdiff self.method = dict( bicgstab=scipy.sparse.linalg.bicgstab, gmres=scipy.sparse.linalg.gmres, lgmres=scipy.sparse.linalg.lgmres, cgs=scipy.sparse.linalg.cgs, minres=scipy.sparse.linalg.minres, ).get(method, method) self.method_kw = dict(maxiter=inner_maxiter, M=self.preconditioner) if self.method is scipy.sparse.linalg.gmres: # Replace GMRES's outer iteration with Newton steps self.method_kw['restrt'] = inner_maxiter self.method_kw['maxiter'] = 1 self.method_kw.setdefault('atol', 0) elif self.method is scipy.sparse.linalg.gcrotmk: self.method_kw.setdefault('atol', 0) elif self.method is scipy.sparse.linalg.lgmres: self.method_kw['outer_k'] = outer_k # Replace LGMRES's outer iteration with Newton steps self.method_kw['maxiter'] = 1 # Carry LGMRES's `outer_v` vectors across nonlinear iterations self.method_kw.setdefault('outer_v', []) self.method_kw.setdefault('prepend_outer_v', True) # But don't carry the corresponding Jacobian*v products, in case # the Jacobian changes a lot in the nonlinear step # # XXX: some trust-region inspired ideas might be more efficient... # See eg. Brown & Saad. But needs to be implemented separately # since it's not an inexact Newton method. self.method_kw.setdefault('store_outer_Av', False) self.method_kw.setdefault('atol', 0) for key, value in kw.items(): if not key.startswith('inner_'): raise ValueError("Unknown parameter %s" % key) self.method_kw[key[6:]] = value def _update_diff_step(self): mx = abs(self.x0).max() mf = abs(self.f0).max() self.omega = self.rdiff * max(1, mx) / max(1, mf) def matvec(self, v): nv = norm(v) if nv == 0: return 0*v sc = self.omega / nv r = (self.func(self.x0 + sc*v) - self.f0) / sc if not np.all(np.isfinite(r)) and np.all(np.isfinite(v)): raise ValueError('Function returned non-finite results') return r def solve(self, rhs, tol=0): if 'tol' in self.method_kw: sol, info = self.method(self.op, rhs, **self.method_kw) else: sol, info = self.method(self.op, rhs, tol=tol, **self.method_kw) return sol def update(self, x, f): self.x0 = x self.f0 = f self._update_diff_step() # Update also the preconditioner, if possible if self.preconditioner is not None: if hasattr(self.preconditioner, 'update'): self.preconditioner.update(x, f) def setup(self, x, f, func): Jacobian.setup(self, x, f, func) self.x0 = x self.f0 = f self.op = scipy.sparse.linalg.aslinearoperator(self) if self.rdiff is None: self.rdiff = np.finfo(x.dtype).eps ** (1./2) self._update_diff_step() # Setup also the preconditioner, if possible if self.preconditioner is not None: if hasattr(self.preconditioner, 'setup'): self.preconditioner.setup(x, f, func) #------------------------------------------------------------------------------ # Wrapper functions #------------------------------------------------------------------------------ def _nonlin_wrapper(name, jac): """ Construct a solver wrapper with given name and jacobian approx. It inspects the keyword arguments of ``jac.__init__``, and allows to use the same arguments in the wrapper function, in addition to the keyword arguments of `nonlin_solve` """ args, varargs, varkw, defaults = _getargspec(jac.__init__) kwargs = list(zip(args[-len(defaults):], defaults)) kw_str = ", ".join(["%s=%r" % (k, v) for k, v in kwargs]) if kw_str: kw_str = ", " + kw_str kwkw_str = ", ".join(["%s=%s" % (k, k) for k, v in kwargs]) if kwkw_str: kwkw_str = kwkw_str + ", " # Construct the wrapper function so that its keyword arguments # are visible in pydoc.help etc. wrapper = """ def %(name)s(F, xin, iter=None %(kw)s, verbose=False, maxiter=None, f_tol=None, f_rtol=None, x_tol=None, x_rtol=None, tol_norm=None, line_search='armijo', callback=None, **kw): jac = %(jac)s(%(kwkw)s **kw) return nonlin_solve(F, xin, jac, iter, verbose, maxiter, f_tol, f_rtol, x_tol, x_rtol, tol_norm, line_search, callback) """ wrapper = wrapper % dict(name=name, kw=kw_str, jac=jac.__name__, kwkw=kwkw_str) ns = {} ns.update(globals()) exec_(wrapper, ns) func = ns[name] func.__doc__ = jac.__doc__ _set_doc(func) return func broyden1 = _nonlin_wrapper('broyden1', BroydenFirst) broyden2 = _nonlin_wrapper('broyden2', BroydenSecond) anderson = _nonlin_wrapper('anderson', Anderson) linearmixing = _nonlin_wrapper('linearmixing', LinearMixing) diagbroyden = _nonlin_wrapper('diagbroyden', DiagBroyden) excitingmixing = _nonlin_wrapper('excitingmixing', ExcitingMixing) newton_krylov = _nonlin_wrapper('newton_krylov', KrylovJacobian)