You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

267 lines
9.0 KiB
Python

"""Trust-region optimization."""
import math
import numpy as np
import scipy.linalg
from .optimize import (_check_unknown_options, wrap_function, _status_message,
OptimizeResult, _prepare_scalar_function)
__all__ = []
class BaseQuadraticSubproblem(object):
"""
Base/abstract class defining the quadratic model for trust-region
minimization. Child classes must implement the ``solve`` method.
Values of the objective function, Jacobian and Hessian (if provided) at
the current iterate ``x`` are evaluated on demand and then stored as
attributes ``fun``, ``jac``, ``hess``.
"""
def __init__(self, x, fun, jac, hess=None, hessp=None):
self._x = x
self._f = None
self._g = None
self._h = None
self._g_mag = None
self._cauchy_point = None
self._newton_point = None
self._fun = fun
self._jac = jac
self._hess = hess
self._hessp = hessp
def __call__(self, p):
return self.fun + np.dot(self.jac, p) + 0.5 * np.dot(p, self.hessp(p))
@property
def fun(self):
"""Value of objective function at current iteration."""
if self._f is None:
self._f = self._fun(self._x)
return self._f
@property
def jac(self):
"""Value of Jacobian of objective function at current iteration."""
if self._g is None:
self._g = self._jac(self._x)
return self._g
@property
def hess(self):
"""Value of Hessian of objective function at current iteration."""
if self._h is None:
self._h = self._hess(self._x)
return self._h
def hessp(self, p):
if self._hessp is not None:
return self._hessp(self._x, p)
else:
return np.dot(self.hess, p)
@property
def jac_mag(self):
"""Magnitude of jacobian of objective function at current iteration."""
if self._g_mag is None:
self._g_mag = scipy.linalg.norm(self.jac)
return self._g_mag
def get_boundaries_intersections(self, z, d, trust_radius):
"""
Solve the scalar quadratic equation ||z + t d|| == trust_radius.
This is like a line-sphere intersection.
Return the two values of t, sorted from low to high.
"""
a = np.dot(d, d)
b = 2 * np.dot(z, d)
c = np.dot(z, z) - trust_radius**2
sqrt_discriminant = math.sqrt(b*b - 4*a*c)
# The following calculation is mathematically
# equivalent to:
# ta = (-b - sqrt_discriminant) / (2*a)
# tb = (-b + sqrt_discriminant) / (2*a)
# but produce smaller round off errors.
# Look at Matrix Computation p.97
# for a better justification.
aux = b + math.copysign(sqrt_discriminant, b)
ta = -aux / (2*a)
tb = -2*c / aux
return sorted([ta, tb])
def solve(self, trust_radius):
raise NotImplementedError('The solve method should be implemented by '
'the child class')
def _minimize_trust_region(fun, x0, args=(), jac=None, hess=None, hessp=None,
subproblem=None, initial_trust_radius=1.0,
max_trust_radius=1000.0, eta=0.15, gtol=1e-4,
maxiter=None, disp=False, return_all=False,
callback=None, inexact=True, **unknown_options):
"""
Minimization of scalar function of one or more variables using a
trust-region algorithm.
Options for the trust-region algorithm are:
initial_trust_radius : float
Initial trust radius.
max_trust_radius : float
Never propose steps that are longer than this value.
eta : float
Trust region related acceptance stringency for proposed steps.
gtol : float
Gradient norm must be less than `gtol`
before successful termination.
maxiter : int
Maximum number of iterations to perform.
disp : bool
If True, print convergence message.
inexact : bool
Accuracy to solve subproblems. If True requires less nonlinear
iterations, but more vector products. Only effective for method
trust-krylov.
This function is called by the `minimize` function.
It is not supposed to be called directly.
"""
_check_unknown_options(unknown_options)
if jac is None:
raise ValueError('Jacobian is currently required for trust-region '
'methods')
if hess is None and hessp is None:
raise ValueError('Either the Hessian or the Hessian-vector product '
'is currently required for trust-region methods')
if subproblem is None:
raise ValueError('A subproblem solving strategy is required for '
'trust-region methods')
if not (0 <= eta < 0.25):
raise Exception('invalid acceptance stringency')
if max_trust_radius <= 0:
raise Exception('the max trust radius must be positive')
if initial_trust_radius <= 0:
raise ValueError('the initial trust radius must be positive')
if initial_trust_radius >= max_trust_radius:
raise ValueError('the initial trust radius must be less than the '
'max trust radius')
# force the initial guess into a nice format
x0 = np.asarray(x0).flatten()
# A ScalarFunction representing the problem. This caches calls to fun, jac,
# hess.
sf = _prepare_scalar_function(fun, x0, jac=jac, hess=hess, args=args)
fun = sf.fun
jac = sf.grad
if hess is not None:
hess = sf.hess
# ScalarFunction doesn't represent hessp
nhessp, hessp = wrap_function(hessp, args)
# limit the number of iterations
if maxiter is None:
maxiter = len(x0)*200
# init the search status
warnflag = 0
# initialize the search
trust_radius = initial_trust_radius
x = x0
if return_all:
allvecs = [x]
m = subproblem(x, fun, jac, hess, hessp)
k = 0
# search for the function min
# do not even start if the gradient is small enough
while m.jac_mag >= gtol:
# Solve the sub-problem.
# This gives us the proposed step relative to the current position
# and it tells us whether the proposed step
# has reached the trust region boundary or not.
try:
p, hits_boundary = m.solve(trust_radius)
except np.linalg.linalg.LinAlgError:
warnflag = 3
break
# calculate the predicted value at the proposed point
predicted_value = m(p)
# define the local approximation at the proposed point
x_proposed = x + p
m_proposed = subproblem(x_proposed, fun, jac, hess, hessp)
# evaluate the ratio defined in equation (4.4)
actual_reduction = m.fun - m_proposed.fun
predicted_reduction = m.fun - predicted_value
if predicted_reduction <= 0:
warnflag = 2
break
rho = actual_reduction / predicted_reduction
# update the trust radius according to the actual/predicted ratio
if rho < 0.25:
trust_radius *= 0.25
elif rho > 0.75 and hits_boundary:
trust_radius = min(2*trust_radius, max_trust_radius)
# if the ratio is high enough then accept the proposed step
if rho > eta:
x = x_proposed
m = m_proposed
# append the best guess, call back, increment the iteration count
if return_all:
allvecs.append(np.copy(x))
if callback is not None:
callback(np.copy(x))
k += 1
# check if the gradient is small enough to stop
if m.jac_mag < gtol:
warnflag = 0
break
# check if we have looked at enough iterations
if k >= maxiter:
warnflag = 1
break
# print some stuff if requested
status_messages = (
_status_message['success'],
_status_message['maxiter'],
'A bad approximation caused failure to predict improvement.',
'A linalg error occurred, such as a non-psd Hessian.',
)
if disp:
if warnflag == 0:
print(status_messages[warnflag])
else:
print('Warning: ' + status_messages[warnflag])
print(" Current function value: %f" % m.fun)
print(" Iterations: %d" % k)
print(" Function evaluations: %d" % sf.nfev)
print(" Gradient evaluations: %d" % sf.ngev)
print(" Hessian evaluations: %d" % (sf.nhev + nhessp[0]))
result = OptimizeResult(x=x, success=(warnflag == 0), status=warnflag,
fun=m.fun, jac=m.jac, nfev=sf.nfev, njev=sf.ngev,
nhev=sf.nhev + nhessp[0], nit=k,
message=status_messages[warnflag])
if hess is not None:
result['hess'] = m.hess
if return_all:
result['allvecs'] = allvecs
return result