You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
129 lines
4.5 KiB
Python
129 lines
4.5 KiB
Python
"""Newton-CG trust-region optimization."""
|
|
from __future__ import division, print_function, absolute_import
|
|
|
|
import math
|
|
|
|
import numpy as np
|
|
import scipy.linalg
|
|
from ._trustregion import (_minimize_trust_region, BaseQuadraticSubproblem)
|
|
|
|
__all__ = []
|
|
|
|
|
|
def _minimize_trust_ncg(fun, x0, args=(), jac=None, hess=None, hessp=None,
|
|
**trust_region_options):
|
|
"""
|
|
Minimization of scalar function of one or more variables using
|
|
the Newton conjugate gradient trust-region algorithm.
|
|
|
|
Options
|
|
-------
|
|
initial_trust_radius : float
|
|
Initial trust-region radius.
|
|
max_trust_radius : float
|
|
Maximum value of the trust-region radius. No steps that are longer
|
|
than this value will be proposed.
|
|
eta : float
|
|
Trust region related acceptance stringency for proposed steps.
|
|
gtol : float
|
|
Gradient norm must be less than `gtol` before successful
|
|
termination.
|
|
|
|
"""
|
|
if jac is None:
|
|
raise ValueError('Jacobian is required for Newton-CG trust-region '
|
|
'minimization')
|
|
if hess is None and hessp is None:
|
|
raise ValueError('Either the Hessian or the Hessian-vector product '
|
|
'is required for Newton-CG trust-region minimization')
|
|
return _minimize_trust_region(fun, x0, args=args, jac=jac, hess=hess,
|
|
hessp=hessp, subproblem=CGSteihaugSubproblem,
|
|
**trust_region_options)
|
|
|
|
|
|
class CGSteihaugSubproblem(BaseQuadraticSubproblem):
|
|
"""Quadratic subproblem solved by a conjugate gradient method"""
|
|
def solve(self, trust_radius):
|
|
"""
|
|
Solve the subproblem using a conjugate gradient method.
|
|
|
|
Parameters
|
|
----------
|
|
trust_radius : float
|
|
We are allowed to wander only this far away from the origin.
|
|
|
|
Returns
|
|
-------
|
|
p : ndarray
|
|
The proposed step.
|
|
hits_boundary : bool
|
|
True if the proposed step is on the boundary of the trust region.
|
|
|
|
Notes
|
|
-----
|
|
This is algorithm (7.2) of Nocedal and Wright 2nd edition.
|
|
Only the function that computes the Hessian-vector product is required.
|
|
The Hessian itself is not required, and the Hessian does
|
|
not need to be positive semidefinite.
|
|
"""
|
|
|
|
# get the norm of jacobian and define the origin
|
|
p_origin = np.zeros_like(self.jac)
|
|
|
|
# define a default tolerance
|
|
tolerance = min(0.5, math.sqrt(self.jac_mag)) * self.jac_mag
|
|
|
|
# Stop the method if the search direction
|
|
# is a direction of nonpositive curvature.
|
|
if self.jac_mag < tolerance:
|
|
hits_boundary = False
|
|
return p_origin, hits_boundary
|
|
|
|
# init the state for the first iteration
|
|
z = p_origin
|
|
r = self.jac
|
|
d = -r
|
|
|
|
# Search for the min of the approximation of the objective function.
|
|
while True:
|
|
|
|
# do an iteration
|
|
Bd = self.hessp(d)
|
|
dBd = np.dot(d, Bd)
|
|
if dBd <= 0:
|
|
# Look at the two boundary points.
|
|
# Find both values of t to get the boundary points such that
|
|
# ||z + t d|| == trust_radius
|
|
# and then choose the one with the predicted min value.
|
|
ta, tb = self.get_boundaries_intersections(z, d, trust_radius)
|
|
pa = z + ta * d
|
|
pb = z + tb * d
|
|
if self(pa) < self(pb):
|
|
p_boundary = pa
|
|
else:
|
|
p_boundary = pb
|
|
hits_boundary = True
|
|
return p_boundary, hits_boundary
|
|
r_squared = np.dot(r, r)
|
|
alpha = r_squared / dBd
|
|
z_next = z + alpha * d
|
|
if scipy.linalg.norm(z_next) >= trust_radius:
|
|
# Find t >= 0 to get the boundary point such that
|
|
# ||z + t d|| == trust_radius
|
|
ta, tb = self.get_boundaries_intersections(z, d, trust_radius)
|
|
p_boundary = z + tb * d
|
|
hits_boundary = True
|
|
return p_boundary, hits_boundary
|
|
r_next = r + alpha * Bd
|
|
r_next_squared = np.dot(r_next, r_next)
|
|
if math.sqrt(r_next_squared) < tolerance:
|
|
hits_boundary = False
|
|
return z_next, hits_boundary
|
|
beta_next = r_next_squared / r_squared
|
|
d_next = -r_next + beta_next * d
|
|
|
|
# update the state for the next iteration
|
|
z = z_next
|
|
r = r_next
|
|
d = d_next
|