eaiaiaiaoi/venv/lib/python2.7/site-packages/scipy/optimize/_lsq/dogbox.py

"""
dogleg algorithm with rectangular trust regions for least-squares minimization.

The description of the algorithm can be found in [Voglis]_. The algorithm does
trust-region iterations, but the shape of trust regions is rectangular as
opposed to conventional elliptical. The intersection of a trust region and
an initial feasible region is again some rectangle. Thus on each iteration a
bound-constrained quadratic optimization problem is solved.

A quadratic problem is solved by well-known dogleg approach, where the
function is minimized along piecewise-linear "dogleg" path [NumOpt]_,
Chapter 4. If Jacobian is not rank-deficient then the function is decreasing
along this path, and optimization amounts to simply following along this
path as long as a point stays within the bounds. A constrained Cauchy step
(along the anti-gradient) is considered for safety in rank deficient cases,
in this situations the convergence might be slow.

If during iterations some variable hit the initial bound and the component
of anti-gradient points outside the feasible region, then a next dogleg step
won't make any progress. At this state such variables satisfy first-order
optimality conditions and they are excluded before computing a next dogleg
step.

Gauss-Newton step can be computed exactly by `numpy.linalg.lstsq` (for dense
Jacobian matrices) or by iterative procedure `scipy.sparse.linalg.lsmr` (for
dense and sparse matrices, or Jacobian being LinearOperator). The second
option allows to solve very large problems (up to couple of millions of
residuals on a regular PC), provided the Jacobian matrix is sufficiently
sparse. But note that dogbox is not very good for solving problems with
large number of constraints, because of variables exclusion-inclusion on each
iteration (a required number of function evaluations might be high or accuracy
of a solution will be poor), thus its large-scale usage is probably limited
to unconstrained problems.

References
----------
.. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region Dogleg
            Approach for Unconstrained and Bound Constrained Nonlinear
            Optimization", WSEAS International Conference on Applied
            Mathematics, Corfu, Greece, 2004.
.. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization, 2nd edition".
"""
from __future__ import division, print_function, absolute_import

import numpy as np
from numpy.linalg import lstsq, norm

from scipy.sparse.linalg import LinearOperator, aslinearoperator, lsmr
from scipy.optimize import OptimizeResult
from scipy._lib.six import string_types

from .common import (
    step_size_to_bound, in_bounds, update_tr_radius, evaluate_quadratic,
    build_quadratic_1d, minimize_quadratic_1d, compute_grad,
    compute_jac_scale, check_termination, scale_for_robust_loss_function,
    print_header_nonlinear, print_iteration_nonlinear)


def lsmr_operator(Jop, d, active_set):
    """Compute LinearOperator to use in LSMR by dogbox algorithm.

    `active_set` mask is used to excluded active variables from computations
    of matrix-vector products.
    """
    m, n = Jop.shape

    def matvec(x):
        x_free = x.ravel().copy()
        x_free[active_set] = 0
        return Jop.matvec(x * d)

    def rmatvec(x):
        r = d * Jop.rmatvec(x)
        r[active_set] = 0
        return r

    return LinearOperator((m, n), matvec=matvec, rmatvec=rmatvec, dtype=float)


def find_intersection(x, tr_bounds, lb, ub):
    """Find intersection of trust-region bounds and initial bounds.

    Returns
    -------
    lb_total, ub_total : ndarray with shape of x
        Lower and upper bounds of the intersection region.
    orig_l, orig_u : ndarray of bool with shape of x
        True means that an original bound is taken as a corresponding bound
        in the intersection region.
    tr_l, tr_u : ndarray of bool with shape of x
        True means that a trust-region bound is taken as a corresponding bound
        in the intersection region.
    """
    lb_centered = lb - x
    ub_centered = ub - x

    lb_total = np.maximum(lb_centered, -tr_bounds)
    ub_total = np.minimum(ub_centered, tr_bounds)

    orig_l = np.equal(lb_total, lb_centered)
    orig_u = np.equal(ub_total, ub_centered)

    tr_l = np.equal(lb_total, -tr_bounds)
    tr_u = np.equal(ub_total, tr_bounds)

    return lb_total, ub_total, orig_l, orig_u, tr_l, tr_u


def dogleg_step(x, newton_step, g, a, b, tr_bounds, lb, ub):
    """Find dogleg step in a rectangular region.

    Returns
    -------
    step : ndarray, shape (n,)
        Computed dogleg step.
    bound_hits : ndarray of int, shape (n,)
        Each component shows whether a corresponding variable hits the
        initial bound after the step is taken:
            *  0 - a variable doesn't hit the bound.
            * -1 - lower bound is hit.
            *  1 - upper bound is hit.
    tr_hit : bool
        Whether the step hit the boundary of the trust-region.
    """
    lb_total, ub_total, orig_l, orig_u, tr_l, tr_u = find_intersection(
        x, tr_bounds, lb, ub
    )
    bound_hits = np.zeros_like(x, dtype=int)

    if in_bounds(newton_step, lb_total, ub_total):
        return newton_step, bound_hits, False

    to_bounds, _ = step_size_to_bound(np.zeros_like(x), -g, lb_total, ub_total)

    # The classical dogleg algorithm would check if Cauchy step fits into
    # the bounds, and just return it constrained version if not. But in a
    # rectangular trust region it makes sense to try to improve constrained
    # Cauchy step too. Thus we don't distinguish these two cases.

    cauchy_step = -minimize_quadratic_1d(a, b, 0, to_bounds)[0] * g

    step_diff = newton_step - cauchy_step
    step_size, hits = step_size_to_bound(cauchy_step, step_diff,
                                         lb_total, ub_total)
    bound_hits[(hits < 0) & orig_l] = -1
    bound_hits[(hits > 0) & orig_u] = 1
    tr_hit = np.any((hits < 0) & tr_l | (hits > 0) & tr_u)

    return cauchy_step + step_size * step_diff, bound_hits, tr_hit


def dogbox(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
           loss_function, tr_solver, tr_options, verbose):
    f = f0
    f_true = f.copy()
    nfev = 1

    J = J0
    njev = 1

    if loss_function is not None:
        rho = loss_function(f)
        cost = 0.5 * np.sum(rho[0])
        J, f = scale_for_robust_loss_function(J, f, rho)
    else:
        cost = 0.5 * np.dot(f, f)

    g = compute_grad(J, f)

    jac_scale = isinstance(x_scale, string_types) and x_scale == 'jac'
    if jac_scale:
        scale, scale_inv = compute_jac_scale(J)
    else:
        scale, scale_inv = x_scale, 1 / x_scale

    Delta = norm(x0 * scale_inv, ord=np.inf)
    if Delta == 0:
        Delta = 1.0

    on_bound = np.zeros_like(x0, dtype=int)
    on_bound[np.equal(x0, lb)] = -1
    on_bound[np.equal(x0, ub)] = 1

    x = x0
    step = np.empty_like(x0)

    if max_nfev is None:
        max_nfev = x0.size * 100

    termination_status = None
    iteration = 0
    step_norm = None
    actual_reduction = None

    if verbose == 2:
        print_header_nonlinear()

    while True:
        active_set = on_bound * g < 0
        free_set = ~active_set

        g_free = g[free_set]
        g_full = g.copy()
        g[active_set] = 0

        g_norm = norm(g, ord=np.inf)
        if g_norm < gtol:
            termination_status = 1

        if verbose == 2:
            print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
                                      step_norm, g_norm)

        if termination_status is not None or nfev == max_nfev:
            break

        x_free = x[free_set]
        lb_free = lb[free_set]
        ub_free = ub[free_set]
        scale_free = scale[free_set]

        # Compute (Gauss-)Newton and build quadratic model for Cauchy step.
        if tr_solver == 'exact':
            J_free = J[:, free_set]
            newton_step = lstsq(J_free, -f, rcond=-1)[0]

            # Coefficients for the quadratic model along the anti-gradient.
            a, b = build_quadratic_1d(J_free, g_free, -g_free)
        elif tr_solver == 'lsmr':
            Jop = aslinearoperator(J)

            # We compute lsmr step in scaled variables and then
            # transform back to normal variables, if lsmr would give exact lsq
            # solution this would be equivalent to not doing any
            # transformations, but from experience it's better this way.

            # We pass active_set to make computations as if we selected
            # the free subset of J columns, but without actually doing any
            # slicing, which is expensive for sparse matrices and impossible
            # for LinearOperator.

            lsmr_op = lsmr_operator(Jop, scale, active_set)
            newton_step = -lsmr(lsmr_op, f, **tr_options)[0][free_set]
            newton_step *= scale_free

            # Components of g for active variables were zeroed, so this call
            # is correct and equivalent to using J_free and g_free.
            a, b = build_quadratic_1d(Jop, g, -g)

        actual_reduction = -1.0
        while actual_reduction <= 0 and nfev < max_nfev:
            tr_bounds = Delta * scale_free

            step_free, on_bound_free, tr_hit = dogleg_step(
                x_free, newton_step, g_free, a, b, tr_bounds, lb_free, ub_free)

            step.fill(0.0)
            step[free_set] = step_free

            if tr_solver == 'exact':
                predicted_reduction = -evaluate_quadratic(J_free, g_free,
                                                          step_free)
            elif tr_solver == 'lsmr':
                predicted_reduction = -evaluate_quadratic(Jop, g, step)

            x_new = x + step
            f_new = fun(x_new)
            nfev += 1

            step_h_norm = norm(step * scale_inv, ord=np.inf)

            if not np.all(np.isfinite(f_new)):
                Delta = 0.25 * step_h_norm
                continue

            # Usual trust-region step quality estimation.
            if loss_function is not None:
                cost_new = loss_function(f_new, cost_only=True)
            else:
                cost_new = 0.5 * np.dot(f_new, f_new)
            actual_reduction = cost - cost_new

            Delta, ratio = update_tr_radius(
                Delta, actual_reduction, predicted_reduction,
                step_h_norm, tr_hit
            )

            step_norm = norm(step)
            termination_status = check_termination(
                actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)

            if termination_status is not None:
                break

        if actual_reduction > 0:
            on_bound[free_set] = on_bound_free

            x = x_new
            # Set variables exactly at the boundary.
            mask = on_bound == -1
            x[mask] = lb[mask]
            mask = on_bound == 1
            x[mask] = ub[mask]

            f = f_new
            f_true = f.copy()

            cost = cost_new

            J = jac(x, f)
            njev += 1

            if loss_function is not None:
                rho = loss_function(f)
                J, f = scale_for_robust_loss_function(J, f, rho)

            g = compute_grad(J, f)

            if jac_scale:
                scale, scale_inv = compute_jac_scale(J, scale_inv)
        else:
            step_norm = 0
            actual_reduction = 0

        iteration += 1

    if termination_status is None:
        termination_status = 0

    return OptimizeResult(
        x=x, cost=cost, fun=f_true, jac=J, grad=g_full, optimality=g_norm,
        active_mask=on_bound, nfev=nfev, njev=njev, status=termination_status)
add in project 6 years ago			`"""`
			`dogleg algorithm with rectangular trust regions for least-squares minimization.`

			`The description of the algorithm can be found in [Voglis]_. The algorithm does`
			`trust-region iterations, but the shape of trust regions is rectangular as`
			`opposed to conventional elliptical. The intersection of a trust region and`
			`an initial feasible region is again some rectangle. Thus on each iteration a`
			`bound-constrained quadratic optimization problem is solved.`

			`A quadratic problem is solved by well-known dogleg approach, where the`
			`function is minimized along piecewise-linear "dogleg" path [NumOpt]_,`
			`Chapter 4. If Jacobian is not rank-deficient then the function is decreasing`
			`along this path, and optimization amounts to simply following along this`
			`path as long as a point stays within the bounds. A constrained Cauchy step`
			`(along the anti-gradient) is considered for safety in rank deficient cases,`
			`in this situations the convergence might be slow.`

			`If during iterations some variable hit the initial bound and the component`
			`of anti-gradient points outside the feasible region, then a next dogleg step`
			`won't make any progress. At this state such variables satisfy first-order`
			`optimality conditions and they are excluded before computing a next dogleg`
			`step.`

			Gauss-Newton step can be computed exactly by `numpy.linalg.lstsq` (for dense
			Jacobian matrices) or by iterative procedure `scipy.sparse.linalg.lsmr` (for
			`dense and sparse matrices, or Jacobian being LinearOperator). The second`
			`option allows to solve very large problems (up to couple of millions of`
			`residuals on a regular PC), provided the Jacobian matrix is sufficiently`
			`sparse. But note that dogbox is not very good for solving problems with`
			`large number of constraints, because of variables exclusion-inclusion on each`
			`iteration (a required number of function evaluations might be high or accuracy`
			`of a solution will be poor), thus its large-scale usage is probably limited`
			`to unconstrained problems.`

			`References`
			`----------`
			`.. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region Dogleg`
			`Approach for Unconstrained and Bound Constrained Nonlinear`
			`Optimization", WSEAS International Conference on Applied`
			`Mathematics, Corfu, Greece, 2004.`
			`.. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization, 2nd edition".`
			`"""`
			`from __future__ import division, print_function, absolute_import`

			`import numpy as np`
			`from numpy.linalg import lstsq, norm`

			`from scipy.sparse.linalg import LinearOperator, aslinearoperator, lsmr`
			`from scipy.optimize import OptimizeResult`
			`from scipy._lib.six import string_types`

			`from .common import (`
			`step_size_to_bound, in_bounds, update_tr_radius, evaluate_quadratic,`
			`build_quadratic_1d, minimize_quadratic_1d, compute_grad,`
			`compute_jac_scale, check_termination, scale_for_robust_loss_function,`
			`print_header_nonlinear, print_iteration_nonlinear)`


			`def lsmr_operator(Jop, d, active_set):`
			`"""Compute LinearOperator to use in LSMR by dogbox algorithm.`

			`active_set` mask is used to excluded active variables from computations
			`of matrix-vector products.`
			`"""`
			`m, n = Jop.shape`

			`def matvec(x):`
			`x_free = x.ravel().copy()`
			`x_free[active_set] = 0`
			`return Jop.matvec(x * d)`

			`def rmatvec(x):`
			`r = d * Jop.rmatvec(x)`
			`r[active_set] = 0`
			`return r`

			`return LinearOperator((m, n), matvec=matvec, rmatvec=rmatvec, dtype=float)`


			`def find_intersection(x, tr_bounds, lb, ub):`
			`"""Find intersection of trust-region bounds and initial bounds.`

			`Returns`
			`-------`
			`lb_total, ub_total : ndarray with shape of x`
			`Lower and upper bounds of the intersection region.`
			`orig_l, orig_u : ndarray of bool with shape of x`
			`True means that an original bound is taken as a corresponding bound`
			`in the intersection region.`
			`tr_l, tr_u : ndarray of bool with shape of x`
			`True means that a trust-region bound is taken as a corresponding bound`
			`in the intersection region.`
			`"""`
			`lb_centered = lb - x`
			`ub_centered = ub - x`

			`lb_total = np.maximum(lb_centered, -tr_bounds)`
			`ub_total = np.minimum(ub_centered, tr_bounds)`

			`orig_l = np.equal(lb_total, lb_centered)`
			`orig_u = np.equal(ub_total, ub_centered)`

			`tr_l = np.equal(lb_total, -tr_bounds)`
			`tr_u = np.equal(ub_total, tr_bounds)`

			`return lb_total, ub_total, orig_l, orig_u, tr_l, tr_u`


			`def dogleg_step(x, newton_step, g, a, b, tr_bounds, lb, ub):`
			`"""Find dogleg step in a rectangular region.`

			`Returns`
			`-------`
			`step : ndarray, shape (n,)`
			`Computed dogleg step.`
			`bound_hits : ndarray of int, shape (n,)`
			`Each component shows whether a corresponding variable hits the`
			`initial bound after the step is taken:`
			`* 0 - a variable doesn't hit the bound.`
			`* -1 - lower bound is hit.`
			`* 1 - upper bound is hit.`
			`tr_hit : bool`
			`Whether the step hit the boundary of the trust-region.`
			`"""`
			`lb_total, ub_total, orig_l, orig_u, tr_l, tr_u = find_intersection(`
			`x, tr_bounds, lb, ub`
			`)`
			`bound_hits = np.zeros_like(x, dtype=int)`

			`if in_bounds(newton_step, lb_total, ub_total):`
			`return newton_step, bound_hits, False`

			`to_bounds, _ = step_size_to_bound(np.zeros_like(x), -g, lb_total, ub_total)`

			`# The classical dogleg algorithm would check if Cauchy step fits into`
			`# the bounds, and just return it constrained version if not. But in a`
			`# rectangular trust region it makes sense to try to improve constrained`
			`# Cauchy step too. Thus we don't distinguish these two cases.`

			`cauchy_step = -minimize_quadratic_1d(a, b, 0, to_bounds)[0] * g`

			`step_diff = newton_step - cauchy_step`
			`step_size, hits = step_size_to_bound(cauchy_step, step_diff,`
			`lb_total, ub_total)`
			`bound_hits[(hits < 0) & orig_l] = -1`
			`bound_hits[(hits > 0) & orig_u] = 1`
			`tr_hit = np.any((hits < 0) & tr_l \| (hits > 0) & tr_u)`

			`return cauchy_step + step_size * step_diff, bound_hits, tr_hit`


			`def dogbox(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,`
			`loss_function, tr_solver, tr_options, verbose):`
			`f = f0`
			`f_true = f.copy()`
			`nfev = 1`

			`J = J0`
			`njev = 1`

			`if loss_function is not None:`
			`rho = loss_function(f)`
			`cost = 0.5 * np.sum(rho[0])`
			`J, f = scale_for_robust_loss_function(J, f, rho)`
			`else:`
			`cost = 0.5 * np.dot(f, f)`

			`g = compute_grad(J, f)`

			`jac_scale = isinstance(x_scale, string_types) and x_scale == 'jac'`
			`if jac_scale:`
			`scale, scale_inv = compute_jac_scale(J)`
			`else:`
			`scale, scale_inv = x_scale, 1 / x_scale`

			`Delta = norm(x0 * scale_inv, ord=np.inf)`
			`if Delta == 0:`
			`Delta = 1.0`

			`on_bound = np.zeros_like(x0, dtype=int)`
			`on_bound[np.equal(x0, lb)] = -1`
			`on_bound[np.equal(x0, ub)] = 1`

			`x = x0`
			`step = np.empty_like(x0)`

			`if max_nfev is None:`
			`max_nfev = x0.size * 100`

			`termination_status = None`
			`iteration = 0`
			`step_norm = None`
			`actual_reduction = None`

			`if verbose == 2:`
			`print_header_nonlinear()`

			`while True:`
			`active_set = on_bound * g < 0`
			`free_set = ~active_set`

			`g_free = g[free_set]`
			`g_full = g.copy()`
			`g[active_set] = 0`

			`g_norm = norm(g, ord=np.inf)`
			`if g_norm < gtol:`
			`termination_status = 1`

			`if verbose == 2:`
			`print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,`
			`step_norm, g_norm)`

			`if termination_status is not None or nfev == max_nfev:`
			`break`

			`x_free = x[free_set]`
			`lb_free = lb[free_set]`
			`ub_free = ub[free_set]`
			`scale_free = scale[free_set]`

			`# Compute (Gauss-)Newton and build quadratic model for Cauchy step.`
			`if tr_solver == 'exact':`
			`J_free = J[:, free_set]`
			`newton_step = lstsq(J_free, -f, rcond=-1)[0]`

			`# Coefficients for the quadratic model along the anti-gradient.`
			`a, b = build_quadratic_1d(J_free, g_free, -g_free)`
			`elif tr_solver == 'lsmr':`
			`Jop = aslinearoperator(J)`

			`# We compute lsmr step in scaled variables and then`
			`# transform back to normal variables, if lsmr would give exact lsq`
			`# solution this would be equivalent to not doing any`
			`# transformations, but from experience it's better this way.`

			`# We pass active_set to make computations as if we selected`
			`# the free subset of J columns, but without actually doing any`
			`# slicing, which is expensive for sparse matrices and impossible`
			`# for LinearOperator.`

			`lsmr_op = lsmr_operator(Jop, scale, active_set)`
			`newton_step = -lsmr(lsmr_op, f, **tr_options)[0][free_set]`
			`newton_step *= scale_free`

			`# Components of g for active variables were zeroed, so this call`
			`# is correct and equivalent to using J_free and g_free.`
			`a, b = build_quadratic_1d(Jop, g, -g)`

			`actual_reduction = -1.0`
			`while actual_reduction <= 0 and nfev < max_nfev:`
			`tr_bounds = Delta * scale_free`

			`step_free, on_bound_free, tr_hit = dogleg_step(`
			`x_free, newton_step, g_free, a, b, tr_bounds, lb_free, ub_free)`

			`step.fill(0.0)`
			`step[free_set] = step_free`

			`if tr_solver == 'exact':`
			`predicted_reduction = -evaluate_quadratic(J_free, g_free,`
			`step_free)`
			`elif tr_solver == 'lsmr':`
			`predicted_reduction = -evaluate_quadratic(Jop, g, step)`

			`x_new = x + step`
			`f_new = fun(x_new)`
			`nfev += 1`

			`step_h_norm = norm(step * scale_inv, ord=np.inf)`

			`if not np.all(np.isfinite(f_new)):`
			`Delta = 0.25 * step_h_norm`
			`continue`

			`# Usual trust-region step quality estimation.`
			`if loss_function is not None:`
			`cost_new = loss_function(f_new, cost_only=True)`
			`else:`
			`cost_new = 0.5 * np.dot(f_new, f_new)`
			`actual_reduction = cost - cost_new`

			`Delta, ratio = update_tr_radius(`
			`Delta, actual_reduction, predicted_reduction,`
			`step_h_norm, tr_hit`
			`)`

			`step_norm = norm(step)`
			`termination_status = check_termination(`
			`actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)`

			`if termination_status is not None:`
			`break`

			`if actual_reduction > 0:`
			`on_bound[free_set] = on_bound_free`

			`x = x_new`
			`# Set variables exactly at the boundary.`
			`mask = on_bound == -1`
			`x[mask] = lb[mask]`
			`mask = on_bound == 1`
			`x[mask] = ub[mask]`

			`f = f_new`
			`f_true = f.copy()`

			`cost = cost_new`

			`J = jac(x, f)`
			`njev += 1`

			`if loss_function is not None:`
			`rho = loss_function(f)`
			`J, f = scale_for_robust_loss_function(J, f, rho)`

			`g = compute_grad(J, f)`

			`if jac_scale:`
			`scale, scale_inv = compute_jac_scale(J, scale_inv)`
			`else:`
			`step_norm = 0`
			`actual_reduction = 0`

			`iteration += 1`

			`if termination_status is None:`
			`termination_status = 0`

			`return OptimizeResult(`
			`x=x, cost=cost, fun=f_true, jac=J, grad=g_full, optimality=g_norm,`
			`active_mask=on_bound, nfev=nfev, njev=njev, status=termination_status)`