"""Functions used by least-squares algorithms. Those functions that involve
large computations are reimplemented in the common_jax.py file using JAX."""
from math import copysign
import numpy as np
from numpy.linalg import norm
from scipy.linalg import LinAlgError, cho_factor, cho_solve
from nlsq.utils.logging import get_logger
logger = get_logger(__name__)
EPS = np.finfo(float).eps
# Functions related to a trust-region problem.
[docs]
def intersect_trust_region(x, s, Delta):
"""Find the intersection of a line with the boundary of a trust region.
This function solves the quadratic equation with respect to t
||(x + s*t)||**2 = Delta**2.
Returns
-------
t_neg, t_pos : tuple of float
Negative and positive roots.
Raises
------
ValueError
If `s` is zero or `x` is not within the trust region.
"""
a = np.dot(s, s)
if a == 0:
raise ValueError("`s` is zero.")
b = np.dot(x, s)
c = np.dot(x, x) - Delta**2
if c >= 0:
raise ValueError("`x` is not within the trust region.")
d = np.sqrt(b * b - a * c) # Root from one fourth of the discriminant.
# Computations below avoid loss of significance, see "Numerical Recipes".
q = -(b + copysign(d, b))
t1 = q / a
t2 = c / q
if t1 < t2:
return t1, t2
else:
return t2, t1
[docs]
def solve_lsq_trust_region(
n, m, uf, s, V, Delta, initial_alpha=None, rtol=0.01, max_iter=10
):
"""Solve a trust-region problem arising in least-squares minimization.
This function implements a method described by J. J. More [12]_ and used
in MINPACK, but it relies on a single SVD of Jacobian instead of series
of Cholesky decompositions. Before running this function, compute:
``U, s, VT = svd(J, full_matrices=False)``.
Parameters
----------
n : int
Number of variables.
m : int
Number of residuals.
uf : ndarray
Computed as U.T.dot(f).
s : ndarray
Singular values of J.
V : ndarray
Transpose of VT.
Delta : float
Radius of a trust region.
initial_alpha : float, optional
Initial guess for alpha, which might be available from a previous
iteration. If None, determined automatically.
rtol : float, optional
Stopping tolerance for the root-finding procedure. Namely, the
solution ``p`` will satisfy ``abs(norm(p) - Delta) < rtol * Delta``.
max_iter : int, optional
Maximum allowed number of iterations for the root-finding procedure.
Returns
-------
p : ndarray, shape (n,)
Found solution of a trust-region problem.
alpha : float
Positive value such that (J.T*J + alpha*I)*p = -J.T*f.
Sometimes called Levenberg-Marquardt parameter.
n_iter : int
Number of iterations made by root-finding procedure. Zero means
that Gauss-Newton step was selected as the solution.
References
----------
.. [12] More, J. J., "The Levenberg-Marquardt Algorithm: Implementation
and Theory," Numerical Analysis, ed. G. A. Watson, Lecture Notes
in Mathematics 630, Springer Verlag, pp. 105-116, 1977.
"""
def phi_and_derivative(alpha, suf, s, Delta):
"""Function of which to find zero.
It is defined as "norm of regularized (by alpha) least-squares
solution minus `Delta`". Refer to [12]_.
"""
denom = s**2 + alpha
p_norm = norm(suf / denom)
phi = p_norm - Delta
safe_p_norm = p_norm if p_norm != 0 else np.finfo(float).tiny
phi_prime = -np.sum(suf**2 / denom**3) / safe_p_norm
return phi, phi_prime
suf = s * uf
# Check if J has full rank and try Gauss-Newton step.
if m >= n:
threshold = EPS * m * s[0]
full_rank = s[-1] > threshold
else:
full_rank = False
if full_rank:
p = -V.dot(uf / s)
if norm(p) <= Delta:
return p, 0.0, 0
alpha_upper = norm(suf) / Delta
if full_rank:
phi, phi_prime = phi_and_derivative(0.0, suf, s, Delta)
alpha_lower = -phi / phi_prime
else:
alpha_lower = 0.0
if initial_alpha is None or (not full_rank and initial_alpha == 0):
alpha = max(0.001 * alpha_upper, (alpha_lower * alpha_upper) ** 0.5)
else:
alpha = initial_alpha
it = -1
for it in range(max_iter):
if alpha < alpha_lower or alpha > alpha_upper:
alpha = max(0.001 * alpha_upper, (alpha_lower * alpha_upper) ** 0.5)
phi, phi_prime = phi_and_derivative(alpha, suf, s, Delta)
if phi < 0:
alpha_upper = alpha
ratio = phi / phi_prime
alpha_lower = max(alpha_lower, alpha - ratio)
alpha -= (phi + Delta) * ratio / Delta
if np.abs(phi) < rtol * Delta:
break
p = -V.dot(suf / (s**2 + alpha))
# Make the norm of p equal to Delta, p is changed only slightly during
# this. It is done to prevent p lie outside the trust region (which can
# cause problems later).
p_norm = norm(p)
if p_norm > 0:
p *= Delta / p_norm
return p, alpha, it + 1
[docs]
def solve_trust_region_2d(B, g, Delta):
"""Solve a general trust-region problem in 2 dimensions.
The problem is reformulated as a 4th order algebraic equation,
the solution of which is found by numpy.roots.
Parameters
----------
B : ndarray, shape (2, 2)
Symmetric matrix, defines a quadratic term of the function.
g : ndarray, shape (2,)
Defines a linear term of the function.
Delta : float
Radius of a trust region.
Returns
-------
p : ndarray, shape (2,)
Found solution.
newton_step : bool
Whether the returned solution is the Newton step which lies within
the trust region.
"""
try:
R, lower = cho_factor(B)
p = -cho_solve((R, lower), g)
if np.dot(p, p) <= Delta**2:
return p, True
except LinAlgError:
pass
a = B[0, 0] * Delta**2
b = B[0, 1] * Delta**2
c = B[1, 1] * Delta**2
d = g[0] * Delta
f = g[1] * Delta
coeffs = np.array([-b + d, 2 * (a - c + f), 6 * b, 2 * (-a + c + f), -b - d])
t = np.roots(coeffs) # Can handle leading zeros.
t = np.real(t[np.isreal(t)])
p = Delta * np.vstack((2 * t / (1 + t**2), (1 - t**2) / (1 + t**2)))
value = 0.5 * np.sum(p * B.dot(p), axis=0) + np.dot(g, p)
i = np.argmin(value)
p = p[:, i]
return p, False
[docs]
def update_tr_radius(
Delta, actual_reduction, predicted_reduction, step_norm, bound_hit
):
"""Update the radius of a trust region based on the cost reduction.
Returns
-------
Delta : float
New radius.
ratio : float
Ratio between actual and predicted reductions.
"""
if predicted_reduction > 0:
ratio = actual_reduction / predicted_reduction
elif predicted_reduction == actual_reduction == 0:
ratio = 1
else:
ratio = 0
if ratio < 0.25:
Delta = 0.25 * step_norm
elif ratio > 0.75 and bound_hit:
Delta *= 2.0
return Delta, ratio
# Construction and minimization of quadratic functions.
# b = np.dot(g, s)
# if s0 is not None:
# u = J.dot(s0)
# b += np.dot(u, v)
# c = 0.5 * np.dot(u, u) + np.dot(g, s0)
# if diag is not None:
# b += np.dot(s0 * diag, s)
# c += 0.5 * np.dot(s0 * diag, s0)
# return a, b, c
# else:
# return a, b
[docs]
def minimize_quadratic_1d(a, b, lb, ub, c=0):
"""Minimize a 1-D quadratic function subject to bounds.
The free term `c` is 0 by default. Bounds must be finite.
Returns
-------
t : float
Minimum point.
y : float
Minimum value.
"""
t = [lb, ub]
if a != 0:
extremum = -0.5 * b / a
if lb < extremum < ub:
t.append(extremum)
t = np.asarray(t)
y = t * (a * t + b) + c
min_index = np.argmin(y)
return t[min_index], y[min_index]
[docs]
def evaluate_quadratic(J, g, s, diag=None):
"""Compute values of a quadratic function arising in least squares.
The function is 0.5 * s.T * (J.T * J + diag) * s + g.T * s.
Parameters
----------
J : ndarray, sparse matrix or LinearOperator, shape (m, n)
Jacobian matrix, affects the quadratic term.
g : ndarray, shape (n,)
Gradient, defines the linear term.
s : ndarray, shape (k, n) or (n,)
Array containing steps as rows.
diag : ndarray, shape (n,), optional
Addition diagonal part, affects the quadratic term.
If None, assumed to be 0.
Returns
-------
values : ndarray with shape (k,) or float
Values of the function. If `s` was 2-D, then ndarray is
returned, otherwise, float is returned.
"""
if s.ndim == 1:
Js = J.dot(s)
q = np.dot(Js, Js)
if diag is not None:
q += np.dot(s * diag, s)
else:
Js = J.dot(s.T)
q = np.sum(Js**2, axis=0)
if diag is not None:
q += np.sum(diag * s**2, axis=1)
l = np.dot(s, g)
return 0.5 * q + l
# Utility functions to work with bound constraints.
[docs]
def in_bounds(x, lb, ub):
"""Check if a point lies within bounds."""
return np.all((x >= lb) & (x <= ub))
[docs]
def step_size_to_bound(x, s, lb, ub):
"""Compute a min_step size required to reach a bound.
The function computes a positive scalar t, such that x + s * t is on
the bound.
Returns
-------
step : float
Computed step. Non-negative value.
hits : ndarray of int with shape of x
Each element indicates whether a corresponding variable reaches the
bound:
* 0 - the bound was not hit.
* -1 - the lower bound was hit.
* 1 - the upper bound was hit.
"""
non_zero = np.nonzero(s)
s_non_zero = s[non_zero]
steps = np.empty_like(x)
steps.fill(np.inf)
with np.errstate(over="ignore"):
steps[non_zero] = np.maximum(
(lb - x)[non_zero] / s_non_zero, (ub - x)[non_zero] / s_non_zero
)
min_step = np.min(steps)
return min_step, np.equal(steps, min_step) * np.sign(s).astype(int)
[docs]
def find_active_constraints(x, lb, ub, rtol=1e-10):
"""Determine which constraints are active in a given point.
The threshold is computed using `rtol` and the absolute value of the
closest bound.
Returns
-------
active : ndarray of int with shape of x
Each component shows whether the corresponding constraint is active:
* 0 - a constraint is not active.
* -1 - a lower bound is active.
* 1 - a upper bound is active.
"""
active = np.zeros_like(x, dtype=int)
if rtol == 0:
active[x <= lb] = -1
active[x >= ub] = 1
return active
lower_dist = x - lb
upper_dist = ub - x
lower_threshold = rtol * np.maximum(1, np.abs(lb))
upper_threshold = rtol * np.maximum(1, np.abs(ub))
lower_active = np.isfinite(lb) & (
lower_dist <= np.minimum(upper_dist, lower_threshold)
)
active[lower_active] = -1
upper_active = np.isfinite(ub) & (
upper_dist <= np.minimum(lower_dist, upper_threshold)
)
active[upper_active] = 1
return active
[docs]
def make_strictly_feasible(x, lb, ub, rstep=1e-10):
"""Shift a point to the interior of a feasible region.
Each element of the returned vector is at least at a relative distance
`rstep` from the closest bound. If ``rstep=0`` then `np.nextafter` is used.
"""
# Convert to NumPy array to ensure mutability (JAX arrays are immutable)
x_new = np.array(x, copy=True)
lb = np.asarray(lb)
ub = np.asarray(ub)
active = find_active_constraints(x, lb, ub, rstep)
lower_mask = np.equal(active, -1)
upper_mask = np.equal(active, 1)
if rstep == 0:
x_new[lower_mask] = np.nextafter(lb[lower_mask], ub[lower_mask])
x_new[upper_mask] = np.nextafter(ub[upper_mask], lb[upper_mask])
else:
x_new[lower_mask] = lb[lower_mask] + rstep * np.maximum(
1, np.abs(lb[lower_mask])
)
x_new[upper_mask] = ub[upper_mask] - rstep * np.maximum(
1, np.abs(ub[upper_mask])
)
tight_bounds = (x_new < lb) | (x_new > ub)
x_new[tight_bounds] = 0.5 * (lb[tight_bounds] + ub[tight_bounds])
return x_new
[docs]
def CL_scaling_vector(x, g, lb, ub):
"""Compute Coleman-Li scaling vector and its derivatives.
Components of a vector v are defined as follows::
| ub[i] - x[i], if g[i] < 0 and ub[i] < np.inf
v[i] = | x[i] - lb[i], if g[i] > 0 and lb[i] > -np.inf
| 1, otherwise
According to this definition v[i] >= 0 for all i. It differs from the
definition in paper [5]_ (eq. (2.2)), where the absolute value of v is
used. Both definitions are equivalent down the line.
Derivatives of v with respect to x take value 1, -1 or 0 depending on a
case.
Returns
-------
v : ndarray with shape of x
Scaling vector.
dv : ndarray with shape of x
Derivatives of v[i] with respect to x[i], diagonal elements of v's
Jacobian.
References
----------
.. [5] M.A. Branch, T.F. Coleman, and Y. Li, "A Subspace, Interior,
and Conjugate Gradient Method for Large-Scale Bound-Constrained
Minimization Problems," SIAM Journal on Scientific Computing,
Vol. 21, Number 1, pp 1-23, 1999.
"""
v = np.ones_like(x)
dv = np.zeros_like(x)
mask = (g < 0) & np.isfinite(ub)
v[mask] = ub[mask] - x[mask]
dv[mask] = -1
mask = (g > 0) & np.isfinite(lb)
v[mask] = x[mask] - lb[mask]
dv[mask] = 1
return v, dv
# Functions to display algorithm's progress.
[docs]
def print_iteration_nonlinear(
iteration, nfev, cost, cost_reduction, step_norm, optimality
):
"""Print a single iteration row for nonlinear optimization progress."""
cost_reduction = " " * 15 if cost_reduction is None else f"{cost_reduction:^15.2e}"
step_norm = " " * 15 if step_norm is None else f"{step_norm:^15.2e}"
logger.info(
f"{iteration:^15}{nfev:^15}{cost:^15.4e}{cost_reduction}{step_norm}{optimality:^15.2e}"
)
[docs]
def print_iteration_linear(iteration, cost, cost_reduction, step_norm, optimality):
"""Print a single iteration row for linear optimization progress."""
cost_reduction = " " * 15 if cost_reduction is None else f"{cost_reduction:^15.2e}"
step_norm = " " * 15 if step_norm is None else f"{step_norm:^15.2e}"
logger.info(
f"{iteration:^15}{cost:^15.4e}{cost_reduction}{step_norm}{optimality:^15.2e}"
)
[docs]
def check_termination(dF, F, dx_norm, x_norm, ratio, ftol, xtol):
"""Check termination condition for nonlinear least squares."""
ftol_satisfied = dF < ftol * F and ratio > 0.25
xtol_satisfied = dx_norm < xtol * (xtol + x_norm)
if ftol_satisfied and xtol_satisfied:
return 4
elif ftol_satisfied:
return 2
elif xtol_satisfied:
return 3
else:
return None