# Copyright (c) 2025, Yichuan Liu and Yingzhou Li
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
Interpolation Surrogate Model
=============================
Maintain two core classes:
1. :class:`~upoqa.utils.model.QuadSurrogate`: Quadratic interpolation surrogate
model using the derivative-free symmetric Broyden update [1]_. Serves as the
element model in UPOQA.
2. :class:`~upoqa.utils.model.OverallSurrogate`: Overall surrogate model formed by
assembling multiple element models.
References
----------
.. [1] Michael J. D. Powell. 2004. Least Frobenius norm updating of quadratic
models that satisfy interpolation conditions. *Math. Program.* 100, 1
(May 2004), 183-215.
"""
import numpy as np
import numpy.linalg as LA
from copy import deepcopy
from .interp_set import InterpSet, OverallInterpSet
from typing import Any, Callable, Optional, Union, Tuple, List, Dict
from .params import UPOQAParameterList
__all__ = ["SurrogateLinAlgError", "QuadSurrogate", "OverallSurrogate"]
[docs]
class SurrogateLinAlgError(Exception):
"""
Exception raised for linear algebra errors in the surrogate model
Attributes
----------
ele_idx : int or None
Index of the element function that caused the error.
If the error is not related to a specific element, this is None.
ele_name : Any or None
Name of the element function that caused the error.
If the error is not related to a specific element, this is None.
"""
def __init__(
self,
message: str,
ele_idx: Optional[int] = None,
ele_name: Optional[Any] = None,
) -> None:
super().__init__(message)
self.ele_idx = ele_idx
self.ele_name = ele_name
[docs]
class QuadSurrogate:
"""
Quadratic interpolation surrogate model using the *derivative-free symmetric
Broyden update* [1]_. Serves as the elemental surrogate model in UPOQA.
Parameters
----------
interp_set: :class:`~upoqa.utils.interp_set.InterpSet`
The interpolation set. It must be provided in order for the interpolation
surrogate model to be built based on the interpolation points.
center: ndarray, optional
The initial center of the surrogate model. If provided, it overrides the ``n``
argument. Otherwise, the center will be set as a zero vector.
n : int, optional
Problem dimension. Required if ``center`` is not provided.
Note that either ``n`` or ``center`` must be provided.
ref_surrogate : :class:`~upoqa.utils.model.QuadSurrogate`, optional
A reference surrogate model to help initialize the KKT coefficients
and other cached information.
Attributes
----------
n : int
Problem dimension
interp_set : :class:`~upoqa.utils.interp_set.InterpSet`
The interpolation set used to build the surrogate model
npt : int
Number of interpolation points in the interpolation set
model_center : ndarray, shape (n,)
Center of the surrogate model
model_cons : float
Constant term of the model at the center
model_grad : ndarray, shape (n,)
Gradient of the model at the center
model_hess_explicit : ndarray, shape (n, n)
Explicit Hessian of the model
model_hess_implicit : ndarray, shape (npt,)
Implicit Hessian of the model
References
----------
.. [1] Michael J. D. Powell. 2004. Least Frobenius norm updating of quadratic models that
satisfy interpolation conditions. *Math. Program.* 100, 1 (May 2004), 183-215.
"""
def __init__(
self,
interp_set: InterpSet,
center: Optional[np.ndarray] = None,
n: Optional[int] = None,
ref_surrogate: Optional["QuadSurrogate"] = None,
) -> None:
if center is None:
assert n is not None
center = np.zeros(n)
n = center.size
assert (
interp_set.npt <= (n + 1) * (n + 2) // 2
), "QuadSurrogate requires npt to be <= (n + 1) * (n + 2) / 2."
assert interp_set.npt >= n + 1, "QuadSurrogate requires npt to be >= n + 1."
self.reset(interp_set, center, ref_surrogate)
[docs]
def reset(
self,
interp_set: InterpSet,
center: np.ndarray,
ref_surrogate: Optional["QuadSurrogate"] = None,
) -> None:
"""
Reset the model with the given interpolation set, center point and dimension,
and optionally a reference model.
Parameters
----------
interp_set: :class:`~upoqa.utils.interp_set.InterpSet`
The interpolation set. It must be provided in order for the interpolation
surrogate model to be built based on the interpolation points.
center: ndarray
The initial center of the surrogate model.
ref_surrogate : :class:`~upoqa.utils.model.QuadSurrogate`, optional
A reference surrogate model to help initialize the KKT coefficients
and other cached information.
"""
n = center.size
self.n = n
self.interp_set = interp_set
self.npt = self.interp_set.npt
# surrogate model coeff
self.model_hess_explicit = np.zeros((n, n), dtype=np.float64)
self.model_hess_implicit = np.zeros(interp_set.npt, dtype=np.float64)
self.model_grad = np.zeros(n, dtype=np.float64)
self.model_cons = 0.0
self.model_center = (
center.reshape((n,))
if center is not None
else np.zeros(n, dtype=np.float64)
)
if ref_surrogate is not None:
# if ref_surrogate is provided, initialize KKT coefficients to match ref_surrogate
self._KKT_R: np.ndarray = ref_surrogate._KKT_R.copy()
self._KKT_B: np.ndarray = ref_surrogate._KKT_B.copy()
self.cached_Y_shift: np.ndarray = (
ref_surrogate.cached_Y_shift.copy()
) # interp set points - center
self.cached_x_anchor: np.ndarray = ref_surrogate.cached_x_anchor.copy()
self.cached_x_anchor_idx: int = ref_surrogate.cached_x_anchor_idx
self._negative_s_idx: int = ref_surrogate._negative_s_idx
self._update_surrogate_coeff()
else:
# otherwise, initialize all KKT coefficients to zero and update them later
self._KKT_R = np.zeros((self.npt - self.n - 1, self.npt), dtype=np.float64)
self._KKT_B = np.zeros((self.npt + self.n, self.n), dtype=np.float64)
self.cached_Y_shift = (
self.interp_set.get_interp_set()[0] - self.model_center
)
self.cached_x_anchor = self.interp_set.get_anchor()[0].copy()
self.cached_x_anchor_idx = self.interp_set.x_anchor_idx
self._negative_s_idx = 0
self._init_KKT_and_model_coeff(self.interp_set.last_init_step_size)
def _init_KKT_and_model_coeff(self, step_size: Optional[float] = None) -> None:
"""
Initialize the KKT and model coefficients based on the interpolation set and
the given step size. Detailed explanations can be found around formula (2.10)
of [1]_.
Parameters
----------
step_size : float, optional
The step size used for initializing the KKT and model coefficients. If not
provided, the initialization step size from the interpolation set is used.
References
----------
.. [1] Michael J. D. Powell. 2009. The BOBYQA algorithm for bound constrained
optimization without derivatives. *Cambridge NA Report NA2009/06,
University of Cambridge, Cambridge* 26 (2009): 26-46.
"""
init_step_size = step_size or self.interp_set.init_step_size
self._negative_s_idx == 0
xpt = np.zeros((self.npt, self.n))
for k in range(self.npt):
if 1 <= k <= self.n:
xpt[k, k - 1] = init_step_size
if self.npt <= k + self.n:
self._KKT_B[0, k - 1] = -1.0 / init_step_size
self._KKT_B[k, k - 1] = 1.0 / init_step_size
self._KKT_B[self.npt + k - 1, k - 1] = -0.5 * init_step_size**2.0
elif self.n < k <= 2 * self.n:
xpt[k, k - self.n - 1] = -init_step_size
self._KKT_B[k, k - self.n - 1] = -0.5 / xpt[k - self.n, k - self.n - 1]
self._KKT_B[k - self.n, k - self.n - 1] = (
-self._KKT_B[0, k - self.n - 1] - self._KKT_B[k, k - self.n - 1]
)
self._KKT_R[k - self.n - 1, 0] = -np.sqrt(2.0) / (init_step_size**2.0)
self._KKT_R[k - self.n - 1, k] = np.sqrt(0.5) / init_step_size**2.0
self._KKT_R[k - self.n - 1, k - self.n] = (
-self._KKT_R[k - self.n - 1, 0] - self._KKT_R[k - self.n - 1, k]
)
elif k > 2 * self.n:
shift = (k - self.n - 1) // self.n
i = k - (1 + shift) * self.n - 1
j = (i + shift) % self.n
self._KKT_R[k - self.n - 1, 0] = 1.0 / init_step_size**2.0
self._KKT_R[k - self.n - 1, k] = 1.0 / init_step_size**2.0
self._KKT_R[k - self.n - 1, i + 1] = -1.0 / init_step_size**2.0
self._KKT_R[k - self.n - 1, j + 1] = -1.0 / init_step_size**2.0
self._update_surrogate_coeff()
[docs]
def inv_kkt_matrix_dot(
self, x: np.ndarray, w_star: Optional[np.ndarray] = None
) -> np.ndarray:
"""
Compute inv(W) @ x, where W is the coefficient matrix of the KKT equation generated
by the derivative-free symmetric Broyden update [1]_, and return the vector formed
by the first ``npt`` and last ``n`` dimensions of the result.
In the multiplication, we can safely ignore the ``(npt + 1)``-th dimension of ``x``.
Therefore, the input vector ``x`` only needs to include the first ``npt`` and the last
``n`` elements, instead of the full ``(npt + 1 + n)``-dimensional vector.
Parameters
----------
x : ndarray, shape (npt + n,)
The vector to be dotted with the inverse of the KKT matrix.
w_star : ndarray, shape (npt + n,), optional
The auxiliary vector to assist in the calculation. It is formed by
``[0.5 * ((yi - center) @ (Y - center).T) ** 2, yi - center]``,
where ``Y`` is of shape ``(npt, n)`` which represents the interpolation points.
``yi`` can be any point in the interpolation set, and is ``x_anchor`` by default.
Returns
-------
ndarray, shape (npt + n,)
The vector formed by the first ``npt`` and last ``n`` dimensions of the result.
References
----------
.. [1] Michael J. D. Powell. 2004. Least Frobenius norm updating of quadratic
models that satisfy interpolation conditions. *Math. Program.* 100, 1
(May 2004), 183-215.
"""
# assert x.size == self.npt + self.n
w_star = self._get_w() if w_star is None else w_star
x = x.reshape((-1,))
Zx = np.zeros((self.npt + self.n,), dtype=np.float64)
tmp = x[: self.npt] @ self._KKT_R.T
tmp[: self._negative_s_idx] = -tmp[: self._negative_s_idx]
Zx[: self.npt] = tmp @ self._KKT_R
Zx[: self.npt] += x[self.npt :] @ (self._KKT_B[: self.npt]).T
Zx[: self.npt] -= w_star[self.npt :] @ (self._KKT_B[: self.npt]).T
tmp = w_star[: self.npt] @ self._KKT_R.T
tmp[: self._negative_s_idx] = -tmp[: self._negative_s_idx]
Zx[: self.npt] -= tmp @ self._KKT_R
Zx[self.npt :] = (x - w_star) @ self._KKT_B
Zx[self.cached_x_anchor_idx] += 1
return Zx
[docs]
def inv_kkt_matrix_partial_dot(self, x: np.ndarray) -> np.ndarray:
"""
Compute inv(W) @ x, where W is the coefficient matrix of the KKT equation
generated by the derivative-free symmetric Broyden update [1]_, and return the
vector formed by the first ``npt`` and last ``n`` dimensions of the result.
Unlike ``self.inv_kkt_matrix_dot(x)``, this method assumes that ``x`` is zero for
indices ``[npt:]``, allowing for a simplified calculation.
Parameters
----------
x : ndarray, shape (npt + n,)
The vector to be dotted with the inverse of the KKT matrix.
Returns
-------
ndarray, shape (npt + n,)
The vector formed by the first ``npt`` and last ``n`` dimensions of the result.
References
----------
.. [1] Michael J. D. Powell. 2004. Least Frobenius norm updating of quadratic
models that satisfy interpolation conditions. *Math. Program.* 100, 1
(May 2004), 183-215.
"""
x = x.reshape((-1,))
tmp = x[: self.npt] @ self._KKT_R.T
tmp[: self._negative_s_idx] = -tmp[: self._negative_s_idx]
return np.hstack((tmp @ self._KKT_R, x[: self.npt] @ self._KKT_B[: self.npt]))
def _get_alpha(self, k: Optional[int] = None) -> Union[np.ndarray, float]:
if k is None:
return (
LA.norm(self._KKT_R[self._negative_s_idx :], axis=0) ** 2
- LA.norm(self._KKT_R[: self._negative_s_idx], axis=0) ** 2
)
else:
tmp = np.copy(self._KKT_R[:, k])
tmp[: self._negative_s_idx] = -tmp[: self._negative_s_idx]
return tmp.dot(self._KKT_R[:, k])
def _get_beta(
self,
w: np.ndarray,
Zw: np.ndarray,
x_shift: np.ndarray,
w_star: Optional[np.ndarray] = None,
) -> float:
w_star = self._get_w() if w_star is None else w_star
return (
0.5 * (x_shift.dot(x_shift)) ** 2
- w[self.cached_x_anchor_idx]
- w.dot(Zw)
+ w_star.dot(Zw)
)
def _get_w(self, x_shift: Optional[np.ndarray] = None) -> np.ndarray:
x_shift = (
self.cached_x_anchor - self.model_center
if x_shift is None
else x_shift.copy()
)
w = np.zeros(self.n + self.npt, dtype=np.float64)
w[: self.npt] = 0.5 * ((x_shift @ self.cached_Y_shift.T) ** 2)
w[self.npt :] = x_shift
return w.copy()
[docs]
def get_determinant_ratio(
self, x_new: np.ndarray, idx: Optional[int] = None
) -> Tuple[
Union[np.ndarray, float],
float,
Union[np.ndarray, float],
Union[np.ndarray, float],
np.ndarray,
]:
"""
Calculate key intermediate quantities (``alpha``, ``beta``, ``tau``, ``sigma``) for
updating the inverse KKT matrix in the derivative-free symmetric Broyden
update [1]_. These quantities are defined in [2]_, where ``sigma`` (the denominator
in the update equation) represents the ratio of the determinant of the inverse
KKT matrix before and after update.
Parameters
----------
x_new : ndarray, shape (n,)
New point to be added to the interpolation set (not yet added).
idx : int, optional
Index of the interpolation point to be replaced by ``x_new``.
If provided, returns scalar values at this index; otherwise returns full arrays.
Returns
-------
alpha : ndarray or float
Quantity ``alpha``. Returns ``alpha[idx]`` if ``idx`` is provided.
beta : float
Quantity ``beta``.
tau : ndarray or float
Quantity ``tau``. Returns ``tau[idx]`` if ``idx`` is provided.
sigma : ndarray or float
Quantity ``sigma``. Returns ``sigma[idx]`` if ``idx`` is provided.
ndarray
Result of ``self.inv_kkt_matrix_dot(w)``, where:
``w = [0.5 * ((x_new - center) @ (Y - center).T) ** 2, x_new - center]``,
where ``Y`` is of shape ``(npt, n)`` which represents the interpolation points.
References
----------
.. [1] Michael J. D. Powell. 2004. Least Frobenius norm updating of quadratic
models that satisfy interpolation conditions. *Math. Program.* 100, 1
(May 2004), 183-215.
.. [2] Michael J. D. Powell. 2004. On updating the inverse of a KKT matrix.
*Numerical Linear Algebra and Optimization, ed. Ya-xiang Yuan, Science Press
(Beijing)* (2004): 56-78.
"""
x_new_shift = x_new - self.model_center
w = self._get_w(x_new_shift)
w_star = self._get_w()
Zw = self.inv_kkt_matrix_dot(w, w_star)
beta = self._get_beta(w, Zw, x_new_shift, w_star=w_star)
alpha = self._get_alpha(idx)
tau = Zw[idx] if idx is not None else Zw[: self.npt]
sigma = alpha * beta + tau**2
return alpha, beta, tau, sigma, Zw
[docs]
def update(
self,
cached_kkt_info: Optional[
Tuple[np.ndarray, float, np.ndarray, np.ndarray, np.ndarray]
] = None,
) -> None:
"""
Update the KKT and model coefficients using the method detailed in [1]_, [2]_ and [3]_.
Parameters
----------
cached_kkt_info : tuple, optional
If not None, it should be the cached output of ``self.get_determinant_ratio(x_new)``,
where ``x_new`` denotes the new point just added into the interpolation set.
References
----------
.. [1] Michael J. D. Powell. 2004. Least Frobenius norm updating of quadratic models
that satisfy interpolation conditions. *Math. Program.* 100, 1 (May 2004), 183-215.
.. [2] Michael J. D. Powell. 2004. On updating the inverse of a KKT matrix. *Numerical
Linear Algebra and Optimization, ed. Ya-xiang Yuan, Science Press (Beijing)*
(2004): 56-78.
.. [3] Michael J. D. Powell. 2006. *The NEWUOA Software for Unconstrained Optimization
without Derivatives*. Springer US, Boston, MA, 255-297.
"""
last_update_idx = self.interp_set.last_update_idx
x_new = self.interp_set.get_interp_set(last_update_idx)[0]
deleted_x = self.interp_set.just_deleted_point.copy()
self._update_KKT_coeff(x_new, last_update_idx, cached_kkt_info)
self._update_surrogate_coeff(deleted_x, last_update_idx)
self.cached_x_anchor = self.interp_set.get_anchor()[0].copy()
self.cached_x_anchor_idx = self.interp_set.x_anchor_idx
def _update_KKT_coeff(
self,
x_new: np.ndarray,
idx: int,
cached_kkt_info: Optional[
Tuple[np.ndarray, float, np.ndarray, np.ndarray, np.ndarray]
] = None,
) -> None:
"""
Update the KKT coefficients using the method detailed in [1]_, [2]_ and [3]_.
Parameters
----------
x_new : ndarray, shape (n,)
The new point which was just added into the interpolation set.
idx : int
The index in the interpolation set where ``x_new`` was added.
cached_kkt_info : tuple, optional
If not None, it should be the cached output of ``self.get_determinant_ratio(x_new)``,
where ``x_new`` denotes the new point just added into the interpolation set.
Notes
-----
The implementation here largely references that in COBYQA [4]_.
References
----------
.. [1] Michael J. D. Powell. 2004. Least Frobenius norm updating of quadratic
models that satisfy interpolation conditions. *Math. Program.* 100, 1
(May 2004), 183-215.
.. [2] Michael J. D. Powell. 2004. On updating the inverse of a KKT matrix.
*Numerical Linear Algebra and Optimization, ed. Ya-xiang Yuan, Science Press
(Beijing)* (2004): 56-78.
.. [3] Michael J. D. Powell. 2006. *The NEWUOA Software for Unconstrained Optimization
without Derivatives*. Springer US, Boston, MA, 255-297.
.. [4] Tom M. Ragonneau. *Model-Based Derivative-Free Optimization Methods
and Software*. PhD thesis, Department of Applied Mathematics, The Hong
Kong Polytechnic University, Hong Kong, China, 2022. URL:
https://theses.lib.polyu.edu.hk/handle/200/12294.
"""
def householder_transform(mat: np.ndarray, idx: int) -> None:
"""
Perform an in-place Householder transformation on ``mat`` to make the ``idx``-th
column align with the first standard basis vector ``e_1``.
"""
vec = mat[:, idx].reshape((-1,)).copy()
vec_norm = LA.norm(vec)
if np.abs(vec_norm) > 0.0:
vec[0] += vec_norm if vec[0] >= 0 else -vec_norm
mat -= 2 * np.outer(vec, vec @ mat) / vec.dot(vec)
# Perform Householder transformation on _KKT_R
if self._negative_s_idx > 1:
householder_transform(self._KKT_R[: self._negative_s_idx], idx)
if self._negative_s_idx < self._KKT_R.shape[0] - 1:
householder_transform(self._KKT_R[self._negative_s_idx :], idx)
# Prepare alpha, beta, tau, sigma, and Zw
if cached_kkt_info is None:
alpha, beta, tau, sigma, Zw = self.get_determinant_ratio(x_new, idx)
else:
alpha, beta, tau, sigma, Zw = cached_kkt_info
alpha, tau, sigma = alpha[idx], tau[idx], sigma[idx]
b_max = np.max(np.abs(self._KKT_B), initial=1.0)
z_max = np.max(np.abs(self._KKT_R), initial=1.0)
if abs(sigma) < np.finfo(float).tiny * max(b_max, z_max):
# The denominator of the updating formula is too small to safely
# divide the coefficients of the KKT matrix of interpolation.
# Theoretically, the value of abs(sigma) is always positive, and
# becomes small only for ill-conditioned problems.
raise ZeroDivisionError("The denominator of the updating formula is zero")
# Update _KKT_B first
_KKT_B_on_idx = self._KKT_B[idx].copy()
Ze_ell = np.hstack((self._KKT_R[0] * self._KKT_R[0, idx], _KKT_B_on_idx))
Zw_minus_e_ell = Zw.copy()
Zw_minus_e_ell[idx] -= 1
temp_vec1 = (alpha * Zw[self.npt :] - tau * _KKT_B_on_idx) / sigma
temp_vec2 = (tau * Zw[self.npt :] + beta * _KKT_B_on_idx) / sigma
self._KKT_B[: self.npt] += np.outer(
Zw_minus_e_ell[: self.npt], temp_vec1
) - np.outer(Ze_ell[: self.npt], temp_vec2)
self._KKT_B[self.npt :] += np.outer(Zw_minus_e_ell[self.npt :], temp_vec1)
self._KKT_B[self.npt :] -= np.outer(Ze_ell[self.npt :], temp_vec2)
# Then update _KKT_R
jdz = (
self._negative_s_idx if self._negative_s_idx < self.npt - self.n - 1 else 0
)
if jdz == 0:
self._KKT_R[0] = (
tau * self._KKT_R[0] - self._KKT_R[0, idx] * Zw_minus_e_ell[: self.npt]
) / (np.abs(sigma) ** 0.5)
if sigma < 0.0:
self._negative_s_idx = 1
else:
scala = self._KKT_R[0, idx] if jdz == 0 else -self._KKT_R[0, idx]
scalb = 0.0 if jdz == 0 else self._KKT_R[jdz, idx]
kdz = jdz if beta >= 0.0 else 0
jdz -= kdz
tempb = self._KKT_R[jdz, idx] * tau / sigma
tmp1 = 1.0 / np.sqrt(abs(beta) * self._KKT_R[jdz, idx] ** 2.0 + tau**2.0)
self._KKT_R[kdz] = (
tau * self._KKT_R[kdz]
- self._KKT_R[jdz, idx] * Zw_minus_e_ell[: self.npt]
)
self._KKT_R[kdz] *= tmp1
self._KKT_R[jdz] -= (self._KKT_R[jdz, idx] * beta / sigma) * (
scala * self._KKT_R[0] + scalb * self._KKT_R[self._negative_s_idx]
) + tempb * Zw_minus_e_ell[: self.npt]
self._KKT_R[jdz] *= tmp1 * np.sqrt(abs(sigma))
if sigma <= 0.0:
if beta < 0.0:
# If sigma <= 0 and beta < 0, the positive-definite property is broken.
self._negative_s_idx += 1
else:
self._negative_s_idx -= 1
self._KKT_R[[0, self._negative_s_idx]] = self._KKT_R[
[self._negative_s_idx, 0]
]
[docs]
def shift_center_to(self, x: np.ndarray) -> None:
"""
Shift the model center to ``x``, then update the KKT and model coefficients.
Parameters
----------
x : ndarray, shape (n,)
The new model center.
Notes
-----
The computational cost is O(npt^3) with BLAS-3 operations.
"""
x = x.copy()
step = x - self.model_center
# Prepare intermediate variables
Phi_T = self.cached_Y_shift - 0.5 * step
Psi_T = (step @ Phi_T.T).reshape((-1, 1)) * Phi_T + 0.25 * step.dot(step) * step
R_T_Psi_T = self._KKT_R @ Psi_T
Xi_Psi_T = self._KKT_B[: self.npt].T @ Psi_T
S_R_T_Psi_T = np.copy(R_T_Psi_T)
S_R_T_Psi_T[: self._negative_s_idx] = -S_R_T_Psi_T[: self._negative_s_idx]
Omega_Psi_T = self._KKT_R.T @ S_R_T_Psi_T
Psi_Omega_Psi_T = R_T_Psi_T.T @ S_R_T_Psi_T
# Update _KKT_B
# Note: _KKT_R remains invariant when shifting the model center.
self._KKT_B[: self.npt] += Omega_Psi_T
self._KKT_B[self.npt :] += Xi_Psi_T + Xi_Psi_T.T + Psi_Omega_Psi_T
# Update surrogate coefficients
center_shift = x - self.model_center
Hess_center_shift = self.hess_operator(center_shift)
self.model_cons = (
self.model_cons
+ self.model_grad.dot(center_shift)
+ 0.5 * center_shift @ Hess_center_shift
)
self.model_grad = Hess_center_shift + self.model_grad
self.model_center = x
temp = (Phi_T.T * self.model_hess_implicit) @ np.tile(step, (self.npt, 1))
self.model_hess_explicit += temp + temp.T
self.cached_Y_shift = self.cached_Y_shift - step
def _update_surrogate_coeff(
self, deleted_x: Optional[np.ndarray] = None, idx: Optional[int] = None
) -> None:
"""
Assuming the KKT coefficients have been updated, this function updates the model
coefficients based on the updated KKT coefficients.
Parameters
----------
deleted_x : ndarray, shape (n,), optional
The old interpolation point that was replaced with a new point at the index
``idx``. Both ``deleted_x`` and ``idx`` should be None if the model is being
updated for the first time after being established.
idx : int, optional
The index where the old interpolation point ``deleted_x`` was replaced with a
new point. Both ``deleted_x`` and ``idx`` should be None if the model is being
updated for the first time after being established.
"""
Y_new, f_Y_new = self.interp_set.get_interp_set()
f_Y_shift = f_Y_new - self.fun_eval(Y_new)
model_lambda, model_g = self.solve_interpolation_KKT(f_Y_shift=f_Y_shift)
model_hess_explicit_p1 = self.model_hess_explicit.copy()
model_hess_implicit_p1 = self.model_hess_implicit + model_lambda
if idx is not None:
# assert deleted_x is not None
deleted_x = deleted_x - self.model_center
model_hess_explicit_p1 += self.model_hess_implicit[idx] * np.outer(
deleted_x, deleted_x
)
model_hess_implicit_p1[idx] = model_lambda[idx]
model_grad_p1 = self.grad_eval(self.model_center) + model_g
self.model_hess_explicit = model_hess_explicit_p1
self.model_hess_implicit = model_hess_implicit_p1
self.model_grad = model_grad_p1
self.cached_Y_shift = Y_new - self.model_center
self.model_cons += f_Y_new[self.interp_set.x_anchor_idx] - self.fun_eval(
self.interp_set.x_anchor
)
[docs]
def solve_interpolation_KKT(
self, f_Y_shift=np.ndarray
) -> Tuple[np.ndarray, np.ndarray]:
"""
Given the interpolation set, solves the first-order KKT optimality conditions
for the derivative-free symmetric Broyden update subproblem and returns the solution:
the Lagrange multipliers and the gradient update term.
Parameters
----------
f_Y_shift : ndarray, shape (npt,)
Represents ``fun(Y) - surrogate(Y)``, where ``Y`` denotes the interpolation points.
Returns
-------
lambda : ndarray, shape (npt,)
The Lagrange multipliers.
g : ndarray, shape (n,)
The gradient update term.
"""
coeff = self.inv_kkt_matrix_partial_dot(f_Y_shift)
return coeff[0 : self.npt], coeff[self.npt :]
[docs]
def reset_surrogate_coeff(self) -> None:
"""
Reset all surrogate model coefficients to zero values.
"""
self.model_hess_explicit = np.zeros((self.n, self.n), dtype=np.float64)
self.model_hess_implicit = np.zeros(self.interp_set.npt, dtype=np.float64)
self.model_grad = np.zeros(self.n, dtype=np.float64)
self.model_cons = 0.0
[docs]
def reinit(self) -> None:
"""
Reinitialize the surrogate model by first resetting and then updating the coefficients.
"""
self.reset_surrogate_coeff()
self._update_surrogate_coeff()
[docs]
def alt_grad_eval(self, x: np.ndarray) -> np.ndarray:
"""
Evaluate gradient of a newly-instantiated surrogate model at point ``x``.
Conceptually equivalent to:
1. Creating a new surrogate model with current interpolation set
2. Updating its coefficients from zero
3. Returning its gradient at ``x``
Avoids actual model reconstruction by leveraging KKT solution.
Parameters
----------
x : ndarray, shape (n,)
Evaluation point for gradient calculation.
Returns
-------
g : ndarray
Gradient vector of the conceptual surrogate model at ``x``.
"""
_, f_Y = self.interp_set.get_interp_set()
model_lambda, model_g = self.solve_interpolation_KKT(f_Y_shift=f_Y)
return (
((x - self.model_center) @ self.cached_Y_shift.T) * model_lambda
) @ self.cached_Y_shift + model_g
[docs]
def fun_eval(self, X: np.ndarray) -> Union[np.ndarray, float]:
"""
Evaluate the surrogate model at given point(s).
Parameters
----------
x : ndarray, shape (n,) or ndarray, shape (n_samples, n)
Evaluation point(s). Can be a single point (1D) or batch of points (2D).
Returns
-------
fval : float or ndarray, shape (n_samples,)
Model value(s). Scalar for single point input, array of shape (n_samples,)
for multiple points.
"""
X_shift = X - self.model_center
if X_shift.ndim == 1:
return (
X_shift.dot(0.5 * self.hess_operator(X_shift) + self.model_grad)
+ self.model_cons
)
else:
value = (
0.5 * np.diag(self.hess_operator(X_shift) @ X_shift.T)
+ X_shift @ self.model_grad
+ self.model_cons
)
return (
value.item()
if isinstance(value, np.ndarray) and value.size == 1
else value
)
[docs]
def grad_eval(self, X: np.ndarray) -> np.ndarray:
"""
Evaluate the gradient vector(s) of the surrogate model at given point(s).
Parameters
----------
x : ndarray, shape (n,) or ndarray, shape (n_samples, n)
Evaluation point(s). Can be a single point (1D) or batch of points (2D).
Returns
-------
g : ndarray, shape (n, ) or ndarray, shape (n_samples, n)
Gradient vector(s). Shape (n,) for single point, (n_samples, n) for
multiple points.
"""
return self.hess_operator(X - self.model_center) + self.model_grad
@property
def model_hess(self) -> np.ndarray:
"""Full Hessian matrix of the surrogate model"""
return (
self.cached_Y_shift.T * self.model_hess_implicit
) @ self.cached_Y_shift + self.model_hess_explicit
[docs]
def hess_eval(self, *arg, **kwargs) -> np.ndarray:
"""
Return the full Hessian matrix of the surrogate model.
Returns
-------
H : ndarray, shape (n, n)
Hessian matrix (same as ``model_hess`` property).
"""
return self.model_hess
[docs]
def hess_operator(self, V: np.ndarray) -> np.ndarray:
"""
Compute Hessian-vector product(s) for the surrogate model.
Parameters
----------
V : ndarray, shape (n, ) or ndarray, shape (n_samples, n)
Input vector(s). Single vector (1D) or batch of vectors (2D).
Returns
-------
HV : ndarray, shape (n, ) or ndarray, shape (n_samples, n)
Hessian-vector product(s). Shape (n,) for single input vector,
(n_samples, n) for multiple vectors.
"""
if V.ndim == 1:
V = V[np.newaxis, :]
result = (
(V @ self.cached_Y_shift.T) * self.model_hess_implicit
) @ self.cached_Y_shift + V @ self.model_hess_explicit
return result if V.shape[0] > 1 else result[0]
[docs]
class OverallSurrogate:
r"""
Overall surrogate model formed by assembling multiple elemental surrogate models.
Represent a full-system surrogate as a sum of component models, each defined on
a subspace of the full design space.
Attributes
----------
n : int
Problem dimension
interp_set : :class:`~upoqa.utils.interp_set.OverallInterpSet`
Interpolation set for the overall surrogate model
proj_onto_ele : callable
Projection function mapping full-space points to element subspaces:
.. code-block:: python
(full_point: ndarray, element_name: Any) -> element_point: ndarray
For example, ``proj_onto_ele([1.0, 2.0], "f_1")`` yields ``[1.0,]``, where
``"f_1"`` denotes a function that depends only on the first variable of ``x``.
coords : list of 1D arrays
Variable indices for each element's subspace (parallel to ``ele_names``).
ele_models : list
List of elemental surrogate models
xforms : list
Transformations :math:`h_1, \ldots, h_q` applied to element function outputs
weights : list
Weights :math:`w_1, \ldots, w_q` for each element in the model
extra_fun : list
The white-box component :math:`f_0` of the objective function, which is a list of
length 3 containing callables to evaluate the function value, gradient, and Hessian.
params : :class:`~upoqa.utils.params.UPOQAParameterList`
Parameter list used for the algorithm
ele_names : list
List containing the names of all elements in order
model_center : ndarray, shape (n,)
Center of the surrogate model
"""
def __init__(
self,
n: int,
interp_set: OverallInterpSet,
proj_onto_ele: Callable[[np.ndarray, Any], np.ndarray],
coords: List[Union[List, np.ndarray]],
ele_models: List[QuadSurrogate],
extra_fun: List[Callable[[np.ndarray], float]],
ele_names: List[Any],
params: UPOQAParameterList,
) -> None:
self.n = n
self._zero = np.zeros((n,))
self.interp_set = interp_set
self.ele_models = ele_models
self.ele_names = ele_names
self.ele_idxs = list(range(len(ele_names)))
assert callable(proj_onto_ele)
self.proj_onto_ele = proj_onto_ele
self.coords = coords
self.model_center = self.interp_set.x_opt
self.extra_fun = extra_fun
self.params = params
self.xforms = None
self.weights: Optional[np.ndarray] = None
def __getitem__(self, ele_idx: int) -> QuadSurrogate:
return self.ele_models[ele_idx]
[docs]
def set_weights(self, weights: np.ndarray) -> None:
"""
Set the weights for the elements.
Parameters
----------
- weights : ndarray
The weights to assign to each element.
"""
self.weights = deepcopy(weights)
[docs]
def update_weights(
self,
weights: Union[
Dict[Any, Union[int, float]], List[Union[int, float]], np.ndarray
],
) -> None:
"""
Update the weights for the elements, handling both list and dictionary inputs.
If ``weights`` is a dictionary, it maps element names to weight values.
Missing elements retain their current weights.
This method should only be called by
:meth:`~upoqa.utils.manager.UPOQAManager.update_weights()`.
Parameters
----------
weights : list or dict or ndarray
New weights for model elements. Can be a 1D array, or a list of weights
for all elements or a dictionary mapping element names to weights.
"""
if isinstance(weights, dict):
weights_list = []
for ele_idx in self.ele_idxs:
ele_name = self.ele_names[ele_idx]
if ele_name in weights:
weights_list.append(float(weights[ele_name]))
else:
weights_list.append(float(self.weights[ele_idx]))
self.set_weights(weights_list)
else:
self.set_weights(weights)
[docs]
def shift_center_to(self, x: np.ndarray) -> None:
"""
Shift the model center to a new point ``x``.
Note that this does not change the centers of the element models.
Parameters
----------
x : ndarray
The new center point for the overall surrogate model.
"""
self.model_center = x
[docs]
def update(
self,
want_update: List[bool],
cached_KKT_info_eles: List[
Optional[Tuple[np.ndarray, float, np.ndarray, np.ndarray, np.ndarray]]
] = None,
) -> None:
"""
Update the element models based on the provided update flags and cached KKT
information.
Parameters
----------
want_update : list of bool
A list of flags indicating whether each element model should be updated.
cached_KKT_info_eles : list of tuples, optional
List of cached outputs of ``ele_surrogate.get_determinant_ratio(x_ele_new)``
for each element model ``ele_surrogate``, where ``x_ele_new`` denotes the new
point just added into the corresponding elemental interpolation set.
"""
for ele_idx in self.ele_idxs:
ele_surrogate = self.ele_models[ele_idx]
if want_update[ele_idx]:
try:
ele_surrogate.update(
cached_kkt_info=(
cached_KKT_info_eles[ele_idx]
if cached_KKT_info_eles
else None
)
)
except (LA.LinAlgError, ZeroDivisionError) as e:
raise SurrogateLinAlgError(
str(e), ele_name=self.ele_names[ele_idx], ele_idx=ele_idx
)
[docs]
def fun_eval(self, x: np.ndarray) -> float:
"""
Evaluate the overall surrogate model at given point.
Parameters
----------
x : ndarray, shape (n,)
Evaluation point.
Returns
-------
fval : float
The model value of the overall surrogate model at the point ``x``.
"""
value = float(self.extra_fun[0](x))
for ele_idx in self.ele_idxs:
ele_surrogate = self.ele_models[ele_idx]
x_ele = self.proj_onto_ele(x, ele_idx)
if self.xforms[ele_idx] is not None:
value_ele = self.weights[ele_idx] * self.xforms[ele_idx][0](
ele_surrogate.fun_eval(x_ele)
)
else:
value_ele = self.weights[ele_idx] * ele_surrogate.fun_eval(x_ele)
if self.params("debug.check_nan_fval") and np.any(np.isnan(value_ele)):
raise ValueError(
f"(element {self.ele_names[ele_idx]}) NaN encountered in the return value"
" of the surrogate function. "
"This may be caused by some numerical error or matrix singularity. "
)
value += value_ele
return value
[docs]
def grad_eval(self, x: np.ndarray) -> np.ndarray:
"""
Evaluate the gradient vector of the overall surrogate model at given point.
Parameters
----------
x : ndarray, shape (n,)
Evaluation point.
Returns
-------
g : ndarray, shape (n, )
The gradient vector of the overall surrogate model at the point ``x``.
"""
grad = np.array(self.extra_fun[1](x), dtype=np.float64)
for ele_idx in self.ele_idxs:
ele_surrogate = self.ele_models[ele_idx]
x_ele = self.proj_onto_ele(x, ele_idx)
if self.xforms[ele_idx] is not None:
grad_ele = (
self.weights[ele_idx]
* self.xforms[ele_idx][1](ele_surrogate.fun_eval(x_ele))
* ele_surrogate.grad_eval(x_ele)
)
else:
grad_ele = self.weights[ele_idx] * ele_surrogate.grad_eval(x_ele)
if self.params("debug.check_nan_fval") and np.any(np.isnan(grad_ele)):
raise ValueError(
f"(element {self.ele_names[ele_idx]}) NaN encountered in the return value"
" of the surrogate gradient. "
"This may be caused by some numerical error or matrix singularity. "
)
grad[self.coords[ele_idx]] += grad_ele
return grad
[docs]
def hess_eval(self, x: np.ndarray) -> np.ndarray:
"""
Evaluate the full Hessian matrix of the overall surrogate model at given point.
Parameters
----------
x : ndarray, shape (n,)
Evaluation point.
Returns
-------
H : ndarray, shape (n, n)
The Hessian matrix of the overall surrogate model at the point ``x``.
"""
hess = np.array(self.extra_fun[2](x), dtype=np.float64)
for ele_idx in self.ele_idxs:
ele_surrogate = self.ele_models[ele_idx]
model_dim_idx = np.asarray(self.coords[ele_idx]).reshape((1, -1))
x_ele = self.proj_onto_ele(x, ele_idx)
if self.xforms[ele_idx] is not None:
ele_model_fval = ele_surrogate.fun_eval(x_ele)
ele_model_grad = ele_surrogate.grad_eval(x_ele)
xform_grad = self.xforms[ele_idx][1](ele_model_fval)
xform_hess = self.xforms[ele_idx][2](ele_model_fval)
hess_ele = self.weights[ele_idx] * (
xform_hess * np.outer(ele_model_grad, ele_model_grad)
+ xform_grad * ele_surrogate.hess_eval(x_ele)
)
else:
hess_ele = self.weights[ele_idx] * ele_surrogate.hess_eval(x_ele)
if self.params("debug.check_nan_fval") and np.any(np.isnan(hess_ele)):
raise ValueError(
f"(element {self.ele_names[ele_idx]}) NaN encountered in the return value"
" of the surrogate hessian. "
"This may be caused by some numerical error or matrix singularity. "
)
hess[model_dim_idx.T, model_dim_idx] += hess_ele
return hess
[docs]
def hess_operator(self, x: np.ndarray, v: np.ndarray) -> np.ndarray:
"""
Compute Hessian-vector product for the overall surrogate model at given point.
Parameters
----------
x : ndarray, shape (n,)
The point at which to evaluate the Hessian-vector product.
v : ndarray, shape (n, )
The vector to multiply with the Hessian matrix.
Returns
-------
Hv : ndarray, shape (n, )
The result of the Hessian-vector product.
"""
result = v @ np.array(self.extra_fun[2](x), dtype=np.float64)
for ele_idx in self.ele_idxs:
ele_surrogate = self.ele_models[ele_idx]
v_ele = self.proj_onto_ele(v, ele_idx)
if self.xforms[ele_idx] is not None:
x_ele = self.proj_onto_ele(x, ele_idx)
ele_model_fval = ele_surrogate.fun_eval(x_ele)
ele_model_grad = ele_surrogate.grad_eval(x_ele)
xform_grad = self.xforms[ele_idx][1](ele_model_fval)
xform_hess = self.xforms[ele_idx][2](ele_model_fval)
hess_operator_ele = self.weights[ele_idx] * (
xform_hess
* np.outer(ele_model_grad, v_ele.dot(ele_model_grad)).squeeze()
+ xform_grad * ele_surrogate.hess_operator(v_ele)
)
else:
hess_operator_ele = self.weights[ele_idx] * ele_surrogate.hess_operator(
v_ele
)
if self.params("debug.check_nan_fval") and np.any(
np.isnan(hess_operator_ele)
):
raise ValueError(
f"(element {self.ele_names[ele_idx]}) NaN encountered in the return "
"value of the surrogate hessian operator. "
"This may be caused by some numerical error or matrix singularity. "
)
result[self.coords[ele_idx]] += hess_operator_ele
return result