Skip to content
Draft
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ or if multiple things are being optimized, `x_scale` can be a list of dict, one
- Adds new option `x_scale='ess'` to use exponential spectral scaling from (Jang 2025) which has been shown to improve performance and robustness as an
alternative to fourier continuation methods.
- Adds ``"scipy-l-bfgs-b"`` optimizer option as a wrapper to scipy's ``"l-bfgs-b"`` method.
- Adds ``prox_inv_method`` option to be used in equilibrium constrained optimization problems. ``prox_inv_method`` can be ``qr``, ``svd`` or ``svd-reg``. ``svd-reg`` is the legacy behaviour which had an unnecessary regularization term. The new default is ``qr`` which is faster than ``svd`` without loss of significant accuracy. Note that this may change the results of the optimization scripts. For number of example cases, it is been seen that the new default facilitates the optimization without continuation.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these example cases are super low res, so I wouldn't take that as representative of the true behavior


Bug Fixes

Expand Down
52 changes: 37 additions & 15 deletions desc/optimize/_constraint_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import numpy as np

from desc.backend import jit, jnp, put
from desc.backend import jit, jnp, put, qr
from desc.batching import batched_vectorize
from desc.objectives import (
BoundaryRSelfConsistency,
Expand All @@ -21,7 +21,7 @@
)
from desc.utils import Timer, errorif, get_instance, setdefault

from .utils import f_where_x
from .utils import f_where_x, solve_triangular_regularized


class LinearConstraintProjection(ObjectiveFunction):
Expand Down Expand Up @@ -572,6 +572,9 @@
perturb_options, solve_options : dict
dictionary of arguments passed to Equilibrium.perturb and Equilibrium.solve
during the projection step.
inv_method : str
Method to use for computing the pseudo-inverse of the constraint Jacobian.
Options are 'svd' (default) and 'qr'.
name : str
Name of the objective function.
"""
Expand All @@ -583,14 +586,20 @@
eq,
perturb_options=None,
solve_options=None,
inv_method="qr",
name="ProximalProjection",
):
assert isinstance(objective, ObjectiveFunction), (
"objective should be instance of ObjectiveFunction." ""
)
assert isinstance(constraint, ObjectiveFunction), (
"constraint should be instance of ObjectiveFunction." ""
)
assert isinstance(
objective, ObjectiveFunction
), "objective should be instance of ObjectiveFunction."
assert isinstance(
constraint, ObjectiveFunction
), "constraint should be instance of ObjectiveFunction."
assert inv_method in [
"svd",
"qr",
"svd-reg",
], f"inv_method should be either 'svd', 'svd-reg' or 'qr', got {inv_method}."
for con in constraint.objectives:
errorif(
not con._equilibrium,
Expand Down Expand Up @@ -618,6 +627,7 @@
solve_options.setdefault("verbose", 0)
self._perturb_options = perturb_options
self._solve_options = solve_options
self._inv_method = inv_method
self._built = False
# don't want to compile this, just use the compiled objective and constraint
self._use_jit = False
Expand Down Expand Up @@ -1262,6 +1272,7 @@
self._eq_solve_objective._feasible_tangents,
self._dxdc,
op,
inv_method=self._inv_method,
)
# broadcasting against multiple things
dfdcs = [jnp.zeros(dim) for dim in self._dimc_per_thing]
Expand Down Expand Up @@ -1310,8 +1321,10 @@
# define these helper functions that are stateless so we can safely jit them


@functools.partial(jit, static_argnames=["op"])
def _proximal_jvp_f_pure(constraint, xf, constants, dc, eq_feasible_tangents, dxdc, op):
@functools.partial(jit, static_argnames=["op", "inv_method"])
def _proximal_jvp_f_pure(
constraint, xf, constants, dc, eq_feasible_tangents, dxdc, op, inv_method="svd"
):
# Note: This function is called by _get_tangent which is vectorized over v
# (v is called dc in this function). So, dc is expected to be 1D array
# of same size as full equilibrium state vector. This function returns a 1D array.
Expand All @@ -1326,11 +1339,20 @@
# wrt all R_lmn coefficients that contribute to Rb_023. See BoundaryRSelfConsistency
# for the relation between Rb_lmn and R_lmn.
Fc = getattr(constraint, "jvp_" + op)(dxdc @ dc, xf, constants)
cutoff = jnp.finfo(Fxh.dtype).eps * max(Fxh.shape)
uf, sf, vtf = jnp.linalg.svd(Fxh, full_matrices=False)
sf += sf[-1] # add a tiny bit of regularization
sfi = jnp.where(sf < cutoff * sf[0], 0, 1 / sf)
return vtf.T @ (sfi * (uf.T @ Fc))
if inv_method == "svd":
cutoff = jnp.finfo(Fxh.dtype).eps * max(Fxh.shape)
uf, sf, vtf = jnp.linalg.svd(Fxh, full_matrices=False)
sfi = jnp.where(sf < cutoff * sf[0], 0, 1 / sf)
return vtf.T @ (sfi * (uf.T @ Fc))
elif inv_method == "svd-reg": # this option will be deleted
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why the comment on deleting the option?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was planning to remove it, but I will keep it at least for foreseable future

cutoff = jnp.finfo(Fxh.dtype).eps * max(Fxh.shape)
uf, sf, vtf = jnp.linalg.svd(Fxh, full_matrices=False)
sf += sf[-1]
sfi = jnp.where(sf < cutoff * sf[0], 0, 1 / sf)
return vtf.T @ (sfi * (uf.T @ Fc))

Check warning on line 1352 in desc/optimize/_constraint_wrappers.py

View check run for this annotation

Codecov / codecov/patch

desc/optimize/_constraint_wrappers.py#L1348-L1352

Added lines #L1348 - L1352 were not covered by tests
elif inv_method == "qr":
Q, R = qr(Fxh, mode="economic")
return solve_triangular_regularized(R, Q.T @ Fc)


@functools.partial(jit, static_argnames=["op"])
Expand Down
2 changes: 2 additions & 0 deletions desc/optimize/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,12 +601,14 @@ def _maybe_wrap_nonlinear_constraints(
if wrapper is not None and wrapper.lower() in ["prox", "proximal"]:
perturb_options = options.pop("perturb_options", {})
solve_options = options.pop("solve_options", {})
inv_method = options.pop("prox_inv_method", "qr")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to see more testing on a wide class of problems before we change the default.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed, I will try it on some trickier free boundary problems I have

objective = ProximalProjection(
objective,
constraint=_combine_constraints(nonlinear_constraints),
perturb_options=perturb_options,
solve_options=solve_options,
eq=eq,
inv_method=inv_method,
)
nonlinear_constraints = ()
return objective, nonlinear_constraints
Expand Down
518 changes: 154 additions & 364 deletions docs/notebooks/tutorials/advanced_optimization.ipynb

Large diffs are not rendered by default.

21 changes: 2 additions & 19 deletions tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,30 +265,13 @@ def test_qh_optimization():
eq = Equilibrium(M=5, N=5, Psi=0.04, surface=surf)
eq.solve(verbose=3)

eq1 = run_qh_step(0, eq)
eq1 = run_qh_step(2, eq)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I should check the notebooks to see if anything is changed with this

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dpanici also reported this kind of effect in #2000 but I think here doing continuation with the current settings overcommits the lower order modes to a local minimum and cause issues in later steps. Since without continuation it converges immediately, I don't think this is a problem.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this test doesn't work with the new option that suggests that there may be something wrong. We should figure out what the actual issue is rather than changing the test.

(note that this is such low resolution that it would already converge without the fourier continuation, but we want to test that part as well)


obj = QuasisymmetryBoozer(helicity=(1, eq1.NFP), eq=eq1)
obj.build()
B_asym = obj.compute(*obj.xs(eq1))

np.testing.assert_array_less(np.abs(B_asym).max(), 1e-1)
np.testing.assert_array_less(eq1.compute("a_major/a_minor")["a_major/a_minor"], 5)

eq2 = run_qh_step(1, eq1)

obj = QuasisymmetryBoozer(helicity=(1, eq2.NFP), eq=eq2)
obj.build()
B_asym = obj.compute(*obj.xs(eq2))
np.testing.assert_array_less(np.abs(B_asym).max(), 1e-2)
np.testing.assert_array_less(eq2.compute("a_major/a_minor")["a_major/a_minor"], 5)

eq3 = run_qh_step(2, eq2)

obj = QuasisymmetryBoozer(helicity=(1, eq3.NFP), eq=eq3)
obj.build()
B_asym = obj.compute(*obj.xs(eq3))
np.testing.assert_array_less(np.abs(B_asym).max(), 2e-3)
np.testing.assert_array_less(eq3.compute("a_major/a_minor")["a_major/a_minor"], 5)
np.testing.assert_array_less(eq1.compute("a_major/a_minor")["a_major/a_minor"], 5)


@pytest.mark.regression
Expand Down
23 changes: 21 additions & 2 deletions tests/test_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1242,7 +1242,6 @@ def test_proximal_jacobian():
Gc = Gx @ prox1._dxdc
cutoff = np.finfo(Fxh.dtype).eps * np.max(Fxh.shape)
uf, sf, vtf = jnp.linalg.svd(Fxh, full_matrices=False)
sf += sf[-1] # add a tiny bit of regularization
sfi = np.where(sf < cutoff * sf[0], 0, 1 / sf)
Fxh_inv = vtf.T @ (sfi[..., np.newaxis] * uf.T)
jac_scaled = -Gxh @ (Fxh_inv @ Fc) + Gc
Expand All @@ -1255,7 +1254,6 @@ def test_proximal_jacobian():
Gc = Gx @ prox1._dxdc
cutoff = np.finfo(Fxh.dtype).eps * np.max(Fxh.shape)
uf, sf, vtf = jnp.linalg.svd(Fxh, full_matrices=False)
sf += sf[-1] # add a tiny bit of regularization
sfi = np.where(sf < cutoff * sf[0], 0, 1 / sf)
Fxh_inv = vtf.T @ (sfi[..., np.newaxis] * uf.T)
jac_unscaled = -Gxh @ (Fxh_inv @ Fc) + Gc
Expand Down Expand Up @@ -1294,6 +1292,27 @@ def test_proximal_jacobian():
np.testing.assert_allclose(jac_unscaled, jac2, rtol=1e-12, atol=1e-12)
np.testing.assert_allclose(jac_unscaled, jac3, rtol=1e-12, atol=1e-12)

# Test different inversion methods give same result.
# Note: svd-reg is expected to be different due to regularization
# Above jac1 is obtained with default 'qr' method, we only need to test 'svd' here.
eq_svd = eq.copy()
con_svd = ObjectiveFunction(ForceBalance(eq_svd), use_jit=False)
obj_svd = ObjectiveFunction(
(
QuasisymmetryTripleProduct(eq_svd, deriv_mode="fwd"),
AspectRatio(eq_svd, deriv_mode="fwd"),
Volume(eq_svd, deriv_mode="fwd"),
),
deriv_mode="batched",
use_jit=True,
)
prox_svd = ProximalProjection(
obj_svd, con_svd, eq_svd, perturb_options, solve_options, inv_method="svd"
)
prox_svd.build()
jac_svd = prox_svd.jac_unscaled(x)
np.testing.assert_allclose(jac1, jac_svd, rtol=1e-12, atol=1e-12)


@pytest.mark.slow
@pytest.mark.regression
Expand Down
Loading