Merge branch 'JuliaSmoothOptimizers:master' into LMTR-JSO

MaxenceGollier · web-flow · commit 15648139df54 · 2025-09-13T11:30:37.000-04:00
diff --git a/src/R2N.jl b/src/R2N.jl
@@ -12,6 +12,7 @@ mutable struct R2NSolver{
   xk::V
   ∇fk::V
   ∇fk⁻::V
+  y::V
   mν∇fk::V
   ψ::G
   xkn::V
@@ -40,6 +41,7 @@ function R2NSolver(
   xk = similar(x0)
   ∇fk = similar(x0)
   ∇fk⁻ = similar(x0)
+  y = similar(x0)
   mν∇fk = similar(x0)
   xkn = similar(x0)
   s = similar(x0)
@@ -70,6 +72,7 @@ function R2NSolver(
     xk,
     ∇fk,
     ∇fk⁻,
+    y,
     mν∇fk,
     ψ,
     xkn,
@@ -154,6 +157,12 @@ Notably, you can access, and modify, the following:
   - `stats.solver_specific[:nonsmooth_obj]`: current value of the nonsmooth part of the objective function;
   - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything other than `:unknown` will stop the algorithm, but you should use `:user` to properly indicate the intention;
   - `stats.elapsed_time`: elapsed time in seconds.
+Similarly to the callback, when using a quasi-Newton approximation, two functions, `qn_update_y!(nlp, solver, stats)` and `qn_copy!(nlp, solver, stats)` are called at each update of the approximation.
+Namely, the former computes the `y` vector for which the pair `(s, y)` is pushed into the approximation.
+By default, `y := ∇fk⁻ - ∇fk`.
+The latter allows the user to tell which values should be copied for the next iteration.
+By default, only the gradient is copied: `∇fk⁻ .= ∇fk`.
+This might be useful when using R2N in a constrained optimization context, when the gradient of the Lagrangian function is pushed at each iteration rather than the gradient of the objective function.
 """
 function R2N(
   nlp::AbstractNLPModel{T, V},
@@ -200,6 +209,8 @@ function SolverCore.solve!(
   reg_nlp::AbstractRegularizedNLPModel{T, V},
   stats::GenericExecutionStats{T, V};
   callback = (args...) -> nothing,
+  qn_update_y!::Function = _qn_grad_update_y!,
+  qn_copy!::Function = _qn_grad_copy!,
   x::V = reg_nlp.model.meta.x0,
   atol::T = √eps(T),
   rtol::T = √eps(T),
@@ -283,7 +294,7 @@ function SolverCore.solve!(
 
   fk = obj(nlp, xk)
   grad!(nlp, xk, ∇fk)
-  ∇fk⁻ .= ∇fk
+  qn_copy!(nlp, solver, stats)
 
   quasiNewtTest = isa(nlp, QuasiNewtonModel)
   λmax::T = T(1)
@@ -416,15 +427,14 @@ function SolverCore.solve!(
       grad!(nlp, xk, ∇fk)
 
       if quasiNewtTest
-        @. ∇fk⁻ = ∇fk - ∇fk⁻
-        push!(nlp, s, ∇fk⁻)
+        qn_update_y!(nlp, solver, stats)
+        push!(nlp, s, solver.y)
+        qn_copy!(nlp, solver, stats)
       end
       solver.subpb.model.B = hess_op(nlp, xk)
 
       λmax, found_λ = opnorm(solver.subpb.model.B)
       found_λ || error("operator norm computation failed")
-
-      ∇fk⁻ .= ∇fk
     end
 
     if η2 ≤ ρk < Inf
@@ -500,3 +510,19 @@ function SolverCore.solve!(
   set_residuals!(stats, zero(eltype(xk)), sqrt_ξ1_νInv)
   return stats
 end
+
+function _qn_grad_update_y!(
+  nlp::AbstractNLPModel{T, V},
+  solver::R2NSolver{T, G, V},
+  stats::GenericExecutionStats,
+) where {T, V, G}
+  @. solver.y = solver.∇fk - solver.∇fk⁻
+end
+
+function _qn_grad_copy!(
+  nlp::AbstractNLPModel{T, V},
+  solver::R2NSolver{T, G, V},
+  stats::GenericExecutionStats,
+) where {T, V, G}
+  solver.∇fk⁻ .= solver.∇fk
+end
diff --git a/src/RegularizedOptimization.jl b/src/RegularizedOptimization.jl
@@ -8,7 +8,12 @@ using Arpack, ProximalOperators
 
 # dependencies from us
 using LinearOperators,
-  ManualNLPModels, NLPModels, NLPModelsModifiers, RegularizedProblems, ShiftedProximalOperators, SolverCore
+  ManualNLPModels,
+  NLPModels,
+  NLPModelsModifiers,
+  RegularizedProblems,
+  ShiftedProximalOperators,
+  SolverCore
 using Percival: AugLagModel, update_y!, update_μ!
 
 const callback_docstring = "
diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl
@@ -190,7 +190,7 @@ function TRDH(
   selected::AbstractVector{<:Integer} = 1:length(x0),
   kwargs...,
 ) where {R <: Real, F, G, H, DQN <: AbstractDiagonalQuasiNewtonOperator, X}
-  nlp = NLPModel(x0, f, grad=∇f!)
+  nlp = NLPModel(x0, f, grad = ∇f!)
   reg_nlp = RegularizedNLPModel(nlp, h, selected)
   stats = TRDH(
     reg_nlp;
diff --git a/src/TR_alg.jl b/src/TR_alg.jl
@@ -59,7 +59,9 @@ function TRSolver(
     shifted(reg_nlp.h, xk, l_bound_m_x, u_bound_m_x, reg_nlp.selected) :
     shifted(reg_nlp.h, xk, T(1), χ)
 
-  Bk = isa(reg_nlp.model, QuasiNewtonModel) ? hess_op(reg_nlp.model, xk) : hess_op!(reg_nlp.model, xk, similar(xk))
+  Bk =
+    isa(reg_nlp.model, QuasiNewtonModel) ? hess_op(reg_nlp.model, xk) :
+    hess_op!(reg_nlp.model, xk, similar(xk))
   sub_nlp = R2NModel(Bk, ∇fk, zero(T), x0) #FIXME 
   subpb = RegularizedNLPModel(sub_nlp, ψ)
   substats = RegularizedExecutionStats(subpb)
@@ -341,7 +343,7 @@ function SolverCore.solve!(
       set_radius!(solver.subsolver.ψ, ∆_effective)
       set_radius!(ψ, ∆_effective)
     end
-    with_logger(subsolver_logger) do 
+    with_logger(subsolver_logger) do
       if isa(solver.subsolver, TRDHSolver) #FIXME
         solver.subsolver.D.d[1] = 1/ν₁
         solve!(
diff --git a/test/test_allocs.jl b/test/test_allocs.jl
@@ -42,21 +42,26 @@ end
 # Test non allocating solve!
 @testset "NLP allocs" begin
   for (h, h_name) ∈ ((NormL0(λ), "l0"),)
-    for (solver, solver_name) ∈ ((:R2Solver, "R2"), (:R2DHSolver, "R2DH"), (:R2NSolver, "R2N"), (:TRDHSolver, "TRDH"), (:TRSolver, "TR"))
+    for (solver, solver_name) ∈ (
+      (:R2Solver, "R2"),
+      (:R2DHSolver, "R2DH"),
+      (:R2NSolver, "R2N"),
+      (:TRDHSolver, "TRDH"),
+      (:TRSolver, "TR"),
+    )
       @testset "$(solver_name)" begin
         (solver_name == "R2N" || solver_name == "TR") && continue #FIXME
         reg_nlp = RegularizedNLPModel(LBFGSModel(bpdn), h)
         solver = eval(solver)(reg_nlp)
         stats = RegularizedExecutionStats(reg_nlp)
-        solver_name == "R2" && @test @wrappedallocs(
-          solve!(solver, reg_nlp, stats, ν = 1.0, atol = 1e-6, rtol = 1e-6)
-        ) == 0
+        solver_name == "R2" &&
+          @test @wrappedallocs(solve!(solver, reg_nlp, stats, ν = 1.0, atol = 1e-6, rtol = 1e-6)) ==
+                0
         solver_name == "R2DH" && @test @wrappedallocs(
           solve!(solver, reg_nlp, stats, σk = 1.0, atol = 1e-6, rtol = 1e-6)
         ) == 0
-        solver_name == "TRDH" && @test @wrappedallocs(
-          solve!(solver, reg_nlp, stats, atol = 1e-6, rtol = 1e-6)
-        ) == 0
+        solver_name == "TRDH" &&
+          @test @wrappedallocs(solve!(solver, reg_nlp, stats, atol = 1e-6, rtol = 1e-6)) == 0
         @test stats.status == :first_order
       end
     end