JuliaSmoothOptimizers · arnavk23 · Oct 3, 2025 · Oct 9, 2025 · Oct 9, 2025 · Oct 10, 2025
diff --git a/src/LMModel.jl b/src/LMModel.jl
diff --git a/src/LMTR_alg.jl b/src/LMTR_alg.jl
@@ -67,7 +67,18 @@ function LMTRSolver(
     ) : shifted(reg_nls.h, xk, one(T), χ)
 
   Jk = jac_op_residual(reg_nls.model, xk)
-  sub_nlp = LMModel(Jk, Fk, T(0), xk)
+  # Compute initial residual
+  residual!(reg_nls.model, xk, Fk)
+  # Create LM-TR subproblem: min 1/2 ||J(x)s + F(x)||^2 (no regularization σ=0)
+  # = min 1/2 (J*s + F)^T(J*s + F)
+  # = min s^T J^T F + 1/2 s^T J^T J s + const
+  # So c = J^T F, H = J^T J
+  JtF = similar(xk)
+  mul!(JtF, Jk', Fk)
+  JtJ = Jk' * Jk
+  # Don't include the constant term to avoid numerical overflow in obj evaluation
+  # The constant doesn't affect the optimization anyway
+  sub_nlp = QuadraticModel(JtF, JtJ, c0 = zero(T), x0 = zeros(T, length(xk)), name = "LMTR-subproblem")
   subpb = RegularizedNLPModel(sub_nlp, ψ)
   substats = RegularizedExecutionStats(subpb)
   subsolver = subsolver(subpb)
@@ -270,8 +281,15 @@ function SolverCore.solve!(
   jtprod_residual!(nls, xk, Fk, ∇fk)
   fk = dot(Fk, Fk) / 2
 
-  σmax, found_σ = opnorm(solver.subpb.model.J)
+  # Get Jacobian operator from original NLS model since QuadraticModel doesn't store it
+  Jk = jac_op_residual(nls, xk)
+  σmax, found_σ = opnorm(Jk)
   found_σ || error("operator norm computation failed")
+
+  # Update the QuadraticModel with current Jacobian and gradient
+  JtF = Jk' * Fk
+  JtJ = Jk' * Jk  
+  solver.subpb.model = QuadraticModel(JtF, JtJ, c0 = zero(T), x0 = zeros(T, length(xk)), name = "LMTR-subproblem")
   ν = α * Δk / (1 + σmax^2 * (α * Δk + 1))
   @. mν∇fk = -∇fk * ν
   sqrt_ξ1_νInv = one(T)
@@ -409,8 +427,15 @@ function SolverCore.solve!(
       shift!(ψ, xk)
       jtprod_residual!(nls, xk, Fk, ∇fk)
 
-      σmax, found_σ = opnorm(solver.subpb.model.J)
+      # Get Jacobian operator from original NLS model
+      Jk_update = jac_op_residual(nls, xk)
+      σmax, found_σ = opnorm(Jk_update)
       found_σ || error("operator norm computation failed")
+
+      # Update the QuadraticModel with new Jacobian and gradient
+      JtF = Jk_update' * Fk
+      JtJ = Jk_update' * Jk_update  
+      solver.subpb.model = QuadraticModel(JtF, JtJ, c0 = zero(T), x0 = zeros(T, length(xk)), name = "LMTR-subproblem")
     end
 
     if η2 ≤ ρk < Inf
@@ -424,7 +449,7 @@ function SolverCore.solve!(
     if ρk < η1 || ρk == Inf
       Δk = Δk / 2
       if has_bnds
-        update_bounds!(l_bound_m_x, u_bound_m_x, false, l_bound, u_bound, xk, ∆k)
+        update_bounds!(l_bound_m_x, u_bound_m_x, false, l_bound, u_bound, xk, Δk)
         set_bounds!(ψ, l_bound_m_x, u_bound_m_x)
         set_bounds!(solver.subsolver.ψ, l_bound_m_x, u_bound_m_x)
       else

diff --git a/src/LM_alg.jl b/src/LM_alg.jl
@@ -16,6 +16,7 @@ mutable struct LMSolver{
   Fkn::V
   Jv::V
   Jtv::V
+  JtF::V  # Add JtF to store the gradient J'F
   ψ::G
   xkn::V
   s::V
@@ -66,7 +67,16 @@ function LMSolver(
     shifted(reg_nls.h, xk)
 
   Jk = jac_op_residual(reg_nls.model, xk)
-  sub_nlp = LMModel(Jk, Fk, T(1), xk)
+  # Compute initial residual
+  residual!(reg_nls.model, xk, Fk)
+  # Create LM subproblem using QuadraticModel: min s^T J^T F + 1/2 s^T (J^T J + σI) s
+  JtF = similar(xk)
+  mul!(JtF, Jk', Fk)
+  n = length(xk)
+  Id = opEye(T, n)  # Identity operator
+  JtJ_plus_sigma = Jk' * Jk + T(1) * Id
+  c0_val = dot(Fk, Fk) / 2  # Initial constant term = 1/2||F||²
+  sub_nlp = QuadraticModel(JtF, JtJ_plus_sigma, c0 = c0_val, x0 = zeros(T, n), name = "LM-subproblem")
   subpb = RegularizedNLPModel(sub_nlp, ψ)
   substats = RegularizedExecutionStats(subpb)
   subsolver = subsolver(subpb)
@@ -79,6 +89,7 @@ function LMSolver(
     Fkn,
     Jv,
     Jtv,
+    JtF,  # Add JtF to constructor
     ψ,
     xkn,
     s,
@@ -275,7 +286,9 @@ function SolverCore.solve!(
   jtprod_residual!(nls, xk, Fk, ∇fk)
   fk = dot(Fk, Fk) / 2
 
-  σmax, found_σ = opnorm(solver.subpb.model.J)
+  # Compute Jacobian norm for normalization
+  Jk = jac_op_residual(nls, xk)
+  σmax, found_σ = opnorm(Jk)
   found_σ || error("operator norm computation failed")
   ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
   sqrt_ξ1_νInv = one(T)
@@ -300,7 +313,7 @@ function SolverCore.solve!(
   end
 
   mk = let ψ = ψ, solver = solver
-    d -> obj(solver.subpb.model, d, skip_sigma = true) + ψ(d)
+    d -> obj(solver.subpb.model, d) + ψ(d)
   end
 
   prox!(s, ψ, mν∇fk, ν)
@@ -331,7 +344,18 @@ function SolverCore.solve!(
 
   while !done
     sub_atol = stats.iter == 0 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5), sqrt_ξ1_νInv * 1e-3)
-    solver.subpb.model.σ = σk
+
+    # Update the QuadraticModel with new σ value by recreating the Hessian
+    Jk = jac_op_residual(nls, xk)
+    mul!(solver.JtF, Jk', Fk)  # Update gradient J'F using solver field
+    Id = opEye(T, length(xk))  # Identity operator
+    JtJ_plus_sigma = Jk' * Jk + σk * Id  # Updated Hessian with new σ
+    # Create new model with updated σ
+    # LM model: mk(s) = 1/2||F||² + (J'F)'s + 1/2 s'(J'J + σI)s
+    c0_val = dot(Fk, Fk) / 2  # Constant term = 1/2||F||²
+    new_model = QuadraticModel(solver.JtF, JtJ_plus_sigma, c0 = c0_val, x0 = zeros(T, length(xk)), name = "LM-subproblem")
+    solver.subpb = RegularizedNLPModel(new_model, solver.ψ)
+
     isa(solver.subsolver, R2DHSolver) && (solver.subsolver.D.d[1] = 1/ν)
     if isa(solver.subsolver, R2Solver) #FIXME
       solve!(solver.subsolver, solver.subpb, solver.substats, x = s, atol = sub_atol, ν = ν)
@@ -402,7 +426,8 @@ function SolverCore.solve!(
 
       # update opnorm if not linear least squares
       if nonlinear == true
-        σmax, found_σ = opnorm(solver.subpb.model.J)
+        Jk = jac_op_residual(nls, xk)
+        σmax, found_σ = opnorm(Jk)
         found_σ || error("operator norm computation failed")
       end
     end

diff --git a/src/R2N.jl b/src/R2N.jl
@@ -1,6 +1,42 @@
 export R2N, R2NSolver, solve!
 
 import SolverCore.solve!
+using LinearAlgebra
+using LinearOperators
+
+# A small mutable wrapper that represents B + sigma*I without allocating a new
+# LinearOperator every time sigma or B changes. It provides mul! methods so it
+# can be used where a LinearOperator is expected.
+mutable struct ShiftedHessian{T}
+  B::Any
+  sigma::T
+end
+
+Base.size(op::ShiftedHessian) = size(op.B)
+Base.eltype(op::ShiftedHessian) = eltype(op.B)
+
+import LinearAlgebra: adjoint
+function adjoint(op::ShiftedHessian{T}) where T
+  return LinearAlgebra.Adjoint(op)
+end
+
+function LinearAlgebra.mul!(y::AbstractVector{T}, op::ShiftedHessian{T}, x::AbstractVector{T}) where T
+  mul!(y, op.B, x)
+  @inbounds for i in eachindex(y)
+    y[i] += op.sigma * x[i]
+  end
+  return y
+end
+
+function LinearAlgebra.mul!(y::AbstractVector{T}, opAd::Adjoint{<:Any,ShiftedHessian{T}}, x::AbstractVector{T}) where T
+  # Use the adjoint of the underlying operator and add sigma*x
+  mul!(y, adjoint(opAd.parent.B), x)
+  @inbounds for i in eachindex(y)
+    y[i] += opAd.parent.sigma * x[i]
+  end
+  return y
+end
+
 
 mutable struct R2NSolver{
   T <: Real,
@@ -28,6 +64,12 @@ mutable struct R2NSolver{
   subsolver::ST
   subpb::PB
   substats::GenericExecutionStats{T, V, V, T}
+  # Pre-allocated components for QuadraticModel recreation
+  Id::LinearOperator  # Identity operator
+  x0_quad::V         # Zero vector for QuadraticModel x0
+  reg_hess::LinearOperator  # regularized Hessian operator
+  reg_hess_wrapper::ShiftedHessian{T}  # mutable wrapper (B, sigma)
+  reg_hess_op::LinearOperator  # LinearOperator that captures the wrapper
 end
 
 function R2NSolver(
@@ -68,7 +110,25 @@ function R2NSolver(
     shifted(reg_nlp.h, xk)
 
   Bk = hess_op(reg_nlp.model, x0)
-  sub_nlp = R2NModel(Bk, ∇fk, T(1), x0)
+  # Create quadratic model: min ∇f^T s + 1/2 s^T B s + σ/2 ||s||^2
+  # QuadraticModel represents: min c^T x + 1/2 x^T H x + c0
+  # So we need c = ∇fk, H = Bk + σI, c0 = 0
+  σ = T(1)
+  n = length(∇fk)
+    Id = opEye(T, n)  # Identity operator
+    x0_quad = zeros(T, n)  # Pre-allocate x0 for QuadraticModel
+    # Create a mutable wrapper around the Hessian so we can update sigma/B without
+    # allocating a new operator every iteration.
+    reg_hess_wrapper = ShiftedHessian{T}(Bk, T(1))
+    # Create a LinearOperator that calls mul! on the wrapper. This operator is
+    # allocated once and keeps a reference to the mutable wrapper, so future
+    # updates can mutate the wrapper without reallocating the operator.
+  reg_hess_op = LinearOperator{T}(n, n, false, false,
+    (y, x) -> mul!(y, reg_hess_wrapper, x),
+    (y, x) -> mul!(y, adjoint(reg_hess_wrapper), x),
+    (y, x) -> mul!(y, adjoint(reg_hess_wrapper), x),
+  )
+    sub_nlp = QuadraticModel(∇fk, reg_hess_op, c0 = zero(T), x0 = x0_quad, name = "R2N-subproblem")
   subpb = RegularizedNLPModel(sub_nlp, ψ)
   substats = RegularizedExecutionStats(subpb)
   subsolver = subsolver(subpb)
@@ -93,6 +153,11 @@ function R2NSolver(
     subsolver,
     subpb,
     substats,
+    Id,
+    x0_quad,
+    reg_hess_op,
+    reg_hess_wrapper,
+    reg_hess_op,
   )
 end
 
@@ -195,6 +260,14 @@ function R2N(reg_nlp::AbstractRegularizedNLPModel; kwargs...)
   return stats
 end
 
+# Helper function to update QuadraticModel in-place to avoid allocations
+function update_quadratic_model!(qm::QuadraticModel, c::AbstractVector, H=nothing)
+  # Update gradient only; Hessian wrapper should be mutated by the caller
+  copyto!(qm.data.c, c)
+  qm.counters.neval_hess = 0
+  return qm
+end
+
 function SolverCore.solve!(
   solver::R2NSolver{T, G, V},
   reg_nlp::AbstractRegularizedNLPModel{T, V},
@@ -292,12 +365,18 @@ function SolverCore.solve!(
   quasiNewtTest = isa(nlp, QuasiNewtonModel)
   λmax::T = T(1)
   found_λ = true
-  solver.subpb.model.B = hess_op(nlp, xk)
+  # Update the Hessian and update the QuadraticModel
+  Bk_new = hess_op(nlp, xk)
+  σ = T(1)
+  # Update the existing ShiftedHessian wrapper in-place to avoid allocations
+  solver.reg_hess_wrapper.B = Bk_new
+  solver.reg_hess_wrapper.sigma = σ
+  update_quadratic_model!(solver.subpb.model, solver.∇fk)
 
   if opnorm_maxiter ≤ 0
-    λmax, found_λ = opnorm(solver.subpb.model.B)
+    λmax, found_λ = opnorm(Bk_new)
   else
-    λmax = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter)
+    λmax = power_method!(Bk_new, solver.v0, solver.subpb.model.data.v, opnorm_maxiter)
   end
   found_λ || error("operator norm computation failed")
 
@@ -327,7 +406,7 @@ function SolverCore.solve!(
   end
 
   mk = let ψ = ψ, solver = solver
-    d -> obj(solver.subpb.model, d, skip_sigma = true) + ψ(d)::T
+    d -> obj(solver.subpb.model, d) + ψ(d)::T
   end
 
   prox!(s1, ψ, mν∇fk, ν₁)
@@ -361,7 +440,12 @@ function SolverCore.solve!(
   while !done
     sub_atol = stats.iter == 0 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5), sqrt_ξ1_νInv * 1e-3)
 
-    solver.subpb.model.σ = σk
+    # Update QuadraticModel with updated regularization parameter
+  Bk_current = hess_op(nlp, xk)
+  # mutate wrapper in-place
+  solver.reg_hess_wrapper.B = Bk_current
+  solver.reg_hess_wrapper.sigma = σk
+  update_quadratic_model!(solver.subpb.model, solver.∇fk)
     isa(solver.subsolver, R2DHSolver) && (solver.subsolver.D.d[1] = 1/ν₁)
     if isa(solver.subsolver, R2Solver) #FIXME
       solve!(
@@ -445,12 +529,17 @@ function SolverCore.solve!(
         push!(nlp, s, solver.y)
         qn_copy!(nlp, solver, stats)
       end
-      solver.subpb.model.B = hess_op(nlp, xk)
+      # Update the Hessian and update the QuadraticModel
+    Bk_new = hess_op(nlp, xk)
+    σ = T(1)
+    solver.reg_hess_wrapper.B = Bk_new
+    solver.reg_hess_wrapper.sigma = σ
+    update_quadratic_model!(solver.subpb.model, solver.∇fk)
 
       if opnorm_maxiter ≤ 0
-        λmax, found_λ = opnorm(solver.subpb.model.B)
+        λmax, found_λ = opnorm(Bk_new)
       else
-        λmax = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter)
+        λmax = power_method!(Bk_new, solver.v0, solver.subpb.model.data.v, opnorm_maxiter)
       end
       found_λ || error("operator norm computation failed")
     end