Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
dbbce49
LMModel → LLSModels: Successfully replaced custom LMModel with standa…
arnavk23 Oct 3, 2025
3774c3d
R2N
arnavk23 Oct 9, 2025
685dabb
allocations_56240
arnavk23 Oct 9, 2025
d984a62
Update src/R2N.jl
arnavk23 Oct 10, 2025
8473946
Update src/R2N.jl
arnavk23 Oct 10, 2025
9f9322f
Update src/LM_alg.jl
arnavk23 Oct 10, 2025
4a0d87c
Update src/LMTR_alg.jl
arnavk23 Oct 10, 2025
8ed914e
Update src/LMTR_alg.jl
arnavk23 Oct 10, 2025
b7d2979
Update src/R2N.jl
arnavk23 Oct 10, 2025
b06e017
Update src/R2N.jl
arnavk23 Oct 10, 2025
dd6fc3b
Update src/LM_alg.jl
arnavk23 Oct 10, 2025
37e9beb
changes
arnavk23 Oct 11, 2025
8357983
Update src/utils.jl
arnavk23 Oct 11, 2025
589f75b
Update src/R2N.jl
arnavk23 Oct 11, 2025
0f9f29e
Update src/R2N.jl
arnavk23 Oct 11, 2025
13ce83c
Update src/R2N.jl
arnavk23 Oct 11, 2025
3cef1e2
Update src/R2N.jl
arnavk23 Oct 11, 2025
4e1e1f9
Update src/LM_alg.jl
arnavk23 Oct 11, 2025
7c7aeaf
Update src/LMTR_alg.jl
arnavk23 Oct 11, 2025
f2f89ed
changeset 1a2b3c4d5e6f7g8h9i0jklmnopqrstuv
arnavk23 Oct 11, 2025
a434d5a
More Changes
arnavk23 Oct 12, 2025
2ea55a4
g @warn "LM: subsolver iter count high" iter=solver.substats.iter f…
arnavk23 Oct 12, 2025
e3bd03b
LM_alg
arnavk23 Oct 15, 2025
48811bd
Changes proposed
arnavk23 Oct 17, 2025
92c7efb
Merge branch 'master' into master
arnavk23 Oct 17, 2025
109c004
Update the in-loop status update to recompute the stationarity measur…
arnavk23 Oct 17, 2025
b5e1c58
Update src/R2N.jl
arnavk23 Oct 17, 2025
deef93e
Update src/LMTR_alg.jl
arnavk23 Oct 17, 2025
fb0fbb3
Update src/R2N.jl
arnavk23 Oct 17, 2025
2d08604
Update src/LM_alg.jl
arnavk23 Oct 17, 2025
8dd1b2a
Copilot remarks
arnavk23 Oct 17, 2025
f5d105e
solver error
arnavk23 Oct 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 0 additions & 60 deletions src/LMModel.jl

This file was deleted.

33 changes: 29 additions & 4 deletions src/LMTR_alg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,18 @@ function LMTRSolver(
) : shifted(reg_nls.h, xk, one(T), χ)

Jk = jac_op_residual(reg_nls.model, xk)
sub_nlp = LMModel(Jk, Fk, T(0), xk)
# Compute initial residual
residual!(reg_nls.model, xk, Fk)
# Create LM-TR subproblem: min 1/2 ||J(x)s + F(x)||^2 (no regularization σ=0)
# = min 1/2 (J*s + F)^T(J*s + F)
# = min s^T J^T F + 1/2 s^T J^T J s + const
# So c = J^T F, H = J^T J
JtF = similar(xk)
mul!(JtF, Jk', Fk)
JtJ = Jk' * Jk
# Don't include the constant term to avoid numerical overflow in obj evaluation
# The constant doesn't affect the optimization anyway
sub_nlp = QuadraticModel(JtF, JtJ, c0 = zero(T), x0 = zeros(T, length(xk)), name = "LMTR-subproblem")
subpb = RegularizedNLPModel(sub_nlp, ψ)
substats = RegularizedExecutionStats(subpb)
subsolver = subsolver(subpb)
Expand Down Expand Up @@ -270,8 +281,15 @@ function SolverCore.solve!(
jtprod_residual!(nls, xk, Fk, ∇fk)
fk = dot(Fk, Fk) / 2

σmax, found_σ = opnorm(solver.subpb.model.J)
# Get Jacobian operator from original NLS model since QuadraticModel doesn't store it
Jk = jac_op_residual(nls, xk)
σmax, found_σ = opnorm(Jk)
found_σ || error("operator norm computation failed")

# Update the QuadraticModel with current Jacobian and gradient
JtF = Jk' * Fk
JtJ = Jk' * Jk
solver.subpb.model = QuadraticModel(JtF, JtJ, c0 = zero(T), x0 = zeros(T, length(xk)), name = "LMTR-subproblem")
ν = α * Δk / (1 + σmax^2 * (α * Δk + 1))
@. mν∇fk = -∇fk * ν
sqrt_ξ1_νInv = one(T)
Expand Down Expand Up @@ -409,8 +427,15 @@ function SolverCore.solve!(
shift!(ψ, xk)
jtprod_residual!(nls, xk, Fk, ∇fk)

σmax, found_σ = opnorm(solver.subpb.model.J)
# Get Jacobian operator from original NLS model
Jk_update = jac_op_residual(nls, xk)
σmax, found_σ = opnorm(Jk_update)
found_σ || error("operator norm computation failed")

# Update the QuadraticModel with new Jacobian and gradient
JtF = Jk_update' * Fk
JtJ = Jk_update' * Jk_update
solver.subpb.model = QuadraticModel(JtF, JtJ, c0 = zero(T), x0 = zeros(T, length(xk)), name = "LMTR-subproblem")
end

if η2 ≤ ρk < Inf
Expand All @@ -424,7 +449,7 @@ function SolverCore.solve!(
if ρk < η1 || ρk == Inf
Δk = Δk / 2
if has_bnds
update_bounds!(l_bound_m_x, u_bound_m_x, false, l_bound, u_bound, xk, ∆k)
update_bounds!(l_bound_m_x, u_bound_m_x, false, l_bound, u_bound, xk, Δk)
set_bounds!(ψ, l_bound_m_x, u_bound_m_x)
set_bounds!(solver.subsolver.ψ, l_bound_m_x, u_bound_m_x)
else
Expand Down
35 changes: 30 additions & 5 deletions src/LM_alg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ mutable struct LMSolver{
Fkn::V
Jv::V
Jtv::V
JtF::V # Add JtF to store the gradient J'F
ψ::G
xkn::V
s::V
Expand Down Expand Up @@ -66,7 +67,16 @@ function LMSolver(
shifted(reg_nls.h, xk)

Jk = jac_op_residual(reg_nls.model, xk)
sub_nlp = LMModel(Jk, Fk, T(1), xk)
# Compute initial residual
residual!(reg_nls.model, xk, Fk)
# Create LM subproblem using QuadraticModel: min s^T J^T F + 1/2 s^T (J^T J + σI) s
JtF = similar(xk)
mul!(JtF, Jk', Fk)
n = length(xk)
Id = opEye(T, n) # Identity operator
JtJ_plus_sigma = Jk' * Jk + T(1) * Id
c0_val = dot(Fk, Fk) / 2 # Initial constant term = 1/2||F||²
sub_nlp = QuadraticModel(JtF, JtJ_plus_sigma, c0 = c0_val, x0 = zeros(T, n), name = "LM-subproblem")
subpb = RegularizedNLPModel(sub_nlp, ψ)
substats = RegularizedExecutionStats(subpb)
subsolver = subsolver(subpb)
Expand All @@ -79,6 +89,7 @@ function LMSolver(
Fkn,
Jv,
Jtv,
JtF, # Add JtF to constructor
ψ,
xkn,
s,
Expand Down Expand Up @@ -275,7 +286,9 @@ function SolverCore.solve!(
jtprod_residual!(nls, xk, Fk, ∇fk)
fk = dot(Fk, Fk) / 2

σmax, found_σ = opnorm(solver.subpb.model.J)
# Compute Jacobian norm for normalization
Jk = jac_op_residual(nls, xk)
σmax, found_σ = opnorm(Jk)
found_σ || error("operator norm computation failed")
ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
sqrt_ξ1_νInv = one(T)
Expand All @@ -300,7 +313,7 @@ function SolverCore.solve!(
end

mk = let ψ = ψ, solver = solver
d -> obj(solver.subpb.model, d, skip_sigma = true) + ψ(d)
d -> obj(solver.subpb.model, d) + ψ(d)
end

prox!(s, ψ, mν∇fk, ν)
Expand Down Expand Up @@ -331,7 +344,18 @@ function SolverCore.solve!(

while !done
sub_atol = stats.iter == 0 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5), sqrt_ξ1_νInv * 1e-3)
solver.subpb.model.σ = σk

# Update the QuadraticModel with new σ value by recreating the Hessian
Jk = jac_op_residual(nls, xk)
mul!(solver.JtF, Jk', Fk) # Update gradient J'F using solver field
Id = opEye(T, length(xk)) # Identity operator
JtJ_plus_sigma = Jk' * Jk + σk * Id # Updated Hessian with new σ
# Create new model with updated σ
# LM model: mk(s) = 1/2||F||² + (J'F)'s + 1/2 s'(J'J + σI)s
c0_val = dot(Fk, Fk) / 2 # Constant term = 1/2||F||²
new_model = QuadraticModel(solver.JtF, JtJ_plus_sigma, c0 = c0_val, x0 = zeros(T, length(xk)), name = "LM-subproblem")
solver.subpb = RegularizedNLPModel(new_model, solver.ψ)

isa(solver.subsolver, R2DHSolver) && (solver.subsolver.D.d[1] = 1/ν)
if isa(solver.subsolver, R2Solver) #FIXME
solve!(solver.subsolver, solver.subpb, solver.substats, x = s, atol = sub_atol, ν = ν)
Expand Down Expand Up @@ -402,7 +426,8 @@ function SolverCore.solve!(

# update opnorm if not linear least squares
if nonlinear == true
σmax, found_σ = opnorm(solver.subpb.model.J)
Jk = jac_op_residual(nls, xk)
σmax, found_σ = opnorm(Jk)
found_σ || error("operator norm computation failed")
end
end
Expand Down
107 changes: 98 additions & 9 deletions src/R2N.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,42 @@
export R2N, R2NSolver, solve!

import SolverCore.solve!
using LinearAlgebra
using LinearOperators

# A small mutable wrapper that represents B + sigma*I without allocating a new
# LinearOperator every time sigma or B changes. It provides mul! methods so it
# can be used where a LinearOperator is expected.
mutable struct ShiftedHessian{T}
B::Any
sigma::T
end

Base.size(op::ShiftedHessian) = size(op.B)
Base.eltype(op::ShiftedHessian) = eltype(op.B)

import LinearAlgebra: adjoint
function adjoint(op::ShiftedHessian{T}) where T
return LinearAlgebra.Adjoint(op)
end

function LinearAlgebra.mul!(y::AbstractVector{T}, op::ShiftedHessian{T}, x::AbstractVector{T}) where T
mul!(y, op.B, x)
@inbounds for i in eachindex(y)
y[i] += op.sigma * x[i]
end
return y
end

function LinearAlgebra.mul!(y::AbstractVector{T}, opAd::Adjoint{<:Any,ShiftedHessian{T}}, x::AbstractVector{T}) where T
# Use the adjoint of the underlying operator and add sigma*x
mul!(y, adjoint(opAd.parent.B), x)
@inbounds for i in eachindex(y)
y[i] += opAd.parent.sigma * x[i]
end
return y
end


mutable struct R2NSolver{
T <: Real,
Expand Down Expand Up @@ -28,6 +64,12 @@ mutable struct R2NSolver{
subsolver::ST
subpb::PB
substats::GenericExecutionStats{T, V, V, T}
# Pre-allocated components for QuadraticModel recreation
Id::LinearOperator # Identity operator
x0_quad::V # Zero vector for QuadraticModel x0
reg_hess::LinearOperator # regularized Hessian operator
reg_hess_wrapper::ShiftedHessian{T} # mutable wrapper (B, sigma)
reg_hess_op::LinearOperator # LinearOperator that captures the wrapper
end

function R2NSolver(
Expand Down Expand Up @@ -68,7 +110,25 @@ function R2NSolver(
shifted(reg_nlp.h, xk)

Bk = hess_op(reg_nlp.model, x0)
sub_nlp = R2NModel(Bk, ∇fk, T(1), x0)
# Create quadratic model: min ∇f^T s + 1/2 s^T B s + σ/2 ||s||^2
# QuadraticModel represents: min c^T x + 1/2 x^T H x + c0
# So we need c = ∇fk, H = Bk + σI, c0 = 0
σ = T(1)
n = length(∇fk)
Id = opEye(T, n) # Identity operator
x0_quad = zeros(T, n) # Pre-allocate x0 for QuadraticModel
# Create a mutable wrapper around the Hessian so we can update sigma/B without
# allocating a new operator every iteration.
reg_hess_wrapper = ShiftedHessian{T}(Bk, T(1))
# Create a LinearOperator that calls mul! on the wrapper. This operator is
# allocated once and keeps a reference to the mutable wrapper, so future
# updates can mutate the wrapper without reallocating the operator.
reg_hess_op = LinearOperator{T}(n, n, false, false,
(y, x) -> mul!(y, reg_hess_wrapper, x),
(y, x) -> mul!(y, adjoint(reg_hess_wrapper), x),
(y, x) -> mul!(y, adjoint(reg_hess_wrapper), x),
)
sub_nlp = QuadraticModel(∇fk, reg_hess_op, c0 = zero(T), x0 = x0_quad, name = "R2N-subproblem")
subpb = RegularizedNLPModel(sub_nlp, ψ)
substats = RegularizedExecutionStats(subpb)
subsolver = subsolver(subpb)
Expand All @@ -93,6 +153,11 @@ function R2NSolver(
subsolver,
subpb,
substats,
Id,
x0_quad,
reg_hess_op,
reg_hess_wrapper,
reg_hess_op,
)
end

Expand Down Expand Up @@ -195,6 +260,14 @@ function R2N(reg_nlp::AbstractRegularizedNLPModel; kwargs...)
return stats
end

# Helper function to update QuadraticModel in-place to avoid allocations
function update_quadratic_model!(qm::QuadraticModel, c::AbstractVector, H=nothing)
# Update gradient only; Hessian wrapper should be mutated by the caller
copyto!(qm.data.c, c)
qm.counters.neval_hess = 0
return qm
end
Comment on lines +254 to +255
Copy link

Copilot AI Oct 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function accepts H but never applies it. Either remove the unused parameter or support in-place Hessian updates:
if H !== nothing; qm.data.H = H; end.

Copilot uses AI. Check for mistakes.

function SolverCore.solve!(
solver::R2NSolver{T, G, V},
reg_nlp::AbstractRegularizedNLPModel{T, V},
Expand Down Expand Up @@ -292,12 +365,18 @@ function SolverCore.solve!(
quasiNewtTest = isa(nlp, QuasiNewtonModel)
λmax::T = T(1)
found_λ = true
solver.subpb.model.B = hess_op(nlp, xk)
# Update the Hessian and update the QuadraticModel
Bk_new = hess_op(nlp, xk)
σ = T(1)
# Update the existing ShiftedHessian wrapper in-place to avoid allocations
solver.reg_hess_wrapper.B = Bk_new
solver.reg_hess_wrapper.sigma = σ
update_quadratic_model!(solver.subpb.model, solver.∇fk)

if opnorm_maxiter ≤ 0
λmax, found_λ = opnorm(solver.subpb.model.B)
λmax, found_λ = opnorm(Bk_new)
else
λmax = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter)
λmax = power_method!(Bk_new, solver.v0, solver.subpb.model.data.v, opnorm_maxiter)
end
found_λ || error("operator norm computation failed")

Expand Down Expand Up @@ -327,7 +406,7 @@ function SolverCore.solve!(
end

mk = let ψ = ψ, solver = solver
d -> obj(solver.subpb.model, d, skip_sigma = true) + ψ(d)::T
d -> obj(solver.subpb.model, d) + ψ(d)::T
end

prox!(s1, ψ, mν∇fk, ν₁)
Expand Down Expand Up @@ -361,7 +440,12 @@ function SolverCore.solve!(
while !done
sub_atol = stats.iter == 0 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5), sqrt_ξ1_νInv * 1e-3)

solver.subpb.model.σ = σk
# Update QuadraticModel with updated regularization parameter
Bk_current = hess_op(nlp, xk)
# mutate wrapper in-place
solver.reg_hess_wrapper.B = Bk_current
solver.reg_hess_wrapper.sigma = σk
update_quadratic_model!(solver.subpb.model, solver.∇fk)
isa(solver.subsolver, R2DHSolver) && (solver.subsolver.D.d[1] = 1/ν₁)
if isa(solver.subsolver, R2Solver) #FIXME
solve!(
Expand Down Expand Up @@ -445,12 +529,17 @@ function SolverCore.solve!(
push!(nlp, s, solver.y)
qn_copy!(nlp, solver, stats)
end
solver.subpb.model.B = hess_op(nlp, xk)
# Update the Hessian and update the QuadraticModel
Bk_new = hess_op(nlp, xk)
σ = T(1)
solver.reg_hess_wrapper.B = Bk_new
solver.reg_hess_wrapper.sigma = σ
update_quadratic_model!(solver.subpb.model, solver.∇fk)

if opnorm_maxiter ≤ 0
λmax, found_λ = opnorm(solver.subpb.model.B)
λmax, found_λ = opnorm(Bk_new)
else
λmax = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter)
λmax = power_method!(Bk_new, solver.v0, solver.subpb.model.data.v, opnorm_maxiter)
end
found_λ || error("operator norm computation failed")
end
Expand Down
Loading