Skip to content
Closed
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
dbbce49
LMModel → LLSModels: Successfully replaced custom LMModel with standa…
arnavk23 Oct 3, 2025
3774c3d
R2N
arnavk23 Oct 9, 2025
685dabb
allocations_56240
arnavk23 Oct 9, 2025
d984a62
Update src/R2N.jl
arnavk23 Oct 10, 2025
8473946
Update src/R2N.jl
arnavk23 Oct 10, 2025
9f9322f
Update src/LM_alg.jl
arnavk23 Oct 10, 2025
4a0d87c
Update src/LMTR_alg.jl
arnavk23 Oct 10, 2025
8ed914e
Update src/LMTR_alg.jl
arnavk23 Oct 10, 2025
b7d2979
Update src/R2N.jl
arnavk23 Oct 10, 2025
b06e017
Update src/R2N.jl
arnavk23 Oct 10, 2025
dd6fc3b
Update src/LM_alg.jl
arnavk23 Oct 10, 2025
37e9beb
changes
arnavk23 Oct 11, 2025
8357983
Update src/utils.jl
arnavk23 Oct 11, 2025
589f75b
Update src/R2N.jl
arnavk23 Oct 11, 2025
0f9f29e
Update src/R2N.jl
arnavk23 Oct 11, 2025
13ce83c
Update src/R2N.jl
arnavk23 Oct 11, 2025
3cef1e2
Update src/R2N.jl
arnavk23 Oct 11, 2025
4e1e1f9
Update src/LM_alg.jl
arnavk23 Oct 11, 2025
7c7aeaf
Update src/LMTR_alg.jl
arnavk23 Oct 11, 2025
f2f89ed
changeset 1a2b3c4d5e6f7g8h9i0jklmnopqrstuv
arnavk23 Oct 11, 2025
a434d5a
More Changes
arnavk23 Oct 12, 2025
2ea55a4
g @warn "LM: subsolver iter count high" iter=solver.substats.iter f…
arnavk23 Oct 12, 2025
e3bd03b
LM_alg
arnavk23 Oct 15, 2025
48811bd
Changes proposed
arnavk23 Oct 17, 2025
92c7efb
Merge branch 'master' into master
arnavk23 Oct 17, 2025
109c004
Update the in-loop status update to recompute the stationarity measur…
arnavk23 Oct 17, 2025
b5e1c58
Update src/R2N.jl
arnavk23 Oct 17, 2025
deef93e
Update src/LMTR_alg.jl
arnavk23 Oct 17, 2025
fb0fbb3
Update src/R2N.jl
arnavk23 Oct 17, 2025
2d08604
Update src/LM_alg.jl
arnavk23 Oct 17, 2025
8dd1b2a
Copilot remarks
arnavk23 Oct 17, 2025
f5d105e
solver error
arnavk23 Oct 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 0 additions & 60 deletions src/LMModel.jl

This file was deleted.

43 changes: 37 additions & 6 deletions src/LMTR_alg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@ function LMTRSolver(
xk = similar(x0)
∇fk = similar(x0)
mν∇fk = similar(x0)
Fk = similar(x0, reg_nls.model.nls_meta.nequ)
Fkn = similar(Fk)
# residuals will be allocated after creating Jacobian operator
Fk = similar(x0, 0)
Fkn = similar(x0, 0)
xkn = similar(x0)
s = similar(x0)
has_bnds = any(l_bound .!= T(-Inf)) || any(u_bound .!= T(Inf)) || subsolver == TRDHSolver
Expand All @@ -67,7 +68,18 @@ function LMTRSolver(
) : shifted(reg_nls.h, xk, one(T), χ)

Jk = jac_op_residual(reg_nls.model, xk)
sub_nlp = LMModel(Jk, Fk, T(0), xk)
# Compute initial residual
residual!(reg_nls.model, xk, Fk)
# Create LM-TR subproblem: min 1/2 ||J(x)s + F(x)||^2 (no regularization σ=0)
# = min 1/2 (J*s + F)^T(J*s + F)
# = min s^T J^T F + 1/2 s^T J^T J s + const
# So c = J^T F, H = J^T J
JtF = similar(xk)
mul!(JtF, Jk', Fk)
JtJ = Jk' * Jk
# Don't include the constant term to avoid numerical overflow in obj evaluation
# The constant doesn't affect the optimization anyway
sub_nlp = QuadraticModel(JtF, JtJ, c0 = zero(T), x0 = zeros(T, length(xk)), name = "LMTR-subproblem")
subpb = RegularizedNLPModel(sub_nlp, ψ)
substats = RegularizedExecutionStats(subpb)
subsolver = subsolver(subpb)
Expand Down Expand Up @@ -270,8 +282,20 @@ function SolverCore.solve!(
jtprod_residual!(nls, xk, Fk, ∇fk)
fk = dot(Fk, Fk) / 2

σmax, found_σ = opnorm(solver.subpb.model.J)
# Get Jacobian operator from original NLS model since QuadraticModel doesn't store it
Jk = jac_op_residual(nls, xk)
σmax, found_σ = opnorm(Jk)
found_σ || error("operator norm computation failed")

# Update the QuadraticModel with current Jacobian and gradient
JtF = Jk' * Fk
JtJ = Jk' * Jk
# In-place update of QuadraticModel fields to avoid allocation
solver.subpb.model.g = JtF
solver.subpb.model.H = JtJ
solver.subpb.model.c0 = zero(T)
solver.subpb.model.x0 .= 0
solver.subpb.model.name = "LMTR-subproblem"
ν = α * Δk / (1 + σmax^2 * (α * Δk + 1))
@. mν∇fk = -∇fk * ν
sqrt_ξ1_νInv = one(T)
Expand Down Expand Up @@ -409,8 +433,15 @@ function SolverCore.solve!(
shift!(ψ, xk)
jtprod_residual!(nls, xk, Fk, ∇fk)

σmax, found_σ = opnorm(solver.subpb.model.J)
# Get Jacobian operator from original NLS model
Jk_update = jac_op_residual(nls, xk)
σmax, found_σ = opnorm(Jk_update)
found_σ || error("operator norm computation failed")

# Update the QuadraticModel with new Jacobian and gradient
JtF = Jk_update' * Fk
JtJ = Jk_update' * Jk_update
update_model!(solver.subpb.model, JtF, JtJ)
end

if η2 ≤ ρk < Inf
Expand All @@ -424,7 +455,7 @@ function SolverCore.solve!(
if ρk < η1 || ρk == Inf
Δk = Δk / 2
if has_bnds
update_bounds!(l_bound_m_x, u_bound_m_x, false, l_bound, u_bound, xk, ∆k)
update_bounds!(l_bound_m_x, u_bound_m_x, false, l_bound, u_bound, xk, Δk)
set_bounds!(ψ, l_bound_m_x, u_bound_m_x)
set_bounds!(solver.subsolver.ψ, l_bound_m_x, u_bound_m_x)
else
Expand Down
95 changes: 85 additions & 10 deletions src/LM_alg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ mutable struct LMSolver{
Fkn::V
Jv::V
Jtv::V
JtF::V # Add JtF to store the gradient J'F
ψ::G
xkn::V
s::V
Expand All @@ -28,6 +29,12 @@ mutable struct LMSolver{
subsolver::ST
subpb::PB
substats::GenericExecutionStats{T, V, V, T}
# Preallocated QuadraticModel components to avoid reallocations
x0_quad::V
reg_hess_wrapper::ShiftedHessian{T}
reg_hess_op::LinearOperator
v0::V # workspace for power method
tmp_res::V # temporary residual storage for power method
end

function LMSolver(
Expand All @@ -42,9 +49,10 @@ function LMSolver(
xk = similar(x0)
∇fk = similar(x0)
mν∇fk = similar(x0)
Fk = similar(x0, reg_nls.model.nls_meta.nequ)
Fkn = similar(Fk)
Jv = similar(Fk)
# residuals will be allocated after creating Jacobian operator (size may vary)
Fk = similar(x0, 0)
Fkn = similar(x0, 0)
Jv = similar(x0, 0)
Jtv = similar(x0)
xkn = similar(x0)
s = similar(x0)
Expand All @@ -65,11 +73,40 @@ function LMSolver(
has_bnds ? shifted(reg_nls.h, xk, l_bound_m_x, u_bound_m_x, reg_nls.selected) :
shifted(reg_nls.h, xk)

Jk = jac_op_residual(reg_nls.model, xk)
sub_nlp = LMModel(Jk, Fk, T(1), xk)
# Defer Jacobian/residual construction to solve!; preallocate JtF (length n)
JtF = similar(xk)
# residuals will be allocated in solve! when the Jacobian operator is available
Fk = similar(x0, 0)
Fkn = similar(x0, 0)
Jv = similar(x0, 0)
n = length(xk)
c0_val = dot(Fk, Fk) / 2 # Initial constant term = 1/2||F||²
x0_quad = zeros(T, n) # Pre-allocated x0 for QuadraticModel
# Create mutable wrapper around Hessian; use JacobianGram to avoid forming J'J
gram = JacobianGram{T}(nothing, zeros(T, 0))
# Try to initialize JacobianGram.tmp with the row size of the Jacobian at x0
try
J_init = jac_op_residual(reg_nls.model, x0)
gram.J = J_init
gram.tmp = zeros(T, size(J_init, 1))
catch
# If jac_op_residual is not available for this model type, leave tmp empty
gram.J = nothing
gram.tmp = zeros(T, 0)
end
reg_hess_wrapper = ShiftedHessian{T}(gram, T(1))
reg_hess_op = LinearOperator{T}(n, n, false, false,
(y, x) -> mul!(y, reg_hess_wrapper, x),
(y, x) -> mul!(y, adjoint(reg_hess_wrapper), x),
(y, x) -> mul!(y, adjoint(reg_hess_wrapper), x),
)
sub_nlp = QuadraticModel(JtF, reg_hess_op, c0 = c0_val, x0 = x0_quad, name = "LM-subproblem")
subpb = RegularizedNLPModel(sub_nlp, ψ)
substats = RegularizedExecutionStats(subpb)
subsolver = subsolver(subpb)
v0 = [(-1.0)^i for i = 0:(n - 1)]
v0 ./= sqrt(n)
tmp_res = copy(gram.tmp)

return LMSolver{T, typeof(ψ), V, typeof(subsolver), typeof(subpb)}(
xk,
Expand All @@ -79,6 +116,7 @@ function LMSolver(
Fkn,
Jv,
Jtv,
JtF, # Add JtF to constructor
ψ,
xkn,
s,
Expand All @@ -91,6 +129,11 @@ function LMSolver(
subsolver,
subpb,
substats,
x0_quad,
reg_hess_wrapper,
reg_hess_op,
v0,
tmp_res,
)
end

Expand Down Expand Up @@ -271,12 +314,28 @@ function SolverCore.solve!(
local ρk::T = zero(T)
local prox_evals::Int = 0

# Ensure residual vectors are sized to match the Jacobian rows
Jk = jac_op_residual(nls, xk)
m = size(Jk, 1)
if length(Fk) != m
solver.Fk = zeros(T, m)
solver.Fkn = similar(solver.Fk)
solver.Jv = similar(solver.Fk)
Fk = solver.Fk
Fkn = solver.Fkn
Jv = solver.Jv
end
residual!(nls, xk, Fk)
jtprod_residual!(nls, xk, Fk, ∇fk)
fk = dot(Fk, Fk) / 2

σmax, found_σ = opnorm(solver.subpb.model.J)
found_σ || error("operator norm computation failed")
# Compute Jacobian norm for normalization
Jk = jac_op_residual(nls, xk)
# Estimate Jacobian operator norm (largest singular value) via power method to avoid heavy allocations
if length(solver.tmp_res) != size(Jk, 1)
solver.tmp_res = zeros(T, size(Jk, 1))
end
σmax = power_method_singular!(Jk, solver.v0, solver.subpb.model.data.v, solver.tmp_res, 5)
ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
sqrt_ξ1_νInv = one(T)

Expand All @@ -300,7 +359,7 @@ function SolverCore.solve!(
end

mk = let ψ = ψ, solver = solver
d -> obj(solver.subpb.model, d, skip_sigma = true) + ψ(d)
d -> obj(solver.subpb.model, d) + ψ(d)
end

prox!(s, ψ, mν∇fk, ν)
Expand Down Expand Up @@ -331,7 +390,22 @@ function SolverCore.solve!(

while !done
sub_atol = stats.iter == 0 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5), sqrt_ξ1_νInv * 1e-3)
solver.subpb.model.σ = σk

# Update the QuadraticModel in-place: update J'F and mutate the Hessian wrapper
Jk = jac_op_residual(nls, xk)
mul!(solver.JtF, Jk', Fk) # Update gradient J'F in-place
# Update underlying JacobianGram wrapper to reference the new J and resize tmp
solver.reg_hess_wrapper.B.J = Jk
if length(solver.tmp_res) != size(Jk, 1)
solver.reg_hess_wrapper.B.tmp = zeros(T, size(Jk, 1))
else
solver.reg_hess_wrapper.B.tmp .= 0
end
solver.reg_hess_wrapper.sigma = σk
# Update QuadraticModel's gradient/counters in-place
c0_val = dot(Fk, Fk) / 2 # Constant term = 1/2||F||²
update_quadratic_model!(solver.subpb.model, solver.JtF)

isa(solver.subsolver, R2DHSolver) && (solver.subsolver.D.d[1] = 1/ν)
if isa(solver.subsolver, R2Solver) #FIXME
solve!(solver.subsolver, solver.subpb, solver.substats, x = s, atol = sub_atol, ν = ν)
Expand Down Expand Up @@ -402,7 +476,8 @@ function SolverCore.solve!(

# update opnorm if not linear least squares
if nonlinear == true
σmax, found_σ = opnorm(solver.subpb.model.J)
Jk = jac_op_residual(nls, xk)
σmax, found_σ = opnorm(Jk)
found_σ || error("operator norm computation failed")
end
end
Expand Down
Loading