From b50c82abd77cea51c2d77f113862a1986236f705 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sun, 23 Feb 2025 18:50:46 -0500 Subject: [PATCH 1/5] update theta default value --- src/LMTR_alg.jl | 4 ++-- src/LM_alg.jl | 4 ++-- src/input_struct.jl | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/LMTR_alg.jl b/src/LMTR_alg.jl index 6eff53c1..339f899b 100644 --- a/src/LMTR_alg.jl +++ b/src/LMTR_alg.jl @@ -132,7 +132,7 @@ function LMTR( σmax, found_σ = opnorm(Jk) found_σ || error("operator norm computation failed") - νInv = (1 + θ) * σmax^2 # ‖J'J‖ = ‖J‖² + νInv = σmax^2/θ # ‖J'J‖ = ‖J‖² mν∇fk = -∇fk / νInv @@ -255,7 +255,7 @@ function LMTR( jtprod_residual!(nls, xk, Fk, ∇fk) σmax, found_σ = opnorm(Jk) found_σ || error("operator norm computation failed") - νInv = (1 + θ) * σmax^2 # ‖J'J‖ = ‖J‖² + νInv = σmax^2/θ # ‖J'J‖ = ‖J‖² @. mν∇fk = -∇fk / νInv end diff --git a/src/LM_alg.jl b/src/LM_alg.jl index 163cc6fe..bda831b8 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -127,7 +127,7 @@ function LM( σmax, found_σ = opnorm(Jk) found_σ || error("operator norm computation failed") - νInv = (1 + θ) * (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ + νInv = (σmax^2 + σk)/θ # ‖J'J + σₖ I‖ = ‖J‖² + σₖ s = zero(xk) @@ -260,7 +260,7 @@ function LM( if ρk < η1 || ρk == Inf σk = σk * γ end - νInv = (1 + θ) * (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ + νInv = (σmax^2 + σk)/θ # ‖J'J + σₖ I‖ = ‖J‖² + σₖ tired = k ≥ maxIter || elapsed_time > maxTime end diff --git a/src/input_struct.jl b/src/input_struct.jl index 7242b90a..1b8b1581 100644 --- a/src/input_struct.jl +++ b/src/input_struct.jl @@ -34,7 +34,7 @@ mutable struct ROSolverOptions{R} α::R = 1 / eps(R), ν::R = eps(R)^(1 / 5), γ::R = R(3), - θ::R = eps(R)^(1 / 5), + θ::R = 1/(1+eps(R)^(1 / 5)), β::R = 1 / eps(R), reduce_TR::Bool = true, ) where {R <: Real} From d34e765af51049e73cb04bb38d599410816c8af3 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Mon, 3 Mar 2025 13:21:23 -0500 Subject: [PATCH 2/5] reformat code in LM & LMTR --- src/LMTR_alg.jl | 4 ++-- src/LM_alg.jl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/LMTR_alg.jl b/src/LMTR_alg.jl index 339f899b..4c0e3ee7 100644 --- a/src/LMTR_alg.jl +++ b/src/LMTR_alg.jl @@ -132,7 +132,7 @@ function LMTR( σmax, found_σ = opnorm(Jk) found_σ || error("operator norm computation failed") - νInv = σmax^2/θ # ‖J'J‖ = ‖J‖² + νInv = σmax^2 / θ # ‖J'J‖ = ‖J‖² mν∇fk = -∇fk / νInv @@ -255,7 +255,7 @@ function LMTR( jtprod_residual!(nls, xk, Fk, ∇fk) σmax, found_σ = opnorm(Jk) found_σ || error("operator norm computation failed") - νInv = σmax^2/θ # ‖J'J‖ = ‖J‖² + νInv = σmax^2 / θ # ‖J'J‖ = ‖J‖² @. mν∇fk = -∇fk / νInv end diff --git a/src/LM_alg.jl b/src/LM_alg.jl index bda831b8..35c95586 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -127,7 +127,7 @@ function LM( σmax, found_σ = opnorm(Jk) found_σ || error("operator norm computation failed") - νInv = (σmax^2 + σk)/θ # ‖J'J + σₖ I‖ = ‖J‖² + σₖ + νInv = (σmax^2 + σk) / θ # ‖J'J + σₖ I‖ = ‖J‖² + σₖ s = zero(xk) @@ -260,7 +260,7 @@ function LM( if ρk < η1 || ρk == Inf σk = σk * γ end - νInv = (σmax^2 + σk)/θ # ‖J'J + σₖ I‖ = ‖J‖² + σₖ + νInv = (σmax^2 + σk) / θ # ‖J'J + σₖ I‖ = ‖J‖² + σₖ tired = k ≥ maxIter || elapsed_time > maxTime end From 8e80540220862297beb6c0b505104697ba3cec88 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Wed, 26 Mar 2025 11:07:17 -0400 Subject: [PATCH 3/5] remove nu_Inv in LM & LMTR --- src/LMTR_alg.jl | 20 ++++++++++---------- src/LM_alg.jl | 13 ++++++------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/src/LMTR_alg.jl b/src/LMTR_alg.jl index 4c0e3ee7..e7df359c 100644 --- a/src/LMTR_alg.jl +++ b/src/LMTR_alg.jl @@ -132,9 +132,9 @@ function LMTR( σmax, found_σ = opnorm(Jk) found_σ || error("operator norm computation failed") - νInv = σmax^2 / θ # ‖J'J‖ = ‖J‖² + ν = θ / σmax^2 # ‖J'J‖ = ‖J‖² - mν∇fk = -∇fk / νInv + mν∇fk = -∇fk * ν optimal = false tired = k ≥ maxIter || elapsed_time > maxTime @@ -174,8 +174,8 @@ function LMTR( # Take first proximal gradient step s1 and see if current xk is nearly stationary. # s1 minimizes φ1(d) + ‖d‖² / 2 / ν + ψ(d) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0)) - ν = 1 / (νInv + 1 / (Δk * α)) - prox!(s, ψ, mν∇fk, ν) + ν1 = 1 / (1/ν + 1 / (Δk * α)) + prox!(s, ψ, mν∇fk, ν1) ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps() ξ1 > 0 || error("LMTR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") @@ -197,8 +197,8 @@ function LMTR( set_bounds!(ψ, max.(-∆_effective, l_bound - xk), min.(∆_effective, u_bound - xk)) : set_radius!(ψ, ∆_effective) subsolver_options.Δk = ∆_effective / 10 - subsolver_options.ν = ν - subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : () + subsolver_options.ν = ν1 + subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν1, nls.meta.nvar),) : () s, iter, _ = with_logger(subsolver_logger) do subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) end @@ -232,7 +232,7 @@ function LMTR( if (verbose > 0) && (k % ptf == 0) #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1) sqrt(ξ) ρk ∆_effective χ(xk) sNorm νInv TR_stat + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1) sqrt(ξ) ρk ∆_effective χ(xk) sNorm 1/ν TR_stat #! format: on end @@ -255,8 +255,8 @@ function LMTR( jtprod_residual!(nls, xk, Fk, ∇fk) σmax, found_σ = opnorm(Jk) found_σ || error("operator norm computation failed") - νInv = σmax^2 / θ # ‖J'J‖ = ‖J‖² - @. mν∇fk = -∇fk / νInv + ν = θ / σmax^2 # ‖J'J‖ = ‖J‖² + @. mν∇fk = -∇fk * ν end if ρk < η1 || ρk == Inf @@ -273,7 +273,7 @@ function LMTR( @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk elseif optimal #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1) sqrt(ξ1) "" Δk χ(xk) χ(s) νInv + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1) sqrt(ξ1) "" Δk χ(xk) χ(s) 1/ν #! format: on @info "LMTR: terminating with √ξ1 = $(sqrt(ξ1))" end diff --git a/src/LM_alg.jl b/src/LM_alg.jl index 35c95586..45242b0d 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -127,7 +127,7 @@ function LM( σmax, found_σ = opnorm(Jk) found_σ || error("operator norm computation failed") - νInv = (σmax^2 + σk) / θ # ‖J'J + σₖ I‖ = ‖J‖² + σₖ + ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ s = zero(xk) @@ -174,12 +174,11 @@ function LM( # take first proximal gradient step s1 and see if current xk is nearly stationary # s1 minimizes φ1(s) + ‖s‖² / 2 / ν + ψ(s) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0)). - ν = 1 / νInv ∇fk .*= -ν # reuse gradient storage prox!(s, ψ, ∇fk, ν) ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps() # TODO: isn't mk(s) returned by subsolver? ξ1 > 0 || error("LM: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") - sqrt_ξ1_νInv = sqrt(ξ1 * νInv) + sqrt_ξ1_νInv = sqrt(ξ1 / ν) if ξ1 ≥ 0 && k == 1 ϵ_increment = ϵr * sqrt_ξ1_νInv @@ -196,7 +195,7 @@ function LM( # subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10)) subsolver_options.ϵa = k == 1 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5) , sqrt_ξ1_νInv * 1e-3) # 1.0e-5 default subsolver_options.ν = ν - subsolver_args = subsolver == R2DH ? (SpectralGradient(νInv, nls.meta.nvar),) : () + subsolver_args = subsolver == R2DH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : () @debug "setting inner stopping tolerance to" subsolver_options.optTol s, iter, _ = with_logger(subsolver_logger) do subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) @@ -226,7 +225,7 @@ function LM( if (verbose > 0) && (k % ptf == 0) #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt_ξ1_νInv sqrt(ξ) ρk σk norm(xk) norm(s) νInv σ_stat + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt_ξ1_νInv sqrt(ξ) ρk σk norm(xk) norm(s) 1/ν σ_stat #! format: off end @@ -260,7 +259,7 @@ function LM( if ρk < η1 || ρk == Inf σk = σk * γ end - νInv = (σmax^2 + σk) / θ # ‖J'J + σₖ I‖ = ‖J‖² + σₖ + ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ tired = k ≥ maxIter || elapsed_time > maxTime end @@ -269,7 +268,7 @@ function LM( @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk elseif optimal #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt_ξ1_νInv sqrt(ξ1) "" σk norm(xk) norm(s) νInv + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt_ξ1_νInv sqrt(ξ1) "" σk norm(xk) norm(s) 1/ν #! format: on @info "LM: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)" end From 43b8f6a452834189012b8175e2cd49cce6df5625 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Wed, 26 Mar 2025 11:18:02 -0400 Subject: [PATCH 4/5] remove nuInv in R2DH & R2N & TRDH --- src/R2DH.jl | 4 ++-- src/R2N.jl | 10 +++++----- src/TRDH_alg.jl | 6 ++---- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/R2DH.jl b/src/R2DH.jl index 7f01912d..99eddba2 100644 --- a/src/R2DH.jl +++ b/src/R2DH.jl @@ -191,7 +191,7 @@ function R2DH( Dkσk = D.d .+ σk DNorm = norm(D.d, Inf) - ν = 1 / ((DNorm + σk) * (1 + θ)) + ν = θ / (DNorm + σk) mν∇fk = -ν * ∇fk sqrt_ξ_νInv = one(R) @@ -274,7 +274,7 @@ function R2DH( end Dkσk .= D.d .+ σk - ν = 1 / ((DNorm + σk) * (1 + θ)) + ν = θ / (DNorm + σk) tired = maxIter > 0 && k ≥ maxIter if !tired diff --git a/src/R2N.jl b/src/R2N.jl index aba356a1..8a2d6c17 100644 --- a/src/R2N.jl +++ b/src/R2N.jl @@ -134,7 +134,7 @@ function R2N( λmax, found_λ = opnorm(Bk) found_λ || error("operator norm computation failed") - νInv = (1 + θ) * (σk + λmax) + ν = θ / (σk + λmax) sqrt_ξ1_νInv = one(R) optimal = false @@ -166,11 +166,11 @@ function R2N( # take first proximal gradient step s1 and see if current xk is nearly stationary # s1 minimizes φ1(s) + ‖s‖² / 2 / ν + ψ(s) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0)). - subsolver_options.ν = 1 / νInv + subsolver_options.ν = ν prox!(s, ψ, -subsolver_options.ν * ∇fk, subsolver_options.ν) ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() ξ1 > 0 || error("R2N: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") - sqrt_ξ1_νInv = sqrt(ξ1 * νInv) + sqrt_ξ1_νInv = sqrt(ξ1 / ν) if ξ1 ≥ 0 && k == 1 ϵ_increment = ϵr * sqrt_ξ1_νInv @@ -188,7 +188,7 @@ function R2N( subsolver_options.ϵa = k == 1 ? 1.0e-3 : min(sqrt_ξ1_νInv^(1.5), sqrt_ξ1_νInv * 1e-3) verbose > 0 && @debug "setting inner stopping tolerance to" subsolver_options.optTol subsolver_options.σk = σk - subsolver_args = subsolver == R2DH ? (SpectralGradient(νInv, f.meta.nvar),) : () + subsolver_args = subsolver == R2DH ? (SpectralGradient(1 / ν, f.meta.nvar),) : () s, iter, _ = with_logger(subsolver_logger) do subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s) end @@ -256,7 +256,7 @@ function R2N( if ρk < η1 || ρk == Inf σk = σk * γ end - νInv = (1 + θ) * (σk + λmax) + ν = θ / (σk + λmax) tired = k ≥ maxIter || elapsed_time > maxTime end diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index 57e83155..2eaac3b0 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -200,8 +200,7 @@ function TRDH( ∇f!(∇fk, xk) ∇fk⁻ = copy(∇fk) DNorm = norm(D.d, Inf) - νInv = DNorm + one(R) / (α * Δk) - ν = one(R) / νInv + ν = (α * Δk)/(DNorm + one(R)) mν∇fk = -ν .* ∇fk sqrt_ξ_νInv = one(R) @@ -318,8 +317,7 @@ function TRDH( has_bnds ? set_bounds!(ψ, l_bound_k, u_bound_k) : set_radius!(ψ, Δk) end - νInv = reduce_TR ? (DNorm + one(R) / (α * Δk)) : (DNorm + one(R) / α) - ν = one(R) / νInv + ν = reduce_TR ? (α * Δk)/(DNorm + one(R)) : α / (DNorm + one(R)) mν∇fk .= -ν .* ∇fk tired = k ≥ maxIter || elapsed_time > maxTime From 91a915c19115e9a39de079bc8f40911d9f56e630 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Wed, 26 Mar 2025 15:33:40 -0400 Subject: [PATCH 5/5] print nu instead in LMTR --- src/LMTR_alg.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/LMTR_alg.jl b/src/LMTR_alg.jl index e7df359c..6e778d90 100644 --- a/src/LMTR_alg.jl +++ b/src/LMTR_alg.jl @@ -115,7 +115,7 @@ function LMTR( if verbose > 0 #! format: off - @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√ξ1" "√ξ" "ρ" "Δ" "‖x‖" "‖s‖" "1/ν" "TR" + @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√ξ1" "√ξ" "ρ" "Δ" "‖x‖" "‖s‖" "ν" "TR" #! format: on end @@ -232,7 +232,7 @@ function LMTR( if (verbose > 0) && (k % ptf == 0) #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1) sqrt(ξ) ρk ∆_effective χ(xk) sNorm 1/ν TR_stat + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1) sqrt(ξ) ρk ∆_effective χ(xk) sNorm ν TR_stat #! format: on end @@ -273,7 +273,7 @@ function LMTR( @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk elseif optimal #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1) sqrt(ξ1) "" Δk χ(xk) χ(s) 1/ν + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1) sqrt(ξ1) "" Δk χ(xk) χ(s) ν #! format: on @info "LMTR: terminating with √ξ1 = $(sqrt(ξ1))" end