Update the default value of theta in ROSolverOptions and update LM & LMTR accordingly (#180)

MaxenceGollier · web-flow · commit 83bc164cf632 · 2025-04-09T08:08:53.000-04:00
diff --git a/src/LMTR_alg.jl b/src/LMTR_alg.jl
@@ -115,7 +115,7 @@ function LMTR(
 
   if verbose > 0
     #! format: off
-    @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√ξ1" "√ξ" "ρ" "Δ" "‖x‖" "‖s‖" "1/ν" "TR"
+    @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√ξ1" "√ξ" "ρ" "Δ" "‖x‖" "‖s‖" "ν" "TR"
     #! format: on
   end
 
@@ -132,9 +132,9 @@ function LMTR(
 
   σmax, found_σ = opnorm(Jk)
   found_σ || error("operator norm computation failed")
-  νInv = (1 + θ) * σmax^2  # ‖J'J‖ = ‖J‖²
+  ν = θ / σmax^2 # ‖J'J‖ = ‖J‖²
 
-  mν∇fk = -∇fk / νInv
+  mν∇fk = -∇fk * ν
 
   optimal = false
   tired = k ≥ maxIter || elapsed_time > maxTime
@@ -174,8 +174,8 @@ function LMTR(
 
     # Take first proximal gradient step s1 and see if current xk is nearly stationary.
     # s1 minimizes φ1(d) + ‖d‖² / 2 / ν + ψ(d) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0))
-    ν = 1 / (νInv + 1 / (Δk * α))
-    prox!(s, ψ, mν∇fk, ν)
+    ν1 = 1 / (1/ν + 1 / (Δk * α))
+    prox!(s, ψ, mν∇fk, ν1)
     ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps()
     ξ1 > 0 || error("LMTR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
 
@@ -197,8 +197,8 @@ function LMTR(
     set_bounds!(ψ, max.(-∆_effective, l_bound - xk), min.(∆_effective, u_bound - xk)) :
     set_radius!(ψ, ∆_effective)
     subsolver_options.Δk = ∆_effective / 10
-    subsolver_options.ν = ν
-    subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : ()
+    subsolver_options.ν = ν1
+    subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν1, nls.meta.nvar),) : ()
     s, iter, _ = with_logger(subsolver_logger) do
       subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s)
     end
@@ -232,7 +232,7 @@ function LMTR(
 
     if (verbose > 0) && (k % ptf == 0)
       #! format: off
-      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1) sqrt(ξ) ρk ∆_effective χ(xk) sNorm νInv TR_stat
+      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1) sqrt(ξ) ρk ∆_effective χ(xk) sNorm ν TR_stat
       #! format: on
     end
 
@@ -255,8 +255,8 @@ function LMTR(
       jtprod_residual!(nls, xk, Fk, ∇fk)
       σmax, found_σ = opnorm(Jk)
       found_σ || error("operator norm computation failed")
-      νInv = (1 + θ) * σmax^2  # ‖J'J‖ = ‖J‖²
-      @. mν∇fk = -∇fk / νInv
+      ν = θ / σmax^2  # ‖J'J‖ = ‖J‖²
+      @. mν∇fk = -∇fk * ν
     end
 
     if ρk < η1 || ρk == Inf
@@ -273,7 +273,7 @@ function LMTR(
       @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk
     elseif optimal
       #! format: off
-      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1) sqrt(ξ1) "" Δk χ(xk) χ(s) νInv
+      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1) sqrt(ξ1) "" Δk χ(xk) χ(s) ν
       #! format: on
       @info "LMTR: terminating with √ξ1 = $(sqrt(ξ1))"
     end
diff --git a/src/LM_alg.jl b/src/LM_alg.jl
@@ -127,7 +127,7 @@ function LM(
 
   σmax, found_σ = opnorm(Jk)
   found_σ || error("operator norm computation failed")
-  νInv = (1 + θ) * (σmax^2 + σk)  # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
+  ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
 
   s = zero(xk)
 
@@ -174,12 +174,11 @@ function LM(
 
     # take first proximal gradient step s1 and see if current xk is nearly stationary
     # s1 minimizes φ1(s) + ‖s‖² / 2 / ν + ψ(s) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0)).
-    ν = 1 / νInv
     ∇fk .*= -ν  # reuse gradient storage
     prox!(s, ψ, ∇fk, ν)
     ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps()  # TODO: isn't mk(s) returned by subsolver?
     ξ1 > 0 || error("LM: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
-    sqrt_ξ1_νInv = sqrt(ξ1 * νInv)
+    sqrt_ξ1_νInv = sqrt(ξ1 / ν)
 
     if ξ1 ≥ 0 && k == 1
       ϵ_increment = ϵr * sqrt_ξ1_νInv
@@ -196,7 +195,7 @@ function LM(
     #  subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10))
     subsolver_options.ϵa = k == 1 ? 1.0e-3 : min(sqrt_ξ1_νInv^(1.5), sqrt_ξ1_νInv * 1e-3) # 1.0e-5 default
     subsolver_options.ν = ν
-    subsolver_args = subsolver == R2DH ? (SpectralGradient(νInv, nls.meta.nvar),) : ()
+    subsolver_args = subsolver == R2DH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : ()
     @debug "setting inner stopping tolerance to" subsolver_options.optTol
     s, iter, _ = with_logger(subsolver_logger) do
       subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s)
@@ -226,7 +225,7 @@ function LM(
 
     if (verbose > 0) && (k % ptf == 0)
       #! format: off
-      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt_ξ1_νInv sqrt(ξ) ρk σk norm(xk) norm(s) νInv σ_stat
+      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt_ξ1_νInv sqrt(ξ) ρk σk norm(xk) norm(s) 1/ν σ_stat
       #! format: off
     end
 
@@ -260,7 +259,7 @@ function LM(
     if ρk < η1 || ρk == Inf
       σk = σk * γ
     end
-    νInv = (1 + θ) * (σmax^2 + σk)  # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
+    ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
     tired = k ≥ maxIter || elapsed_time > maxTime
   end
 
@@ -269,7 +268,7 @@ function LM(
       @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk
     elseif optimal
       #! format: off
-      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt_ξ1_νInv sqrt(ξ1) "" σk norm(xk) norm(s) νInv
+      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt_ξ1_νInv sqrt(ξ1) "" σk norm(xk) norm(s) 1/ν
       #! format: on
       @info "LM: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)"
     end
diff --git a/src/R2DH.jl b/src/R2DH.jl
@@ -191,7 +191,7 @@ function R2DH(
   Dkσk = D.d .+ σk
   DNorm = norm(D.d, Inf)
 
-  ν = 1 / ((DNorm + σk) * (1 + θ))
+  ν = θ / (DNorm + σk)
   mν∇fk = -ν * ∇fk
   sqrt_ξ_νInv = one(R)  
 
@@ -274,7 +274,7 @@ function R2DH(
     end
 
     Dkσk .= D.d .+ σk
-    ν = 1 / ((DNorm + σk) * (1 + θ))
+    ν = θ / (DNorm + σk)
 
     tired = maxIter > 0 && k ≥ maxIter
     if !tired
diff --git a/src/R2N.jl b/src/R2N.jl
@@ -134,7 +134,7 @@ function R2N(
 
   λmax, found_λ = opnorm(Bk)
   found_λ || error("operator norm computation failed")
-  νInv = (1 + θ) * (σk + λmax)
+  ν =  θ / (σk + λmax)
   sqrt_ξ1_νInv = one(R)
 
   optimal = false
@@ -166,11 +166,11 @@ function R2N(
     # take first proximal gradient step s1 and see if current xk is nearly stationary
     # s1 minimizes φ1(s) + ‖s‖² / 2 / ν + ψ(s) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0)).
 
-    subsolver_options.ν = 1 / νInv
+    subsolver_options.ν = ν
     prox!(s, ψ, -subsolver_options.ν * ∇fk, subsolver_options.ν)
     ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps()
     ξ1 > 0 || error("R2N: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
-    sqrt_ξ1_νInv = sqrt(ξ1 * νInv)
+    sqrt_ξ1_νInv = sqrt(ξ1 / ν)
 
     if ξ1 ≥ 0 && k == 1
       ϵ_increment = ϵr * sqrt_ξ1_νInv
@@ -188,7 +188,7 @@ function R2N(
     subsolver_options.ϵa = k == 1 ? 1.0e-3 : min(sqrt_ξ1_νInv^(1.5), sqrt_ξ1_νInv * 1e-3)
     verbose > 0 && @debug "setting inner stopping tolerance to" subsolver_options.optTol
     subsolver_options.σk = σk
-    subsolver_args = subsolver == R2DH ? (SpectralGradient(νInv, f.meta.nvar),) : ()
+    subsolver_args = subsolver == R2DH ? (SpectralGradient(1 / ν, f.meta.nvar),) : ()
     s, iter, _ = with_logger(subsolver_logger) do
       subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s)
     end
@@ -256,7 +256,7 @@ function R2N(
     if ρk < η1 || ρk == Inf
         σk = σk * γ
     end
-    νInv = (1 + θ) * (σk + λmax)
+    ν = θ / (σk + λmax)
     tired = k ≥ maxIter || elapsed_time > maxTime
   end
 
diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl
@@ -200,8 +200,7 @@ function TRDH(
   ∇f!(∇fk, xk)
   ∇fk⁻ = copy(∇fk)
   DNorm = norm(D.d, Inf)
-  νInv = DNorm + one(R) / (α * Δk)
-  ν = one(R) / νInv
+  ν = (α * Δk)/(DNorm + one(R))
   mν∇fk = -ν .* ∇fk
   sqrt_ξ_νInv = one(R)
 
@@ -318,8 +317,7 @@ function TRDH(
       has_bnds ? set_bounds!(ψ, l_bound_k, u_bound_k) : set_radius!(ψ, Δk)
     end
 
-    νInv = reduce_TR ? (DNorm + one(R) / (α * Δk)) : (DNorm + one(R) / α)
-    ν = one(R) / νInv
+    ν = reduce_TR ? (α * Δk)/(DNorm + one(R)) : α / (DNorm + one(R))
     mν∇fk .= -ν .* ∇fk
 
     tired = k ≥ maxIter || elapsed_time > maxTime
diff --git a/src/input_struct.jl b/src/input_struct.jl
@@ -34,7 +34,7 @@ mutable struct ROSolverOptions{R}
     α::R = 1 / eps(R),
     ν::R = eps(R)^(1 / 5),
     γ::R = R(3),
-    θ::R = eps(R)^(1 / 5),
+    θ::R = 1/(1+eps(R)^(1 / 5)),
     β::R = 1 / eps(R),
     reduce_TR::Bool = true,
   ) where {R <: Real}