From b50c82abd77cea51c2d77f113862a1986236f705 Mon Sep 17 00:00:00 2001
From: Maxence Gollier <mgollier45@gmail.com>
Date: Sun, 23 Feb 2025 18:50:46 -0500
Subject: [PATCH 1/5] update theta default value

---
 src/LMTR_alg.jl     | 4 ++--
 src/LM_alg.jl       | 4 ++--
 src/input_struct.jl | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/LMTR_alg.jl b/src/LMTR_alg.jl
index 6eff53c1..339f899b 100644
--- a/src/LMTR_alg.jl
+++ b/src/LMTR_alg.jl
@@ -132,7 +132,7 @@ function LMTR(
 
   σmax, found_σ = opnorm(Jk)
   found_σ || error("operator norm computation failed")
-  νInv = (1 + θ) * σmax^2  # ‖J'J‖ = ‖J‖²
+  νInv = σmax^2/θ # ‖J'J‖ = ‖J‖²
 
   mν∇fk = -∇fk / νInv
 
@@ -255,7 +255,7 @@ function LMTR(
       jtprod_residual!(nls, xk, Fk, ∇fk)
       σmax, found_σ = opnorm(Jk)
       found_σ || error("operator norm computation failed")
-      νInv = (1 + θ) * σmax^2  # ‖J'J‖ = ‖J‖²
+      νInv = σmax^2/θ # ‖J'J‖ = ‖J‖²
       @. mν∇fk = -∇fk / νInv
     end
 
diff --git a/src/LM_alg.jl b/src/LM_alg.jl
index 163cc6fe..bda831b8 100644
--- a/src/LM_alg.jl
+++ b/src/LM_alg.jl
@@ -127,7 +127,7 @@ function LM(
 
   σmax, found_σ = opnorm(Jk)
   found_σ || error("operator norm computation failed")
-  νInv = (1 + θ) * (σmax^2 + σk)  # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
+  νInv = (σmax^2 + σk)/θ # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
 
   s = zero(xk)
 
@@ -260,7 +260,7 @@ function LM(
     if ρk < η1 || ρk == Inf
       σk = σk * γ
     end
-    νInv = (1 + θ) * (σmax^2 + σk)  # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
+    νInv = (σmax^2 + σk)/θ # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
     tired = k ≥ maxIter || elapsed_time > maxTime
   end
 
diff --git a/src/input_struct.jl b/src/input_struct.jl
index 7242b90a..1b8b1581 100644
--- a/src/input_struct.jl
+++ b/src/input_struct.jl
@@ -34,7 +34,7 @@ mutable struct ROSolverOptions{R}
     α::R = 1 / eps(R),
     ν::R = eps(R)^(1 / 5),
     γ::R = R(3),
-    θ::R = eps(R)^(1 / 5),
+    θ::R = 1/(1+eps(R)^(1 / 5)),
     β::R = 1 / eps(R),
     reduce_TR::Bool = true,
   ) where {R <: Real}

From d34e765af51049e73cb04bb38d599410816c8af3 Mon Sep 17 00:00:00 2001
From: Maxence Gollier <mgollier45@gmail.com>
Date: Mon, 3 Mar 2025 13:21:23 -0500
Subject: [PATCH 2/5] reformat code in LM & LMTR

---
 src/LMTR_alg.jl | 4 ++--
 src/LM_alg.jl   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/LMTR_alg.jl b/src/LMTR_alg.jl
index 339f899b..4c0e3ee7 100644
--- a/src/LMTR_alg.jl
+++ b/src/LMTR_alg.jl
@@ -132,7 +132,7 @@ function LMTR(
 
   σmax, found_σ = opnorm(Jk)
   found_σ || error("operator norm computation failed")
-  νInv = σmax^2/θ # ‖J'J‖ = ‖J‖²
+  νInv = σmax^2 / θ # ‖J'J‖ = ‖J‖²
 
   mν∇fk = -∇fk / νInv
 
@@ -255,7 +255,7 @@ function LMTR(
       jtprod_residual!(nls, xk, Fk, ∇fk)
       σmax, found_σ = opnorm(Jk)
       found_σ || error("operator norm computation failed")
-      νInv = σmax^2/θ # ‖J'J‖ = ‖J‖²
+      νInv = σmax^2 / θ # ‖J'J‖ = ‖J‖²
       @. mν∇fk = -∇fk / νInv
     end
 
diff --git a/src/LM_alg.jl b/src/LM_alg.jl
index bda831b8..35c95586 100644
--- a/src/LM_alg.jl
+++ b/src/LM_alg.jl
@@ -127,7 +127,7 @@ function LM(
 
   σmax, found_σ = opnorm(Jk)
   found_σ || error("operator norm computation failed")
-  νInv = (σmax^2 + σk)/θ # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
+  νInv = (σmax^2 + σk) / θ # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
 
   s = zero(xk)
 
@@ -260,7 +260,7 @@ function LM(
     if ρk < η1 || ρk == Inf
       σk = σk * γ
     end
-    νInv = (σmax^2 + σk)/θ # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
+    νInv = (σmax^2 + σk) / θ # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
     tired = k ≥ maxIter || elapsed_time > maxTime
   end
 

From 8e80540220862297beb6c0b505104697ba3cec88 Mon Sep 17 00:00:00 2001
From: Maxence Gollier <mgollier45@gmail.com>
Date: Wed, 26 Mar 2025 11:07:17 -0400
Subject: [PATCH 3/5] remove nu_Inv in LM & LMTR

---
 src/LMTR_alg.jl | 20 ++++++++++----------
 src/LM_alg.jl   | 13 ++++++-------
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/src/LMTR_alg.jl b/src/LMTR_alg.jl
index 4c0e3ee7..e7df359c 100644
--- a/src/LMTR_alg.jl
+++ b/src/LMTR_alg.jl
@@ -132,9 +132,9 @@ function LMTR(
 
   σmax, found_σ = opnorm(Jk)
   found_σ || error("operator norm computation failed")
-  νInv = σmax^2 / θ # ‖J'J‖ = ‖J‖²
+  ν = θ / σmax^2 # ‖J'J‖ = ‖J‖²
 
-  mν∇fk = -∇fk / νInv
+  mν∇fk = -∇fk * ν
 
   optimal = false
   tired = k ≥ maxIter || elapsed_time > maxTime
@@ -174,8 +174,8 @@ function LMTR(
 
     # Take first proximal gradient step s1 and see if current xk is nearly stationary.
     # s1 minimizes φ1(d) + ‖d‖² / 2 / ν + ψ(d) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0))
-    ν = 1 / (νInv + 1 / (Δk * α))
-    prox!(s, ψ, mν∇fk, ν)
+    ν1 = 1 / (1/ν + 1 / (Δk * α))
+    prox!(s, ψ, mν∇fk, ν1)
     ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps()
     ξ1 > 0 || error("LMTR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
 
@@ -197,8 +197,8 @@ function LMTR(
     set_bounds!(ψ, max.(-∆_effective, l_bound - xk), min.(∆_effective, u_bound - xk)) :
     set_radius!(ψ, ∆_effective)
     subsolver_options.Δk = ∆_effective / 10
-    subsolver_options.ν = ν
-    subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : ()
+    subsolver_options.ν = ν1
+    subsolver_args = subsolver == TRDH ? (SpectralGradient(1 / ν1, nls.meta.nvar),) : ()
     s, iter, _ = with_logger(subsolver_logger) do
       subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s)
     end
@@ -232,7 +232,7 @@ function LMTR(
 
     if (verbose > 0) && (k % ptf == 0)
       #! format: off
-      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1) sqrt(ξ) ρk ∆_effective χ(xk) sNorm νInv TR_stat
+      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1) sqrt(ξ) ρk ∆_effective χ(xk) sNorm 1/ν TR_stat
       #! format: on
     end
 
@@ -255,8 +255,8 @@ function LMTR(
       jtprod_residual!(nls, xk, Fk, ∇fk)
       σmax, found_σ = opnorm(Jk)
       found_σ || error("operator norm computation failed")
-      νInv = σmax^2 / θ # ‖J'J‖ = ‖J‖²
-      @. mν∇fk = -∇fk / νInv
+      ν = θ / σmax^2  # ‖J'J‖ = ‖J‖²
+      @. mν∇fk = -∇fk * ν
     end
 
     if ρk < η1 || ρk == Inf
@@ -273,7 +273,7 @@ function LMTR(
       @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk
     elseif optimal
       #! format: off
-      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1) sqrt(ξ1) "" Δk χ(xk) χ(s) νInv
+      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1) sqrt(ξ1) "" Δk χ(xk) χ(s) 1/ν
       #! format: on
       @info "LMTR: terminating with √ξ1 = $(sqrt(ξ1))"
     end
diff --git a/src/LM_alg.jl b/src/LM_alg.jl
index 35c95586..45242b0d 100644
--- a/src/LM_alg.jl
+++ b/src/LM_alg.jl
@@ -127,7 +127,7 @@ function LM(
 
   σmax, found_σ = opnorm(Jk)
   found_σ || error("operator norm computation failed")
-  νInv = (σmax^2 + σk) / θ # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
+  ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
 
   s = zero(xk)
 
@@ -174,12 +174,11 @@ function LM(
 
     # take first proximal gradient step s1 and see if current xk is nearly stationary
     # s1 minimizes φ1(s) + ‖s‖² / 2 / ν + ψ(s) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0)).
-    ν = 1 / νInv
     ∇fk .*= -ν  # reuse gradient storage
     prox!(s, ψ, ∇fk, ν)
     ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps()  # TODO: isn't mk(s) returned by subsolver?
     ξ1 > 0 || error("LM: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
-    sqrt_ξ1_νInv = sqrt(ξ1 * νInv)
+    sqrt_ξ1_νInv = sqrt(ξ1 / ν)
 
     if ξ1 ≥ 0 && k == 1
       ϵ_increment = ϵr * sqrt_ξ1_νInv
@@ -196,7 +195,7 @@ function LM(
   #  subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10))
     subsolver_options.ϵa = k == 1 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5) , sqrt_ξ1_νInv * 1e-3) # 1.0e-5 default
     subsolver_options.ν = ν
-    subsolver_args = subsolver == R2DH ? (SpectralGradient(νInv, nls.meta.nvar),) : ()
+    subsolver_args = subsolver == R2DH ? (SpectralGradient(1 / ν, nls.meta.nvar),) : ()
     @debug "setting inner stopping tolerance to" subsolver_options.optTol
     s, iter, _ = with_logger(subsolver_logger) do
       subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s)
@@ -226,7 +225,7 @@ function LM(
 
     if (verbose > 0) && (k % ptf == 0)
       #! format: off
-      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt_ξ1_νInv sqrt(ξ) ρk σk norm(xk) norm(s) νInv σ_stat
+      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt_ξ1_νInv sqrt(ξ) ρk σk norm(xk) norm(s) 1/ν σ_stat
       #! format: off
     end
 
@@ -260,7 +259,7 @@ function LM(
     if ρk < η1 || ρk == Inf
       σk = σk * γ
     end
-    νInv = (σmax^2 + σk) / θ # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
+    ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
     tired = k ≥ maxIter || elapsed_time > maxTime
   end
 
@@ -269,7 +268,7 @@ function LM(
       @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk
     elseif optimal
       #! format: off
-      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt_ξ1_νInv sqrt(ξ1) "" σk norm(xk) norm(s) νInv
+      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt_ξ1_νInv sqrt(ξ1) "" σk norm(xk) norm(s) 1/ν
       #! format: on
       @info "LM: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)"
     end

From 43b8f6a452834189012b8175e2cd49cce6df5625 Mon Sep 17 00:00:00 2001
From: Maxence Gollier <mgollier45@gmail.com>
Date: Wed, 26 Mar 2025 11:18:02 -0400
Subject: [PATCH 4/5] remove nuInv in R2DH & R2N & TRDH

---
 src/R2DH.jl     |  4 ++--
 src/R2N.jl      | 10 +++++-----
 src/TRDH_alg.jl |  6 ++----
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/R2DH.jl b/src/R2DH.jl
index 7f01912d..99eddba2 100644
--- a/src/R2DH.jl
+++ b/src/R2DH.jl
@@ -191,7 +191,7 @@ function R2DH(
   Dkσk = D.d .+ σk
   DNorm = norm(D.d, Inf)
 
-  ν = 1 / ((DNorm + σk) * (1 + θ))
+  ν = θ / (DNorm + σk)
   mν∇fk = -ν * ∇fk
   sqrt_ξ_νInv = one(R)  
 
@@ -274,7 +274,7 @@ function R2DH(
     end
 
     Dkσk .= D.d .+ σk
-    ν = 1 / ((DNorm + σk) * (1 + θ))
+    ν = θ / (DNorm + σk)
 
     tired = maxIter > 0 && k ≥ maxIter
     if !tired
diff --git a/src/R2N.jl b/src/R2N.jl
index aba356a1..8a2d6c17 100644
--- a/src/R2N.jl
+++ b/src/R2N.jl
@@ -134,7 +134,7 @@ function R2N(
 
   λmax, found_λ = opnorm(Bk)
   found_λ || error("operator norm computation failed")
-  νInv = (1 + θ) * (σk + λmax)
+  ν =  θ / (σk + λmax)
   sqrt_ξ1_νInv = one(R)
 
   optimal = false
@@ -166,11 +166,11 @@ function R2N(
     # take first proximal gradient step s1 and see if current xk is nearly stationary
     # s1 minimizes φ1(s) + ‖s‖² / 2 / ν + ψ(s) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0)).
 
-    subsolver_options.ν = 1 / νInv
+    subsolver_options.ν = ν
     prox!(s, ψ, -subsolver_options.ν * ∇fk, subsolver_options.ν)
     ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps()
     ξ1 > 0 || error("R2N: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
-    sqrt_ξ1_νInv = sqrt(ξ1 * νInv)
+    sqrt_ξ1_νInv = sqrt(ξ1 / ν)
 
     if ξ1 ≥ 0 && k == 1
       ϵ_increment = ϵr * sqrt_ξ1_νInv
@@ -188,7 +188,7 @@ function R2N(
     subsolver_options.ϵa = k == 1 ? 1.0e-3 : min(sqrt_ξ1_νInv^(1.5), sqrt_ξ1_νInv * 1e-3)
     verbose > 0 && @debug "setting inner stopping tolerance to" subsolver_options.optTol
     subsolver_options.σk = σk
-    subsolver_args = subsolver == R2DH ? (SpectralGradient(νInv, f.meta.nvar),) : ()
+    subsolver_args = subsolver == R2DH ? (SpectralGradient(1 / ν, f.meta.nvar),) : ()
     s, iter, _ = with_logger(subsolver_logger) do
       subsolver(φ, ∇φ!, ψ, subsolver_args..., subsolver_options, s)
     end
@@ -256,7 +256,7 @@ function R2N(
     if ρk < η1 || ρk == Inf
         σk = σk * γ
     end
-    νInv = (1 + θ) * (σk + λmax)
+    ν = θ / (σk + λmax)
     tired = k ≥ maxIter || elapsed_time > maxTime
   end
 
diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl
index 57e83155..2eaac3b0 100644
--- a/src/TRDH_alg.jl
+++ b/src/TRDH_alg.jl
@@ -200,8 +200,7 @@ function TRDH(
   ∇f!(∇fk, xk)
   ∇fk⁻ = copy(∇fk)
   DNorm = norm(D.d, Inf)
-  νInv = DNorm + one(R) / (α * Δk)
-  ν = one(R) / νInv
+  ν = (α * Δk)/(DNorm + one(R))
   mν∇fk = -ν .* ∇fk
   sqrt_ξ_νInv = one(R)
 
@@ -318,8 +317,7 @@ function TRDH(
       has_bnds ? set_bounds!(ψ, l_bound_k, u_bound_k) : set_radius!(ψ, Δk)
     end
 
-    νInv = reduce_TR ? (DNorm + one(R) / (α * Δk)) : (DNorm + one(R) / α)
-    ν = one(R) / νInv
+    ν = reduce_TR ? (α * Δk)/(DNorm + one(R)) : α / (DNorm + one(R))
     mν∇fk .= -ν .* ∇fk
 
     tired = k ≥ maxIter || elapsed_time > maxTime

From 91a915c19115e9a39de079bc8f40911d9f56e630 Mon Sep 17 00:00:00 2001
From: Maxence Gollier <mgollier45@gmail.com>
Date: Wed, 26 Mar 2025 15:33:40 -0400
Subject: [PATCH 5/5] print nu instead in LMTR

---
 src/LMTR_alg.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/LMTR_alg.jl b/src/LMTR_alg.jl
index e7df359c..6e778d90 100644
--- a/src/LMTR_alg.jl
+++ b/src/LMTR_alg.jl
@@ -115,7 +115,7 @@ function LMTR(
 
   if verbose > 0
     #! format: off
-    @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√ξ1" "√ξ" "ρ" "Δ" "‖x‖" "‖s‖" "1/ν" "TR"
+    @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√ξ1" "√ξ" "ρ" "Δ" "‖x‖" "‖s‖" "ν" "TR"
     #! format: on
   end
 
@@ -232,7 +232,7 @@ function LMTR(
 
     if (verbose > 0) && (k % ptf == 0)
       #! format: off
-      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1) sqrt(ξ) ρk ∆_effective χ(xk) sNorm 1/ν TR_stat
+      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1) sqrt(ξ) ρk ∆_effective χ(xk) sNorm ν TR_stat
       #! format: on
     end
 
@@ -273,7 +273,7 @@ function LMTR(
       @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk
     elseif optimal
       #! format: off
-      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1) sqrt(ξ1) "" Δk χ(xk) χ(s) 1/ν
+      @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1) sqrt(ξ1) "" Δk χ(xk) χ(s) ν
       #! format: on
       @info "LMTR: terminating with √ξ1 = $(sqrt(ξ1))"
     end