Add LMTR solver implementation and update benchmarks in Benchmark.jl and Benchmark.tex

MohamedLaghdafHABIBOULLAH · MaxenceGollier · commit 843899fafdf7 · 2025-10-03T12:37:21.000-04:00
diff --git a/paper/examples/Benchmark.jl b/paper/examples/Benchmark.jl
@@ -131,6 +131,30 @@ function run_LM_svm!(nls_model, x0; λ = 1.0, atol = 1e-3, rtol = 1e-3, verbose
     )
 end
 
+function run_LMTR_svm!(nls_model, x0; λ = 1.0, atol = 1e-3, rtol = 1e-3, verbose = 0, sub_kwargs = (;))
+    reg_nls  = RegularizedNLSModel(nls_model, RootNormLhalf(λ))
+    solver   = LMTRSolver(reg_nls)
+    stats    = RegularizedExecutionStats(reg_nls)
+    RegularizedOptimization.solve!(solver, reg_nls, stats;
+        x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs)
+    reset!(nls_model)  # Reset counters before timing
+    reg_nls  = RegularizedNLSModel(nls_model, RootNormLhalf(λ))
+    solver   = LMTRSolver(reg_nls)
+    t = @elapsed RegularizedOptimization.solve!(solver, reg_nls, stats;
+        x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs)
+    return (
+        name      = "LMTR (SVM)",
+        status    = string(stats.status),
+        time      = t,
+        iters     = get(stats.solver_specific, :outer_iter, missing),
+        fevals    = neval_residual(nls_model),
+        gevals    = neval_jtprod_residual(nls_model) + neval_jprod_residual(nls_model),
+        proxcalls = get(stats.solver_specific, :prox_evals, missing),
+        solution  = stats.solution,
+        final_obj = obj(nls_model, stats.solution)
+    )
+end
+
 function bench_svm!(cfg = CFG)
     Random.seed!(cfg.SEED)
     model, nls_train, _ = RegularizedProblems.svm_train_model()
@@ -140,6 +164,7 @@ function bench_svm!(cfg = CFG)
     (:TR    in cfg.RUN_SOLVERS) && push!(results, run_tr_svm!(model, x0; λ = cfg.LAMBDA_L0, qn = cfg.QN_FOR_TR, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, sub_kwargs = cfg.SUB_KWARGS_R2N))
     (:R2N   in cfg.RUN_SOLVERS) && push!(results, run_r2n_svm!(model, x0; λ = cfg.LAMBDA_L0, qn = cfg.QN_FOR_R2N, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, sub_kwargs = cfg.SUB_KWARGS_R2N))
     (:LM    in cfg.RUN_SOLVERS) && push!(results, run_LM_svm!(nls_train, x0; λ = cfg.LAMBDA_L0, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, sub_kwargs = cfg.SUB_KWARGS_R2N))
+    (:LMTR  in cfg.RUN_SOLVERS) && push!(results, run_LMTR_svm!(nls_train, x0; λ = cfg.LAMBDA_L0, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, sub_kwargs = cfg.SUB_KWARGS_R2N))
 
     # Print quick summary
     println("\n=== SVM: solver comparison ===")
@@ -230,17 +255,17 @@ function run_r2n_nnmf!(model, x0; λ = 1.0, qn = :LBFGS, atol = 1e-3, rtol = 1e-
     )
 end
 
-function run_LM_nnmf!(nls_model, x0; λ = 1.0, atol = 1e-3, rtol = 1e-3, verbose = 0, selected = nothing)
+function run_LM_nnmf!(nls_model, x0; λ = 1.0, atol = 1e-3, rtol = 1e-3, verbose = 0, selected = nothing, sub_kwargs = (;))
     reg_nls  = RegularizedNLSModel(nls_model, NormL0(λ), selected)
     solver   = LMSolver(reg_nls)
     stats    = RegularizedExecutionStats(reg_nls)
     RegularizedOptimization.solve!(solver, reg_nls, stats;
-        x = x0, atol = atol, rtol = rtol, verbose = verbose)
+        x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs)
     reset!(nls_model)  # Reset counters before timing
     reg_nls  = RegularizedNLSModel(nls_model, NormL0(λ), selected)
     solver   = LMSolver(reg_nls)
     t = @elapsed RegularizedOptimization.solve!(solver, reg_nls, stats;
-        x = x0, atol = atol, rtol = rtol, verbose = verbose)
+        x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs)
     return (
         name      = "LM (NNMF)",
         status    = string(stats.status),
@@ -254,6 +279,30 @@ function run_LM_nnmf!(nls_model, x0; λ = 1.0, atol = 1e-3, rtol = 1e-3, verbose
     )
 end
 
+function run_LMTR_nnmf!(nls_model, x0; λ = 1.0, atol = 1e-3, rtol = 1e-3, verbose = 0, selected = nothing, sub_kwargs = (;))
+    reg_nls  = RegularizedNLSModel(nls_model, NormL0(λ), selected)
+    solver   = LMTRSolver(reg_nls)
+    stats    = RegularizedExecutionStats(reg_nls)
+    RegularizedOptimization.solve!(solver, reg_nls, stats;
+        x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs)
+    reset!(nls_model)  # Reset counters before timing
+    reg_nls  = RegularizedNLSModel(nls_model, NormL0(λ), selected)
+    solver   = LMTRSolver(reg_nls)
+    t = @elapsed RegularizedOptimization.solve!(solver, reg_nls, stats;
+        x = x0, atol = atol, rtol = rtol, verbose = verbose, sub_kwargs = sub_kwargs)
+    return (
+        name      = "LMTR (NNMF)",
+        status    = string(stats.status),
+        time      = t,
+        iters     = get(stats.solver_specific, :outer_iter, missing),
+        fevals    = neval_residual(nls_model),
+        gevals    = neval_jtprod_residual(nls_model) + neval_jprod_residual(nls_model),
+        proxcalls = get(stats.solver_specific, :prox_evals, missing),
+        solution  = stats.solution,
+        final_obj = obj(nls_model, stats.solution)
+    )
+end
+
 function bench_nnmf!(cfg = CFG2; m = 100, n = 50, k = 5)
     Random.seed!(cfg.SEED)
 
@@ -268,7 +317,8 @@ function bench_nnmf!(cfg = CFG2; m = 100, n = 50, k = 5)
     results = NamedTuple[]
     (:TR  in cfg.RUN_SOLVERS) && push!(results, run_tr_nnmf!(model, x0; λ = cfg.LAMBDA_L0, qn = cfg.QN_FOR_TR, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, sub_kwargs = cfg.SUB_KWARGS_R2N, selected = selected))
     (:R2N in cfg.RUN_SOLVERS) && push!(results, run_r2n_nnmf!(model, x0; λ = cfg.LAMBDA_L0, qn = cfg.QN_FOR_R2N, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, sub_kwargs = cfg.SUB_KWARGS_R2N, selected = selected))
-    (:LM  in cfg.RUN_SOLVERS) && push!(results, run_LM_nnmf!(nls_model, x0; λ = cfg.LAMBDA_L0, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, selected = selected))
+    (:LM  in cfg.RUN_SOLVERS) && push!(results, run_LM_nnmf!(nls_model, x0; λ = cfg.LAMBDA_L0, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, selected = selected, sub_kwargs = cfg.SUB_KWARGS_R2N))
+    (:LMTR in cfg.RUN_SOLVERS) && push!(results, run_LMTR_nnmf!(nls_model, x0; λ = cfg.LAMBDA_L0, atol = cfg.TOL, rtol = cfg.RTOL, verbose = cfg.VERBOSE_RO, selected = selected, sub_kwargs = cfg.SUB_KWARGS_R2N))
 
     println("\n=== NNMF: solver comparison ===")
     for m in results
diff --git a/paper/examples/Benchmark.tex b/paper/examples/Benchmark.tex
@@ -1,11 +1,13 @@
 \begin{tabular}{lcrrrrr}
   \hline
   \textbf{Method} & \textbf{Status} & \textbf{$t$($s$)} & \textbf{$\#f$} & \textbf{$\#\nabla f$} & \textbf{$\#prox$} & \textbf{Objective} \\\hline
-  TR (LSR1, SVM) & first\_order & 3.4768 & 347 & 291 & 4037 & 179.837 \\
-  R2N (LSR1, SVM) & first\_order & 1.728 & 185 & 101 & 27932 & 192.493 \\
-  LM (SVM) & first\_order & 18.0861 & 6 & 2876 & 1001 & 201.186 \\
+  TR (LSR1, SVM) & first\_order & 3.9349 & 347 & 291 & 4037 & 179.837 \\
+  R2N (LSR1, SVM) & first\_order & 1.9511 & 185 & 101 & 27932 & 192.493 \\
+  LM (SVM) & first\_order & 19.7826 & 6 & 2876 & 1001 & 201.186 \\
+  LMTR (SVM) & first\_order & 12.4967 & 11 & 1614 & 432 & 188.274 \\
   \hline
-  TR (LBFGS, NNMF) & first\_order & 0.1013 & 42 & 40 & 3160 & 976.06 \\
-  R2N (LBFGS, NNMF) & first\_order & 0.4974 & 169 & 107 & 17789 & 411.727 \\
-  LM (NNMF) & first\_order & 0.4654 & 15 & 27703 & 12320 & 131.183 \\\hline
+  TR (LBFGS, NNMF) & first\_order & 0.1014 & 42 & 40 & 3160 & 976.06 \\
+  R2N (LBFGS, NNMF) & first\_order & 0.4913 & 169 & 107 & 17789 & 411.727 \\
+  LM (NNMF) & first\_order & 0.1157 & 14 & 7042 & 2601 & 131.184 \\
+  LMTR (NNMF) & first\_order & 0.0697 & 9 & 4066 & 1435 & 131.186 \\\hline
 \end{tabular}
diff --git a/paper/examples/comparison-config.jl b/paper/examples/comparison-config.jl
@@ -8,7 +8,7 @@ Base.@kwdef mutable struct Config
     MAXIT_PANOC::Int          = 10000
     VERBOSE_PANOC::Bool       = false
     VERBOSE_RO::Int           = 0
-    RUN_SOLVERS::Vector{Symbol} = [:LM, :TR, :R2N]   # mutable
+    RUN_SOLVERS::Vector{Symbol} = [:LMTR, :LM, :TR, :R2N]   # mutable
     QN_FOR_TR::Symbol         = :LSR1
     QN_FOR_R2N::Symbol        = :LBFGS
     SUB_KWARGS_R2N::NamedTuple = (; max_iter = 200)
diff --git a/paper/paper.md b/paper/paper.md
@@ -167,7 +167,7 @@ solver = LMSolver(reg_nls)                                   # Choose solver
 
 ## Numerical results
 
-We compare **TR**, **R2N**, and **LM** from our library.
+We compare **TR**, **R2N**, **LM** and **LMTR** from our library.
 
 We report the following solver statistics in the table: the convergence status of each solver, the number of evaluations of $f$, the number of evaluations of $\nabla f$, the number of proximal operator evaluations, the elapsed time in seconds and the final objective value.
 On the SVM and NNMF problems, we use limited-memory SR1 and BFGS Hessian approximations, respectively.
@@ -180,10 +180,12 @@ The subproblem solver is **R2**.
 All methods successfully reduced the optimality measure below the specified tolerance of $10^{-4}$, and thus converged to an approximate first-order stationary point.
 Note that, the final objective values differ due to the nonconvexity of the problems.
 
-- **SVM with $\ell^{1/2}$ penalty:** **R2N** is the fastest, requiring the fewest function and gradient evaluations compared to **TR**.
+- **SVM with $\ell^{1/2}$ penalty:** **R2N** is the fastest, requiring the fewest gradient evaluations compared to all the other solvers.
 However, it requires more proximal evaluations, but these are inexpensive.
-**LM** requires the fewest function evaluations, but many gradient evaluations, and is the slowest.
-- **NNMF with constrained $\ell_0$ penalty:** **TR** is the fastest, and requires a fewer number of function and gradient evaluations than **R2N**. **LM** is competitive in terms of function calls but incurs many Jacobian–vector products; it nevertheless achieves the lowest objective value.
+**LMTR** and **LM** require the fewest function evaluations, but incur many Jacobian–vector products, and are the slowest.
+Note that here, **LMTR** achieves the lowest objective value.
+- **NNMF with constrained $\ell_0$ penalty:** **LMTR** is the fastest, and requires a fewer number of function evaluations than all the other solvers. Followed by **TR** which is the second fastest and requires the fewest gradient evaluations, however it achieves the highest objective value.
+Note that both **LMTR** and **LM** achieve the lowest objective value.
 
 Additional tests (e.g., other regularizers, constraint types, and scaling dimensions) have also been conducted, and a full benchmarking campaign is currently underway.