add powernorm iteration

MaxenceGollier · MaxenceGollier · commit 70d7fb07079e · 2025-09-18T17:00:32.000-04:00
diff --git a/src/R2N.jl b/src/R2N.jl
@@ -139,7 +139,7 @@ For advanced usage, first define a solver "R2NSolver" to preallocate the memory
 - `η2::T = T(0.9)`: very successful iteration threshold;
 - `γ::T = T(3)`: regularization parameter multiplier, σ := σ/γ when the iteration is very successful and σ := σγ when the iteration is unsuccessful;
 - `θ::T = 1/(1 + eps(T)^(1 / 5))`: is the model decrease fraction with respect to the decrease of the Cauchy model;
-- `compute_opnorm::Bool = false`: whether the operator norm of Bₖ should be computed at each iteration. If false, a Rayleigh quotient is computed instead. The first option causes the solver to converge in fewer iterations but the computational cost per iteration is larger;
+- `opnorm_maxiter::Int = 1`: how many iterations of the power method to use to compute the operator norm of Bₖ. If a negative number is provided, then Arpack is used instead;
 - `m_monotone::Int = 1`: monotonicity parameter. By default, R2N is monotone but the non-monotone variant will be used if `m_monotone > 1`;
 - `sub_kwargs::NamedTuple = NamedTuple()`: a named tuple containing the keyword arguments to be sent to the subsolver. The solver will fail if invalid keyword arguments are provided to the subsolver. For example, if the subsolver is `R2Solver`, you can pass `sub_kwargs = (max_iter = 100, σmin = 1e-6,)`.
 
@@ -232,7 +232,7 @@ function SolverCore.solve!(
   γ::T = T(3),
   β::T = 1 / eps(T),
   θ::T = 1/(1 + eps(T)^(1 / 5)),
-  compute_opnorm::Bool = false,
+  opnorm_maxiter::Int = 1,
   sub_kwargs::NamedTuple = NamedTuple(),
 ) where {T, V, G}
   reset!(stats)
@@ -308,12 +308,12 @@ function SolverCore.solve!(
   found_λ = true
   solver.subpb.model.B = hess_op(nlp, xk)
 
-  if !compute_opnorm
-    mul!(solver.subpb.model.v, solver.subpb.model.B, solver.v0)
-    λmax = dot(solver.v0, solver.subpb.model.v)
-  else
+  if opnorm_maxiter ≤ 0
     λmax, found_λ = opnorm(solver.subpb.model.B)
+  else
+    λmax = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter)
   end
+  
   found_λ || error("operator norm computation failed")
 
   ν₁ = θ / (λmax + σk)
@@ -446,11 +446,10 @@ function SolverCore.solve!(
       end
       solver.subpb.model.B = hess_op(nlp, xk)
 
-      if !compute_opnorm
-        mul!(solver.subpb.model.v, solver.subpb.model.B, solver.v0)
-        λmax = dot(solver.v0, solver.subpb.model.v)
-      else
+      if opnorm_maxiter ≤ 0
         λmax, found_λ = opnorm(solver.subpb.model.B)
+      else
+        λmax = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter)
       end
       
       found_λ || error("operator norm computation failed")
diff --git a/src/TR_alg.jl b/src/TR_alg.jl
@@ -18,6 +18,7 @@ mutable struct TRSolver{
   χ::N
   xkn::V
   s::V
+  v0::V
   has_bnds::Bool
   l_bound::V
   u_bound::V
@@ -54,6 +55,9 @@ function TRSolver(
     u_bound_m_x = similar(xk, 0)
   end
 
+  v0 = [(-1.0)^i for i in 0:(reg_nlp.model.meta.nvar-1)]
+  v0 ./= sqrt(reg_nlp.model.meta.nvar)
+
   ψ =
     has_bnds || subsolver == TRDHSolver ?
     shifted(reg_nlp.h, xk, l_bound_m_x, u_bound_m_x, reg_nlp.selected) :
@@ -76,6 +80,7 @@ function TRSolver(
     χ,
     xkn,
     s,
+    v0,
     has_bnds,
     l_bound,
     u_bound,
@@ -129,6 +134,7 @@ For advanced usage, first define a solver "TRSolver" to preallocate the memory u
 - `η1::T = √√eps(T)`: successful iteration threshold;
 - `η2::T = T(0.9)`: very successful iteration threshold;
 - `γ::T = T(3)`: trust-region radius parameter multiplier. Must satisfy `γ > 1`. The trust-region radius is updated as Δ := Δ*γ when the iteration is very successful and Δ := Δ/γ when the iteration is unsuccessful;
+- `opnorm_maxiter::Int = 1`: how many iterations of the power method to use to compute the operator norm of Bₖ. If a negative number is provided, then Arpack is used instead;
 - `χ::F =  NormLinf(1)`: norm used to define the trust-region;`
 - `subsolver::S = R2Solver`: subsolver used to solve the subproblem that appears at each iteration.
 - `sub_kwargs::NamedTuple = NamedTuple()`: a named tuple containing the keyword arguments to be sent to the subsolver. The solver will fail if invalid keyword arguments are provided to the subsolver. For example, if the subsolver is `R2Solver`, you can pass `sub_kwargs = (max_iter = 100, σmin = 1e-6,)`.
@@ -201,6 +207,7 @@ function SolverCore.solve!(
   η2::T = T(0.9),
   γ::T = T(3),
   sub_kwargs::NamedTuple = NamedTuple(),
+  opnorm_maxiter::Int = 1,
 ) where {T, G, V}
   reset!(stats)
 
@@ -275,9 +282,14 @@ function SolverCore.solve!(
   ∇fk⁻ .= ∇fk
 
   quasiNewtTest = isa(nlp, QuasiNewtonModel)
-  λmax = T(1)
+  λmax::T = T(1)
+  found_λ = true
 
-  λmax, found_λ = opnorm(solver.subpb.model.B)
+  if opnorm_maxiter ≤ 0
+    λmax, found_λ = opnorm(solver.subpb.model.B)
+  else
+    λmax = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter)
+  end
   found_λ || error("operator norm computation failed")
 
   ν₁ = α * Δk / (1 + λmax * (α * Δk + 1))
@@ -332,7 +344,6 @@ function SolverCore.solve!(
   callback(nlp, solver, stats)
 
   done = stats.status != :unknown
-
   while !done
     sub_atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, sqrt_ξ1_νInv))
     ∆_effective = min(β * χ(s), Δk)
@@ -431,7 +442,11 @@ function SolverCore.solve!(
         push!(nlp, s, ∇fk⁻) # update QN operator
       end
 
-      λmax, found_λ = opnorm(solver.subpb.model.B)
+      if opnorm_maxiter ≤ 0
+        λmax, found_λ = opnorm(solver.subpb.model.B)
+      else
+        λmax = power_method!(solver.subpb.model.B, solver.v0, solver.subpb.model.v, opnorm_maxiter)
+      end
       found_λ || error("operator norm computation failed")
 
       ∇fk⁻ .= ∇fk
diff --git a/src/utils.jl b/src/utils.jl
@@ -2,6 +2,19 @@ export RegularizedExecutionStats
 
 import SolverCore.GenericExecutionStats
 
+function power_method!(B::M, v₀::S, v₁::S, max_iter::Int = 1) where{M, S}
+  @assert max_iter >= 1 
+  mul!(v₁, B, v₀)
+  normalize!(v₁) # v1 = B*v0 / ‖B*v0‖
+  for i = 2:max_iter
+    v₀ .= v₁ # v0 = v1
+    mul!(v₁, B, v₀)
+    normalize!(v₁)
+  end
+  mul!(v₁, B, v₀)
+  return dot(v₀, v₁)
+end
+
 # use Arpack to obtain largest eigenvalue in magnitude with a minimum of robustness
 function LinearAlgebra.opnorm(B; kwargs...)
   m, n = size(B)
diff --git a/test/test_allocs.jl b/test/test_allocs.jl
@@ -59,7 +59,7 @@ end
         (solver_name == "R2DH" || solver_name == "R2N") && @test @wrappedallocs(
           solve!(solver, reg_nlp, stats, σk = 1.0, atol = 1e-6, rtol = 1e-6)
         ) == 0
-        solver_name == "TRDH" &&
+        (solver_name == "TRDH") &&
           @test @wrappedallocs(solve!(solver, reg_nlp, stats, atol = 1e-6, rtol = 1e-6)) == 0
         @test stats.status == :first_order
       end