refactor: use RobustNonMonotoneLineSearch from LineSearch.jl

avik-pal · avik-pal · commit 1e9898712758 · 2024-09-24T20:06:36.000-04:00
diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
@@ -31,7 +31,7 @@ using LinearAlgebra: LinearAlgebra, ColumnNorm, Diagonal, I, LowerTriangular, Sy
                      UpperTriangular, axpy!, cond, diag, diagind, dot, issuccess, istril,
                      istriu, lu, mul!, norm, pinv, tril!, triu!
 using LineSearch: LineSearch, AbstractLineSearchAlgorithm, AbstractLineSearchCache,
-                  NoLineSearch
+                  NoLineSearch, RobustNonMonotoneLineSearch
 using LineSearches: LineSearches
 using LinearSolve: LinearSolve, LUFactorization, QRFactorization, ComposePreconditioner,
                    InvPreconditioner, needs_concrete_A, AbstractFactorization,
@@ -105,54 +105,54 @@ include("algorithms/extension_algs.jl")
 include("utils.jl")
 include("default.jl")
 
-# @setup_workload begin
-#     nlfuncs = ((NonlinearFunction{false}((u, p) -> u .* u .- p), 0.1),
-#         (NonlinearFunction{true}((du, u, p) -> du .= u .* u .- p), [0.1]))
-#     probs_nls = NonlinearProblem[]
-#     for (fn, u0) in nlfuncs
-#         push!(probs_nls, NonlinearProblem(fn, u0, 2.0))
-#     end
-
-#     nls_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(),
-#         PseudoTransient(), Broyden(), Klement(), DFSane(), nothing)
-
-#     probs_nlls = NonlinearLeastSquaresProblem[]
-#     nlfuncs = ((NonlinearFunction{false}((u, p) -> (u .^ 2 .- p)[1:1]), [0.1, 0.0]),
-#         (NonlinearFunction{false}((u, p) -> vcat(u .* u .- p, u .* u .- p)), [0.1, 0.1]),
-#         (
-#             NonlinearFunction{true}(
-#                 (du, u, p) -> du[1] = u[1] * u[1] - p, resid_prototype = zeros(1)),
-#             [0.1, 0.0]),
-#         (
-#             NonlinearFunction{true}((du, u, p) -> du .= vcat(u .* u .- p, u .* u .- p),
-#                 resid_prototype = zeros(4)),
-#             [0.1, 0.1]))
-#     for (fn, u0) in nlfuncs
-#         push!(probs_nlls, NonlinearLeastSquaresProblem(fn, u0, 2.0))
-#     end
-
-#     nlls_algs = (LevenbergMarquardt(), GaussNewton(), TrustRegion(),
-#         LevenbergMarquardt(; linsolve = LUFactorization()),
-#         GaussNewton(; linsolve = LUFactorization()),
-#         TrustRegion(; linsolve = LUFactorization()), nothing)
-
-#     @compile_workload begin
-#         @sync begin
-#             for T in (Float32, Float64), (fn, u0) in nlfuncs
-#                 Threads.@spawn NonlinearProblem(fn, T.(u0), T(2))
-#             end
-#             for (fn, u0) in nlfuncs
-#                 Threads.@spawn NonlinearLeastSquaresProblem(fn, u0, 2.0)
-#             end
-#             for prob in probs_nls, alg in nls_algs
-#                 Threads.@spawn solve(prob, alg; abstol = 1e-2, verbose = false)
-#             end
-#             for prob in probs_nlls, alg in nlls_algs
-#                 Threads.@spawn solve(prob, alg; abstol = 1e-2, verbose = false)
-#             end
-#         end
-#     end
-# end
+@setup_workload begin
+    nlfuncs = ((NonlinearFunction{false}((u, p) -> u .* u .- p), 0.1),
+        (NonlinearFunction{true}((du, u, p) -> du .= u .* u .- p), [0.1]))
+    probs_nls = NonlinearProblem[]
+    for (fn, u0) in nlfuncs
+        push!(probs_nls, NonlinearProblem(fn, u0, 2.0))
+    end
+
+    nls_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(),
+        PseudoTransient(), Broyden(), Klement(), DFSane(), nothing)
+
+    probs_nlls = NonlinearLeastSquaresProblem[]
+    nlfuncs = ((NonlinearFunction{false}((u, p) -> (u .^ 2 .- p)[1:1]), [0.1, 0.0]),
+        (NonlinearFunction{false}((u, p) -> vcat(u .* u .- p, u .* u .- p)), [0.1, 0.1]),
+        (
+            NonlinearFunction{true}(
+                (du, u, p) -> du[1] = u[1] * u[1] - p, resid_prototype = zeros(1)),
+            [0.1, 0.0]),
+        (
+            NonlinearFunction{true}((du, u, p) -> du .= vcat(u .* u .- p, u .* u .- p),
+                resid_prototype = zeros(4)),
+            [0.1, 0.1]))
+    for (fn, u0) in nlfuncs
+        push!(probs_nlls, NonlinearLeastSquaresProblem(fn, u0, 2.0))
+    end
+
+    nlls_algs = (LevenbergMarquardt(), GaussNewton(), TrustRegion(),
+        LevenbergMarquardt(; linsolve = LUFactorization()),
+        GaussNewton(; linsolve = LUFactorization()),
+        TrustRegion(; linsolve = LUFactorization()), nothing)
+
+    @compile_workload begin
+        @sync begin
+            for T in (Float32, Float64), (fn, u0) in nlfuncs
+                Threads.@spawn NonlinearProblem(fn, T.(u0), T(2))
+            end
+            for (fn, u0) in nlfuncs
+                Threads.@spawn NonlinearLeastSquaresProblem(fn, u0, 2.0)
+            end
+            for prob in probs_nls, alg in nls_algs
+                Threads.@spawn solve(prob, alg; abstol = 1e-2, verbose = false)
+            end
+            for prob in probs_nlls, alg in nlls_algs
+                Threads.@spawn solve(prob, alg; abstol = 1e-2, verbose = false)
+            end
+        end
+    end
+end
 
 # Core Algorithms
 export NewtonRaphson, PseudoTransient, Klement, Broyden, LimitedMemoryBroyden, DFSane
@@ -172,8 +172,9 @@ export NewtonDescent, SteepestDescent, Dogleg, DampedNewtonDescent, GeodesicAcce
 
 # Globalization
 ## Line Search Algorithms
-export LineSearchesJL, NoLineSearch, RobustNonMonotoneLineSearch, LiFukushimaLineSearch
-export Static, HagerZhang, MoreThuente, StrongWolfe, BackTracking
+export LineSearchesJL, LiFukushimaLineSearch # FIXME: deprecated. use LineSearch.jl directly
+export Static, HagerZhang, MoreThuente, StrongWolfe, BackTracking  # FIXME: deprecated
+export NoLineSearch, RobustNonMonotoneLineSearch
 ## Trust Region Algorithms
 export RadiusUpdateSchemes
 
diff --git a/src/algorithms/dfsane.jl b/src/algorithms/dfsane.jl
@@ -19,9 +19,8 @@ For other keyword arguments, see [`RobustNonMonotoneLineSearch`](@ref).
 function DFSane(; σ_min = 1 // 10^10, σ_max = 1e10, σ_1 = 1, M::Int = 10, γ = 1 // 10^4,
         τ_min = 1 // 10, τ_max = 1 // 2, n_exp::Int = 2, max_inner_iterations::Int = 100,
         η_strategy::ETA = (fn_1, n, x_n, f_n) -> fn_1 / n^2) where {ETA}
-    # linesearch = RobustNonMonotoneLineSearch(;
-    #     gamma = γ, sigma_1 = σ_1, M, tau_min = τ_min, tau_max = τ_max,
-    #     n_exp, η_strategy, maxiters = max_inner_iterations)
-    linesearch = NoLineSearch()
+    linesearch = RobustNonMonotoneLineSearch(;
+        gamma = γ, sigma_1 = σ_1, M, tau_min = τ_min, tau_max = τ_max,
+        n_exp, η_strategy, maxiters = max_inner_iterations)
     return GeneralizedDFSane{:DFSane}(linesearch, σ_min, σ_max, nothing)
 end
diff --git a/src/core/spectral_methods.jl b/src/core/spectral_methods.jl
@@ -118,7 +118,7 @@ end
 function SciMLBase.__init(prob::AbstractNonlinearProblem, alg::GeneralizedDFSane, args...;
         stats = empty_nlstats(), alias_u0 = false, maxiters = 1000,
         abstol = nothing, reltol = nothing, termination_condition = nothing,
-        internalnorm::F = DEFAULT_NORM, maxtime = nothing, kwargs...) where {F}
+        maxtime = nothing, kwargs...)
     timer = get_timer_output()
     @static_timeit timer "cache construction" begin
         u = __maybe_unaliased(prob.u0, alias_u0)
@@ -129,8 +129,7 @@ function SciMLBase.__init(prob::AbstractNonlinearProblem, alg::GeneralizedDFSane
         fu = evaluate_f(prob, u)
         @bb fu_cache = copy(fu)
 
-        linesearch_cache = __internal_init(prob, alg.linesearch, prob.f, fu, u, prob.p;
-            stats, maxiters, internalnorm, kwargs...)
+        linesearch_cache = init(prob, alg.linesearch, fu, u; stats, kwargs...)
 
         abstol, reltol, tc_cache = init_termination_cache(
             prob, abstol, reltol, fu, u_cache, termination_condition)
@@ -166,7 +165,9 @@ function __step!(cache::GeneralizedDFSaneCache{iip};
     end
 
     @static_timeit cache.timer "linesearch" begin
-        linesearch_failed, α = __internal_solve!(cache.linesearch_cache, cache.u, cache.du)
+        linesearch_sol = solve!(cache.linesearch_cache, cache.u, cache.du)
+        linesearch_failed = !SciMLBase.successful_retcode(linesearch_sol.retcode)
+        α = linesearch_sol.step_size
     end
 
     if linesearch_failed
diff --git a/src/globalization/line_search.jl b/src/globalization/line_search.jl
@@ -24,124 +24,6 @@ end
 Base.@deprecate LiFukushimaLineSearch(; nan_max_iter::Int = 5, kwargs...) LineSearch.LiFukushimaLineSearch(;
     nan_maxiters = nan_max_iter, kwargs...)
 
-# """
-#     RobustNonMonotoneLineSearch(; gamma = 1 // 10000, sigma_0 = 1, M::Int = 10,
-#         tau_min = 1 // 10, tau_max = 1 // 2, n_exp::Int = 2, maxiters::Int = 100,
-#         η_strategy = (fn₁, n, uₙ, fₙ) -> fn₁ / n^2)
-
-# Robust NonMonotone Line Search is a derivative free line search method from DF Sane
-# [la2006spectral](@cite).
-
-# ### Keyword Arguments
-
-#   - `M`: The monotonicity of the algorithm is determined by a this positive integer.
-#     A value of 1 for `M` would result in strict monotonicity in the decrease of the L2-norm
-#     of the function `f`. However, higher values allow for more flexibility in this reduction.
-#     Despite this, the algorithm still ensures global convergence through the use of a
-#     non-monotone line-search algorithm that adheres to the Grippo-Lampariello-Lucidi
-#     condition. Values in the range of 5 to 20 are usually sufficient, but some cases may
-#     call for a higher value of `M`. The default setting is 10.
-#   - `gamma`: a parameter that influences if a proposed step will be accepted. Higher value
-#     of `gamma` will make the algorithm more restrictive in accepting steps. Defaults to
-#     `1e-4`.
-#   - `tau_min`: if a step is rejected the new step size will get multiplied by factor, and
-#     this parameter is the minimum value of that factor. Defaults to `0.1`.
-#   - `tau_max`: if a step is rejected the new step size will get multiplied by factor, and
-#     this parameter is the maximum value of that factor. Defaults to `0.5`.
-#   - `n_exp`: the exponent of the loss, i.e. ``f_n=||F(x_n)||^{n\\_exp}``. The paper uses
-#     `n_exp ∈ {1, 2}`. Defaults to `2`.
-#   - `η_strategy`:  function to determine the parameter `η`, which enables growth
-#     of ``||f_n||^2``. Called as `η = η_strategy(fn_1, n, x_n, f_n)` with `fn_1` initialized
-#     as ``fn_1=||f(x_1)||^{n\\_exp}``, `n` is the iteration number, `x_n` is the current
-#     `x`-value and `f_n` the current residual. Should satisfy ``η > 0`` and ``∑ₖ ηₖ < ∞``.
-#     Defaults to ``fn_1 / n^2``.
-#   - `maxiters`: the maximum number of iterations allowed for the inner loop of the
-#     algorithm. Defaults to `100`.
-# """
-# @kwdef @concrete struct RobustNonMonotoneLineSearch <:
-#                         AbstractNonlinearSolveLineSearchAlgorithm
-#     gamma = 1 // 10000
-#     sigma_1 = 1
-#     M::Int = 10
-#     tau_min = 1 // 10
-#     tau_max = 1 // 2
-#     n_exp::Int = 2
-#     maxiters::Int = 100
-#     η_strategy = (fn₁, n, uₙ, fₙ) -> fn₁ / n^2
-# end
-
-# @concrete mutable struct RobustNonMonotoneLineSearchCache <:
-#                          AbstractNonlinearSolveLineSearchCache
-#     f
-#     p
-#     ϕ
-#     u_cache
-#     fu_cache
-#     internalnorm
-#     maxiters::Int
-#     history
-#     γ
-#     σ₁
-#     M::Int
-#     τ_min
-#     τ_max
-#     nsteps::Int
-#     η_strategy
-#     n_exp::Int
-#     stats::NLStats
-# end
-
-# function __internal_init(
-#         prob::AbstractNonlinearProblem, alg::RobustNonMonotoneLineSearch, f::F, fu, u,
-#         p, args...; stats, internalnorm::IN = DEFAULT_NORM, kwargs...) where {F, IN}
-#     @bb u_cache = similar(u)
-#     @bb fu_cache = similar(fu)
-#     T = promote_type(eltype(fu), eltype(u))
-
-#     ϕ = @closure (f, p, u, du, α, u_cache, fu_cache) -> begin
-#         @bb @. u_cache = u + α * du
-#         fu_cache = evaluate_f!!(f, fu_cache, u_cache, p)
-#         stats.nf += 1
-#         return internalnorm(fu_cache)^alg.n_exp
-#     end
-
-#     fn₁ = internalnorm(fu)^alg.n_exp
-#     η_strategy = @closure (n, xₙ, fₙ) -> alg.η_strategy(fn₁, n, xₙ, fₙ)
-
-#     return RobustNonMonotoneLineSearchCache(
-#         f, p, ϕ, u_cache, fu_cache, internalnorm, alg.maxiters,
-#         fill(fn₁, alg.M), T(alg.gamma), T(alg.sigma_1), alg.M,
-#         T(alg.tau_min), T(alg.tau_max), 0, η_strategy, alg.n_exp, stats)
-# end
-
-# function __internal_solve!(cache::RobustNonMonotoneLineSearchCache, u, du; kwargs...)
-#     T = promote_type(eltype(u), eltype(du))
-#     ϕ = @closure α -> cache.ϕ(cache.f, cache.p, u, du, α, cache.u_cache, cache.fu_cache)
-#     f_norm_old = ϕ(eltype(u)(0))
-#     α₊, α₋ = T(cache.σ₁), T(cache.σ₁)
-#     η = cache.η_strategy(cache.nsteps, u, f_norm_old)
-#     f_bar = maximum(cache.history)
-
-#     for k in 1:(cache.maxiters)
-#         f_norm = ϕ(α₊)
-#         f_norm ≤ f_bar + η - cache.γ * α₊ * f_norm_old && return (false, α₊)
-
-#         α₊ *= clamp(α₊ * f_norm_old / (f_norm + (T(2) * α₊ - T(1)) * f_norm_old),
-#             cache.τ_min, cache.τ_max)
-
-#         f_norm = ϕ(-α₋)
-#         f_norm ≤ f_bar + η - cache.γ * α₋ * f_norm_old && return (false, -α₋)
-
-#         α₋ *= clamp(α₋ * f_norm_old / (f_norm + (T(2) * α₋ - T(1)) * f_norm_old),
-#             cache.τ_min, cache.τ_max)
-#     end
-
-#     return true, T(cache.σ₁)
-# end
-
-# function callback_into_cache!(topcache, cache::RobustNonMonotoneLineSearchCache, args...)
-#     fu = get_fu(topcache)
-#     cache.history[mod1(cache.nsteps, cache.M)] = cache.internalnorm(fu)^cache.n_exp
-#     cache.nsteps += 1
-#     return
-# end
+function callback_into_cache!(topcache, cache::AbstractLineSearchCache, args...)
+    LineSearch.callback_into_cache!(cache, get_fu(topcache))
+end