Broyden with LineSearch

avik-pal · avik-pal · commit c6992a58674c · 2023-10-20T18:31:04.000-04:00
diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
@@ -26,6 +26,8 @@ import UnPack: @unpack
 const AbstractSparseADType = Union{ADTypes.AbstractSparseFiniteDifferences,
     ADTypes.AbstractSparseForwardMode, ADTypes.AbstractSparseReverseMode}
 
+abstract type AbstractNonlinearSolveLineSearchAlgorithm end
+
 abstract type AbstractNonlinearSolveAlgorithm <: AbstractNonlinearAlgorithm end
 abstract type AbstractNewtonAlgorithm{CJ, AD} <: AbstractNonlinearSolveAlgorithm end
 
@@ -105,6 +107,6 @@ export NewtonRaphson, TrustRegion, LevenbergMarquardt, DFSane, GaussNewton, Pseu
 export LeastSquaresOptimJL, FastLevenbergMarquardtJL
 export RobustMultiNewton, FastShortcutNonlinearPolyalg
 
-export LineSearch
+export LineSearch, LiFukushimaLineSearch
 
 end # module
diff --git a/src/broyden.jl b/src/broyden.jl
@@ -1,19 +1,28 @@
 # Sadly `Broyden` is taken up by SimpleNonlinearSolve.jl
 """
-    GeneralBroyden(max_resets)
-    GeneralBroyden(; max_resets = 3)
+    GeneralBroyden(max_resets, linesearch)
+    GeneralBroyden(; max_resets = 3, linesearch = LineSearch())
 
-An implementation of `Broyden` with support for caching!
+An implementation of `Broyden` with reseting and line search.
 
 ## Arguments
 
   - `max_resets`: the maximum number of resets to perform. Defaults to `3`.
+  - `linesearch`: the line search algorithm to use. Defaults to [`LineSearch()`](@ref),
+    which means that no line search is performed. Algorithms from `LineSearches.jl` can be
+    used here directly, and they will be converted to the correct `LineSearch`. It is
+    recommended to use [LiFukushimaLineSearchCache](@ref) -- a derivative free linesearch
+    specifically designed for Broyden's method.
 """
-struct GeneralBroyden <: AbstractNewtonAlgorithm{false, Nothing}
+@concrete struct GeneralBroyden <: AbstractNewtonAlgorithm{false, Nothing}
     max_resets::Int
+    linesearch
 end
 
-GeneralBroyden(; max_resets = 3) = GeneralBroyden(max_resets)
+function GeneralBroyden(; max_resets = 3, linesearch = LineSearch())
+    linesearch = linesearch isa LineSearch ? linesearch : LineSearch(; method = linesearch)
+    return GeneralBroyden(max_resets, linesearch)
+end
 
 @concrete mutable struct GeneralBroydenCache{iip} <: AbstractNonlinearSolveCache{iip}
     f
@@ -29,13 +38,14 @@ GeneralBroyden(; max_resets = 3) = GeneralBroyden(max_resets)
     J⁻¹df
     force_stop::Bool
     resets::Int
-    max_rests::Int
+    max_resets::Int
     maxiters::Int
     internalnorm
     retcode::ReturnCode.T
     abstol
     prob
     stats::NLStats
+    lscache
 end
 
 get_fu(cache::GeneralBroydenCache) = cache.fu
@@ -46,19 +56,20 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyde
     @unpack f, u0, p = prob
     u = alias_u0 ? u0 : deepcopy(u0)
     fu = evaluate_f(prob, u)
-    J⁻¹ = convert(parameterless_type(_mutable(u)),
-        Matrix{eltype(u)}(I, length(fu), length(u)))
-    return GeneralBroydenCache{iip}(f, alg, u, _mutable_zero(u), fu, similar(fu),
-        similar(fu), p, J⁻¹, similar(fu'), _mutable_zero(u), false, 0, alg.max_resets,
-        maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0))
+    J⁻¹ = __init_identity_jacobian(u, fu)
+    return GeneralBroydenCache{iip}(f, alg, u, _mutable_zero(u), fu, zero(fu),
+        zero(fu), p, J⁻¹, zero(fu'), _mutable_zero(u), false, 0, alg.max_resets,
+        maxiters, internalnorm, ReturnCode.Default, abstol, prob, NLStats(1, 0, 0, 0, 0),
+        init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)))
 end
 
 function perform_step!(cache::GeneralBroydenCache{true})
     @unpack f, p, du, fu, fu2, dfu, u, J⁻¹, J⁻¹df, J⁻¹₂ = cache
     T = eltype(u)
 
     mul!(du, J⁻¹, -fu)
-    u .+= du
+    α = perform_linesearch!(cache.lscache, u, du)
+    axpy!(α, du, u)
     f(fu2, u, p)
 
     cache.internalnorm(fu2) < cache.abstol && (cache.force_stop = true)
@@ -68,7 +79,7 @@ function perform_step!(cache::GeneralBroydenCache{true})
 
     # Update the inverse jacobian
     dfu .= fu2 .- fu
-    if cache.resets < cache.max_rests &&
+    if cache.resets < cache.max_resets &&
        (all(x -> abs(x) ≤ 1e-12, du) || all(x -> abs(x) ≤ 1e-12, dfu))
         fill!(J⁻¹, 0)
         J⁻¹[diagind(J⁻¹)] .= T(1)
@@ -83,3 +94,57 @@ function perform_step!(cache::GeneralBroydenCache{true})
 
     return nothing
 end
+
+function perform_step!(cache::GeneralBroydenCache{false})
+    @unpack f, p = cache
+    T = eltype(cache.u)
+
+    cache.du = cache.J⁻¹ * -cache.fu
+    α = perform_linesearch!(cache.lscache, cache.u, cache.du)
+    cache.u = cache.u .+ α * cache.du
+    cache.fu2 = f(cache.u, p)
+
+    cache.internalnorm(cache.fu2) < cache.abstol && (cache.force_stop = true)
+    cache.stats.nf += 1
+
+    cache.force_stop && return nothing
+
+    # Update the inverse jacobian
+    cache.dfu = cache.fu2 .- cache.fu
+    if cache.resets < cache.max_resets &&
+       (all(x -> abs(x) ≤ 1e-12, cache.du) || all(x -> abs(x) ≤ 1e-12, cache.dfu))
+        J⁻¹ = similar(cache.J⁻¹)
+        fill!(J⁻¹, 0)
+        J⁻¹[diagind(J⁻¹)] .= T(1)
+        cache.J⁻¹ = J⁻¹
+        cache.resets += 1
+    else
+        cache.J⁻¹df = cache.J⁻¹ * cache.dfu
+        cache.J⁻¹₂ = cache.du' * cache.J⁻¹
+        cache.du = (cache.du .- cache.J⁻¹df) ./ (dot(cache.du, cache.J⁻¹df) .+ T(1e-5))
+        cache.J⁻¹ = cache.J⁻¹ .+ cache.du * cache.J⁻¹₂
+    end
+    cache.fu = cache.fu2
+
+    return nothing
+end
+
+function SciMLBase.reinit!(cache::GeneralBroydenCache{iip}, u0 = cache.u; p = cache.p,
+    abstol = cache.abstol, maxiters = cache.maxiters) where {iip}
+    cache.p = p
+    if iip
+        recursivecopy!(cache.u, u0)
+        cache.f(cache.fu, cache.u, p)
+    else
+        # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter
+        cache.u = u0
+        cache.fu = cache.f(cache.u, p)
+    end
+    cache.abstol = abstol
+    cache.maxiters = maxiters
+    cache.stats.nf = 1
+    cache.stats.nsteps = 1
+    cache.force_stop = false
+    cache.retcode = ReturnCode.Default
+    return cache
+end
diff --git a/src/klement.jl b/src/klement.jl
@@ -0,0 +1 @@
+
diff --git a/src/linesearch.jl b/src/linesearch.jl
@@ -26,7 +26,15 @@ function LineSearch(; method = Static(), autodiff = AutoFiniteDiff(), alpha = tr
     return LineSearch(method, autodiff, alpha)
 end
 
-@concrete mutable struct LineSearchCache
+@inline function init_linesearch_cache(ls::LineSearch, args...)
+    return init_linesearch_cache(ls.method, ls, args...)
+end
+
+# LineSearches.jl doesn't have a supertype so default to that
+init_linesearch_cache(_, ls, f, u, p, fu, iip) = LineSearchesJLCache(ls, f, u, p, fu, iip)
+
+# Wrapper over LineSearches.jl algorithms
+@concrete mutable struct LineSearchesJLCache
     f
     ϕ
     dϕ
@@ -35,11 +43,11 @@ end
     ls
 end
 
-function LineSearchCache(ls::LineSearch, f, u::Number, p, _, ::Val{false})
+function LineSearchesJLCache(ls::LineSearch, f, u::Number, p, _, ::Val{false})
     eval_f(u, du, α) = eval_f(u - α * du)
     eval_f(u) = f(u, p)
 
-    ls.method isa Static && return LineSearchCache(eval_f, nothing, nothing, nothing,
+    ls.method isa Static && return LineSearchesJLCache(eval_f, nothing, nothing, nothing,
         convert(typeof(u), ls.α), ls)
 
     g(u, fu) = last(value_derivative(Base.Fix2(f, p), u)) * fu
@@ -73,11 +81,11 @@ function LineSearchCache(ls::LineSearch, f, u::Number, p, _, ::Val{false})
         return ϕdϕ_internal
     end
 
-    return LineSearchCache(eval_f, ϕ, dϕ, ϕdϕ, convert(eltype(u), ls.α), ls)
+    return LineSearchesJLCache(eval_f, ϕ, dϕ, ϕdϕ, convert(eltype(u), ls.α), ls)
 end
 
-function LineSearchCache(ls::LineSearch, f, u, p, fu1, IIP::Val{iip}) where {iip}
-    fu = iip ? fu1 : nothing
+function LineSearchesJLCache(ls::LineSearch, f, u, p, fu1, IIP::Val{iip}) where {iip}
+    fu = iip ? deepcopy(fu1) : nothing
     u_ = _mutable_zero(u)
 
     function eval_f(u, du, α)
@@ -86,7 +94,7 @@ function LineSearchCache(ls::LineSearch, f, u, p, fu1, IIP::Val{iip}) where {iip
     end
     eval_f(u) = evaluate_f(f, u, p, IIP; fu)
 
-    ls.method isa Static && return LineSearchCache(eval_f, nothing, nothing, nothing,
+    ls.method isa Static && return LineSearchesJLCache(eval_f, nothing, nothing, nothing,
         convert(eltype(u), ls.α), ls)
 
     g₀ = _mutable_zero(u)
@@ -138,10 +146,10 @@ function LineSearchCache(ls::LineSearch, f, u, p, fu1, IIP::Val{iip}) where {iip
         return ϕdϕ_internal
     end
 
-    return LineSearchCache(eval_f, ϕ, dϕ, ϕdϕ, convert(eltype(u), ls.α), ls)
+    return LineSearchesJLCache(eval_f, ϕ, dϕ, ϕdϕ, convert(eltype(u), ls.α), ls)
 end
 
-function perform_linesearch!(cache::LineSearchCache, u, du)
+function perform_linesearch!(cache::LineSearchesJLCache, u, du)
     cache.ls.method isa Static && return cache.α
 
     ϕ = cache.ϕ(u, du)
@@ -155,3 +163,120 @@ function perform_linesearch!(cache::LineSearchCache, u, du)
 
     return first(cache.ls.method(ϕ, cache.dϕ(u, du), cache.ϕdϕ(u, du), cache.α, ϕ₀, dϕ₀))
 end
+
+"""
+    LiFukushimaLineSearch(; lambda_0 = 1.0, beta = 0.5, sigma_1 = 0.001,
+        eta = 0.1, nan_max_iter = 5, maxiters = 50)
+
+A derivative-free line search and global convergence of Broyden-like method for nonlinear
+equations by Dong-Hui Li & Masao Fukushima. For more details see
+https://doi.org/10.1080/10556780008805782
+"""
+struct LiFukushimaLineSearch{T} <: AbstractNonlinearSolveLineSearchAlgorithm
+    λ₀::T
+    β::T
+    σ₁::T
+    σ₂::T
+    η::T
+    ρ::T
+    nan_max_iter::Int
+    maxiters::Int
+end
+
+function LiFukushimaLineSearch(; lambda_0 = 1.0, beta = 0.1, sigma_1 = 0.001,
+    sigma_2 = 0.001, eta = 0.1, rho = 0.9, nan_max_iter = 5, maxiters = 50)
+    T = promote_type(typeof(lambda_0), typeof(beta), typeof(sigma_1), typeof(eta),
+        typeof(rho), typeof(sigma_2))
+    return LiFukushimaLineSearch{T}(lambda_0, beta, sigma_1, sigma_2, eta, rho,
+        nan_max_iter, maxiters)
+end
+
+@concrete mutable struct LiFukushimaLineSearchCache{iip}
+    f
+    p
+    u_cache
+    fu_cache
+    alg
+    α
+end
+
+function init_linesearch_cache(alg::LiFukushimaLineSearch, ls::LineSearch, f, _u, p, _fu,
+    ::Val{iip}) where {iip}
+    fu = iip ? deepcopy(_fu) : nothing
+    u = iip ? deepcopy(_u) : nothing
+    return LiFukushimaLineSearchCache{iip}(f, p, u, fu, alg, ls.α)
+end
+
+function perform_linesearch!(cache::LiFukushimaLineSearchCache{iip}, u, du) where {iip}
+    (; β, σ₁, σ₂, η, λ₀, ρ, nan_max_iter, maxiters) = cache.alg
+    λ₂ = λ₀
+    λ₁ = λ₂
+
+    if iip
+        cache.f(cache.fu_cache, u, cache.p)
+        fx_norm = norm(cache.fu_cache, 2)
+    else
+        fx_norm = norm(cache.f(u, cache.p), 2)
+    end
+
+    # Non-Blocking exit if the norm is NaN or Inf
+    !isfinite(fx_norm) && return cache.α
+
+    # Early Terminate based on Eq. 2.7
+    if iip
+        cache.u_cache .= u .+ du
+        cache.f(cache.fu_cache, cache.u_cache, cache.p)
+        fxλ_norm = norm(cache.fu_cache, 2)
+    else
+        fxλ_norm = norm(cache.f(u .+ du, cache.p), 2)
+    end
+
+    fxλ_norm ≤ ρ * fx_norm - σ₂ * norm(du, 2)^2 && return cache.α
+
+    if iip
+        cache.u_cache .= u .+ λ₂ .* du
+        cache.f(cache.fu_cache, cache.u_cache, cache.p)
+        fxλp_norm = norm(cache.fu_cache, 2)
+    else
+        fxλp_norm = norm(cache.f(u .+ λ₂ .* du, cache.p), 2)
+    end
+
+    if !isfinite(fxλp_norm)
+        # Backtrack a finite number of steps
+        nan_converged = false
+        for _ in 1:nan_max_iter
+            λ₁, λ₂ = λ₂, β * λ₂
+
+            if iip
+                cache.u_cache .= u .+ λ₂ .* du
+                cache.f(cache.fu_cache, cache.u_cache, cache.p)
+                fxλp_norm = norm(cache.fu_cache, 2)
+            else
+                fxλp_norm = norm(cache.f(u .+ λ₂ .* du, cache.p), 2)
+            end
+
+            nan_converged = isfinite(fxλp_norm)
+            nan_converged && break
+        end
+
+        # Non-Blocking exit if the norm is still NaN or Inf
+        !nan_converged && return cache.α
+    end
+
+    for _ in 1:maxiters
+        if iip
+            cache.u_cache .= u .+ λ₂ .* du
+            cache.f(cache.fu_cache, cache.u_cache, cache.p)
+            fxλp_norm = norm(cache.fu_cache, 2)
+        else
+            fxλp_norm = norm(cache.f(u .+ λ₂ .* du, cache.p), 2)
+        end
+
+        converged = fxλp_norm ≤ (1 + η) * fx_norm - σ₁ * λ₂^2 * norm(du, 2)^2
+
+        converged && break
+        λ₁, λ₂ = λ₂, β * λ₂
+    end
+
+    return λ₂
+end
diff --git a/src/raphson.jl b/src/raphson.jl
@@ -82,7 +82,8 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::NewtonRaphso
 
     return NewtonRaphsonCache{iip}(f, alg, u, fu1, fu2, du, p, uf, linsolve, J,
         jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob,
-        NLStats(1, 0, 0, 0, 0), LineSearchCache(alg.linesearch, f, u, p, fu1, Val(iip)))
+        NLStats(1, 0, 0, 0, 0),
+        init_linesearch_cache(alg.linesearch, f, u, p, fu1, Val(iip)))
 end
 
 function perform_step!(cache::NewtonRaphsonCache{true})
@@ -96,7 +97,7 @@ function perform_step!(cache::NewtonRaphsonCache{true})
 
     # Line Search
     α = perform_linesearch!(cache.lscache, u, du)
-    @. u = u - α * du
+    axpy!(α, du, u)
     f(cache.fu1, u, p)
 
     cache.internalnorm(fu1) < cache.abstol && (cache.force_stop = true)
diff --git a/src/utils.jl b/src/utils.jl
@@ -35,8 +35,8 @@ function default_adargs_to_adtype(; chunk_size = missing, autodiff = nothing,
     if chunk_size !== missing || standardtag !== missing || diff_type !== missing ||
        autodiff !== missing
         Base.depwarn("`chunk_size`, `standardtag`, `diff_type`, \
-            `autodiff::Union{Val, Bool}` kwargs have been deprecated and will be removed in\
-             v3. Update your code to directly specify autodiff=<ADTypes>",
+            `autodiff::Union{Val, Bool}` kwargs have been deprecated and will be removed \
+             in v3. Update your code to directly specify autodiff=<ADTypes>",
             :default_adargs_to_adtype)
     end
     chunk_size === missing && (chunk_size = Val{0}())
@@ -211,3 +211,13 @@ function __get_concrete_algorithm(alg, prob)
     end
     return set_ad(alg, ad)
 end
+
+__init_identity_jacobian(u::Number, _) = u
+function __init_identity_jacobian(u, fu)
+    return convert(parameterless_type(_mutable(u)),
+        Matrix{eltype(u)}(I, length(fu), length(u)))
+end
+function __init_identity_jacobian(u::StaticArray, fu)
+    return convert(MArray{Tuple{length(fu), length(u)}},
+        Matrix{eltype(u)}(I, length(fu), length(u)))
+end
diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl
diff --git a/test/basictests.jl b/test/basictests.jl