Fix line search direction for some algorithms

avik-pal · avik-pal · commit 3bf5853e75b4 · 2023-11-01T12:13:44.000-04:00
diff --git a/src/broyden.jl b/src/broyden.jl
@@ -61,8 +61,8 @@ get_fu(cache::GeneralBroydenCache) = cache.fu
 
 function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyden, args...;
     alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing,
-    termination_condition = nothing, internalnorm = DEFAULT_NORM,
-    kwargs...) where {uType, iip}
+    termination_condition = nothing, internalnorm::F = DEFAULT_NORM,
+    kwargs...) where {uType, iip, F}
     @unpack f, u0, p = prob
     u = alias_u0 ? u0 : deepcopy(u0)
     fu = evaluate_f(prob, u)
@@ -71,10 +71,8 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyde
                       alg.reset_tolerance
     reset_check = x -> abs(x) ≤ reset_tolerance
 
-    abstol, reltol, termination_condition = _init_termination_elements(abstol,
-        reltol,
-        termination_condition,
-        eltype(u))
+    abstol, reltol, termination_condition = _init_termination_elements(abstol, reltol,
+        termination_condition, eltype(u))
 
     mode = DiffEqBase.get_termination_mode(termination_condition)
 
@@ -83,8 +81,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::GeneralBroyde
     return GeneralBroydenCache{iip}(f, alg, u, zero(u), _mutable_zero(u), fu, zero(fu),
         zero(fu), p, J⁻¹, zero(_reshape(fu, 1, :)), _mutable_zero(u), false, 0,
         alg.max_resets, maxiters, internalnorm, ReturnCode.Default, abstol, reltol,
-        reset_tolerance,
-        reset_check, prob, NLStats(1, 0, 0, 0, 0),
+        reset_tolerance, reset_check, prob, NLStats(1, 0, 0, 0, 0),
         init_linesearch_cache(alg.linesearch, f, u, p, fu, Val(iip)), termination_condition,
         storage)
 end
@@ -95,9 +92,9 @@ function perform_step!(cache::GeneralBroydenCache{true})
     termination_condition = cache.termination_condition(tc_storage)
     T = eltype(u)
 
-    mul!(_vec(du), J⁻¹, -_vec(fu))
+    mul!(_vec(du), J⁻¹, _vec(fu))
     α = perform_linesearch!(cache.lscache, u, du)
-    _axpy!(α, du, u)
+    _axpy!(-α, du, u)
     f(fu2, u, p)
 
     termination_condition(fu2, u, u_prev, cache.abstol, cache.reltol) &&
@@ -119,6 +116,7 @@ function perform_step!(cache::GeneralBroydenCache{true})
         J⁻¹[diagind(J⁻¹)] .= T(1)
         cache.resets += 1
     else
+        du .*= -1
         mul!(_vec(J⁻¹df), J⁻¹, _vec(dfu))
         mul!(J⁻¹₂, _vec(du)', J⁻¹)
         denom = dot(du, J⁻¹df)
@@ -138,9 +136,9 @@ function perform_step!(cache::GeneralBroydenCache{false})
 
     T = eltype(cache.u)
 
-    cache.du = _restructure(cache.du, cache.J⁻¹ * -_vec(cache.fu))
+    cache.du = _restructure(cache.du, cache.J⁻¹ * _vec(cache.fu))
     α = perform_linesearch!(cache.lscache, cache.u, cache.du)
-    cache.u = cache.u .+ α * cache.du
+    cache.u = cache.u .- α * cache.du
     cache.fu2 = f(cache.u, p)
 
     termination_condition(cache.fu2, cache.u, cache.u_prev, cache.abstol, cache.reltol) &&
@@ -160,6 +158,7 @@ function perform_step!(cache::GeneralBroydenCache{false})
         cache.J⁻¹ = __init_identity_jacobian(cache.u, cache.fu)
         cache.resets += 1
     else
+        cache.du = -cache.du
         cache.J⁻¹df = _restructure(cache.J⁻¹df, cache.J⁻¹ * _vec(cache.dfu))
         cache.J⁻¹₂ = _vec(cache.du)' * cache.J⁻¹
         denom = dot(cache.du, cache.J⁻¹df)
diff --git a/src/dfsane.jl b/src/dfsane.jl
@@ -97,8 +97,8 @@ end
 
 function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::DFSane, args...;
     alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing,
-    termination_condition = nothing, internalnorm = DEFAULT_NORM,
-    kwargs...) where {uType, iip}
+    termination_condition = nothing, internalnorm::F = DEFAULT_NORM,
+    kwargs...) where {uType, iip, F}
     uₙ = alias_u0 ? prob.u0 : deepcopy(prob.u0)
 
     p = prob.p
diff --git a/src/gaussnewton.jl b/src/gaussnewton.jl
@@ -49,9 +49,7 @@ end
 function GaussNewton(; concrete_jac = nothing, linsolve = nothing,
     precs = DEFAULT_PRECS, adkwargs...)
     ad = default_adargs_to_adtype(; adkwargs...)
-    return GaussNewton{_unwrap_val(concrete_jac)}(ad,
-        linsolve,
-        precs)
+    return GaussNewton{_unwrap_val(concrete_jac)}(ad, linsolve, precs)
 end
 
 @concrete mutable struct GaussNewtonCache{iip} <: AbstractNonlinearSolveCache{iip}
@@ -84,21 +82,15 @@ end
 
 function SciMLBase.__init(prob::NonlinearLeastSquaresProblem{uType, iip}, alg_::GaussNewton,
     args...; alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing,
-    termination_condition = nothing,
-    internalnorm = DEFAULT_NORM,
-    kwargs...) where {uType, iip}
+    termination_condition = nothing, internalnorm::F = DEFAULT_NORM,
+    kwargs...) where {uType, iip, F}
     alg = get_concrete_algorithm(alg_, prob)
     @unpack f, u0, p = prob
 
     linsolve_with_JᵀJ = Val(_needs_square_A(alg, u0))
 
     u = alias_u0 ? u0 : deepcopy(u0)
-    if iip
-        fu1 = f.resid_prototype === nothing ? zero(u) : f.resid_prototype
-        f(fu1, u, p)
-    else
-        fu1 = f(u, p)
-    end
+    fu1 = evaluate_f(prob, u)
 
     if SciMLBase._unwrap_val(linsolve_with_JᵀJ)
         uf, linsolve, J, fu2, jac_cache, du, JᵀJ, Jᵀf = jacobian_caches(alg, f, u, p,
diff --git a/src/klement.jl b/src/klement.jl
@@ -70,8 +70,8 @@ get_fu(cache::GeneralKlementCache) = cache.fu
 
 function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::GeneralKlement, args...;
     alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing,
-    termination_condition = nothing, internalnorm = DEFAULT_NORM,
-    linsolve_kwargs = (;), kwargs...) where {uType, iip}
+    termination_condition = nothing, internalnorm::F = DEFAULT_NORM,
+    linsolve_kwargs = (;), kwargs...) where {uType, iip, F}
     @unpack f, u0, p = prob
     u = alias_u0 ? u0 : deepcopy(u0)
     fu = evaluate_f(prob, u)
@@ -89,10 +89,8 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::GeneralKleme
         linsolve = __setup_linsolve(J, _vec(fu), _vec(du), p, alg)
     end
 
-    abstol, reltol, termination_condition = _init_termination_elements(abstol,
-        reltol,
-        termination_condition,
-        eltype(u))
+    abstol, reltol, termination_condition = _init_termination_elements(abstol, reltol,
+        termination_condition, eltype(u))
 
     mode = DiffEqBase.get_termination_mode(termination_condition)
 
@@ -129,12 +127,12 @@ function perform_step!(cache::GeneralKlementCache{true})
 
     # u = u - J \ fu
     linres = dolinsolve(alg.precs, linsolve; A = ifelse(fact_done, nothing, J),
-        b = -_vec(fu), linu = _vec(du), p, reltol = cache.abstol)
+        b = _vec(fu), linu = _vec(du), p, reltol = cache.abstol)
     cache.linsolve = linres.cache
 
     # Line Search
     α = perform_linesearch!(cache.lscache, u, du)
-    _axpy!(α, du, u)
+    _axpy!(-α, du, u)
     f(cache.fu2, u, p)
 
     termination_condition(cache.fu2, u, u_prev, cache.abstol, cache.reltol) &&
@@ -146,6 +144,7 @@ function perform_step!(cache::GeneralKlementCache{true})
     cache.force_stop && return nothing
 
     # Update the Jacobian
+    cache.du .*= -1
     cache.J_cache .= cache.J' .^ 2
     cache.Jdu .= _vec(du) .^ 2
     mul!(cache.Jᵀ²du, cache.J_cache, cache.Jdu)
@@ -186,29 +185,30 @@ function perform_step!(cache::GeneralKlementCache{false})
 
     # u = u - J \ fu
     if linsolve === nothing
-        cache.du = -fu / cache.J
+        cache.du = fu / cache.J
     else
         linres = dolinsolve(alg.precs, linsolve; A = ifelse(fact_done, nothing, J),
-            b = -_vec(fu), linu = _vec(cache.du), p, reltol = cache.abstol)
+            b = _vec(fu), linu = _vec(cache.du), p, reltol = cache.abstol)
         cache.linsolve = linres.cache
     end
 
     # Line Search
     α = perform_linesearch!(cache.lscache, cache.u, cache.du)
-    cache.u = @. cache.u + α * cache.du  # `u` might not support mutation
+    cache.u = @. cache.u - α * cache.du  # `u` might not support mutation
     cache.fu2 = f(cache.u, p)
 
     termination_condition(cache.fu2, cache.u, cache.u_prev, cache.abstol, cache.reltol) &&
         (cache.force_stop = true)
 
-    cache.u_prev = @. cache.u
+    cache.u_prev = cache.u
     cache.stats.nf += 1
     cache.stats.nsolve += 1
     cache.stats.nfactors += 1
 
     cache.force_stop && return nothing
 
     # Update the Jacobian
+    cache.du = -cache.du
     cache.J_cache = cache.J' .^ 2
     cache.Jdu = _vec(cache.du) .^ 2
     cache.Jᵀ²du = cache.J_cache * cache.Jdu
diff --git a/src/lbroyden.jl b/src/lbroyden.jl
@@ -68,8 +68,8 @@ get_fu(cache::LimitedMemoryBroydenCache) = cache.fu
 
 function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LimitedMemoryBroyden,
     args...; alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing,
-    termination_condition = nothing, internalnorm = DEFAULT_NORM,
-    kwargs...) where {uType, iip}
+    termination_condition = nothing, internalnorm::F = DEFAULT_NORM,
+    kwargs...) where {uType, iip, F}
     @unpack f, u0, p = prob
     u = alias_u0 ? u0 : deepcopy(u0)
     if u isa Number
@@ -81,15 +81,13 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::LimitedMemory
     fu = evaluate_f(prob, u)
     threshold = min(alg.threshold, maxiters)
     U, Vᵀ = __init_low_rank_jacobian(u, fu, threshold)
-    du = -fu
+    du = copy(fu)
     reset_tolerance = alg.reset_tolerance === nothing ? sqrt(eps(eltype(u))) :
                       alg.reset_tolerance
     reset_check = x -> abs(x) ≤ reset_tolerance
 
-    abstol, reltol, termination_condition = _init_termination_elements(abstol,
-        reltol,
-        termination_condition,
-        eltype(u))
+    abstol, reltol, termination_condition = _init_termination_elements(abstol, reltol,
+        termination_condition, eltype(u))
 
     mode = DiffEqBase.get_termination_mode(termination_condition)
 
@@ -112,7 +110,7 @@ function perform_step!(cache::LimitedMemoryBroydenCache{true})
     termination_condition = cache.termination_condition(tc_storage)
 
     α = perform_linesearch!(cache.lscache, u, du)
-    _axpy!(α, du, u)
+    _axpy!(-α, du, u)
     f(cache.fu2, u, p)
 
     termination_condition(cache.fu2, cache.u, cache.u_prev, cache.abstol, cache.reltol) &&
@@ -134,7 +132,7 @@ function perform_step!(cache::LimitedMemoryBroydenCache{true})
         end
         cache.iterations_since_reset = 0
         cache.resets += 1
-        cache.du .= -cache.fu
+        cache.du .= cache.fu
     else
         idx = min(cache.iterations_since_reset, size(cache.U, 1))
         U_part = selectdim(cache.U, 1, 1:idx)
@@ -154,7 +152,6 @@ function perform_step!(cache::LimitedMemoryBroydenCache{true})
         U_part = selectdim(cache.U, 1, 1:idx)
         Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx)
         __lbroyden_matvec!(_vec(cache.du), cache.Ux, U_part, Vᵀ_part, _vec(cache.fu2))
-        cache.du .*= -1
         cache.iterations_since_reset += 1
     end
 
@@ -172,7 +169,7 @@ function perform_step!(cache::LimitedMemoryBroydenCache{false})
     T = eltype(cache.u)
 
     α = perform_linesearch!(cache.lscache, cache.u, cache.du)
-    cache.u = cache.u .+ α * cache.du
+    cache.u = cache.u .- α * cache.du
     cache.fu2 = f(cache.u, p)
 
     termination_condition(cache.fu2, cache.u, cache.u_prev, cache.abstol, cache.reltol) &&
@@ -194,7 +191,7 @@ function perform_step!(cache::LimitedMemoryBroydenCache{false})
         end
         cache.iterations_since_reset = 0
         cache.resets += 1
-        cache.du = -cache.fu
+        cache.du = cache.fu
     else
         idx = min(cache.iterations_since_reset, size(cache.U, 1))
         U_part = selectdim(cache.U, 1, 1:idx)
@@ -215,7 +212,7 @@ function perform_step!(cache::LimitedMemoryBroydenCache{false})
         U_part = selectdim(cache.U, 1, 1:idx)
         Vᵀ_part = selectdim(cache.Vᵀ, 2, 1:idx)
         cache.du = _restructure(cache.du,
-            -__lbroyden_matvec(U_part, Vᵀ_part, _vec(cache.fu2)))
+            __lbroyden_matvec(U_part, Vᵀ_part, _vec(cache.fu2)))
         cache.iterations_since_reset += 1
     end
 
diff --git a/src/levenberg.jl b/src/levenberg.jl
@@ -163,9 +163,8 @@ end
 function SciMLBase.__init(prob::Union{NonlinearProblem{uType, iip},
         NonlinearLeastSquaresProblem{uType, iip}}, alg_::LevenbergMarquardt,
     args...; alias_u0 = false, maxiters = 1000, abstol = nothing, reltol = nothing,
-    termination_condition = nothing,
-    internalnorm = DEFAULT_NORM,
-    linsolve_kwargs = (;), kwargs...) where {uType, iip}
+    termination_condition = nothing, internalnorm::F = DEFAULT_NORM,
+    linsolve_kwargs = (;), kwargs...) where {uType, iip, F}
     alg = get_concrete_algorithm(alg_, prob)
     @unpack f, u0, p = prob
     u = alias_u0 ? u0 : deepcopy(u0)
@@ -231,10 +230,8 @@ function SciMLBase.__init(prob::Union{NonlinearProblem{uType, iip},
     end
 
     return LevenbergMarquardtCache{iip, !_unwrap_val(linsolve_with_JᵀJ)}(f, alg, u, copy(u),
-        fu1,
-        fu2, du, p, uf, linsolve, J,
-        jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, reltol, prob,
-        DᵀD,
+        fu1, fu2, du, p, uf, linsolve, J, jac_cache, false, maxiters, internalnorm,
+        ReturnCode.Default, abstol, reltol, prob, DᵀD,
         JᵀJ, λ, λ_factor, damping_increase_factor, damping_decrease_factor, h, α_geodesic,
         b_uphill, min_damping_D, v, a, tmp_vec, v_old, loss, δ, loss, make_new_J, fu_tmp,
         zero(u), zero(fu1), mat_tmp, rhs_tmp, J², NLStats(1, 0, 0, 0, 0),
@@ -321,11 +318,7 @@ function perform_step!(cache::LevenbergMarquardtCache{true, fastls}) where {fast
         if (1 - β)^b_uphill * loss ≤ loss_old
             # Accept step.
             cache.u .+= δ
-            if termination_condition(cache.fu_tmp,
-                cache.u,
-                u_prev,
-                cache.abstol,
-                cache.reltol)
+            if termination_condition(cache.fu_tmp, u, u_prev, cache.abstol, cache.reltol)
                 cache.force_stop = true
                 return nothing
             end
diff --git a/src/linesearch.jl b/src/linesearch.jl
@@ -228,21 +228,21 @@ function perform_linesearch!(cache::LiFukushimaLineSearchCache{iip}, u, du) wher
 
     # Early Terminate based on Eq. 2.7
     if iip
-        cache.u_cache .= u .+ du
+        cache.u_cache .= u .- du
         cache.f(cache.fu_cache, cache.u_cache, cache.p)
         fxλ_norm = norm(cache.fu_cache, 2)
     else
-        fxλ_norm = norm(cache.f(u .+ du, cache.p), 2)
+        fxλ_norm = norm(cache.f(u .- du, cache.p), 2)
     end
 
     fxλ_norm ≤ ρ * fx_norm - σ₂ * norm(du, 2)^2 && return cache.α
 
     if iip
-        cache.u_cache .= u .+ λ₂ .* du
+        cache.u_cache .= u .- λ₂ .* du
         cache.f(cache.fu_cache, cache.u_cache, cache.p)
         fxλp_norm = norm(cache.fu_cache, 2)
     else
-        fxλp_norm = norm(cache.f(u .+ λ₂ .* du, cache.p), 2)
+        fxλp_norm = norm(cache.f(u .- λ₂ .* du, cache.p), 2)
     end
 
     if !isfinite(fxλp_norm)
@@ -252,11 +252,11 @@ function perform_linesearch!(cache::LiFukushimaLineSearchCache{iip}, u, du) wher
             λ₁, λ₂ = λ₂, β * λ₂
 
             if iip
-                cache.u_cache .= u .+ λ₂ .* du
+                cache.u_cache .= u .- λ₂ .* du
                 cache.f(cache.fu_cache, cache.u_cache, cache.p)
                 fxλp_norm = norm(cache.fu_cache, 2)
             else
-                fxλp_norm = norm(cache.f(u .+ λ₂ .* du, cache.p), 2)
+                fxλp_norm = norm(cache.f(u .- λ₂ .* du, cache.p), 2)
             end
 
             nan_converged = isfinite(fxλp_norm)
@@ -269,11 +269,11 @@ function perform_linesearch!(cache::LiFukushimaLineSearchCache{iip}, u, du) wher
 
     for _ in 1:maxiters
         if iip
-            cache.u_cache .= u .+ λ₂ .* du
+            cache.u_cache .= u .- λ₂ .* du
             cache.f(cache.fu_cache, cache.u_cache, cache.p)
             fxλp_norm = norm(cache.fu_cache, 2)
         else
-            fxλp_norm = norm(cache.f(u .+ λ₂ .* du, cache.p), 2)
+            fxλp_norm = norm(cache.f(u .- λ₂ .* du, cache.p), 2)
         end
 
         converged = fxλp_norm ≤ (1 + η) * fx_norm - σ₁ * λ₂^2 * norm(du, 2)^2
diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl
@@ -86,12 +86,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg_::PseudoTransi
 
     @unpack f, u0, p = prob
     u = alias_u0 ? u0 : deepcopy(u0)
-    if iip
-        fu1 = f.resid_prototype === nothing ? zero(u) : f.resid_prototype
-        f(fu1, u, p)
-    else
-        fu1 = _mutable(f(u, p))
-    end
+    fu1 = evaluate_f(prob, u)
     uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip);
         linsolve_kwargs)
     alpha = convert(eltype(u), alg.alpha_initial)
diff --git a/src/raphson.jl b/src/raphson.jl