Skip to content

Commit d87f299

Browse files
committed
feat: add SimpleDFSane
1 parent 5b51678 commit d87f299

File tree

3 files changed

+175
-1
lines changed

3 files changed

+175
-1
lines changed

lib/SimpleNonlinearSolve/Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ version = "1.13.0"
55

66
[deps]
77
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
8+
Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
89
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
910
BracketingNonlinearSolve = "70df07ce-3d50-431d-a3e7-ca6ddb60ac1e"
1011
CommonSolve = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2"

lib/SimpleNonlinearSolve/src/SimpleNonlinearSolve.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
module SimpleNonlinearSolve
22

3+
using Accessors: @reset
34
using CommonSolve: CommonSolve, solve
45
using ConcreteStructs: @concrete
56
using FastClosures: @closure
@@ -10,7 +11,7 @@ using PrecompileTools: @compile_workload, @setup_workload
1011
using Reexport: @reexport
1112
@reexport using SciMLBase # I don't like this but needed to avoid a breaking change
1213
using SciMLBase: AbstractNonlinearAlgorithm, NonlinearProblem, ReturnCode
13-
using StaticArraysCore: StaticArray
14+
using StaticArraysCore: StaticArray, SVector
1415

1516
# AD Dependencies
1617
using ADTypes: AbstractADType, AutoFiniteDiff, AutoForwardDiff, AutoPolyesterForwardDiff
@@ -81,7 +82,9 @@ function solve_adjoint_internal end
8182

8283
algs = [
8384
SimpleBroyden(),
85+
# SimpleDFSane(),
8486
SimpleKlement(),
87+
# SimpleLimitedMemoryBroyden(),
8588
SimpleHalley(),
8689
SimpleNewtonRaphson(),
8790
SimpleTrustRegion()
@@ -100,6 +103,7 @@ export AutoFiniteDiff, AutoForwardDiff, AutoPolyesterForwardDiff
100103
export Alefeld, Bisection, Brent, Falsi, ITP, Ridder
101104

102105
export SimpleBroyden, SimpleKlement
106+
export SimpleDFSane
103107
export SimpleGaussNewton, SimpleNewtonRaphson, SimpleTrustRegion
104108
export SimpleHalley
105109

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,170 @@
1+
"""
2+
SimpleDFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0,
3+
M::Union{Int, Val} = Val(10), γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5,
4+
nexp::Int = 2, η_strategy::Function = (f_1, k, x, F) -> f_1 ./ k^2)
15
6+
A low-overhead implementation of the df-sane method for solving large-scale nonlinear
7+
systems of equations. For in depth information about all the parameters and the algorithm,
8+
see [la2006spectral](@citet).
9+
10+
### Keyword Arguments
11+
12+
- `σ_min`: the minimum value of the spectral coefficient `σ_k` which is related to the
13+
step size in the algorithm. Defaults to `1e-10`.
14+
- `σ_max`: the maximum value of the spectral coefficient `σ_k` which is related to the
15+
step size in the algorithm. Defaults to `1e10`.
16+
- `σ_1`: the initial value of the spectral coefficient `σ_k` which is related to the step
17+
size in the algorithm.. Defaults to `1.0`.
18+
- `M`: The monotonicity of the algorithm is determined by a this positive integer.
19+
A value of 1 for `M` would result in strict monotonicity in the decrease of the L2-norm
20+
of the function `f`. However, higher values allow for more flexibility in this
21+
reduction. Despite this, the algorithm still ensures global convergence through the use
22+
of a non-monotone line-search algorithm that adheres to the Grippo-Lampariello-Lucidi
23+
condition. Values in the range of 5 to 20 are usually sufficient, but some cases may call
24+
for a higher value of `M`. The default setting is 10.
25+
- `γ`: a parameter that influences if a proposed step will be accepted. Higher value of
26+
`γ` will make the algorithm more restrictive in accepting steps. Defaults to `1e-4`.
27+
- `τ_min`: if a step is rejected the new step size will get multiplied by factor, and this
28+
parameter is the minimum value of that factor. Defaults to `0.1`.
29+
- `τ_max`: if a step is rejected the new step size will get multiplied by factor, and this
30+
parameter is the maximum value of that factor. Defaults to `0.5`.
31+
- `nexp`: the exponent of the loss, i.e. ``f_k=||F(x_k)||^{nexp}``. The paper uses
32+
`nexp ∈ {1,2}`. Defaults to `2`.
33+
- `η_strategy`: function to determine the parameter `η_k`, which enables growth
34+
of ``||F||^2``. Called as `η_k = η_strategy(f_1, k, x, F)` with `f_1` initialized as
35+
``f_1=||F(x_1)||^{nexp}``, `k` is the iteration number, `x` is the current `x`-value and
36+
`F` the current residual. Should satisfy ``η_k > 0`` and ``∑ₖ ηₖ < ∞``. Defaults to
37+
``||F||^2 / k^2``.
38+
"""
39+
@concrete struct SimpleDFSane <: AbstractSimpleNonlinearSolveAlgorithm
40+
σ_min
41+
σ_max
42+
σ_1
43+
γ
44+
τ_min
45+
τ_max
46+
nexp::Int
47+
η_strategy
48+
M <: Val
49+
end
50+
51+
# XXX[breaking]: we should change the names to not have unicode
52+
function SimpleDFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0,
53+
M::Union{Int, Val} = Val(10), γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5,
54+
nexp::Int = 2, η_strategy::F = (f_1, k, x, F) -> f_1 ./ k^2) where {F}
55+
M = M isa Int ? Val(M) : M
56+
return SimpleDFSane(σ_min, σ_max, σ_1, γ, τ_min, τ_max, nexp, η_strategy, M)
57+
end
58+
59+
function SciMLBase.__solve(prob::ImmutableNonlinearProblem, alg::SimpleDFSane, args...;
60+
abstol = nothing, reltol = nothing, maxiters = 1000, alias_u0 = false,
61+
termination_condition = nothing, kwargs...)
62+
x = Utils.maybe_unaliased(prob.u0, alias_u0)
63+
fx = Utils.get_fx(prob, x)
64+
fx = Utils.eval_f(prob, fx, x)
65+
T = promote_type(eltype(fx), eltype(x))
66+
67+
σ_min = T(alg.σ_min)
68+
σ_max = T(alg.σ_max)
69+
σ_k = T(alg.σ_1)
70+
71+
(; nexp, η_strategy, M) = alg
72+
γ = T(alg.γ)
73+
τ_min = T(alg.τ_min)
74+
τ_max = T(alg.τ_max)
75+
76+
abstol, reltol, tc_cache = NonlinearSolveBase.init_termination_cache(
77+
prob, abstol, reltol, fx, x, termination_condition, Val(:simple))
78+
79+
fx_norm = L2_NORM(fx)^nexp
80+
α_1 = one(T)
81+
f_1 = fx_norm
82+
83+
history_f_k = dfsane_history_vec(x, fx_norm, alg.M)
84+
85+
# Generate the cache
86+
@bb x_cache = similar(x)
87+
@bb d = copy(x)
88+
@bb xo = copy(x)
89+
@bb δx = copy(x)
90+
@bb δf = copy(fx)
91+
92+
k = 0
93+
while k < maxiters
94+
# Spectral parameter range check
95+
σ_k = sign(σ_k) * clamp(abs(σ_k), σ_min, σ_max)
96+
97+
# Line search direction
98+
@bb @. d = -σ_k * fx
99+
100+
η = η_strategy(f_1, k + 1, x, fx)
101+
f_bar = maximum(history_f_k)
102+
α_p = α_1
103+
α_m = α_1
104+
105+
@bb @. x_cache = x + α_p * d
106+
107+
fx = Utils.eval_f(prob, fx, x_cache)
108+
fx_norm_new = L2_NORM(fx)^nexp
109+
110+
while k < maxiters
111+
(fx_norm_new (f_bar + η - γ * α_p^2 * fx_norm)) && break
112+
113+
α_tp = α_p^2 * fx_norm / (fx_norm_new + (T(2) * α_p - T(1)) * fx_norm)
114+
@bb @. x_cache = x - α_m * d
115+
116+
fx = Utils.eval_f(prob, fx, x_cache)
117+
fx_norm_new = L2_NORM(fx)^nexp
118+
119+
(fx_norm_new (f_bar + η - γ * α_m^2 * fx_norm)) && break
120+
121+
α_tm = α_m^2 * fx_norm / (fx_norm_new + (T(2) * α_m - T(1)) * fx_norm)
122+
α_p = clamp(α_tp, τ_min * α_p, τ_max * α_p)
123+
α_m = clamp(α_tm, τ_min * α_m, τ_max * α_m)
124+
@bb @. x_cache = x + α_p * d
125+
126+
fx = Utils.eval_f(prob, fx, x_cache)
127+
fx_norm_new = L2_NORM(fx)^nexp
128+
129+
k += 1
130+
end
131+
132+
@bb copyto!(x, x_cache)
133+
134+
solved, retcode, fx_sol, x_sol = Utils.check_termination(tc_cache, fx, x, xo, prob)
135+
solved && return SciMLBase.build_solution(prob, alg, x_sol, fx_sol; retcode)
136+
137+
# Update spectral parameter
138+
@bb @. δx = x - xo
139+
@bb @. δf = fx - δf
140+
141+
σ_k = dot(δx, δx) / dot(δx, δf)
142+
143+
# Take step
144+
@bb copyto!(xo, x)
145+
@bb copyto!(δf, fx)
146+
fx_norm = fx_norm_new
147+
148+
# Store function value
149+
idx = mod1(k, SciMLBase._unwrap_val(alg.M))
150+
if history_f_k isa SVector
151+
history_f_k = Base.setindex(history_f_k, fx_norm_new, idx)
152+
elseif history_f_k isa NTuple
153+
@reset history_f_k[idx] = fx_norm_new
154+
else
155+
history_f_k[idx] = fx_norm_new
156+
end
157+
k += 1
158+
end
159+
160+
return SciMLBase.build_solution(prob, alg, x, fx; retcode = ReturnCode.MaxIters)
161+
end
162+
163+
function dfsane_history_vec(x::StaticArray, fx_norm, ::Val{M}) where {M}
164+
return ones(SVector{M, eltype(x)}) .* fx_norm
165+
end
166+
167+
@generated function dfsane_history_vec(x, fx_norm, ::Val{M}) where {M}
168+
M 11 && return :(fill(fx_norm, M)) # Julia can't specialize here
169+
return :(ntuple(Returns(fx_norm), $(M)))
170+
end

0 commit comments

Comments
 (0)