-
-
Notifications
You must be signed in to change notification settings - Fork 66
Expand file tree
/
Copy pathtrust_region.jl
More file actions
229 lines (196 loc) · 8.74 KB
/
trust_region.jl
File metadata and controls
229 lines (196 loc) · 8.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
"""
SimpleTrustRegion(;
autodiff = AutoForwardDiff(), max_trust_radius = 0.0,
initial_trust_radius = 0.0, step_threshold = nothing,
shrink_threshold = nothing, expand_threshold = nothing,
shrink_factor = 0.25, expand_factor = 2.0, max_shrink_times::Int = 32,
nlsolve_update_rule = Val(false)
)
A low-overhead implementation of a trust-region solver. This method is non-allocating on
scalar and static array problems.
### Keyword Arguments
- `autodiff`: determines the backend used for the Jacobian. Defaults to `nothing` (i.e.
automatic backend selection). Valid choices include jacobian backends from
`DifferentiationInterface.jl`.
- `max_trust_radius`: the maximum radius of the trust region. Defaults to
`max(norm(f(u0)), maximum(u0) - minimum(u0))`.
- `initial_trust_radius`: the initial trust region radius. Defaults to
`max_trust_radius / 11`.
- `step_threshold`: the threshold for taking a step. In every iteration, the threshold is
compared with a value `r`, which is the actual reduction in the objective function
divided by the predicted reduction. If `step_threshold > r` the model is not a good
approximation, and the step is rejected. Defaults to `0.0001`. For more details, see
[Rahpeymaii, F.](https://link.springer.com/article/10.1007/s40096-020-00339-4)
- `shrink_threshold`: the threshold for shrinking the trust region radius. In every
iteration, the threshold is compared with a value `r` which is the actual reduction in
the objective function divided by the predicted reduction. If `shrink_threshold > r` the
trust region radius is shrunk by `shrink_factor`. Defaults to `0.25`. For more details,
see [Rahpeymaii, F.](https://link.springer.com/article/10.1007/s40096-020-00339-4)
- `expand_threshold`: the threshold for expanding the trust region radius. If a step is
taken, i.e `step_threshold < r` (with `r` defined in `shrink_threshold`), a check is
also made to see if `expand_threshold < r`. If that is true, the trust region radius is
expanded by `expand_factor`. Defaults to `0.75`.
- `shrink_factor`: the factor to shrink the trust region radius with if
`shrink_threshold > r` (with `r` defined in `shrink_threshold`). Defaults to `0.25`.
- `expand_factor`: the factor to expand the trust region radius with if
`expand_threshold < r` (with `r` defined in `shrink_threshold`). Defaults to `2.0`.
- `max_shrink_times`: the maximum number of times to shrink the trust region radius in a
row, `max_shrink_times` is exceeded, the algorithm returns. Defaults to `32`.
- `nlsolve_update_rule`: If set to `Val(true)`, updates the trust region radius using the
update rule from NLSolve.jl. Defaults to `Val(false)`. If set to `Val(true)`, few of the
radius update parameters -- `step_threshold = 0.05`, `expand_threshold = 0.9`, and
`shrink_factor = 0.5` -- have different defaults.
"""
@kwdef @concrete struct SimpleTrustRegion <: AbstractSimpleNonlinearSolveAlgorithm
autodiff = nothing
max_trust_radius = 0.0
initial_trust_radius = 0.0
step_threshold = 0.0001
shrink_threshold = nothing
expand_threshold = nothing
shrink_factor = nothing
expand_factor = 2.0
max_shrink_times::Int = 32
nlsolve_update_rule = Val(false)
end
function SciMLBase.__solve(
prob::Union{ImmutableNonlinearProblem, NonlinearLeastSquaresProblem},
alg::SimpleTrustRegion, args...;
abstol = nothing, reltol = nothing, maxiters = 1000,
alias::Union{Nothing, SciMLBase.NonlinearAliasSpecifier} = nothing,
alias_u0 = false,
termination_condition = nothing, kwargs...
)
# Extract alias_u0: if alias struct provided, use it; otherwise use alias_u0 kwarg
_alias_u0 = alias === nothing ? alias_u0 : Utils.get_alias_u0(alias, alias_u0)
x = NLBUtils.maybe_unaliased(prob.u0, _alias_u0)
T = eltype(x)
Δₘₐₓ = T(alg.max_trust_radius)
Δ = T(alg.initial_trust_radius)
η₁ = T(alg.step_threshold)
if alg.shrink_threshold === nothing
η₂ = T(ifelse(NLBUtils.unwrap_val(alg.nlsolve_update_rule), 0.05, 0.25))
else
η₂ = T(alg.shrink_threshold)
end
if alg.expand_threshold === nothing
η₃ = T(ifelse(NLBUtils.unwrap_val(alg.nlsolve_update_rule), 0.9, 0.75))
else
η₃ = T(alg.expand_threshold)
end
if alg.shrink_factor === nothing
t₁ = T(ifelse(NLBUtils.unwrap_val(alg.nlsolve_update_rule), 0.5, 0.25))
else
t₁ = T(alg.shrink_factor)
end
t₂ = T(alg.expand_factor)
max_shrink_times = alg.max_shrink_times
autodiff = SciMLBase.has_jac(prob.f) ? alg.autodiff :
NonlinearSolveBase.select_jacobian_autodiff(prob, alg.autodiff)
fx = NLBUtils.evaluate_f(prob, x)
norm_fx = L2_NORM(fx)
@bb xo = copy(x)
fx_cache = Utils.should_cache_fx(prob, prob.f) ?
NLBUtils.safe_similar(fx) : fx
jac_cache = Utils.prepare_jacobian(prob, autodiff, fx_cache, x)
J = Utils.compute_jacobian!!(nothing, prob, autodiff, fx_cache, x, jac_cache)
abstol, reltol,
tc_cache = NonlinearSolveBase.init_termination_cache(
prob, abstol, reltol, fx, x, termination_condition, Val(:simple)
)
# Set default trust region radius if not specified by user.
iszero(Δₘₐₓ) && (Δₘₐₓ = max(L2_NORM(fx), maximum(x) - minimum(x)))
if iszero(Δ)
if NLBUtils.unwrap_val(alg.nlsolve_update_rule)
norm_x = L2_NORM(x)
Δ = T(ifelse(norm_x > 0, norm_x, 1))
else
Δ = T(Δₘₐₓ / 11)
end
end
fₖ = 0.5 * norm_fx^2
H = transpose(J) * J
g = NLBUtils.restructure(x, J' * NLBUtils.safe_vec(fx))
shrink_counter = 0
@bb δsd = copy(x)
@bb δN_δsd = copy(x)
@bb δN = copy(x)
@bb Hδ = copy(x)
dogleg_cache = (; δsd, δN_δsd, δN)
solved, retcode, fx_sol, x_sol = Utils.check_termination(
tc_cache, fx, x, xo, prob
)
solved && return SciMLBase.build_solution(prob, alg, x_sol, fx_sol; retcode)
for _ in 1:maxiters
# Solve the trust region subproblem.
δ = dogleg_method!!(dogleg_cache, J, fx, g, Δ)
@bb @. x = xo + δ
fx = NLBUtils.evaluate_f!!(prob, fx, x)
fₖ₊₁ = L2_NORM(fx)^2 / T(2)
# Compute the ratio of the actual to predicted reduction.
@bb Hδ = H × vec(δ)
r = (fₖ₊₁ - fₖ) / (dot(δ, g) + (dot(δ, Hδ) / T(2)))
# Update the trust region radius.
if r ≥ η₂
shrink_counter = 0
else
Δ = t₁ * Δ
shrink_counter += 1
shrink_counter > max_shrink_times && return SciMLBase.build_solution(
prob, alg, x, fx; retcode = ReturnCode.ShrinkThresholdExceeded
)
end
if r ≥ η₁
# Termination Checks
solved, retcode, fx_sol,
x_sol = Utils.check_termination(
tc_cache, fx, x, xo, prob
)
solved && return SciMLBase.build_solution(prob, alg, x_sol, fx_sol; retcode)
# Take the step.
@bb copyto!(xo, x)
J = Utils.compute_jacobian!!(J, prob, autodiff, fx_cache, x, jac_cache)
fx = NLBUtils.evaluate_f!!(prob, fx, x)
# Update the trust region radius.
if !NLBUtils.unwrap_val(alg.nlsolve_update_rule) && r > η₃
Δ = min(t₂ * Δ, Δₘₐₓ)
end
fₖ = fₖ₊₁
@bb H = transpose(J) × J
@bb g = transpose(J) × vec(fx)
end
if NLBUtils.unwrap_val(alg.nlsolve_update_rule)
if r > η₃
Δ = t₂ * L2_NORM(δ)
elseif r > 0.5
Δ = max(Δ, t₂ * L2_NORM(δ))
end
end
end
return SciMLBase.build_solution(prob, alg, x, fx; retcode = ReturnCode.MaxIters)
end
function dogleg_method!!(cache, J, f::F, g, Δ) where {F}
(; δsd, δN_δsd, δN) = cache
# Compute the Newton step
@bb δN .= NLBUtils.restructure(δN, J \ NLBUtils.safe_vec(f))
@bb δN .*= -1
# Test if the full step is within the trust region
(L2_NORM(δN) ≤ Δ) && return δN
# Calculate Cauchy point, optimum along the steepest descent direction
@bb δsd .= g
@bb @. δsd *= -1
norm_δsd = L2_NORM(δsd)
if (norm_δsd ≥ Δ)
@bb @. δsd *= Δ / norm_δsd
return δsd
end
# Find the intersection point on the boundary
@bb @. δN_δsd = δN - δsd
dot_δN_δsd = dot(δN_δsd, δN_δsd)
dot_δsd_δN_δsd = dot(δsd, δN_δsd)
dot_δsd = dot(δsd, δsd)
fact = dot_δsd_δN_δsd^2 - dot_δN_δsd * (dot_δsd - Δ^2)
tau = (-dot_δsd_δN_δsd + sqrt(fact)) / dot_δN_δsd
@bb @. δsd += tau * δN_δsd
return δsd
end