|
1 | 1 |
|
| 2 | +# """ |
| 3 | +# SimpleTrustRegion(; autodiff = AutoForwardDiff(), max_trust_radius = 0.0, |
| 4 | +# initial_trust_radius = 0.0, step_threshold = nothing, |
| 5 | +# shrink_threshold = nothing, expand_threshold = nothing, |
| 6 | +# shrink_factor = 0.25, expand_factor = 2.0, max_shrink_times::Int = 32, |
| 7 | +# nlsolve_update_rule = Val(false)) |
| 8 | + |
| 9 | +# A low-overhead implementation of a trust-region solver. This method is non-allocating on |
| 10 | +# scalar and static array problems. |
| 11 | + |
| 12 | +# ### Keyword Arguments |
| 13 | + |
| 14 | +# - `autodiff`: determines the backend used for the Jacobian. Defaults to `nothing` (i.e. |
| 15 | +# automatic backend selection). Valid choices include jacobian backends from |
| 16 | +# `DifferentiationInterface.jl`. |
| 17 | +# - `max_trust_radius`: the maximum radius of the trust region. Defaults to |
| 18 | +# `max(norm(f(u0)), maximum(u0) - minimum(u0))`. |
| 19 | +# - `initial_trust_radius`: the initial trust region radius. Defaults to |
| 20 | +# `max_trust_radius / 11`. |
| 21 | +# - `step_threshold`: the threshold for taking a step. In every iteration, the threshold is |
| 22 | +# compared with a value `r`, which is the actual reduction in the objective function divided |
| 23 | +# by the predicted reduction. If `step_threshold > r` the model is not a good approximation, |
| 24 | +# and the step is rejected. Defaults to `0.1`. For more details, see |
| 25 | +# [Rahpeymaii, F.](https://link.springer.com/article/10.1007/s40096-020-00339-4) |
| 26 | +# - `shrink_threshold`: the threshold for shrinking the trust region radius. In every |
| 27 | +# iteration, the threshold is compared with a value `r` which is the actual reduction in the |
| 28 | +# objective function divided by the predicted reduction. If `shrink_threshold > r` the trust |
| 29 | +# region radius is shrunk by `shrink_factor`. Defaults to `0.25`. For more details, see |
| 30 | +# [Rahpeymaii, F.](https://link.springer.com/article/10.1007/s40096-020-00339-4) |
| 31 | +# - `expand_threshold`: the threshold for expanding the trust region radius. If a step is |
| 32 | +# taken, i.e `step_threshold < r` (with `r` defined in `shrink_threshold`), a check is also |
| 33 | +# made to see if `expand_threshold < r`. If that is true, the trust region radius is |
| 34 | +# expanded by `expand_factor`. Defaults to `0.75`. |
| 35 | +# - `shrink_factor`: the factor to shrink the trust region radius with if |
| 36 | +# `shrink_threshold > r` (with `r` defined in `shrink_threshold`). Defaults to `0.25`. |
| 37 | +# - `expand_factor`: the factor to expand the trust region radius with if |
| 38 | +# `expand_threshold < r` (with `r` defined in `shrink_threshold`). Defaults to `2.0`. |
| 39 | +# - `max_shrink_times`: the maximum number of times to shrink the trust region radius in a |
| 40 | +# row, `max_shrink_times` is exceeded, the algorithm returns. Defaults to `32`. |
| 41 | +# - `nlsolve_update_rule`: If set to `Val(true)`, updates the trust region radius using the |
| 42 | +# update rule from NLSolve.jl. Defaults to `Val(false)`. If set to `Val(true)`, few of the |
| 43 | +# radius update parameters -- `step_threshold = 0.05`, `expand_threshold = 0.9`, and |
| 44 | +# `shrink_factor = 0.5` -- have different defaults. |
| 45 | +# """ |
| 46 | +@kwdef @concrete struct SimpleTrustRegion <: AbstractSimpleNonlinearSolveAlgorithm |
| 47 | + autodiff = nothing |
| 48 | + max_trust_radius = 0.0 |
| 49 | + initial_trust_radius = 0.0 |
| 50 | + step_threshold = 0.0001 |
| 51 | + shrink_threshold = nothing |
| 52 | + expand_threshold = nothing |
| 53 | + shrink_factor = nothing |
| 54 | + expand_factor = 2.0 |
| 55 | + max_shrink_times::Int = 32 |
| 56 | + nlsolve_update_rule = Val(false) |
| 57 | +end |
| 58 | + |
| 59 | +function SciMLBase.__solve(prob::ImmutableNonlinearProblem, alg::SimpleTrustRegion, |
| 60 | + args...; abstol = nothing, reltol = nothing, maxiters = 1000, |
| 61 | + alias_u0 = false, termination_condition = nothing, kwargs...) |
| 62 | + x = Utils.maybe_unaliased(prob.u0, alias_u0) |
| 63 | + T = eltype(x) |
| 64 | + Δₘₐₓ = T(alg.max_trust_radius) |
| 65 | + Δ = T(alg.initial_trust_radius) |
| 66 | + η₁ = T(alg.step_threshold) |
| 67 | + |
| 68 | + if alg.shrink_threshold === nothing |
| 69 | + η₂ = T(ifelse(SciMLBase._unwrap_val(alg.nlsolve_update_rule), 0.05, 0.25)) |
| 70 | + else |
| 71 | + η₂ = T(alg.shrink_threshold) |
| 72 | + end |
| 73 | + |
| 74 | + if alg.expand_threshold === nothing |
| 75 | + η₃ = T(ifelse(SciMLBase._unwrap_val(alg.nlsolve_update_rule), 0.9, 0.75)) |
| 76 | + else |
| 77 | + η₃ = T(alg.expand_threshold) |
| 78 | + end |
| 79 | + |
| 80 | + if alg.shrink_factor === nothing |
| 81 | + t₁ = T(ifelse(SciMLBase._unwrap_val(alg.nlsolve_update_rule), 0.5, 0.25)) |
| 82 | + else |
| 83 | + t₁ = T(alg.shrink_factor) |
| 84 | + end |
| 85 | + |
| 86 | + t₂ = T(alg.expand_factor) |
| 87 | + max_shrink_times = alg.max_shrink_times |
| 88 | + |
| 89 | + autodiff = SciMLBase.has_jac(prob.f) ? alg.autodiff : |
| 90 | + NonlinearSolveBase.select_jacobian_autodiff(prob, alg.autodiff) |
| 91 | + |
| 92 | + fx = Utils.get_fx(prob, x) |
| 93 | + fx = Utils.eval_f(prob, fx, x) |
| 94 | + norm_fx = L2_NORM(fx) |
| 95 | + |
| 96 | + @bb xo = copy(x) |
| 97 | + fx_cache = (SciMLBase.isinplace(prob) && !SciMLBase.has_jac(prob.f)) ? similar(fx) : |
| 98 | + nothing |
| 99 | + jac_cache = Utils.prepare_jacobian(prob, autodiff, fx_cache, x) |
| 100 | + J = Utils.compute_jacobian!!(nothing, prob, autodiff, fx_cache, x, jac_cache) |
| 101 | + |
| 102 | + abstol, reltol, tc_cache = NonlinearSolveBase.init_termination_cache( |
| 103 | + prob, abstol, reltol, fx, x, termination_condition, Val(:simple)) |
| 104 | + |
| 105 | + # Set default trust region radius if not specified by user. |
| 106 | + iszero(Δₘₐₓ) && (Δₘₐₓ = max(L2_NORM(fx), maximum(x) - minimum(x))) |
| 107 | + if iszero(Δ) |
| 108 | + if SciMLBase._unwrap_val(alg.nlsolve_update_rule) |
| 109 | + norm_x = L2_NORM(x) |
| 110 | + Δ = T(ifelse(norm_x > 0, norm_x, 1)) |
| 111 | + else |
| 112 | + Δ = T(Δₘₐₓ / 11) |
| 113 | + end |
| 114 | + end |
| 115 | + |
| 116 | + fₖ = 0.5 * norm_fx^2 |
| 117 | + H = transpose(J) * J |
| 118 | + g = Utils.restructure(x, J' * Utils.safe_vec(fx)) |
| 119 | + shrink_counter = 0 |
| 120 | + |
| 121 | + @bb δsd = copy(x) |
| 122 | + @bb δN_δsd = copy(x) |
| 123 | + @bb δN = copy(x) |
| 124 | + @bb Hδ = copy(x) |
| 125 | + dogleg_cache = (; δsd, δN_δsd, δN) |
| 126 | + |
| 127 | + for _ in 1:maxiters |
| 128 | + # Solve the trust region subproblem. |
| 129 | + δ = dogleg_method!!(dogleg_cache, J, fx, g, Δ) |
| 130 | + @bb @. x = xo + δ |
| 131 | + |
| 132 | + fx = Utils.eval_f(prob, fx, x) |
| 133 | + |
| 134 | + fₖ₊₁ = L2_NORM(fx)^2 / T(2) |
| 135 | + |
| 136 | + # Compute the ratio of the actual to predicted reduction. |
| 137 | + @bb Hδ = H × vec(δ) |
| 138 | + r = (fₖ₊₁ - fₖ) / (dot(δ, g) + (dot(δ, Hδ) / T(2))) |
| 139 | + |
| 140 | + # Update the trust region radius. |
| 141 | + if r ≥ η₂ |
| 142 | + shrink_counter = 0 |
| 143 | + else |
| 144 | + Δ = t₁ * Δ |
| 145 | + shrink_counter += 1 |
| 146 | + shrink_counter > max_shrink_times && return SciMLBase.build_solution( |
| 147 | + prob, alg, x, fx; retcode = ReturnCode.ShrinkThresholdExceeded) |
| 148 | + end |
| 149 | + |
| 150 | + if r ≥ η₁ |
| 151 | + # Termination Checks |
| 152 | + solved, retcode, fx_sol, x_sol = Utils.check_termination( |
| 153 | + tc_cache, fx, x, xo, prob) |
| 154 | + solved && return SciMLBase.build_solution(prob, alg, x_sol, fx_sol; retcode) |
| 155 | + |
| 156 | + # Take the step. |
| 157 | + @bb copyto!(xo, x) |
| 158 | + |
| 159 | + J = Utils.compute_jacobian!!(J, prob, autodiff, fx_cache, x, jac_cache) |
| 160 | + fx = Utils.eval_f(prob, fx, x) |
| 161 | + |
| 162 | + # Update the trust region radius. |
| 163 | + if !SciMLBase._unwrap_val(alg.nlsolve_update_rule) && r > η₃ |
| 164 | + Δ = min(t₂ * Δ, Δₘₐₓ) |
| 165 | + end |
| 166 | + fₖ = fₖ₊₁ |
| 167 | + |
| 168 | + @bb H = transpose(J) × J |
| 169 | + @bb g = transpose(J) × vec(fx) |
| 170 | + end |
| 171 | + |
| 172 | + if SciMLBase._unwrap_val(alg.nlsolve_update_rule) |
| 173 | + if r > η₃ |
| 174 | + Δ = t₂ * L2_NORM(δ) |
| 175 | + elseif r > 0.5 |
| 176 | + Δ = max(Δ, t₂ * L2_NORM(δ)) |
| 177 | + end |
| 178 | + end |
| 179 | + end |
| 180 | + |
| 181 | + return SciMLBase.build_solution(prob, alg, x, fx; retcode = ReturnCode.MaxIters) |
| 182 | +end |
| 183 | + |
| 184 | +function dogleg_method!!(cache, J, f, g, Δ) |
| 185 | + (; δsd, δN_δsd, δN) = cache |
| 186 | + |
| 187 | + # Compute the Newton step |
| 188 | + @bb δN .= Utils.restructure(δN, J \ Utils.safe_vec(f)) |
| 189 | + @bb δN .*= -1 |
| 190 | + # Test if the full step is within the trust region |
| 191 | + (L2_NORM(δN) ≤ Δ) && return δN |
| 192 | + |
| 193 | + # Calcualte Cauchy point, optimum along the steepest descent direction |
| 194 | + @bb δsd .= g |
| 195 | + @bb @. δsd *= -1 |
| 196 | + norm_δsd = L2_NORM(δsd) |
| 197 | + |
| 198 | + if (norm_δsd ≥ Δ) |
| 199 | + @bb @. δsd *= Δ / norm_δsd |
| 200 | + return δsd |
| 201 | + end |
| 202 | + |
| 203 | + # Find the intersection point on the boundary |
| 204 | + @bb @. δN_δsd = δN - δsd |
| 205 | + dot_δN_δsd = dot(δN_δsd, δN_δsd) |
| 206 | + dot_δsd_δN_δsd = dot(δsd, δN_δsd) |
| 207 | + dot_δsd = dot(δsd, δsd) |
| 208 | + fact = dot_δsd_δN_δsd^2 - dot_δN_δsd * (dot_δsd - Δ^2) |
| 209 | + tau = (-dot_δsd_δN_δsd + sqrt(fact)) / dot_δN_δsd |
| 210 | + @bb @. δsd += tau * δN_δsd |
| 211 | + return δsd |
| 212 | +end |
0 commit comments