diff --git a/.gitignore b/.gitignore index 2cd4ac3f..98474722 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ docs/build docs/Manifest.toml + +Manifest.toml \ No newline at end of file diff --git a/Project.toml b/Project.toml index 908e4d8d..4792db91 100644 --- a/Project.toml +++ b/Project.toml @@ -19,10 +19,11 @@ NLPModels = "0.13" julia = "^1.3.0" [extras] +Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["NLPModels", "LinearAlgebra", "Test", "Logging"] +test = ["Krylov", "NLPModels", "LinearAlgebra", "Test", "Logging"] diff --git a/docs/src/api.md b/docs/src/api.md index 1585a3af..1a75842e 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -18,13 +18,23 @@ project_step! ## Line-Search ```@docs -LineModel +linesearch +LineSearchOutput +armijo! +armijo_wolfe! +``` + +## Merit + +```@docs +AbstractMeritModel obj -grad -grad! -hess -redirect! -armijo_wolfe +dualobj +primalobj +derivative +L1Merit +AugLagMerit +UncMerit ``` ## Stats @@ -38,13 +48,7 @@ show_statuses ```@docs TrustRegionException -SolverTools.AbstractTrustRegion -aredpred -acceptable -reset! -get_property -set_property! -update! -TrustRegion -TRONTrustRegion +trust_region +basic_trust_region! +tron_trust_region! ``` diff --git a/docs/src/reference.md b/docs/src/reference.md index 2dec9b13..b0d59bcf 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -1,4 +1,12 @@ # Reference -```@index +## Contents + +```@contents +Pages = ["reference.md"] ``` + +## Index + +```@index +``` \ No newline at end of file diff --git a/src/SolverTools.jl b/src/SolverTools.jl index ed6ecde2..5947635b 100644 --- a/src/SolverTools.jl +++ b/src/SolverTools.jl @@ -16,6 +16,7 @@ include("auxiliary/logger.jl") include("stats/stats.jl") # Algorithmic components. +include("merit/merit.jl") include("linesearch/linesearch.jl") include("trust-region/trust-region.jl") diff --git a/src/linesearch/armijo.jl b/src/linesearch/armijo.jl new file mode 100644 index 00000000..c97e2224 --- /dev/null +++ b/src/linesearch/armijo.jl @@ -0,0 +1,57 @@ +export armijo! + +""" + lso = armijo!(ϕ, x, d, xt; kwargs...) + +Performs a line search from `x` along the direction `d` for `AbstractMeritModel` `ϕ`. +The steplength is chosen trying to satisfy the Armijo condition +```math +ϕ(x + t d; η) ≤ ϕ(x) + τ₀ t Dϕ(x; η), +``` +using backtracking. + +The keyword aguments are: +- ... + +The output is a `LineSearchOutput`. The following specific information are also available in +`lso.specific`: +- nbk: the number of times the steplength was decreased to satisfy the Armijo condition, i.e., + number of backtracks; + +The following keyword arguments can be provided: +- `update_obj_at_x`: Whether to call `obj(ϕ, x; update=true)` to update `ϕ.fx` and `ϕ.cx` (default: `false`); +- `update_derivative_at_x`: Whether to call `derivative(ϕ, x, d; update=true)` to update `ϕ.gx` and `ϕ.Ad` (default: `false`); +- `t`: starting steplength (default `1`); +- `τ₀`: slope factor in the Armijo condition (default `max(1e-4, √ϵₘ)`); +- `σ`: backtracking decrease factor (default `0.4`); +- `bk_max`: maximum number of backtracks (default `10`). +""" +function armijo!( + ϕ :: AbstractMeritModel{M,T,V}, + x :: V, + d :: V, + xt :: V; + update_obj_at_x :: Bool = false, + update_derivative_at_x :: Bool = false, + t :: T=one(T), + τ₀ :: T=max(T(1.0e-4), sqrt(eps(T))), + σ :: T=T(0.4), + bk_max :: Int=10 +) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} + + nbk = 0 + ϕx = obj(ϕ, x; update=update_obj_at_x) + slope = derivative(ϕ, x, d; update=update_derivative_at_x) + + xt .= x .+ t .* d + ϕt = obj(ϕ, xt) + while !(ϕt ≤ ϕx + τ₀ * t * slope) && (nbk < bk_max) + t *= σ + xt .= x .+ t .* d + ϕt = obj(ϕ, xt) + + nbk += 1 + end + + return LineSearchOutput(t, xt, ϕt, specific=(nbk=nbk,)) +end diff --git a/src/linesearch/armijo_wolfe.jl b/src/linesearch/armijo_wolfe.jl index 14b3348e..aae84d77 100644 --- a/src/linesearch/armijo_wolfe.jl +++ b/src/linesearch/armijo_wolfe.jl @@ -1,84 +1,91 @@ -export armijo_wolfe +export armijo_wolfe! """ - t, good_grad, ht, nbk, nbW = armijo_wolfe(h, h₀, slope, g) + lso = armijo_wolfe!(ϕ, x, d, xt; kwargs...) -Performs a line search from `x` along the direction `d` as defined by the -`LineModel` ``h(t) = f(x + t d)``, where -`h₀ = h(0) = f(x)`, `slope = h'(0) = ∇f(x)ᵀd` and `g` is a vector that will be -overwritten with the gradient at various points. On exit, if `good_grad=true`, -`g` contains the gradient at the final step length. -The steplength is chosen trying to satisfy the Armijo and Wolfe conditions. The Armijo condition is +Performs a line search from `x` along the direction `d` for `AbstractMeritModel` `ϕ`. +The steplength is chosen trying to satisfy the Armijo and Wolfe conditions. +The Armijo condition is ```math -h(t) ≤ h₀ + τ₀ t h'(0) +ϕ(x + t d; η) ≤ ϕ(x) + τ₀ t Dϕ(x; d, η) ``` and the Wolfe condition is ```math -h'(t) ≤ τ₁ h'(0). +Dϕ(x + t d; d, η) ≤ τ₁ Dϕ(x; d, η). ``` -Initially the step is increased trying to satisfy the Wolfe condition. -Afterwards, only backtracking is performed in order to try to satisfy the Armijo condition. -The final steplength may only satisfy Armijo's condition. +Initially the step is increased trying to satisfy the Wolfe condition. Afterwards, only backtracking +is performed in order to try to satisfy the Armijo condition. The final steplength may only satisfy +Armijo's condition. -The output is the following: -- t: the step length; -- good_grad: whether `g` is the gradient at `x + t * d`; -- ht: the model value at `t`, i.e., `f(x + t * d)`; -- nbk: the number of times the steplength was decreased to satisfy the Armijo condition, i.e., number of backtracks; +The output is a `LineSearchOutput`. The following specific information are also available in +`lso.specific`: +- nbk: the number of times the steplength was decreased to satisfy the Armijo condition, i.e., + number of backtracks; - nbW: the number of times the steplength was increased to satisfy the Wolfe condition. The following keyword arguments can be provided: +- `update_obj_at_x`: Whether to call `obj(ϕ, x; update=true)` to update `ϕ.fx` and `ϕ.cx` (default: `false`); +- `update_derivative_at_x`: Whether to call `derivative(ϕ, x, d; update=true)` to update `ϕ.gx` and `ϕ.Ad` (default: `false`); - `t`: starting steplength (default `1`); - `τ₀`: slope factor in the Armijo condition (default `max(1e-4, √ϵₘ)`); - `τ₁`: slope factor in the Wolfe condition. It should satisfy `τ₁ > τ₀` (default `0.9999`); +- `σ`: backtracking decrease factor (default `0.4`); - `bk_max`: maximum number of backtracks (default `10`); -- `bW_max`: maximum number of increases (default `5`); -- `verbose`: whether to print information (default `false`). +- `bW_max`: maximum number of increases (default `5`). """ -function armijo_wolfe(h :: LineModel, - h₀ :: T, - slope :: T, - g :: Array{T,1}; - t :: T=one(T), - τ₀ :: T=max(T(1.0e-4), sqrt(eps(T))), - τ₁ :: T=T(0.9999), - bk_max :: Int=10, - bW_max :: Int=5, - verbose :: Bool=false) where T <: AbstractFloat +function armijo_wolfe!( + ϕ :: AbstractMeritModel{M,T,V}, + x :: V, + d :: V, + xt :: V; + update_obj_at_x :: Bool = false, + update_derivative_at_x :: Bool = false, + t :: T=one(T), + τ₀ :: T=max(T(1.0e-4), sqrt(eps(T))), + τ₁ :: T=T(0.9999), + σ :: T=T(0.4), + bk_max :: Int=10, + bW_max :: Int=5 +) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} # Perform improved Armijo linesearch. nbk = 0 nbW = 0 + ϕx = obj(ϕ, x; update=update_obj_at_x) + slope = derivative(ϕ, x, d; update=update_derivative_at_x) # First try to increase t to satisfy loose Wolfe condition - ht = obj(h, t) - slope_t = grad!(h, t, g) - while (slope_t < τ₁*slope) && (ht <= h₀ + τ₀ * t * slope) && (nbW < bW_max) + xt .= x .+ t .* d + ϕt = obj(ϕ, xt) + slope_t = derivative(ϕ, xt, d) + while (slope_t < τ₁*slope) && (ϕt <= ϕx + τ₀ * t * slope) && (nbW < bW_max) t *= 5 - ht = obj(h, t) - slope_t = grad!(h, t, g) + xt .= x .+ t .* d + ϕt = obj(ϕ, xt) + slope_t = derivative(ϕ, xt, d) nbW += 1 end - hgoal = h₀ + slope * t * τ₀; + ϕgoal = ϕx + slope * t * τ₀; fact = -T(0.8) ϵ = eps(T)^T(3/5) # Enrich Armijo's condition with Hager & Zhang numerical trick - Armijo = (ht <= hgoal) || ((ht <= h₀ + ϵ * abs(h₀)) && (slope_t <= fact * slope)) + Armijo = (ϕt <= ϕgoal) || ((ϕt <= ϕx + ϵ * abs(ϕx)) && (slope_t <= fact * slope)) good_grad = true while !Armijo && (nbk < bk_max) - t *= T(0.4) - ht = obj(h, t) - hgoal = h₀ + slope * t * τ₀; + t *= σ + xt .= x .+ t .* d + ϕt = obj(ϕ, xt) + ϕgoal = ϕx + slope * t * τ₀; # avoids unused grad! calls Armijo = false good_grad = false - if ht <= hgoal + if ϕt <= ϕgoal Armijo = true - elseif ht <= h₀ + ϵ * abs(h₀) - slope_t = grad!(h, t, g) + elseif ϕt <= ϕx + ϵ * abs(ϕx) + slope_t = derivative!(ϕ, xt, d) good_grad = true if slope_t <= fact * slope Armijo = true @@ -88,7 +95,5 @@ function armijo_wolfe(h :: LineModel, nbk += 1 end - verbose && @printf(" %4d %4d\n", nbk, nbW); - - return (t, good_grad, ht, nbk, nbW) + return LineSearchOutput(t, xt, ϕt, good_grad=good_grad, gt=ϕ.gx, specific=(nbk=nbk, nbW=nbW)) end diff --git a/src/linesearch/line_model.jl b/src/linesearch/line_model.jl deleted file mode 100644 index 54f21426..00000000 --- a/src/linesearch/line_model.jl +++ /dev/null @@ -1,87 +0,0 @@ -import NLPModels: obj, grad, grad!, hess - -export LineModel -export obj, grad, derivative, grad!, derivative!, hess, redirect! - -"""A type to represent the restriction of a function to a direction. -Given f : R → Rⁿ, x ∈ Rⁿ and a nonzero direction d ∈ Rⁿ, - - ϕ = LineModel(nlp, x, d) - -represents the function ϕ : R → R defined by - - ϕ(t) := f(x + td). -""" -mutable struct LineModel <: AbstractNLPModel - meta :: NLPModelMeta - counters :: Counters - nlp :: AbstractNLPModel - x :: AbstractVector - d :: AbstractVector -end - -function LineModel(nlp :: AbstractNLPModel, x :: AbstractVector, d :: AbstractVector) - meta = NLPModelMeta(1, x0=zeros(eltype(x), 1), name="LineModel to $(nlp.meta.name))") - return LineModel(meta, Counters(), nlp, x, d) -end - -"""`redirect!(ϕ, x, d)` - -Change the values of x and d of the LineModel ϕ, but retains the counters. -""" -function redirect!(ϕ :: LineModel, x :: AbstractVector, d :: AbstractVector) - ϕ.x, ϕ.d = x, d - return ϕ -end - -"""`obj(f, t)` evaluates the objective of the `LineModel` - - ϕ(t) := f(x + td). -""" -function obj(f :: LineModel, t :: AbstractFloat) - NLPModels.increment!(f, :neval_obj) - return obj(f.nlp, f.x + t * f.d) -end - -"""`grad(f, t)` evaluates the first derivative of the `LineModel` - - ϕ(t) := f(x + td), - -i.e., - - ϕ'(t) = ∇f(x + td)ᵀd. -""" -function grad(f :: LineModel, t :: AbstractFloat) - NLPModels.increment!(f, :neval_grad) - return dot(grad(f.nlp, f.x + t * f.d), f.d) -end -derivative(f :: LineModel, t :: AbstractFloat) = grad(f, t) - -"""`grad!(f, t, g)` evaluates the first derivative of the `LineModel` - - ϕ(t) := f(x + td), - -i.e., - - ϕ'(t) = ∇f(x + td)ᵀd. - -The gradient ∇f(x + td) is stored in `g`. -""" -function grad!(f :: LineModel, t :: AbstractFloat, g :: AbstractVector) - NLPModels.increment!(f, :neval_grad) - return dot(grad!(f.nlp, f.x + t * f.d, g), f.d) -end -derivative!(f :: LineModel, t :: AbstractFloat, g :: AbstractVector) = grad!(f, t, g) - -"""Evaluate the second derivative of the `LineModel` - - ϕ(t) := f(x + td), - -i.e., - - ϕ"(t) = dᵀ∇²f(x + td)d. -""" -function hess(f :: LineModel, t :: AbstractFloat) - NLPModels.increment!(f, :neval_hess) - return dot(f.d, hprod(f.nlp, f.x + t * f.d, f.d)) -end diff --git a/src/linesearch/linesearch.jl b/src/linesearch/linesearch.jl index 69680d00..70332a16 100644 --- a/src/linesearch/linesearch.jl +++ b/src/linesearch/linesearch.jl @@ -1,2 +1,46 @@ -include("line_model.jl") +export linesearch, linesearch! + +include("linesearchoutput.jl") +include("armijo.jl") include("armijo_wolfe.jl") + +const ls_dict = Dict( + :armijo => armijo!, + :armijo_wolfe => armijo_wolfe! +) + +""" + lso = linesearch(ϕ, x, d; method=:armijo, kwargs...) + lso = linesearch!(ϕ, x, d, xt; method=:armijo, kwargs...) + lso = linesearch(nlp, x, d; method=:armijo, fx=obj(nlp, x), gx=grad(nlp, x), kwargs...) + lso = linesearch!(nlp, x, d, xt; method=:armijo, fx=obj(nlp, x), gx=grad(nlp, x), kwargs...) + +Perform a line search for unconstrained or bound-constrained problem `nlp` or merit model `ϕ` starting at `x` on direction `d`, storing the result in `xt`, for the in place version. +Returns a `LineSearchOutput`. +The `method` keyword argument can be any of the following: + +- `:armijo`: Calls the simple Armijo line search, performing backtracking. +- `:armijo_wolfe`: Calls the Armijo-Wolfe line search, that tries to satisfy the Wolfe conditions in addition to the Armijo conditions. + +When the input is `ϕ`, a merit model, the following keyword arguments are available for any method: +- `update_obj_at_x`: Whether to call `obj(ϕ, x; update=true)` to update `ϕ.fx` and `ϕ.cx` (default: `false`); +- `update_derivative_at_x`: Whether to call `derivative(ϕ, x, d; update=true)` to update `ϕ.gx` and `ϕ.Ad`. + +For the `nlp` version, the problem must be unconstrained or bound-constrained (i.e., `nlp.meta.nequ = 0`). +In such case a `UncMerit` is created using the keyword arguments `fx` and `gx`. **Be warned** that `gx` will be overwritten. +""" +function linesearch end + +function linesearch(nlp_or_ϕ, x, d; kwargs...) + xt = copy(x) + linesearch!(nlp_or_ϕ, x, d, xt; kwargs...) +end + +function linesearch!(nlp :: AbstractNLPModel, x :: AbstractVector, args...; fx=obj(nlp, x), gx=grad(nlp, x), kwargs...) + ϕ = UncMerit(nlp, fx=fx, gx=gx) + linesearch!(ϕ, x, args...; kwargs...) +end + +function linesearch!(ϕ :: AbstractMeritModel, x :: AbstractVector, args...; method :: Symbol=:armijo, kwargs...) + SolverTools.ls_dict[method](ϕ, x, args...; kwargs...) +end \ No newline at end of file diff --git a/src/linesearch/linesearchoutput.jl b/src/linesearch/linesearchoutput.jl new file mode 100644 index 00000000..8eb28e98 --- /dev/null +++ b/src/linesearch/linesearchoutput.jl @@ -0,0 +1,35 @@ +export LineSearchOutput + +# TODO: Generalize AbstractExecutionStats? + +""" + LineSearchOutput + +Structure to store the output of a line search algorithm. +The following variables are stored: +- `t`: The final step length; +- `xt`: The final iterate ``xt = x + t d``; +- `ϕt`: The final merit function value, i.e., `ϕ(xt)`; +- `good_grad`: Whether the gradient was computed at `xt`; +- `gt`: If `good_grad` is `true`, the gradient at `xt`; +- `specific`: A NamedTuple object storing specific method information. Check the specifc method for this information. +""" +struct LineSearchOutput{T <: Real, V <: AbstractVector{<: T}} + t :: T + xt :: V + ϕt :: T + good_grad :: Bool + gt :: V + specific :: NamedTuple +end + +function LineSearchOutput( + t :: T, + xt :: V, + ϕt :: T; + good_grad :: Bool=false, + gt :: V=T[], + specific :: NamedTuple = NamedTuple() +) where {T <: Real, V <: AbstractVector{<: T}} + LineSearchOutput(t, xt, ϕt, good_grad, gt, specific) +end \ No newline at end of file diff --git a/src/merit/auglagmerit.jl b/src/merit/auglagmerit.jl new file mode 100644 index 00000000..ab8f16f7 --- /dev/null +++ b/src/merit/auglagmerit.jl @@ -0,0 +1,113 @@ +export AugLagMerit + +@doc raw""" + AugLagMerit(nlp, η; kwargs...) + +Creates an augmented Lagrangian merit function for the equality constrained problem +```math +\min f(x) \quad \text{s.to} \quad c(x) = 0 +``` +defined by +```math +\phi(x, yₖ; η) = f(x) + yₖᵀc(x) + η ½\|c(x)\|² +``` + +In addition to the keyword arguments declared in [`AbstractMeritModel`](@ref), an `AugLagMerit` also +accepts the argument `y`. +""" +mutable struct AugLagMerit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel{M,T,V} + meta :: NLPModelMeta + counters :: Counters + nlp :: M + η :: T + fx :: T + gx :: V + cx :: V + Ad :: V + y :: V + y⁺ :: V + Jᵀy⁺ :: V + Jv :: V + JᵀJv :: V +end + +function AugLagMerit( + nlp :: M, + η :: T; + fx :: T = T(Inf), + gx :: V = fill(T(Inf), nlp.meta.nvar), + cx :: V = fill(T(Inf), nlp.meta.ncon), + Ad :: V = fill(T(Inf), nlp.meta.ncon), + y :: V = fill(T(Inf), nlp.meta.ncon), + y⁺ :: V = fill(T(Inf), nlp.meta.ncon), # y + η * c(x) + Jᵀy⁺ :: V = fill(T(Inf), nlp.meta.nvar), + Jv :: V = fill(T(Inf), nlp.meta.ncon), + JᵀJv :: V = fill(T(Inf), nlp.meta.nvar) +) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} + meta = NLPModelMeta(nlp.meta.nvar) + AugLagMerit{M,T,V}(meta, Counters(), nlp, η, fx, gx, cx, Ad, y, y⁺, Jᵀy⁺, Jv, JᵀJv) +end + +function dualobj(merit :: AugLagMerit) + merit.fx + dot(merit.y, merit.cx) +end + +function primalobj(merit :: AugLagMerit) + dot(merit.cx, merit.cx) / 2 +end + +function derivative(merit :: AugLagMerit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true) + @lencheck merit.meta.nvar x d + if update + grad!(merit.nlp, x, merit.gx) + merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad) + end + if merit.nlp.meta.ncon == 0 + return dot(merit.gx, d) + else + return dot(merit.gx, d) + dot(merit.y, merit.Ad) + merit.η * dot(merit.cx, merit.Ad) + end +end + +function NLPModels.grad!(merit :: AugLagMerit, x :: AbstractVector, g :: AbstractVector; update :: Bool = true) + @lencheck merit.meta.nvar x g + NLPModels.increment!(merit, :neval_grad) + if update + grad!(nlp.model, x, merit.gx) + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + merit.y⁺ .= merit.y .+ merit.η .* merit.cx + merit.nlp.meta.ncon > 0 && jtprod!(merit.nlp, x, merit.y⁺, merit.Jᵀy⁺) + end + g .= merit.gx .+ merit.Jᵀy⁺ + return g +end + +function NLPModels.objgrad!(merit :: AugLagMerit, x :: AbstractVector, g :: AbstractVector) + @lencheck merit.meta.nvar x g + NLPModels.increment!(merit, :neval_obj) + NLPModels.increment!(merit, :neval_grad) + if update + merit.fx = obj(merit.nlp, x) + grad!(nlp.model, x, merit.gx) + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + merit.y⁺ .= merit.y .+ merit.η .* merit.cx + merit.nlp.meta.ncon > 0 && jtprod!(merit.nlp, x, merit.y⁺, merit.Jᵀy⁺) + end + f = merit.fx + dot(merit.y, merit.cx) + merit.η * dot(merit.cx, merit.cx) / 2 + g .= merit.gx .+ merit.Jᵀy⁺ + return f, g +end + +function NLPModels.hprod!(merit :: AugLagMerit, x :: AbstractVector, v :: AbstractVector, Hv :: AbstractVector; obj_weight :: Float64 = 1.0) + @lencheck merit.meta.nvar x v Hv + NLPModels.increment!(merit, :neval_hprod) + if update + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + merit.y⁺ .= merit.y .+ merit.η .* merit.cx + end + jprod!(merit.model, x, v, merit.Jv) + jtprod!(merit.model, x, merit.Jv, merit.JᵀJv) + hprod!(merit.model, x, merit.y⁺, v, Hv, obj_weight = obj_weight) + Hv .+= merit.η * merit.JᵀJv + return Hv +end \ No newline at end of file diff --git a/src/merit/l1merit.jl b/src/merit/l1merit.jl new file mode 100644 index 00000000..52136f07 --- /dev/null +++ b/src/merit/l1merit.jl @@ -0,0 +1,66 @@ +export L1Merit + +@doc raw""" + L1Merit(nlp, η; kwargs...) + +Creates a ℓ₁ merit function for the equality constrained problem +```math +\min f(x) \quad \text{s.to} \quad c(x) = 0 +``` +defined by +```math +\phi_1(x; η) = f(x) + η\|c(x)\|₁ +``` +""" +mutable struct L1Merit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel{M,T,V} + meta :: NLPModelMeta + counters :: Counters + nlp :: M + η :: T + fx :: T + gx :: V + cx :: V + Ad :: V +end + +function L1Merit( + nlp :: M, + η :: T; + fx :: T = T(Inf), + gx :: V = fill(T(Inf), nlp.meta.nvar), + cx :: V = fill(T(Inf), nlp.meta.ncon), + Ad :: V = fill(T(Inf), nlp.meta.ncon) +) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} + meta = NLPModelMeta(nlp.meta.nvar) + L1Merit{M,T,V}(meta, Counters(), nlp, η, fx, gx, cx, Ad) +end + +function dualobj(merit :: L1Merit) + merit.fx +end + +function primalobj(merit :: L1Merit) + norm(merit.cx, 1) +end + +function derivative(merit :: L1Merit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true) + @lencheck merit.meta.nvar x d + if update + grad!(merit.nlp, x, merit.gx) + merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad) + end + if merit.nlp.meta.ncon == 0 + return dot(merit.gx, d) + else + return dot(merit.gx, d) + merit.η * sum( + if ci > 0 + Adi + elseif ci < 0 + -Adi + else + abs(Adi) + end + for (ci, Adi) in zip(merit.cx, merit.Ad) + ) + end +end \ No newline at end of file diff --git a/src/merit/merit.jl b/src/merit/merit.jl new file mode 100644 index 00000000..da8b9229 --- /dev/null +++ b/src/merit/merit.jl @@ -0,0 +1,73 @@ +using NLPModels +export AbstractMeritModel, obj, derivative, dualobj, primalobj + +""" + AbstractMeritModel + +Model for merit functions. All models should store +- `nlp`: The NLP with the corresponding problem. +- `η`: The merit parameter. +- `fx`: The objective at some point. +- `cx`: The constraints vector at some point. +- `gx`: The constraints vector at some point. +- `Ad`: The Jacobian-direction product. + +All models allow a constructor of form + + Merit(nlp, η; fx=Inf, cx=[Inf,…,Inf], gx=[Inf,…,Inf], Ad=[Inf,…,Inf]) + +Additional arguments and constructors may be provided. + +An AbstractMeritModel is an AbstractNLPModel, but the API may not be completely implemented. For +instance, the `L1Merit` model doesn't provide any gradient function, but it provides a directional +derivative function. + +Furthermore, all implemented methods accept an `update` keyword that defaults to `true`. It is used +to determine whether the internal stored values should be updated or not. +""" +abstract type AbstractMeritModel{M,T,V} <: AbstractNLPModel end + +""" + dualobj(merit) + +Return the dual part of `merit`, i.e., the part without `η`, at its current values. +No updates or internal function calls are made. +""" +function dualobj end + +""" + primalobj(merit) + +Return the primal part of `merit`, i.e., the part with `η`, at its current values. +No updates or internal function calls are made. +""" +function primalobj end + +""" + obj(merit, x; update=true) + +Evaluates the `merit` model at `x`. +This will call `obj` and `cons!` to update the internal values of `fx` and `cx`, unless `update=false`. +""" +function NLPModels.obj(merit :: AbstractMeritModel, x :: AbstractVector; update :: Bool=true) + @lencheck merit.meta.nvar x + NLPModels.increment!(merit, :neval_obj) + if update + merit.fx = obj(merit.nlp, x) + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + end + dualobj(merit) + merit.η * primalobj(merit) +end + +""" + derivative(merit, x, d; update=true) + +Computes the directional derivative of `merit` at `x` on direction `d`. +This will call `grad!` and `jprod` to update the internal values of `gx` and `Ad`, unless `update=false`. +This function assumes that `cx` is already computed, though. +""" +function derivative end + +include("auglagmerit.jl") +include("l1merit.jl") +include("uncmerit.jl") \ No newline at end of file diff --git a/src/merit/uncmerit.jl b/src/merit/uncmerit.jl new file mode 100644 index 00000000..d3e7a869 --- /dev/null +++ b/src/merit/uncmerit.jl @@ -0,0 +1,47 @@ +export UncMerit + +@doc raw""" + UncMerit(nlp; kwargs...) + UncMerit(nlp, η; kwargs...) # η is ignored + +Creates a merit wrapper for unconstrained or bound-countrained problems. +Formally accepts the same constructor as other merit functions, but in practice it ignores any constrained-related value, even if `nlp` is constrained. +""" +mutable struct UncMerit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel{M,T,V} + meta :: NLPModelMeta + counters :: Counters + nlp :: M + η :: T # ignored in practice + fx :: T + gx :: V + cx :: V # ignored in practice + Ad :: V # ignored in practice +end + +function UncMerit( + nlp :: M, + η :: T = 0.0; + fx :: T = T(Inf), + gx :: V = fill(T(Inf), nlp.meta.nvar), + cx :: V = T[], + Ad :: V = T[], +) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} + meta = NLPModelMeta(nlp.meta.nvar) + UncMerit{M,T,V}(meta, Counters(), nlp, η, fx, gx, cx, Ad) +end + +function dualobj(merit :: UncMerit) + merit.fx +end + +function primalobj(merit :: UncMerit) + zero(eltype(merit.η)) +end + +function derivative(merit :: UncMerit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true) + @lencheck merit.meta.nvar x d + if update + grad!(merit.nlp, x, merit.gx) + end + return dot(merit.gx, d) +end \ No newline at end of file diff --git a/src/trust-region/basic-trust-region.jl b/src/trust-region/basic-trust-region.jl index 5523437e..28f8bd53 100644 --- a/src/trust-region/basic-trust-region.jl +++ b/src/trust-region/basic-trust-region.jl @@ -1,40 +1,72 @@ -export TrustRegion - -"""Basic trust region type.""" -mutable struct TrustRegion <: AbstractTrustRegion - initial_radius :: AbstractFloat - radius :: AbstractFloat - max_radius :: AbstractFloat - acceptance_threshold :: AbstractFloat - increase_threshold :: AbstractFloat - decrease_factor :: AbstractFloat - increase_factor :: AbstractFloat - ratio :: AbstractFloat - - function TrustRegion(initial_radius :: T; - max_radius :: T=one(T)/sqrt(eps(T)), - acceptance_threshold :: T=T(1.0e-4), - increase_threshold :: T=T(0.95), - decrease_factor :: T=one(T)/3, - increase_factor :: T=3 * one(T) / 2) where T <: Number - - initial_radius > 0 || (initial_radius = one(T)) - max_radius > initial_radius || throw(TrustRegionException("Invalid initial radius")) - (0 < acceptance_threshold < increase_threshold < 1) || throw(TrustRegionException("Invalid thresholds")) - (0 < decrease_factor < 1 < increase_factor) || throw(TrustRegionException("Invalid decrease/increase factors")) - - return new(initial_radius, initial_radius, max_radius, - acceptance_threshold, increase_threshold, - decrease_factor, increase_factor, 0) +export basic_trust_region! + +""" + tro = basic_trust_region!(ϕ, x, Δx, xt, Δq, Δc, Δ; kwargs...) + +Update `xt` and `Δ` using a trust region strategy. See [`trust_region`](@ref) for the basic usage, and below for specifc information about this strategy. + +This strategy corresponds to the keyword `method=:basic` on `trust_region`. + +Given `ρ = ared / pred`, +- if `ρ < ηacc`, then `xₖ₊₁ = xₖ` and `Δₖ₊₁ = σdec‖d‖`; +- if `ηacc ≤ ρ < ηinc`, then `xₖ₊₁ = xₖ + d` and `Δₖ₊₁ = Δₖ`; +- otherwise, then `xₖ₊₁ = xₖ + d` and `Δₖ₊₁ = min(Δₘₐₓ, max(Δₖ, σinc‖d‖))`. +""" +function basic_trust_region!( + ϕ :: AbstractMeritModel{M,T,V}, + x :: V, + d :: V, + xt :: V, + Δq :: T, + Ad :: V, + Δ :: T; + update_obj_at_x :: Bool=false, + penalty_update :: Symbol=:basic, + max_radius :: T=one(T)/sqrt(eps(T)), + ηacc :: Real=T(1.0e-4), + ηinc :: Real=T(0.95), + σdec :: Real=one(T) / 3, + σinc :: Real=3 * one(T) / 2, +) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} + (0 < ηacc < ηinc < 1) || throw(TrustRegionException("Invalid thresholds")) + (0 < σdec < 1 < σinc) || throw(TrustRegionException("Invalid decrease/increase factors")) + ϕx = obj(ϕ, x, update=update_obj_at_x) + ϕxf = dualobj(ϕ) + ϕxc = primalobj(ϕ) + ϕ.fx += Δq + ϕ.cx .+= Ad + mf = dualobj(ϕ) + mc = primalobj(ϕ) + + if penalty_update == :basic + while ϕxf - mf < -0.1ϕ.η * (ϕxc - mc) < 0 + ϕ.η *= 2 + end + else + throw(TrustRegionException("Unidentified penalty_update $penalty_update")) end -end - -function update!(tr :: TrustRegion, step_norm :: AbstractFloat) - if tr.ratio < tr.acceptance_threshold - tr.radius = tr.decrease_factor * step_norm - elseif tr.ratio >= tr.increase_threshold - tr.radius = min(tr.max_radius, - max(tr.radius, tr.increase_factor * step_norm)) + + @. xt = x + d + ϕt = obj(ϕ, xt) + ϕtf = dualobj(ϕ) + ϕtc = primalobj(ϕ) + + m = mf + ϕ.η * mc + ared = ϕx - ϕt + pred = ϕx - m + ρ = ared / pred + + normd = norm(d) + status = if ρ > ηinc + Δ = min(max_radius, max(Δ, σinc * normd)) + :great + elseif ρ < ηacc + Δ = σdec * normd + xt .= x + :bad + else + :good end - return tr -end + + return TrustRegionOutput(status, ared, pred, ρ, status != :bad, Δ, xt) +end \ No newline at end of file diff --git a/src/trust-region/tron-trust-region.jl b/src/trust-region/tron-trust-region.jl index 57ff961b..b25fb375 100644 --- a/src/trust-region/tron-trust-region.jl +++ b/src/trust-region/tron-trust-region.jl @@ -1,59 +1,87 @@ -export TRONTrustRegion - -"""Trust region used by TRON""" -mutable struct TRONTrustRegion <: AbstractTrustRegion - initial_radius :: AbstractFloat - radius :: AbstractFloat - max_radius :: AbstractFloat - acceptance_threshold :: AbstractFloat - decrease_threshold :: AbstractFloat - increase_threshold :: AbstractFloat - large_decrease_factor :: AbstractFloat - small_decrease_factor :: AbstractFloat - increase_factor :: AbstractFloat - ratio :: AbstractFloat - quad_min :: AbstractFloat - - function TRONTrustRegion(initial_radius :: T; - max_radius :: T=one(T)/sqrt(eps(T)), - acceptance_threshold :: T=T(1.0e-4), - decrease_threshold :: T=T(0.25), - increase_threshold :: T=T(0.75), - large_decrease_factor :: T=T(0.25), - small_decrease_factor :: T=T(0.5), - increase_factor :: T=T(4)) where T <: AbstractFloat - - initial_radius > 0 || (initial_radius = one(T)) - max_radius > initial_radius || throw(TrustRegionException("Invalid initial radius")) - (0 < acceptance_threshold < decrease_threshold < increase_threshold < 1) || throw(TrustRegionException("Invalid thresholds")) - (0 < large_decrease_factor < small_decrease_factor < 1 < increase_factor) || throw(TrustRegionException("Invalid decrease/increase factors")) - - return new(initial_radius, initial_radius, max_radius, - acceptance_threshold, decrease_threshold, increase_threshold, - large_decrease_factor, small_decrease_factor, increase_factor, - zero(T), zero(T)) +export tron_trust_region! + +""" + tro = tron_trust_region!(ϕ, x, Δx, xt, Δq, Δc, Δ; kwargs...) + + +Update `xt` and `Δ` using a trust region strategy. See [`trust_region`](@ref) for the basic usage, and below for specifc information about this strategy. + +This strategy corresponds to the keyword `method=:tron` on `trust_region`. +In addition to the default keywords, the following keywords are also available: +- `update_derivative_at_x`: Whether to call `derivative(ϕ, x, d; update=true)` to update `ϕ.gx` and `ϕ.Ad` (default: `true`); +- `ηdec`: threshold for decreasing Δ (default: 0.25) +- `σlarge_dec`: factor used in the decrease heuristics (default: 0.25) + +Given `ρ = ared / pred`, and the `slope = Dϕ(x, d)`, +- Compute γ = ϕt - ϕx - slope +- Compute α = γ ≤ 0 ? σinc : max(σlarge_dec, -slope / 2γ) +- if `ρ < ηacc`, then `xₖ₊₁ = xₖ` and `Δₖ₊₁ = min(max(α σlarge_dec)×‖d‖, σdecΔₖ)`; +- if `ηacc ≤ ρ < ηdec`, then `xₖ₊₁ = xₖ + d` and `Δₖ₊₁ = max(σlarge_decΔₖ, min(α‖d‖, σdecΔₖ))`; +- if `ηec ≤ ρ < ηinc`, then `xₖ₊₁ = xₖ + d` and `Δₖ₊₁ = max(σlarge_decΔₖ, min(α‖d‖, σincΔₖ))`; +- otherwise, then `xₖ₊₁ = xₖ + d` and `Δₖ₊₁ = min(Δₘₐₓ, max(Δₖ, min(α‖d‖, σincΔₖ)))`. +""" +function tron_trust_region!( + ϕ :: AbstractMeritModel{M,T,V}, + x :: V, + d :: V, + xt :: V, + Δq :: T, + Ad :: V, + Δ :: T; + update_obj_at_x :: Bool=false, + update_derivative_at_x :: Bool=true, + penalty_update :: Symbol=:basic, + max_radius :: T=one(T)/sqrt(eps(T)), + ηacc :: Real=T(1.0e-4), + ηdec :: Real=T(0.25), + ηinc :: Real=T(0.95), + σlarge_dec :: Real=T(0.25), + σdec :: Real=T(0.5), + σinc :: Real=3 * one(T) / 2, +) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} + (0 < ηacc < ηinc < 1) || throw(TrustRegionException("Invalid thresholds")) + (0 < σdec < 1 < σinc) || throw(TrustRegionException("Invalid decrease/increase factors")) + ϕx = obj(ϕ, x, update=update_obj_at_x) + slope = derivative(ϕ, x, d, update=update_derivative_at_x) + ϕxf = dualobj(ϕ) + ϕxc = primalobj(ϕ) + ϕ.fx += Δq + ϕ.cx .+= Ad + mf = dualobj(ϕ) + mc = primalobj(ϕ) + + if penalty_update == :basic + while ϕxf - mf < -0.1ϕ.η * (ϕxc - mc) < 0 + ϕ.η *= 2 + end + else + throw(TrustRegionException("Unidentified penalty_update $penalty_update")) end -end - -function aredpred(tr :: TRONTrustRegion, nlp :: AbstractNLPModel, f :: T, - f_trial :: T, Δm :: T, x_trial :: Vector{T}, - step :: Vector{T}, slope :: T) where T <: AbstractFloat - ared, pred = aredpred(nlp, f, f_trial, Δm, x_trial, step, slope) - γ = f_trial - f - slope - quad_min = γ <= 0 ? tr.increase_factor : max(tr.large_decrease_factor, -slope / γ / 2) - return ared, pred, quad_min -end - -function update!(tr :: TRONTrustRegion, step_norm :: AbstractFloat) - α, σ₁, σ₂, σ₃ = tr.quad_min, tr.large_decrease_factor, tr.small_decrease_factor, tr.increase_factor - tr.radius = if tr.ratio < tr.acceptance_threshold - min(max(α, σ₁) * step_norm, σ₂ * tr.radius) - elseif tr.ratio < tr.decrease_threshold - max(σ₁ * tr.radius, min(α * step_norm, σ₂ * tr.radius)) - elseif tr.ratio < tr.increase_threshold - max(σ₁ * tr.radius, min(α * step_norm, σ₃ * tr.radius)) + + @. xt = x + d + ϕt = obj(ϕ, xt) + ϕtf = dualobj(ϕ) + ϕtc = primalobj(ϕ) + + m = mf + ϕ.η * mc + ared = ϕx - ϕt + pred = ϕx - m + ρ = ared / pred + + γ = ϕt - ϕx - slope + α = γ ≤ 0 ? σinc : max(σlarge_dec, -slope / 2γ) + + normd = norm(d) + Δ, status = if ρ < ηacc + x .= xt + min(max(α, σlarge_dec) * normd, σdec * Δ), :bad + elseif ρ < ηdec + max(σlarge_dec * Δ, min(α * normd, σdec * Δ)), :good + elseif ρ < ηinc + max(σlarge_dec * Δ, min(α * normd, σinc * Δ)), :good else - min(tr.max_radius, max(tr.radius, min(α * step_norm, σ₃ * tr.radius))) + min(max_radius, max(Δ, min(α * normd, σinc * Δ))), :great end - return tr -end + + return TrustRegionOutput(status, ared, pred, ρ, status != :bad, Δ, xt) +end \ No newline at end of file diff --git a/src/trust-region/trust-region-output.jl b/src/trust-region/trust-region-output.jl new file mode 100644 index 00000000..dbddbe17 --- /dev/null +++ b/src/trust-region/trust-region-output.jl @@ -0,0 +1,25 @@ +export TrustRegionOutput + +struct TrustRegionOutput{T <: Real, V <: AbstractVector} + status :: Symbol + ared :: T + pred :: T + ρ :: T + success :: Bool + Δ :: T + xt :: V + specific :: NamedTuple +end + +function TrustRegionOutput( + status :: Symbol, + ared :: T, + pred :: T, + ρ :: T, + success :: Bool, + Δ :: T, + xt :: V; + specific :: NamedTuple = NamedTuple() +) where {T <: Real, V <: AbstractVector{<: T}} + TrustRegionOutput(status, ared, pred, ρ, success, Δ, xt, specific) +end \ No newline at end of file diff --git a/src/trust-region/trust-region.jl b/src/trust-region/trust-region.jl index a1ec4a79..20f47ffb 100644 --- a/src/trust-region/trust-region.jl +++ b/src/trust-region/trust-region.jl @@ -1,98 +1,69 @@ # A trust-region type and basic utility functions. -import NLPModels: reset! -export TrustRegionException, acceptable, aredpred, get_property, ratio, ratio!, reset!, set_property!, update! +export TrustRegionException +export trust_region, trust_region! +# import NLPModels: reset! +# export TrustRegionException, acceptable, aredpred, get_property, ratio, ratio!, reset!, set_property!, update! + +include("trust-region-output.jl") +include("basic-trust-region.jl") +include("tron-trust-region.jl") + +const tr_dict = Dict( + :basic => basic_trust_region!, + :tron => tron_trust_region!, +) "Exception type raised in case of error." mutable struct TrustRegionException <: Exception msg :: String end -"""`AbstractTrustRegion` - -An abstract trust region type so that specific trust regions define update -rules differently. Child types must have at least the following fields: - -- `acceptance_threshold :: AbstractFloat` -- `initial_radius :: AbstractFloat` -- `radius :: AbstractFloat` -- `ratio :: AbstractFloat` - -and the following function: - -- `update!(tr, step_norm)` -""" -abstract type AbstractTrustRegion end - -"""`ared, pred = aredpred(nlp, f, f_trial, Δm, x_trial, step, slope)` - -Compute the actual and predicted reductions `∆f` and `Δm`, where -`Δf = f_trial - f` is the actual reduction is an objective/merit/penalty function, -`Δm = m_trial - m` is the reduction predicted by the model `m` of `f`. -We assume that `m` is being minimized, and therefore that `Δm < 0`. -""" -function aredpred(nlp :: AbstractNLPModel, f :: T, f_trial :: T, Δm :: T, - x_trial :: Vector{T}, step :: Vector{T}, slope :: T) where T <: AbstractFloat - absf = abs(f) - ϵ = eps(T) - pred = Δm - max(one(T), absf) * 10 * ϵ - - ared = f_trial - f + max(one(T), absf) * 10 * ϵ - if (abs(Δm) < 10_000 * ϵ) || (abs(ared) < 10_000 * ϵ * absf) - # correct for roundoff error - g_trial = grad(nlp, x_trial) - slope_trial = dot(g_trial, step) - ared = (slope_trial + slope) / 2 - end - return ared, pred -end - - -"""`acceptable(tr)` - -Return `true` if a step is acceptable """ -function acceptable(tr :: AbstractTrustRegion) - return tr.ratio >= tr.acceptance_threshold -end - -"""`reset!(tr)` - -Reset the trust-region radius to its initial value + tro = trust_region(ϕ, x, d, Δq, Ad, Δ; method=:basic, penalty_update=:basic, kwargs...) + tro = trust_region!(ϕ, x, d, xt, Δq, Ad, Δ; method=:basic, penalty_update=:basic, kwargs...) + tro = trust_region(nlp, x, d, Δq, Δ; method=:basic, fx=obj(nlp, x), gx=grad(nlp, x), kwargs...) + tro = trust_region!(nlp, x, d, xt, Δq, Δ; method=:basic, fx=obj(nlp, x), gx=grad(nlp, x), kwargs...) + +Update `xt` and `Δ` using a trust region strategy according to the `method` keyword. +The output is a `TrustRegionOutput`. + +The actual reduction of the model is given by +``` +ared = ϕx - ϕt, +``` +where `ϕx = obj(nlp, x; update=update_obj_at_x)` and `ϕt = obj(nlp, xt)`. The predicted reduction is +given by +``` +pred = ϕx - (ϕx + Δq) - ϕ.η P(ϕ.cx + Ad), +``` +where `P` is the penalty function used by `ϕ` and `Ad` is the Jacobian times `d`. + +The following keyword argument are available: +- `update_obj_at_x`: Whether to update `ϕ`'s objective at `x` (default: `false`); +- `penalty_update`: Strategy to update the parameter `ϕ.η`. (default: `:basic`); +- `max_radius`: Maximum value for `Δ` (default 1/√ϵ(T)); +- `ηacc`: threshold for successful step (default: 1.0e-4); +- `ηinc`: threshold for great step (default: 0.95), +- `σdec`: factor used in the radius decrease heuristics (default: 1/3); +- `σinc`: factor used in the radius increase heuristics (default: 3/2); + +Some methods have additional keyword arguments. + +For the `nlp` version, the problem must be unconstrained or bound-constrained (i.e., `nlp.meta.nequ += 0`). In such case a `UncMerit` is created using the keyword argument `fx`. """ -function reset!(tr :: AbstractTrustRegion) - tr.radius = tr.initial_radius - return tr -end +function trust_region end - -"""A basic getter for `AbstractTrustRegion` instances. -Should be overhauled when it's possible to overload `getfield()` -and `setfield!()`. See -https://github.com/JuliaLang/julia/issues/1974 -""" -function get_property(tr :: AbstractTrustRegion, prop :: Symbol) - # All fields are gettable. - gettable = fieldnames(typeof(tr)) - prop in gettable || throw(TrustRegionException("Unknown property: $prop")) - getfield(tr, prop) +function trust_region(nlp_or_ϕ, x, d, args...; kwargs...) + xt = copy(x) + trust_region!(nlp_or_ϕ, x, d, xt, args...; kwargs...) end -"""A basic setter for `AbstractTrustRegion` instances. -""" -function set_property!(tr :: AbstractTrustRegion, prop :: Symbol, value :: Any) - gettable = fieldnames(typeof(tr)) - prop in gettable || throw(TrustRegionException("Unknown property: $prop")) - setfield!(tr, prop, value) +function trust_region!(nlp :: AbstractNLPModel, x :: AbstractVector, d :: AbstractVector, xt :: AbstractVector, Δq :: Real, Δ :: Real; fx=obj(nlp, x), kwargs...) + ϕ = UncMerit(nlp, fx=fx) + trust_region!(ϕ, x, d, xt, Δq, eltype(x)[], Δ; kwargs...) end -"""`update!(tr, step_norm)` - -Update the trust-region radius based on the ratio of actual vs. predicted reduction -and the step norm. -""" -function update!(tr :: AbstractTrustRegion, ratio :: AbstractFloat, step_norm :: AbstractFloat) - throw(NotImplementedError("`update!` not implemented for this TrustRegion type")) -end - -include("basic-trust-region.jl") -include("tron-trust-region.jl") +function trust_region!(ϕ :: AbstractMeritModel, x :: AbstractVector, args...; method :: Symbol=:basic, kwargs...) + SolverTools.tr_dict[method](ϕ, x, args...; kwargs...) +end \ No newline at end of file diff --git a/test/dummy_linesearch_solver.jl b/test/dummy_linesearch_solver.jl new file mode 100644 index 00000000..87ed14e5 --- /dev/null +++ b/test/dummy_linesearch_solver.jl @@ -0,0 +1,88 @@ +function dummy_linesearch_solver( + nlp :: AbstractNLPModel; + x :: AbstractVector = copy(nlp.meta.x0), + atol :: Real = 1e-6, + rtol :: Real = 1e-6, + max_eval :: Int = 1000, + max_time :: Float64 = 30.0, + ls_method :: Symbol = :armijo, + merit_constructor = L1Merit +) + + start_time = time() + elapsed_time = 0.0 + + nvar, ncon = nlp.meta.nvar, nlp.meta.ncon + T = eltype(x) + xt = copy(x) + + cx = ncon > 0 ? cons(nlp, x) : zeros(T, 0) + gx = grad(nlp, x) + Jx = ncon > 0 ? jac(nlp, x) : zeros(T, 0, nvar) + y = -Jx' \ gx + Hxy = ncon > 0 ? hess(nlp, x, y) : hess(nlp, x) + + dual = gx + Jx' * y + + iter = 0 + + ϕ = merit_constructor(nlp, 1e-3, cx=cx, gx=gx) + + ϵd = atol + rtol * norm(dual) + ϵp = atol + + fx = obj(nlp, x) + @info log_header([:iter, :f, :c, :dual, :time, :η], [Int, T, T, T, T, T]) + @info log_row(Any[iter, fx, norm(cx), norm(dual), elapsed_time, ϕ.η]) + solved = norm(dual) < ϵd && norm(cx) < ϵp + tired = neval_obj(nlp) + neval_cons(nlp) > max_eval || elapsed_time > max_time + + while !(solved || tired) + Hxy = ncon > 0 ? hess(nlp, x, y) : hess(nlp, x) + W = Symmetric([Hxy zeros(T, nvar, ncon); Jx zeros(T, ncon, ncon)], :L) + Δxy = -W \ [dual; cx] + Δx = Δxy[1:nvar] + Δy = Δxy[nvar+1:end] + + ϕ.fx = fx + ncon > 0 && jprod!(nlp, x, Δx, ϕ.Ad) + Dϕx = derivative(ϕ, x, Δx, update=false) + while Dϕx ≥ 0 + ϕ.η *= 2 + Dϕx = derivative(ϕ, x, Δx, update=false) + end + ϕx = obj(ϕ, x, update=false) + + lso = linesearch!(ϕ, x, Δx, xt, method=ls_method) + x .= xt + t = lso.t + y .+= t * Δy + + grad!(nlp, x, gx) # Updates ϕ.gx + if ncon > 0 + Jx = jac(nlp, x) + end + dual = gx + Jx' * y + elapsed_time = time() - start_time + solved = norm(dual) < ϵd && norm(cx) < ϵp + tired = neval_obj(nlp) + neval_cons(nlp) > max_eval || elapsed_time > max_time + + iter += 1 + fx = obj(nlp, x) + @info log_row(Any[iter, fx, norm(cx), norm(dual), elapsed_time, ϕ.η]) + end + + status = if solved + :first_order + elseif elapsed_time > max_time + :max_time + else + :max_eval + end + + return GenericExecutionStats(status, nlp, + objective=fx, dual_feas=norm(dual), primal_feas=norm(cx), + multipliers=y, multipliers_L=zeros(T, nvar), multipliers_U=zeros(T, nvar), + elapsed_time=elapsed_time, solution=x, iter=iter + ) +end diff --git a/test/dummy_solver.jl b/test/dummy_solver.jl index 8e29e260..466bdb2e 100644 --- a/test/dummy_solver.jl +++ b/test/dummy_solver.jl @@ -1,5 +1,5 @@ function dummy_solver(nlp :: AbstractNLPModel; - x :: AbstractVector = nlp.meta.x0, + x :: AbstractVector = copy(nlp.meta.x0), atol :: Real = sqrt(eps(eltype(x))), rtol :: Real = sqrt(eps(eltype(x))), max_eval :: Int = 1000, @@ -61,7 +61,7 @@ function dummy_solver(nlp :: AbstractNLPModel; :max_eval end - return GenericExecutionStats(:unknown, nlp, + return GenericExecutionStats(status, nlp, objective=fx, dual_feas=norm(dual), primal_feas=norm(cx), multipliers=y, multipliers_L=zeros(T, nvar), multipliers_U=zeros(T, nvar), elapsed_time=elapsed_time, solution=x, iter=iter diff --git a/test/dummy_trust_region_solver.jl b/test/dummy_trust_region_solver.jl new file mode 100644 index 00000000..284ab000 --- /dev/null +++ b/test/dummy_trust_region_solver.jl @@ -0,0 +1,99 @@ +using Krylov + +function dummy_trust_region_solver( + nlp :: AbstractNLPModel; + x :: AbstractVector = copy(nlp.meta.x0), + atol :: Real = 1e-6, + rtol :: Real = 1e-6, + max_eval :: Int = 1000, + max_time :: Float64 = 30.0, + tr_method :: Symbol = :basic, + merit_constructor = L1Merit +) + + start_time = time() + elapsed_time = 0.0 + evals(nlp) = neval_obj(nlp) + neval_cons(nlp) + + nvar, ncon = nlp.meta.nvar, nlp.meta.ncon + T = eltype(x) + xt = copy(x) + + cx = ncon > 0 ? cons(nlp, x) : zeros(T, 0) + gx = grad(nlp, x) + Jx = ncon > 0 ? jac(nlp, x) : zeros(T, 0, nvar) + y = cgls(Jx', -gx)[1] + + dual = gx + Jx' * y + + iter = 0 + + Δ = 100.0 + ϕ = merit_constructor(nlp, 1e-3, cx=copy(cx), gx=gx) + + ϵd = atol + rtol * norm(dual) + ϵp = atol + + fx = obj(nlp, x) + ϕ.fx = fx + @info log_header([:iter, :nfc, :f, :c, :dual, :time, :Δ, :η, :iterkind], [Int, Int, T, T, T, T, T, T, String]) + @info log_row(Any[iter, evals(nlp), fx, norm(cx), norm(dual), elapsed_time, Δ, ϕ.η, "Initial"]) + solved = norm(dual) < ϵd && norm(cx) < ϵp + tired = evals(nlp) > max_eval || elapsed_time > max_time + + while !(solved || tired) + Hxy = ncon > 0 ? hess_op(nlp, x, y) : hess_op(nlp, x) + local Δx + if ncon > 0 + # Normal step + v = cgls(Jx, -cx, radius=0.8Δ)[1] + # Tangent step + Z = nullspace(Jx) + Δx = v + Z * cg(Z' * Hxy * Z, -Z' * (dual + Hxy * v), radius=sqrt(Δ^2 - dot(v, v)))[1] + else + Δx = cg(Hxy, -dual, radius=Δ)[1] + end + + Δq = dot(Δx, gx) + dot(Δx, Hxy * Δx) / 2 + Ad = Jx * Δx + tro = trust_region!(ϕ, x, Δx, xt, Δq, Ad, Δ, method=tr_method, update_obj_at_x=false) + + Δ = tro.Δ + x .= tro.xt + + if tro.success + fx = ϕ.fx + cx .= ϕ.cx + grad!(nlp, x, gx) + if ncon > 0 + Jx = jac(nlp, x) + end + y = cgls(Jx', -gx)[1] + dual = gx + Jx' * y + else + ϕ.fx = fx + ϕ.cx = cx + end + + elapsed_time = time() - start_time + solved = norm(dual) < ϵd && norm(cx) < ϵp + tired = evals(nlp) > max_eval || elapsed_time > max_time + + iter += 1 + @info log_row(Any[iter, evals(nlp), fx, norm(cx), norm(dual), elapsed_time, Δ, ϕ.η, tro.status]) + end + + status = if solved + :first_order + elseif elapsed_time > max_time + :max_time + else + :max_eval + end + + return GenericExecutionStats(status, nlp, + objective=fx, dual_feas=norm(dual), primal_feas=norm(cx), + multipliers=y, multipliers_L=zeros(T, nvar), multipliers_U=zeros(T, nvar), + elapsed_time=elapsed_time, solution=x, iter=iter + ) +end diff --git a/test/merit.jl b/test/merit.jl new file mode 100644 index 00000000..43ab2deb --- /dev/null +++ b/test/merit.jl @@ -0,0 +1,139 @@ +function test_merit(merit_constructor, ϕ) + @testset "Consistency" begin + nlp = ADNLPModel(x -> dot(x, x), zeros(5), x -> [sum(x.^2) - 5; sum(x) - 1; prod(x)], zeros(3), zeros(3)) + + for x in (zeros(5), ones(5), -ones(5), BigFloat.([π, -2π, 3π, -4π, 0.0])), + d in (-ones(5), ones(5)) + η = one(eltype(x)) + merit = merit_constructor(nlp, η) + ϕx = obj(merit, x) + @test ϕx ≈ ϕ(nlp, x, η) + @test obj(nlp, x) == merit.fx + @test cons(nlp, x) == merit.cx + + Dϕx = derivative(merit, x, d) + ϵ = √eps(eltype(η)) + Dϕxa = (ϕ(nlp, x + ϵ * d, η) - ϕ(nlp, x, η)) / ϵ + @test isapprox(Dϕx, Dϕxa, rtol=1e-6) + @test grad(nlp, x) == merit.gx + @test jac(nlp, x) * d == merit.Ad + + ϕt = obj(merit, x + d) + @test ϕt ≈ ϕ(nlp, x + d, η) + @test obj(nlp, x + d) == merit.fx + @test cons(nlp, x + d) == merit.cx + end + end + + @testset "Simple line search" begin + nlp = ADNLPModel(x -> dot(x, x), [-1.0; 1.0], x -> [sum(x) - 1], [0.0], [0.0]) + + sol = [0.5; 0.5] + x = [-1.0; 1.0] + d = 30 * (sol - x) + η = 1.0 + + merit = merit_constructor(nlp, η) + + @assert obj(nlp, x + d) > obj(nlp, x) + @assert norm(cons(nlp, x + d)) > norm(cons(nlp, x)) + + reset!(nlp) + bk = 0 + ϕx = obj(merit, x) + obj(merit, x, update=false) + Dϕx = derivative(merit, x, d) + derivative(merit, x, d, update=false) + @test Dϕx < 0 # If d is not a descent direction for your merit, change your parameters + t = 1.0 + ϕt = obj(merit, x + d) + while ϕt > ϕx + 0.5 * t * Dϕx + t /= 2 + ϕt = obj(merit, x + t * d) + if t < 1e-16 + error("Failure") + end + bk += 1 + end + @test t < 1 + @test neval_obj(nlp) == 2 + bk + @test neval_cons(nlp) == 2 + bk + @test neval_grad(nlp) == 1 + @test neval_jprod(nlp) == 1 + end + + @testset "Separate parts" begin + nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, [-1.2; 1.0], x -> [x[1]^2 + x[2]^2 - 1], [0.0], [0.0]) + ϕ = merit_constructor(nlp, 0.0) + for i = 1:2 + x = randn(nlp.meta.nvar) + d = randn(nlp.meta.nvar) + ϕ.η = 0.0 + manual_dualobj = obj(ϕ, x) + ϕ.η = 1.0 + manual_primalobj = obj(ϕ, x) - manual_dualobj + @test dualobj(ϕ) ≈ manual_dualobj + @test primalobj(ϕ) ≈ manual_primalobj + end + end + + @testset "Safe for unconstrained" begin + nlp = ADNLPModel(x -> dot(x, x), ones(2)) + x = nlp.meta.x0 + d = ones(2) + + merit = merit_constructor(nlp, 1.0) + @test obj(merit, x) == obj(nlp, x) + @test derivative(merit, x, d) == dot(grad(nlp, x), d) + end + + @testset "Use on solver" begin + nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, [-1.2; 1.0]) + output = dummy_linesearch_solver(nlp, merit_constructor=merit_constructor, rtol=0) + @test isapprox(output.solution, ones(2), rtol=1e-6) + @test output.objective < 1e-6 + @test output.primal_feas == 0 + @test output.dual_feas < 1e-6 + + nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, [-1.2; 1.0], x -> [exp(x[1] - 1) - 1], [0.0], [0.0]) + output = dummy_linesearch_solver(nlp, merit_constructor=merit_constructor, rtol=0) + @test isapprox(output.solution, ones(2), rtol=1e-6) + @test output.objective < 1e-6 + @test output.primal_feas < 1e-6 + @test output.dual_feas < 1e-6 + + nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, [-1.2; 1.0]) + output = dummy_trust_region_solver(nlp, merit_constructor=merit_constructor, rtol=0) + @test isapprox(output.solution, ones(2), rtol=1e-6) + @test output.objective < 1e-6 + @test output.primal_feas == 0 + @test output.dual_feas < 1e-6 + + nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, [-1.2; 1.0], x -> [exp(x[1] - 1) - 1], [0.0], [0.0]) + output = dummy_trust_region_solver(nlp, merit_constructor=merit_constructor, rtol=0) + @test isapprox(output.solution, ones(2), rtol=1e-6) + @test output.objective < 1e-6 + @test output.primal_feas < 1e-6 + @test output.dual_feas < 1e-6 + end +end + +@testset "Merit functions" begin + merits = [ # Name, Constructor, ϕ(nlp, x, η) + ( + "L1Merit", + L1Merit, + (nlp, x, η) -> obj(nlp, x) + η * norm(cons(nlp, x), 1) + ), + ( + "AugLagMerit", + (nlp, η; kwargs...) -> AugLagMerit(nlp, η, y=-ones(typeof(η), nlp.meta.ncon); kwargs...), + (nlp, x, η) -> obj(nlp, x) - sum(cons(nlp, x)) + η * norm(cons(nlp, x))^2 / 2 # y = (-1,…,-1)ᵀ + ) + ] + for (name, merit, ϕ) in merits + @testset "Merit $name" begin + test_merit(merit, ϕ) + end + end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index e56c4737..e4d59e1b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,9 +8,12 @@ using NLPModels using LinearAlgebra, Logging, Test include("dummy_solver.jl") +include("dummy_linesearch_solver.jl") +include("dummy_trust_region_solver.jl") include("test_auxiliary.jl") include("test_linesearch.jl") include("test_logging.jl") +include("merit.jl") include("test_stats.jl") -include("test_trust_region.jl") +include("test_trust_region.jl") \ No newline at end of file diff --git a/test/test_linesearch.jl b/test/test_linesearch.jl index 4e519255..cebd4db9 100644 --- a/test/test_linesearch.jl +++ b/test/test_linesearch.jl @@ -1,63 +1,81 @@ @testset "Linesearch" begin - @testset "LineModel" begin - nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2)) - x = nlp.meta.x0 - d = -ones(2) - lm = LineModel(nlp, x, d) - g = zeros(2) + ls_methods = [:armijo, :armijo_wolfe] + @testset "Unconstrained problems" begin + specific_tests = Dict( + :armijo => Dict(:nbk => [0, 0, 1, 0]), + :armijo_wolfe => Dict(:nbk => [0, 0, 1, 5], :nbW => [0, 0, 0, 1]), + ) + for ls_method in ls_methods + @testset "Method $ls_method" begin + nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2)) + x = nlp.meta.x0 + d = -ones(2) + lso = linesearch(nlp, x, d, method=ls_method) + @test lso.t == 1 + @test lso.ϕt == 0.0 + for (k,v) in specific_tests[ls_method] + @test lso.specific[k] == v[1] + end - @test obj(lm, 0.0) == obj(nlp, x) - @test grad(lm, 0.0) == dot(grad(nlp, x), d) - @test grad!(lm, 0.0, g) == dot(grad(nlp, x), d) - @test g == grad(nlp, x) - @test derivative(lm, 0.0) == dot(grad(nlp, x), d) - @test derivative!(lm, 0.0, g) == dot(grad(nlp, x), d) - @test g == grad(nlp, x) - @test hess(lm, 0.0) == dot(d, Symmetric(hess(nlp, x), :L) * d) + d = -ones(2) / 2 + lso = linesearch(nlp, x, d, method=ls_method) + @test lso.t == 1 + @test lso.ϕt == 1.25 + for (k,v) in specific_tests[ls_method] + @test lso.specific[k] == v[2] + end - @test obj(lm, 1.0) == 0.0 - @test grad(lm, 1.0) == 0.0 - @test hess(lm, 1.0) == 2d[1]^2 + 8d[2]^2 + d = -2 * ones(2) + lso = linesearch(nlp, x, d, method=ls_method) + @test lso.t < 1 + for (k,v) in specific_tests[ls_method] + @test lso.specific[k] == v[3] + end - redirect!(lm, zeros(2), ones(2)) - @test obj(lm, 0.0) == 0.0 - @test grad(lm, 0.0) == 0.0 - @test hess(lm, 0.0) == 10.0 + nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, zeros(2)) + x = nlp.meta.x0 + d = [1.7; 3.2] + lso = linesearch(nlp, x, d, method=ls_method) + for (k,v) in specific_tests[ls_method] + @test lso.specific[k] == v[4] + end + end # @testset + end # for + end # @testset - @test neval_obj(lm) == 3 - @test neval_grad(lm) == 6 - @test neval_hess(lm) == 3 - end + @testset "Constrainted problems" begin + specific_tests = Dict( + :armijo => Dict(:nbk => [0, 0, 1]), + :armijo_wolfe => Dict(:nbk => [0, 0, 1]), + ) + for ls_method in ls_methods + @testset "Method $ls_method" begin + nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2), x -> [x[1] + 2x[2]], [0.0], [0.0]) + x = nlp.meta.x0 + ϕ = L1Merit(nlp, 1.0) + d = -ones(2) + lso = linesearch(ϕ, x, d, method=ls_method, update_obj_at_x=true, update_derivative_at_x=true) + @test lso.t == 1 + @test lso.ϕt == 0.0 + for (k,v) in specific_tests[ls_method] + @test lso.specific[k] == v[1] + end - @testset "Armijo-Wolfe" begin - nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2)) - lm = LineModel(nlp, nlp.meta.x0, -ones(2)) - g = zeros(2) + d = -ones(2) / 2 + lso = linesearch(ϕ, x, d, method=ls_method, update_obj_at_x=true, update_derivative_at_x=true) + @test lso.t == 1 + @test lso.ϕt == 1.25 + 1.5 + for (k,v) in specific_tests[ls_method] + @test lso.specific[k] == v[2] + end - t, good_grad, ft, nbk, nbW = armijo_wolfe(lm, obj(lm, 0.0), grad(lm, 0.0), g) - @test t == 1 - @test ft == 0.0 - @test nbk == 0 - @test nbW == 0 - - redirect!(lm, nlp.meta.x0, -ones(2) / 2) - t, good_grad, ft, nbk, nbW = armijo_wolfe(lm, obj(lm, 0.0), grad(lm, 0.0), g) - @test t == 1 - @test ft == 1.25 - @test nbk == 0 - @test nbW == 0 - - redirect!(lm, nlp.meta.x0, -2 * ones(2)) - t, good_grad, ft, nbk, nbW = armijo_wolfe(lm, obj(lm, 0.0), grad(lm, 0.0), g) - @test t < 1 - @test nbk > 0 - @test nbW == 0 - - nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, zeros(2)) - lm = LineModel(nlp, nlp.meta.x0, [1.7; 3.2]) - t, good_grad, ft, nbk, nbW = armijo_wolfe(lm, obj(lm, 0.0), grad(lm, 0.0), g) - @test t < 1 - @test nbk > 0 - @test nbW > 0 - end -end + d = -2 * ones(2) + lso = linesearch(ϕ, x, d, method=ls_method, update_obj_at_x=true, update_derivative_at_x=true) + @test lso.t < 1 + for (k,v) in specific_tests[ls_method] + @test lso.specific[k] == v[3] + end + end # @testset + end # for + end # @testset +end \ No newline at end of file diff --git a/test/test_trust_region.jl b/test/test_trust_region.jl index bd543b5c..2940dd3e 100644 --- a/test/test_trust_region.jl +++ b/test/test_trust_region.jl @@ -1,62 +1,67 @@ @testset "Trust Region" begin - @testset "aredpred" begin - nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2)) - x = nlp.meta.x0 - d = -ones(2) - xt = x + d - f = obj(nlp, x) - ft = obj(nlp, xt) - Δm = ft - f - ared, pred = aredpred(nlp, f, ft, Δm, xt, d, dot(grad(nlp, x), d)) - @test abs(ared - pred) < 1e-12 - tr = TRONTrustRegion(100.0) - ared, pred, qmin = aredpred(tr, nlp, f, ft, Δm, xt, d, dot(grad(nlp, x), d)) - @test abs(ared - pred) < 1e-12 - d = -1e-12 * ones(2) - xt = x + d - ft = obj(nlp, xt) - Δm = ft - f - ared, pred = aredpred(nlp, f, ft, Δm, xt, d, dot(grad(nlp, x), d)) - @test abs(ared - pred) < 1e-12 - end + tr_methods = [:basic, :tron] + + @testset "Unconstrained problems" begin + for tr_method in tr_methods + @testset "Method $tr_method" begin + nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2)) + x = nlp.meta.x0 + d = -ones(2) + Δ = norm(d) + Δq = dot(grad(nlp, x), d) + dot(d, hprod(nlp, x, d)) / 2 + tro = trust_region(nlp, x, d, Δq, Δ, method=tr_method, update_obj_at_x=true) + @test tro.status == :great + @test tro.Δ ≥ Δ + @test tro.xt == x + d + @test tro.success - @testset "BasicTrustRegion" begin - Δ₀ = 10.0 - tr = TrustRegion(Δ₀) - tr.ratio = 1.0 - @test acceptable(tr) == true + Δq = -tro.ared / 0.25 + tro = trust_region(nlp, x, d, Δq, Δ, method=tr_method, update_obj_at_x=true) + @test tro.ρ == 0.25 + @test tro.status == :good + @test tro.xt == x + d + @test tro.success - tr.ratio = 10.0 - update!(tr, Δ₀) - @test tr.radius > Δ₀ - reset!(tr) - tr.ratio = -1.0 - update!(tr, Δ₀) - @test tr.radius < Δ₀ - reset!(tr) + Δq = -tro.ared / 1e-6 + tro = trust_region(nlp, x, d, Δq, Δ, method=tr_method, update_obj_at_x=true) + @test tro.ρ == 1e-6 + @test tro.status == :bad + @test tro.xt == x + @test !tro.success + end + end end - @testset "TRONTrustRegion" begin - Δ₀ = 10.0 - tr = TRONTrustRegion(Δ₀) - tr.ratio = 1.0 - @test acceptable(tr) == true + @testset "Constrained problems" begin + for tr_method in tr_methods + @testset "Method $tr_method" begin + nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2), x -> [x[1] + 2x[2]], [0.0], [0.0]) + x = nlp.meta.x0 + ϕ = L1Merit(nlp, 1.0) + d = -ones(2) + Δ = norm(d) + Δq = dot(grad(nlp, x), d) + dot(d, hprod(nlp, x, d)) / 2 + Ad = jprod(nlp, x, d) + tro = trust_region(ϕ, x, d, Δq, Ad, Δ, method=tr_method, update_obj_at_x=true) + @test tro.status == :great + @test tro.Δ ≥ Δ + @test tro.xt == x + d + @test tro.success + + Δq = -tro.ared / 0.25 - ϕ.η * (norm(ϕ.cx, 1) - norm(ϕ.cx + Ad, 1)) + tro = trust_region(ϕ, x, d, Δq, Ad, Δ, method=tr_method, update_obj_at_x=true) + @test tro.ρ == 0.25 + @test tro.status == :good + @test tro.xt == x + d + @test tro.success - tr.ratio = tr.acceptance_threshold - 1 - update!(tr, Δ₀) - @test tr.radius < Δ₀ - reset!(tr) - tr.ratio = (tr.acceptance_threshold + tr.decrease_threshold) / 2 - update!(tr, Δ₀) - @test tr.radius < Δ₀ - reset!(tr) - tr.ratio = (tr.decrease_threshold + tr.increase_threshold) / 2 - update!(tr, Δ₀) - @test tr.radius < Δ₀ - reset!(tr) - tr.ratio = tr.increase_threshold + 1 - tr.quad_min = 2.0 - update!(tr, Δ₀) - @test tr.radius > Δ₀ + Δq = -tro.ared / 1e-6 - ϕ.η * (norm(ϕ.cx, 1) - norm(ϕ.cx + Ad, 1)) + tro = trust_region(ϕ, x, d, Δq, Ad, Δ, method=tr_method, update_obj_at_x=true) + @test tro.ρ == 1e-6 + @test tro.status == :bad + @test tro.xt == x + @test !tro.success + end + end end end