From c6a2913c5904bb46fd531b62777b91ada5b104c0 Mon Sep 17 00:00:00 2001 From: Abel Soares Siqueira Date: Fri, 7 Aug 2020 10:48:03 -0300 Subject: [PATCH 1/7] First draft of merit function --- .gitignore | 2 + src/SolverTools.jl | 1 + src/linesearch/line_model.jl | 10 ++- src/merit/auglagmerit.jl | 43 +++++++++++++ src/merit/l1merit.jl | 50 +++++++++++++++ src/merit/merit.jl | 35 ++++++++++ test/dummy_linesearch_solver.jl | 90 ++++++++++++++++++++++++++ test/merit.jl | 110 ++++++++++++++++++++++++++++++++ test/runtests.jl | 4 +- 9 files changed, 338 insertions(+), 7 deletions(-) create mode 100644 src/merit/auglagmerit.jl create mode 100644 src/merit/l1merit.jl create mode 100644 src/merit/merit.jl create mode 100644 test/dummy_linesearch_solver.jl create mode 100644 test/merit.jl diff --git a/.gitignore b/.gitignore index 2cd4ac3f..98474722 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ docs/build docs/Manifest.toml + +Manifest.toml \ No newline at end of file diff --git a/src/SolverTools.jl b/src/SolverTools.jl index ed6ecde2..5947635b 100644 --- a/src/SolverTools.jl +++ b/src/SolverTools.jl @@ -16,6 +16,7 @@ include("auxiliary/logger.jl") include("stats/stats.jl") # Algorithmic components. +include("merit/merit.jl") include("linesearch/linesearch.jl") include("trust-region/trust-region.jl") diff --git a/src/linesearch/line_model.jl b/src/linesearch/line_model.jl index 54f21426..08e4b78b 100644 --- a/src/linesearch/line_model.jl +++ b/src/linesearch/line_model.jl @@ -1,5 +1,3 @@ -import NLPModels: obj, grad, grad!, hess - export LineModel export obj, grad, derivative, grad!, derivative!, hess, redirect! @@ -38,7 +36,7 @@ end ϕ(t) := f(x + td). """ -function obj(f :: LineModel, t :: AbstractFloat) +function NLPModels.obj(f :: LineModel, t :: AbstractFloat) NLPModels.increment!(f, :neval_obj) return obj(f.nlp, f.x + t * f.d) end @@ -51,7 +49,7 @@ i.e., ϕ'(t) = ∇f(x + td)ᵀd. """ -function grad(f :: LineModel, t :: AbstractFloat) +function NLPModels.grad(f :: LineModel, t :: AbstractFloat) NLPModels.increment!(f, :neval_grad) return dot(grad(f.nlp, f.x + t * f.d), f.d) end @@ -67,7 +65,7 @@ i.e., The gradient ∇f(x + td) is stored in `g`. """ -function grad!(f :: LineModel, t :: AbstractFloat, g :: AbstractVector) +function NLPModels.grad!(f :: LineModel, t :: AbstractFloat, g :: AbstractVector) NLPModels.increment!(f, :neval_grad) return dot(grad!(f.nlp, f.x + t * f.d, g), f.d) end @@ -81,7 +79,7 @@ i.e., ϕ"(t) = dᵀ∇²f(x + td)d. """ -function hess(f :: LineModel, t :: AbstractFloat) +function NLPModels.hess(f :: LineModel, t :: AbstractFloat) NLPModels.increment!(f, :neval_hess) return dot(f.d, hprod(f.nlp, f.x + t * f.d, f.d)) end diff --git a/src/merit/auglagmerit.jl b/src/merit/auglagmerit.jl new file mode 100644 index 00000000..93aa2a12 --- /dev/null +++ b/src/merit/auglagmerit.jl @@ -0,0 +1,43 @@ +export AugLagMerit + +mutable struct AugLagMerit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel + nlp :: M + η :: T + fx :: T + gx :: V + cx :: V + Ad :: V + y :: V +end + +function AugLagMerit( + nlp :: M, + η :: T; + fx :: T = T(Inf), + gx :: V = fill(T(Inf), nlp.meta.nvar), + cx :: V = fill(T(Inf), nlp.meta.ncon), + Ad :: V = fill(T(Inf), nlp.meta.ncon), + y :: V = fill(T(Inf), nlp.meta.ncon) +) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} + AugLagMerit{M,T,V}(nlp, η, fx, gx, cx, Ad, y) +end + +function NLPModels.obj(merit :: AugLagMerit, x :: AbstractVector; update :: Bool = true) + if update + merit.fx = obj(merit.nlp, x) + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + end + return merit.fx - dot(merit.y, merit.cx) + merit.η * dot(merit.cx, merit.cx) / 2 +end + +function derivative(merit :: AugLagMerit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true) + if update + grad!(merit.nlp, x, merit.gx) + merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad) + end + if merit.nlp.meta.ncon == 0 + return dot(merit.gx, d) + else + return dot(merit.gx, d) - dot(merit.y, merit.Ad) + merit.η * dot(merit.cx, merit.Ad) + end +end \ No newline at end of file diff --git a/src/merit/l1merit.jl b/src/merit/l1merit.jl new file mode 100644 index 00000000..853b4e74 --- /dev/null +++ b/src/merit/l1merit.jl @@ -0,0 +1,50 @@ +export L1Merit + +mutable struct L1Merit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel + nlp :: M + η :: T + fx :: T + gx :: V + cx :: V + Ad :: V +end + +function L1Merit( + nlp :: M, + η :: T; + fx :: T = T(Inf), + gx :: V = fill(T(Inf), nlp.meta.nvar), + cx :: V = fill(T(Inf), nlp.meta.ncon), + Ad :: V = fill(T(Inf), nlp.meta.ncon) +) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} + L1Merit{M,T,V}(nlp, η, fx, gx, cx, Ad) +end + +function NLPModels.obj(merit :: L1Merit, x :: AbstractVector; update :: Bool = true) + if update + merit.fx = obj(merit.nlp, x) + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + end + return merit.fx + merit.η * norm(merit.cx, 1) +end + +function derivative(merit :: L1Merit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true) + if update + grad!(merit.nlp, x, merit.gx) + merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad) + end + if merit.nlp.meta.ncon == 0 + return dot(merit.gx, d) + else + return dot(merit.gx, d) + merit.η * sum( + if ci > 0 + Adi + elseif ci < 0 + -Adi + else + abs(Adi) + end + for (ci, Adi) in zip(merit.cx, merit.Ad) + ) + end +end \ No newline at end of file diff --git a/src/merit/merit.jl b/src/merit/merit.jl new file mode 100644 index 00000000..68d6ff6e --- /dev/null +++ b/src/merit/merit.jl @@ -0,0 +1,35 @@ +import NLPModels.obj +export AbstractMeritModel, obj, derivative + +""" + AbstractMeritModel + +Model for merit functions. All models should store +- `nlp`: The NLP with the corresponding problem. +- `fx`: The objective at some point. +- `cx`: The constraints vector at some point. +- `gx`: The constraints vector at some point. +- `Ad`: The Jacobian-direction product. +""" +abstract type AbstractMeritModel end + +""" + obj(merit, x; update=true) + +Computes the `merit` function value at `x`. +This will call `obj` and `cons` for the internal model, unless `update` is called with `false`. +The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`. +""" +function obj end + +""" + derivative(merit, x, d; update=true) + +Computes the derivative derivative of `merit` at `x` on direction `d`. +This will call `grad!` and `jprod` to update the internal values of `gx` and `Ad`, but will assume that `cx` is correct. +The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`. +""" +function derivative! end + +include("auglagmerit.jl") +include("l1merit.jl") diff --git a/test/dummy_linesearch_solver.jl b/test/dummy_linesearch_solver.jl new file mode 100644 index 00000000..19374908 --- /dev/null +++ b/test/dummy_linesearch_solver.jl @@ -0,0 +1,90 @@ +function dummy_linesearch_solver( + nlp :: AbstractNLPModel; + x :: AbstractVector = nlp.meta.x0, + atol :: Real = 1e-6, + rtol :: Real = 1e-6, + max_eval :: Int = 1000, + max_time :: Float64 = 30.0, + merit_constructor = L1Merit +) + + start_time = time() + elapsed_time = 0.0 + + nvar, ncon = nlp.meta.nvar, nlp.meta.ncon + T = eltype(x) + + cx = ncon > 0 ? cons(nlp, x) : zeros(T, 0) + gx = grad(nlp, x) + Jx = ncon > 0 ? jac(nlp, x) : zeros(T, 0, nvar) + y = -Jx' \ gx + Hxy = ncon > 0 ? hess(nlp, x, y) : hess(nlp, x) + + dual = gx + Jx' * y + + iter = 0 + + ϕ = merit_constructor(nlp, 1e-3, cx=cx, gx=gx) + + ϵd = atol + rtol * norm(dual) + ϵp = atol + + fx = obj(nlp, x) + @info log_header([:iter, :f, :c, :dual, :time, :η], [Int, T, T, T, T, T]) + @info log_row(Any[iter, fx, norm(cx), norm(dual), elapsed_time, ϕ.η]) + solved = norm(dual) < ϵd && norm(cx) < ϵp + tired = neval_obj(nlp) + neval_cons(nlp) > max_eval || elapsed_time > max_time + + while !(solved || tired) + Hxy = ncon > 0 ? hess(nlp, x, y) : hess(nlp, x) + W = Symmetric([Hxy zeros(T, nvar, ncon); Jx zeros(T, ncon, ncon)], :L) + Δxy = -W \ [dual; cx] + Δx = Δxy[1:nvar] + Δy = Δxy[nvar+1:end] + + ϕ.fx = fx + ncon > 0 && jprod!(nlp, x, Δx, ϕ.Ad) + Dϕx = derivative(ϕ, x, Δx, update=false) + while Dϕx ≥ 0 + ϕ.η *= 2 + Dϕx = derivative(ϕ, x, Δx, update=false) + end + ϕx = obj(ϕ, x, update=false) + + t = 1.0 + ϕt = obj(ϕ, x + Δx) # Updates cx + while !(ϕt ≤ ϕx + 1e-2 * t * Dϕx) + t /= 2 + ϕt = obj(ϕ, x + t * Δx) # Updates cx + end + x += t * Δx + y += t * Δy + + grad!(nlp, x, gx) # Updates ϕ.gx + if ncon > 0 + Jx = jac(nlp, x) + end + dual = gx + Jx' * y + elapsed_time = time() - start_time + solved = norm(dual) < ϵd && norm(cx) < ϵp + tired = neval_obj(nlp) + neval_cons(nlp) > max_eval || elapsed_time > max_time + + iter += 1 + fx = obj(nlp, x) + @info log_row(Any[iter, fx, norm(cx), norm(dual), elapsed_time, ϕ.η]) + end + + status = if solved + :first_order + elseif elapsed_time > max_time + :max_time + else + :max_eval + end + + return GenericExecutionStats(:unknown, nlp, + objective=fx, dual_feas=norm(dual), primal_feas=norm(cx), + multipliers=y, multipliers_L=zeros(T, nvar), multipliers_U=zeros(T, nvar), + elapsed_time=elapsed_time, solution=x, iter=iter + ) +end diff --git a/test/merit.jl b/test/merit.jl new file mode 100644 index 00000000..b8c44085 --- /dev/null +++ b/test/merit.jl @@ -0,0 +1,110 @@ +function test_merit(merit_constructor, ϕ) + @testset "Consistency" begin + nlp = ADNLPModel(x -> dot(x, x), zeros(5), x -> [sum(x.^2) - 5; sum(x) - 1; prod(x)], zeros(3), zeros(3)) + + for x in (zeros(5), ones(5), -ones(5), BigFloat.([π, -2π, 3π, -4π, 0.0])), + d in (-ones(5), ones(5)) + η = one(eltype(x)) + merit = merit_constructor(nlp, η) + ϕx = obj(merit, x) + @test ϕx ≈ ϕ(nlp, x, η) + @test obj(nlp, x) == merit.fx + @test cons(nlp, x) == merit.cx + + Dϕx = derivative(merit, x, d) + ϵ = √eps(eltype(η)) + Dϕxa = (ϕ(nlp, x + ϵ * d, η) - ϕ(nlp, x, η)) / ϵ + @test isapprox(Dϕx, Dϕxa, rtol=1e-6) + @test grad(nlp, x) == merit.gx + @test jac(nlp, x) * d == merit.Ad + + ϕt = obj(merit, x + d) + @test ϕt ≈ ϕ(nlp, x + d, η) + @test obj(nlp, x + d) == merit.fx + @test cons(nlp, x + d) == merit.cx + end + end + + @testset "Simple line search" begin + nlp = ADNLPModel(x -> dot(x, x), [-1.0; 1.0], x -> [sum(x) - 1], [0.0], [0.0]) + + sol = [0.5; 0.5] + x = [-1.0; 1.0] + d = 30 * (sol - x) + η = 1.0 + + merit = merit_constructor(nlp, η) + + @assert obj(nlp, x + d) > obj(nlp, x) + @assert norm(cons(nlp, x + d)) > norm(cons(nlp, x)) + + reset!(nlp) + bk = 0 + ϕx = obj(merit, x) + obj(merit, x, update=false) + Dϕx = derivative(merit, x, d) + derivative(merit, x, d, update=false) + @test Dϕx < 0 # If d is not a descent direction for your merit, change your parameters + t = 1.0 + ϕt = obj(merit, x + d) + while ϕt > ϕx + 0.5 * t * Dϕx + t /= 2 + ϕt = obj(merit, x + t * d) + if t < 1e-16 + error("Failure") + end + bk += 1 + end + @test t < 1 + @test neval_obj(nlp) == 2 + bk + @test neval_cons(nlp) == 2 + bk + @test neval_grad(nlp) == 1 + @test neval_jprod(nlp) == 1 + end + + @testset "Safe for unconstrained" begin + nlp = ADNLPModel(x -> dot(x, x), ones(2)) + x = nlp.meta.x0 + d = ones(2) + + merit = merit_constructor(nlp, 1.0) + @test obj(merit, x) == obj(nlp, x) + @test derivative(merit, x, d) == dot(grad(nlp, x), d) + end + + @testset "Use on solver" begin + nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, [-1.2; 1.0]) + output = dummy_linesearch_solver(nlp, merit_constructor=merit_constructor, rtol=0) + @test isapprox(output.solution, ones(2), rtol=1e-6) + @test output.objective < 1e-6 + @test output.primal_feas == 0 + @test output.dual_feas < 1e-6 + + nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, [-1.2; 1.0], x -> [exp(x[1] - 1) - 1], [0.0], [0.0]) + output = dummy_linesearch_solver(nlp, merit_constructor=merit_constructor, rtol=0) + @test isapprox(output.solution, ones(2), rtol=1e-6) + @test output.objective < 1e-6 + @test output.primal_feas < 1e-6 + @test output.dual_feas < 1e-6 + end +end + +@testset "Merit functions" begin + merits = [ # Name, Constructor, ϕ(nlp, x, η) + ( + "L1Merit", + L1Merit, + (nlp, x, η) -> obj(nlp, x) + η * norm(cons(nlp, x), 1) + ), + ( + "AugLagMerit", + (nlp, η; kwargs...) -> AugLagMerit(nlp, η, y=ones(typeof(η), nlp.meta.ncon); kwargs...), + (nlp, x, η) -> obj(nlp, x) - sum(cons(nlp, x)) + η * norm(cons(nlp, x))^2 / 2 # y = (1,…,1)ᵀ + ) + ] + for (name, merit, ϕ) in merits + @testset "Merit $name" begin + test_merit(merit, ϕ) + end + end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index e56c4737..f4b46944 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,9 +8,11 @@ using NLPModels using LinearAlgebra, Logging, Test include("dummy_solver.jl") +include("dummy_linesearch_solver.jl") include("test_auxiliary.jl") include("test_linesearch.jl") include("test_logging.jl") +include("merit.jl") include("test_stats.jl") -include("test_trust_region.jl") +include("test_trust_region.jl") \ No newline at end of file From 80ad155898d60b80292aa2e54474d56cb32c073b Mon Sep 17 00:00:00 2001 From: Abel Date: Fri, 7 Aug 2020 14:43:47 -0300 Subject: [PATCH 2/7] Merit docs --- docs/src/api.md | 11 +++++++++++ docs/src/reference.md | 10 +++++++++- src/merit/auglagmerit.jl | 15 +++++++++++++++ src/merit/l1merit.jl | 12 ++++++++++++ src/merit/merit.jl | 17 ++++++++++++++--- 5 files changed, 61 insertions(+), 4 deletions(-) diff --git a/docs/src/api.md b/docs/src/api.md index 1585a3af..a0e4d821 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -27,6 +27,17 @@ redirect! armijo_wolfe ``` +## Merit + +See also [`obj`](@ref). + +```@docs +AbstractMeritModel +derivative +L1Merit +AugLagMerit +``` + ## Stats ```@docs diff --git a/docs/src/reference.md b/docs/src/reference.md index 2dec9b13..b0d59bcf 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -1,4 +1,12 @@ # Reference -```@index +## Contents + +```@contents +Pages = ["reference.md"] ``` + +## Index + +```@index +``` \ No newline at end of file diff --git a/src/merit/auglagmerit.jl b/src/merit/auglagmerit.jl index 93aa2a12..cbb05c12 100644 --- a/src/merit/auglagmerit.jl +++ b/src/merit/auglagmerit.jl @@ -1,5 +1,20 @@ export AugLagMerit +@doc raw""" + AugLagMerit(nlp, η; kwargs...) + +Creates an augmented Lagrangian merit function for the equality constrained problem +```math +\min f(x) \quad \text{s.to} \quad c(x) = 0 +``` +defined by +```math +\phi(x, yₖ; η) = f(x) - yₖᵀc(x) + η ½\|c(x)\|² +``` + +In addition to the keyword arguments declared in [`AbstractMeritModel`](@ref), an `AugLagMerit` also +accepts the argument `y`. +""" mutable struct AugLagMerit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel nlp :: M η :: T diff --git a/src/merit/l1merit.jl b/src/merit/l1merit.jl index 853b4e74..754a07d1 100644 --- a/src/merit/l1merit.jl +++ b/src/merit/l1merit.jl @@ -1,5 +1,17 @@ export L1Merit +@doc raw""" + L1Merit(nlp, η; kwargs...) + +Creates a ℓ₁ merit function for the equality constrained problem +```math +\min f(x) \quad \text{s.to} \quad c(x) = 0 +``` +defined by +```math +\phi_1(x; η) = f(x) + η\|c(x)\|₁ +``` +""" mutable struct L1Merit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel nlp :: M η :: T diff --git a/src/merit/merit.jl b/src/merit/merit.jl index 68d6ff6e..787cb21a 100644 --- a/src/merit/merit.jl +++ b/src/merit/merit.jl @@ -1,4 +1,4 @@ -import NLPModels.obj +using NLPModels export AbstractMeritModel, obj, derivative """ @@ -6,10 +6,17 @@ export AbstractMeritModel, obj, derivative Model for merit functions. All models should store - `nlp`: The NLP with the corresponding problem. +- `η`: The merit parameter. - `fx`: The objective at some point. - `cx`: The constraints vector at some point. - `gx`: The constraints vector at some point. - `Ad`: The Jacobian-direction product. + +All models allow a constructor of form + + Merit(nlp, η; fx=Inf, cx=[Inf,…,Inf], gx=[Inf,…,Inf], Ad=[Inf,…,Inf]) + +Additional arguments and constructors may be provided. """ abstract type AbstractMeritModel end @@ -20,7 +27,11 @@ Computes the `merit` function value at `x`. This will call `obj` and `cons` for the internal model, unless `update` is called with `false`. The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`. """ -function obj end +function NLPModels.obj(merit::AbstractMeritModel, x::AbstractVector) + # I'd prefer to use only `function NLPModels.obj end` instead, but it doesn't work and using + # only `function obj end` overwrites the docstring + throw(MethodError(NLPModels.obj, (merit, x))) +end """ derivative(merit, x, d; update=true) @@ -29,7 +40,7 @@ Computes the derivative derivative of `merit` at `x` on direction `d`. This will call `grad!` and `jprod` to update the internal values of `gx` and `Ad`, but will assume that `cx` is correct. The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`. """ -function derivative! end +function derivative end include("auglagmerit.jl") include("l1merit.jl") From c689b3f8c733adefc24b5e70dfdb3ca65610f3fa Mon Sep 17 00:00:00 2001 From: Abel Date: Fri, 7 Aug 2020 17:18:16 -0300 Subject: [PATCH 3/7] Change merit to an NLPModel --- src/merit/auglagmerit.jl | 119 +++++++++++++++++++++++++++++---------- src/merit/l1merit.jl | 78 +++++++++++++------------ src/merit/merit.jl | 20 +++---- test/merit.jl | 4 +- 4 files changed, 139 insertions(+), 82 deletions(-) diff --git a/src/merit/auglagmerit.jl b/src/merit/auglagmerit.jl index cbb05c12..f2663dc6 100644 --- a/src/merit/auglagmerit.jl +++ b/src/merit/auglagmerit.jl @@ -1,7 +1,7 @@ export AugLagMerit @doc raw""" - AugLagMerit(nlp, η; kwargs...) + AugLagMerit(nlp, η; kwargs...) Creates an augmented Lagrangian merit function for the equality constrained problem ```math @@ -9,50 +9,107 @@ Creates an augmented Lagrangian merit function for the equality constrained prob ``` defined by ```math -\phi(x, yₖ; η) = f(x) - yₖᵀc(x) + η ½\|c(x)\|² +\phi(x, yₖ; η) = f(x) + yₖᵀc(x) + η ½\|c(x)\|² ``` In addition to the keyword arguments declared in [`AbstractMeritModel`](@ref), an `AugLagMerit` also accepts the argument `y`. """ mutable struct AugLagMerit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel - nlp :: M - η :: T - fx :: T - gx :: V - cx :: V - Ad :: V - y :: V + meta :: NLPModelMeta + counters :: Counters + nlp :: M + η :: T + fx :: T + gx :: V + cx :: V + Ad :: V + y :: V + y⁺ :: V + Jᵀy⁺ :: V + Jv :: V + JᵀJv :: V end function AugLagMerit( - nlp :: M, - η :: T; - fx :: T = T(Inf), - gx :: V = fill(T(Inf), nlp.meta.nvar), - cx :: V = fill(T(Inf), nlp.meta.ncon), - Ad :: V = fill(T(Inf), nlp.meta.ncon), - y :: V = fill(T(Inf), nlp.meta.ncon) + nlp :: M, + η :: T; + fx :: T = T(Inf), + gx :: V = fill(T(Inf), nlp.meta.nvar), + cx :: V = fill(T(Inf), nlp.meta.ncon), + Ad :: V = fill(T(Inf), nlp.meta.ncon), + y :: V = fill(T(Inf), nlp.meta.ncon), + y⁺ :: V = fill(T(Inf), nlp.meta.ncon), # y + η * c(x) + Jᵀy⁺ :: V = fill(T(Inf), nlp.meta.nvar), + Jv :: V = fill(T(Inf), nlp.meta.ncon), + JᵀJv :: V = fill(T(Inf), nlp.meta.nvar) ) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} - AugLagMerit{M,T,V}(nlp, η, fx, gx, cx, Ad, y) + meta = NLPModelMeta(nlp.meta.nvar) + AugLagMerit{M,T,V}(meta, Counters(), nlp, η, fx, gx, cx, Ad, y, y⁺, Jᵀy⁺, Jv, JᵀJv) end function NLPModels.obj(merit :: AugLagMerit, x :: AbstractVector; update :: Bool = true) - if update - merit.fx = obj(merit.nlp, x) - merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) - end - return merit.fx - dot(merit.y, merit.cx) + merit.η * dot(merit.cx, merit.cx) / 2 + @lencheck merit.meta.nvar x + NLPModels.increment!(merit, :neval_obj) + if update + merit.fx = obj(merit.nlp, x) + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + end + return merit.fx + dot(merit.y, merit.cx) + merit.η * dot(merit.cx, merit.cx) / 2 end function derivative(merit :: AugLagMerit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true) - if update - grad!(merit.nlp, x, merit.gx) - merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad) - end - if merit.nlp.meta.ncon == 0 - return dot(merit.gx, d) - else - return dot(merit.gx, d) - dot(merit.y, merit.Ad) + merit.η * dot(merit.cx, merit.Ad) - end + @lencheck merit.meta.nvar x d + if update + grad!(merit.nlp, x, merit.gx) + merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad) + end + if merit.nlp.meta.ncon == 0 + return dot(merit.gx, d) + else + return dot(merit.gx, d) + dot(merit.y, merit.Ad) + merit.η * dot(merit.cx, merit.Ad) + end +end + +function NLPModels.grad!(merit :: AugLagMerit, x :: AbstractVector, g :: AbstractVector; update :: Bool = true) + @lencheck merit.meta.nvar x g + NLPModels.increment!(merit, :neval_grad) + if update + grad!(nlp.model, x, merit.gx) + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + merit.y⁺ .= merit.y .+ merit.η .* merit.cx + merit.nlp.meta.ncon > 0 && jtprod!(merit.nlp, x, merit.y⁺, merit.Jᵀy⁺) + end + g .= merit.gx .+ merit.Jᵀy⁺ + return g +end + +function NLPModels.objgrad!(merit :: AugLagMerit, x :: AbstractVector, g :: AbstractVector) + @lencheck merit.meta.nvar x g + NLPModels.increment!(merit, :neval_obj) + NLPModels.increment!(merit, :neval_grad) + if update + merit.fx = obj(merit.nlp, x) + grad!(nlp.model, x, merit.gx) + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + merit.y⁺ .= merit.y .+ merit.η .* merit.cx + merit.nlp.meta.ncon > 0 && jtprod!(merit.nlp, x, merit.y⁺, merit.Jᵀy⁺) + end + f = merit.fx + dot(merit.y, merit.cx) + merit.η * dot(merit.cx, merit.cx) / 2 + g .= merit.gx .+ merit.Jᵀy⁺ + return f, g +end + +function NLPModels.hprod!(merit :: AugLagMerit, x :: AbstractVector, v :: AbstractVector, Hv :: AbstractVector; obj_weight :: Float64 = 1.0) + @lencheck merit.meta.nvar x v Hv + NLPModels.increment!(merit, :neval_hprod) + if update + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + merit.y⁺ .= merit.y .+ merit.η .* merit.cx + end + jprod!(merit.model, x, v, merit.Jv) + jtprod!(merit.model, x, merit.Jv, merit.JᵀJv) + hprod!(merit.model, x, merit.y⁺, v, Hv, obj_weight = obj_weight) + Hv .+= merit.η * merit.JᵀJv + return Hv end \ No newline at end of file diff --git a/src/merit/l1merit.jl b/src/merit/l1merit.jl index 754a07d1..6a09da2c 100644 --- a/src/merit/l1merit.jl +++ b/src/merit/l1merit.jl @@ -13,50 +13,56 @@ defined by ``` """ mutable struct L1Merit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel - nlp :: M - η :: T - fx :: T - gx :: V - cx :: V - Ad :: V + meta :: NLPModelMeta + counters :: Counters + nlp :: M + η :: T + fx :: T + gx :: V + cx :: V + Ad :: V end function L1Merit( - nlp :: M, - η :: T; - fx :: T = T(Inf), - gx :: V = fill(T(Inf), nlp.meta.nvar), - cx :: V = fill(T(Inf), nlp.meta.ncon), - Ad :: V = fill(T(Inf), nlp.meta.ncon) + nlp :: M, + η :: T; + fx :: T = T(Inf), + gx :: V = fill(T(Inf), nlp.meta.nvar), + cx :: V = fill(T(Inf), nlp.meta.ncon), + Ad :: V = fill(T(Inf), nlp.meta.ncon) ) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} - L1Merit{M,T,V}(nlp, η, fx, gx, cx, Ad) + meta = NLPModelMeta(nlp.meta.nvar) + L1Merit{M,T,V}(meta, Counters(), nlp, η, fx, gx, cx, Ad) end function NLPModels.obj(merit :: L1Merit, x :: AbstractVector; update :: Bool = true) - if update - merit.fx = obj(merit.nlp, x) - merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) - end - return merit.fx + merit.η * norm(merit.cx, 1) + @lencheck merit.meta.nvar x + NLPModels.increment!(merit, :neval_obj) + if update + merit.fx = obj(merit.nlp, x) + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + end + return merit.fx + merit.η * norm(merit.cx, 1) end function derivative(merit :: L1Merit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true) - if update - grad!(merit.nlp, x, merit.gx) - merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad) - end - if merit.nlp.meta.ncon == 0 - return dot(merit.gx, d) - else - return dot(merit.gx, d) + merit.η * sum( - if ci > 0 - Adi - elseif ci < 0 - -Adi - else - abs(Adi) - end - for (ci, Adi) in zip(merit.cx, merit.Ad) - ) - end + @lencheck merit.meta.nvar x d + if update + grad!(merit.nlp, x, merit.gx) + merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad) + end + if merit.nlp.meta.ncon == 0 + return dot(merit.gx, d) + else + return dot(merit.gx, d) + merit.η * sum( + if ci > 0 + Adi + elseif ci < 0 + -Adi + else + abs(Adi) + end + for (ci, Adi) in zip(merit.cx, merit.Ad) + ) + end end \ No newline at end of file diff --git a/src/merit/merit.jl b/src/merit/merit.jl index 787cb21a..c4869a9d 100644 --- a/src/merit/merit.jl +++ b/src/merit/merit.jl @@ -17,26 +17,20 @@ All models allow a constructor of form Merit(nlp, η; fx=Inf, cx=[Inf,…,Inf], gx=[Inf,…,Inf], Ad=[Inf,…,Inf]) Additional arguments and constructors may be provided. -""" -abstract type AbstractMeritModel end -""" - obj(merit, x; update=true) +An AbstractMeritModel is an AbstractNLPModel, but the API may not be completely implemented. For +instance, the `L1Merit` model doesn't provide any gradient function, but it provides a directional +derivative function. -Computes the `merit` function value at `x`. -This will call `obj` and `cons` for the internal model, unless `update` is called with `false`. -The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`. +Furthermore, all implemented methods accept an `update` keyword that defaults to `true`. It is used +to determine whether the internal stored values should be updated or not. """ -function NLPModels.obj(merit::AbstractMeritModel, x::AbstractVector) - # I'd prefer to use only `function NLPModels.obj end` instead, but it doesn't work and using - # only `function obj end` overwrites the docstring - throw(MethodError(NLPModels.obj, (merit, x))) -end +abstract type AbstractMeritModel <: AbstractNLPModel end """ derivative(merit, x, d; update=true) -Computes the derivative derivative of `merit` at `x` on direction `d`. +Computes the directional derivative of `merit` at `x` on direction `d`. This will call `grad!` and `jprod` to update the internal values of `gx` and `Ad`, but will assume that `cx` is correct. The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`. """ diff --git a/test/merit.jl b/test/merit.jl index b8c44085..8c11e6a6 100644 --- a/test/merit.jl +++ b/test/merit.jl @@ -98,8 +98,8 @@ end ), ( "AugLagMerit", - (nlp, η; kwargs...) -> AugLagMerit(nlp, η, y=ones(typeof(η), nlp.meta.ncon); kwargs...), - (nlp, x, η) -> obj(nlp, x) - sum(cons(nlp, x)) + η * norm(cons(nlp, x))^2 / 2 # y = (1,…,1)ᵀ + (nlp, η; kwargs...) -> AugLagMerit(nlp, η, y=-ones(typeof(η), nlp.meta.ncon); kwargs...), + (nlp, x, η) -> obj(nlp, x) - sum(cons(nlp, x)) + η * norm(cons(nlp, x))^2 / 2 # y = (-1,…,-1)ᵀ ) ] for (name, merit, ϕ) in merits From 22672d17ffc4e7136911301982ba801dfebace7d Mon Sep 17 00:00:00 2001 From: Abel Date: Tue, 11 Aug 2020 17:07:53 -0300 Subject: [PATCH 4/7] Change linesearch to use merit models --- docs/src/api.md | 15 ++-- src/linesearch/armijo.jl | 57 +++++++++++++ src/linesearch/armijo_wolfe.jl | 97 +++++++++++---------- src/linesearch/line_model.jl | 85 ------------------- src/linesearch/linesearch.jl | 46 +++++++++- src/linesearch/linesearchoutput.jl | 35 ++++++++ src/merit/auglagmerit.jl | 2 +- src/merit/l1merit.jl | 2 +- src/merit/merit.jl | 17 +++- src/merit/uncmerit.jl | 48 +++++++++++ test/dummy_linesearch_solver.jl | 14 ++-- test/test_linesearch.jl | 130 ++++++++++++++++------------- 12 files changed, 338 insertions(+), 210 deletions(-) create mode 100644 src/linesearch/armijo.jl delete mode 100644 src/linesearch/line_model.jl create mode 100644 src/linesearch/linesearchoutput.jl create mode 100644 src/merit/uncmerit.jl diff --git a/docs/src/api.md b/docs/src/api.md index a0e4d821..8abb451e 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -18,24 +18,21 @@ project_step! ## Line-Search ```@docs -LineModel -obj -grad -grad! -hess -redirect! -armijo_wolfe +linesearch +LineSearchOutput +armijo! +armijo_wolfe! ``` ## Merit -See also [`obj`](@ref). - ```@docs AbstractMeritModel +obj derivative L1Merit AugLagMerit +UncMerit ``` ## Stats diff --git a/src/linesearch/armijo.jl b/src/linesearch/armijo.jl new file mode 100644 index 00000000..c97e2224 --- /dev/null +++ b/src/linesearch/armijo.jl @@ -0,0 +1,57 @@ +export armijo! + +""" + lso = armijo!(ϕ, x, d, xt; kwargs...) + +Performs a line search from `x` along the direction `d` for `AbstractMeritModel` `ϕ`. +The steplength is chosen trying to satisfy the Armijo condition +```math +ϕ(x + t d; η) ≤ ϕ(x) + τ₀ t Dϕ(x; η), +``` +using backtracking. + +The keyword aguments are: +- ... + +The output is a `LineSearchOutput`. The following specific information are also available in +`lso.specific`: +- nbk: the number of times the steplength was decreased to satisfy the Armijo condition, i.e., + number of backtracks; + +The following keyword arguments can be provided: +- `update_obj_at_x`: Whether to call `obj(ϕ, x; update=true)` to update `ϕ.fx` and `ϕ.cx` (default: `false`); +- `update_derivative_at_x`: Whether to call `derivative(ϕ, x, d; update=true)` to update `ϕ.gx` and `ϕ.Ad` (default: `false`); +- `t`: starting steplength (default `1`); +- `τ₀`: slope factor in the Armijo condition (default `max(1e-4, √ϵₘ)`); +- `σ`: backtracking decrease factor (default `0.4`); +- `bk_max`: maximum number of backtracks (default `10`). +""" +function armijo!( + ϕ :: AbstractMeritModel{M,T,V}, + x :: V, + d :: V, + xt :: V; + update_obj_at_x :: Bool = false, + update_derivative_at_x :: Bool = false, + t :: T=one(T), + τ₀ :: T=max(T(1.0e-4), sqrt(eps(T))), + σ :: T=T(0.4), + bk_max :: Int=10 +) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} + + nbk = 0 + ϕx = obj(ϕ, x; update=update_obj_at_x) + slope = derivative(ϕ, x, d; update=update_derivative_at_x) + + xt .= x .+ t .* d + ϕt = obj(ϕ, xt) + while !(ϕt ≤ ϕx + τ₀ * t * slope) && (nbk < bk_max) + t *= σ + xt .= x .+ t .* d + ϕt = obj(ϕ, xt) + + nbk += 1 + end + + return LineSearchOutput(t, xt, ϕt, specific=(nbk=nbk,)) +end diff --git a/src/linesearch/armijo_wolfe.jl b/src/linesearch/armijo_wolfe.jl index 14b3348e..aae84d77 100644 --- a/src/linesearch/armijo_wolfe.jl +++ b/src/linesearch/armijo_wolfe.jl @@ -1,84 +1,91 @@ -export armijo_wolfe +export armijo_wolfe! """ - t, good_grad, ht, nbk, nbW = armijo_wolfe(h, h₀, slope, g) + lso = armijo_wolfe!(ϕ, x, d, xt; kwargs...) -Performs a line search from `x` along the direction `d` as defined by the -`LineModel` ``h(t) = f(x + t d)``, where -`h₀ = h(0) = f(x)`, `slope = h'(0) = ∇f(x)ᵀd` and `g` is a vector that will be -overwritten with the gradient at various points. On exit, if `good_grad=true`, -`g` contains the gradient at the final step length. -The steplength is chosen trying to satisfy the Armijo and Wolfe conditions. The Armijo condition is +Performs a line search from `x` along the direction `d` for `AbstractMeritModel` `ϕ`. +The steplength is chosen trying to satisfy the Armijo and Wolfe conditions. +The Armijo condition is ```math -h(t) ≤ h₀ + τ₀ t h'(0) +ϕ(x + t d; η) ≤ ϕ(x) + τ₀ t Dϕ(x; d, η) ``` and the Wolfe condition is ```math -h'(t) ≤ τ₁ h'(0). +Dϕ(x + t d; d, η) ≤ τ₁ Dϕ(x; d, η). ``` -Initially the step is increased trying to satisfy the Wolfe condition. -Afterwards, only backtracking is performed in order to try to satisfy the Armijo condition. -The final steplength may only satisfy Armijo's condition. +Initially the step is increased trying to satisfy the Wolfe condition. Afterwards, only backtracking +is performed in order to try to satisfy the Armijo condition. The final steplength may only satisfy +Armijo's condition. -The output is the following: -- t: the step length; -- good_grad: whether `g` is the gradient at `x + t * d`; -- ht: the model value at `t`, i.e., `f(x + t * d)`; -- nbk: the number of times the steplength was decreased to satisfy the Armijo condition, i.e., number of backtracks; +The output is a `LineSearchOutput`. The following specific information are also available in +`lso.specific`: +- nbk: the number of times the steplength was decreased to satisfy the Armijo condition, i.e., + number of backtracks; - nbW: the number of times the steplength was increased to satisfy the Wolfe condition. The following keyword arguments can be provided: +- `update_obj_at_x`: Whether to call `obj(ϕ, x; update=true)` to update `ϕ.fx` and `ϕ.cx` (default: `false`); +- `update_derivative_at_x`: Whether to call `derivative(ϕ, x, d; update=true)` to update `ϕ.gx` and `ϕ.Ad` (default: `false`); - `t`: starting steplength (default `1`); - `τ₀`: slope factor in the Armijo condition (default `max(1e-4, √ϵₘ)`); - `τ₁`: slope factor in the Wolfe condition. It should satisfy `τ₁ > τ₀` (default `0.9999`); +- `σ`: backtracking decrease factor (default `0.4`); - `bk_max`: maximum number of backtracks (default `10`); -- `bW_max`: maximum number of increases (default `5`); -- `verbose`: whether to print information (default `false`). +- `bW_max`: maximum number of increases (default `5`). """ -function armijo_wolfe(h :: LineModel, - h₀ :: T, - slope :: T, - g :: Array{T,1}; - t :: T=one(T), - τ₀ :: T=max(T(1.0e-4), sqrt(eps(T))), - τ₁ :: T=T(0.9999), - bk_max :: Int=10, - bW_max :: Int=5, - verbose :: Bool=false) where T <: AbstractFloat +function armijo_wolfe!( + ϕ :: AbstractMeritModel{M,T,V}, + x :: V, + d :: V, + xt :: V; + update_obj_at_x :: Bool = false, + update_derivative_at_x :: Bool = false, + t :: T=one(T), + τ₀ :: T=max(T(1.0e-4), sqrt(eps(T))), + τ₁ :: T=T(0.9999), + σ :: T=T(0.4), + bk_max :: Int=10, + bW_max :: Int=5 +) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} # Perform improved Armijo linesearch. nbk = 0 nbW = 0 + ϕx = obj(ϕ, x; update=update_obj_at_x) + slope = derivative(ϕ, x, d; update=update_derivative_at_x) # First try to increase t to satisfy loose Wolfe condition - ht = obj(h, t) - slope_t = grad!(h, t, g) - while (slope_t < τ₁*slope) && (ht <= h₀ + τ₀ * t * slope) && (nbW < bW_max) + xt .= x .+ t .* d + ϕt = obj(ϕ, xt) + slope_t = derivative(ϕ, xt, d) + while (slope_t < τ₁*slope) && (ϕt <= ϕx + τ₀ * t * slope) && (nbW < bW_max) t *= 5 - ht = obj(h, t) - slope_t = grad!(h, t, g) + xt .= x .+ t .* d + ϕt = obj(ϕ, xt) + slope_t = derivative(ϕ, xt, d) nbW += 1 end - hgoal = h₀ + slope * t * τ₀; + ϕgoal = ϕx + slope * t * τ₀; fact = -T(0.8) ϵ = eps(T)^T(3/5) # Enrich Armijo's condition with Hager & Zhang numerical trick - Armijo = (ht <= hgoal) || ((ht <= h₀ + ϵ * abs(h₀)) && (slope_t <= fact * slope)) + Armijo = (ϕt <= ϕgoal) || ((ϕt <= ϕx + ϵ * abs(ϕx)) && (slope_t <= fact * slope)) good_grad = true while !Armijo && (nbk < bk_max) - t *= T(0.4) - ht = obj(h, t) - hgoal = h₀ + slope * t * τ₀; + t *= σ + xt .= x .+ t .* d + ϕt = obj(ϕ, xt) + ϕgoal = ϕx + slope * t * τ₀; # avoids unused grad! calls Armijo = false good_grad = false - if ht <= hgoal + if ϕt <= ϕgoal Armijo = true - elseif ht <= h₀ + ϵ * abs(h₀) - slope_t = grad!(h, t, g) + elseif ϕt <= ϕx + ϵ * abs(ϕx) + slope_t = derivative!(ϕ, xt, d) good_grad = true if slope_t <= fact * slope Armijo = true @@ -88,7 +95,5 @@ function armijo_wolfe(h :: LineModel, nbk += 1 end - verbose && @printf(" %4d %4d\n", nbk, nbW); - - return (t, good_grad, ht, nbk, nbW) + return LineSearchOutput(t, xt, ϕt, good_grad=good_grad, gt=ϕ.gx, specific=(nbk=nbk, nbW=nbW)) end diff --git a/src/linesearch/line_model.jl b/src/linesearch/line_model.jl deleted file mode 100644 index 08e4b78b..00000000 --- a/src/linesearch/line_model.jl +++ /dev/null @@ -1,85 +0,0 @@ -export LineModel -export obj, grad, derivative, grad!, derivative!, hess, redirect! - -"""A type to represent the restriction of a function to a direction. -Given f : R → Rⁿ, x ∈ Rⁿ and a nonzero direction d ∈ Rⁿ, - - ϕ = LineModel(nlp, x, d) - -represents the function ϕ : R → R defined by - - ϕ(t) := f(x + td). -""" -mutable struct LineModel <: AbstractNLPModel - meta :: NLPModelMeta - counters :: Counters - nlp :: AbstractNLPModel - x :: AbstractVector - d :: AbstractVector -end - -function LineModel(nlp :: AbstractNLPModel, x :: AbstractVector, d :: AbstractVector) - meta = NLPModelMeta(1, x0=zeros(eltype(x), 1), name="LineModel to $(nlp.meta.name))") - return LineModel(meta, Counters(), nlp, x, d) -end - -"""`redirect!(ϕ, x, d)` - -Change the values of x and d of the LineModel ϕ, but retains the counters. -""" -function redirect!(ϕ :: LineModel, x :: AbstractVector, d :: AbstractVector) - ϕ.x, ϕ.d = x, d - return ϕ -end - -"""`obj(f, t)` evaluates the objective of the `LineModel` - - ϕ(t) := f(x + td). -""" -function NLPModels.obj(f :: LineModel, t :: AbstractFloat) - NLPModels.increment!(f, :neval_obj) - return obj(f.nlp, f.x + t * f.d) -end - -"""`grad(f, t)` evaluates the first derivative of the `LineModel` - - ϕ(t) := f(x + td), - -i.e., - - ϕ'(t) = ∇f(x + td)ᵀd. -""" -function NLPModels.grad(f :: LineModel, t :: AbstractFloat) - NLPModels.increment!(f, :neval_grad) - return dot(grad(f.nlp, f.x + t * f.d), f.d) -end -derivative(f :: LineModel, t :: AbstractFloat) = grad(f, t) - -"""`grad!(f, t, g)` evaluates the first derivative of the `LineModel` - - ϕ(t) := f(x + td), - -i.e., - - ϕ'(t) = ∇f(x + td)ᵀd. - -The gradient ∇f(x + td) is stored in `g`. -""" -function NLPModels.grad!(f :: LineModel, t :: AbstractFloat, g :: AbstractVector) - NLPModels.increment!(f, :neval_grad) - return dot(grad!(f.nlp, f.x + t * f.d, g), f.d) -end -derivative!(f :: LineModel, t :: AbstractFloat, g :: AbstractVector) = grad!(f, t, g) - -"""Evaluate the second derivative of the `LineModel` - - ϕ(t) := f(x + td), - -i.e., - - ϕ"(t) = dᵀ∇²f(x + td)d. -""" -function NLPModels.hess(f :: LineModel, t :: AbstractFloat) - NLPModels.increment!(f, :neval_hess) - return dot(f.d, hprod(f.nlp, f.x + t * f.d, f.d)) -end diff --git a/src/linesearch/linesearch.jl b/src/linesearch/linesearch.jl index 69680d00..70332a16 100644 --- a/src/linesearch/linesearch.jl +++ b/src/linesearch/linesearch.jl @@ -1,2 +1,46 @@ -include("line_model.jl") +export linesearch, linesearch! + +include("linesearchoutput.jl") +include("armijo.jl") include("armijo_wolfe.jl") + +const ls_dict = Dict( + :armijo => armijo!, + :armijo_wolfe => armijo_wolfe! +) + +""" + lso = linesearch(ϕ, x, d; method=:armijo, kwargs...) + lso = linesearch!(ϕ, x, d, xt; method=:armijo, kwargs...) + lso = linesearch(nlp, x, d; method=:armijo, fx=obj(nlp, x), gx=grad(nlp, x), kwargs...) + lso = linesearch!(nlp, x, d, xt; method=:armijo, fx=obj(nlp, x), gx=grad(nlp, x), kwargs...) + +Perform a line search for unconstrained or bound-constrained problem `nlp` or merit model `ϕ` starting at `x` on direction `d`, storing the result in `xt`, for the in place version. +Returns a `LineSearchOutput`. +The `method` keyword argument can be any of the following: + +- `:armijo`: Calls the simple Armijo line search, performing backtracking. +- `:armijo_wolfe`: Calls the Armijo-Wolfe line search, that tries to satisfy the Wolfe conditions in addition to the Armijo conditions. + +When the input is `ϕ`, a merit model, the following keyword arguments are available for any method: +- `update_obj_at_x`: Whether to call `obj(ϕ, x; update=true)` to update `ϕ.fx` and `ϕ.cx` (default: `false`); +- `update_derivative_at_x`: Whether to call `derivative(ϕ, x, d; update=true)` to update `ϕ.gx` and `ϕ.Ad`. + +For the `nlp` version, the problem must be unconstrained or bound-constrained (i.e., `nlp.meta.nequ = 0`). +In such case a `UncMerit` is created using the keyword arguments `fx` and `gx`. **Be warned** that `gx` will be overwritten. +""" +function linesearch end + +function linesearch(nlp_or_ϕ, x, d; kwargs...) + xt = copy(x) + linesearch!(nlp_or_ϕ, x, d, xt; kwargs...) +end + +function linesearch!(nlp :: AbstractNLPModel, x :: AbstractVector, args...; fx=obj(nlp, x), gx=grad(nlp, x), kwargs...) + ϕ = UncMerit(nlp, fx=fx, gx=gx) + linesearch!(ϕ, x, args...; kwargs...) +end + +function linesearch!(ϕ :: AbstractMeritModel, x :: AbstractVector, args...; method :: Symbol=:armijo, kwargs...) + SolverTools.ls_dict[method](ϕ, x, args...; kwargs...) +end \ No newline at end of file diff --git a/src/linesearch/linesearchoutput.jl b/src/linesearch/linesearchoutput.jl new file mode 100644 index 00000000..8eb28e98 --- /dev/null +++ b/src/linesearch/linesearchoutput.jl @@ -0,0 +1,35 @@ +export LineSearchOutput + +# TODO: Generalize AbstractExecutionStats? + +""" + LineSearchOutput + +Structure to store the output of a line search algorithm. +The following variables are stored: +- `t`: The final step length; +- `xt`: The final iterate ``xt = x + t d``; +- `ϕt`: The final merit function value, i.e., `ϕ(xt)`; +- `good_grad`: Whether the gradient was computed at `xt`; +- `gt`: If `good_grad` is `true`, the gradient at `xt`; +- `specific`: A NamedTuple object storing specific method information. Check the specifc method for this information. +""" +struct LineSearchOutput{T <: Real, V <: AbstractVector{<: T}} + t :: T + xt :: V + ϕt :: T + good_grad :: Bool + gt :: V + specific :: NamedTuple +end + +function LineSearchOutput( + t :: T, + xt :: V, + ϕt :: T; + good_grad :: Bool=false, + gt :: V=T[], + specific :: NamedTuple = NamedTuple() +) where {T <: Real, V <: AbstractVector{<: T}} + LineSearchOutput(t, xt, ϕt, good_grad, gt, specific) +end \ No newline at end of file diff --git a/src/merit/auglagmerit.jl b/src/merit/auglagmerit.jl index f2663dc6..76b58685 100644 --- a/src/merit/auglagmerit.jl +++ b/src/merit/auglagmerit.jl @@ -15,7 +15,7 @@ defined by In addition to the keyword arguments declared in [`AbstractMeritModel`](@ref), an `AugLagMerit` also accepts the argument `y`. """ -mutable struct AugLagMerit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel +mutable struct AugLagMerit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel{M,T,V} meta :: NLPModelMeta counters :: Counters nlp :: M diff --git a/src/merit/l1merit.jl b/src/merit/l1merit.jl index 6a09da2c..b1a148f0 100644 --- a/src/merit/l1merit.jl +++ b/src/merit/l1merit.jl @@ -12,7 +12,7 @@ defined by \phi_1(x; η) = f(x) + η\|c(x)\|₁ ``` """ -mutable struct L1Merit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel +mutable struct L1Merit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel{M,T,V} meta :: NLPModelMeta counters :: Counters nlp :: M diff --git a/src/merit/merit.jl b/src/merit/merit.jl index c4869a9d..878bfed4 100644 --- a/src/merit/merit.jl +++ b/src/merit/merit.jl @@ -25,16 +25,27 @@ derivative function. Furthermore, all implemented methods accept an `update` keyword that defaults to `true`. It is used to determine whether the internal stored values should be updated or not. """ -abstract type AbstractMeritModel <: AbstractNLPModel end +abstract type AbstractMeritModel{M,T,V} <: AbstractNLPModel end + +""" + obj(merit, x; update=true) + +Evaluates the `merit` model at `x`. +This will call `obj` and `cons!` to update the internal values of `fx` and `cx`, unless `update=false`. +""" +function NLPModels.obj(merit :: AbstractMeritModel, x :: AbstractVector; update :: Bool=false) + throw(MethorError(obj, (merit, x))) +end """ derivative(merit, x, d; update=true) Computes the directional derivative of `merit` at `x` on direction `d`. -This will call `grad!` and `jprod` to update the internal values of `gx` and `Ad`, but will assume that `cx` is correct. -The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`. +This will call `grad!` and `jprod` to update the internal values of `gx` and `Ad`, unless `update=false`. +This function assumes that `cx` is correct, though. """ function derivative end include("auglagmerit.jl") include("l1merit.jl") +include("uncmerit.jl") \ No newline at end of file diff --git a/src/merit/uncmerit.jl b/src/merit/uncmerit.jl new file mode 100644 index 00000000..96d76752 --- /dev/null +++ b/src/merit/uncmerit.jl @@ -0,0 +1,48 @@ +export UncMerit + +@doc raw""" + UncMerit(nlp; kwargs...) + UncMerit(nlp, η; kwargs...) # η is ignored + +Creates a merit wrapper for unconstrained or bound-countrained problems. +Formally accepts the same constructor as other merit functions, but in practice it ignores any constrained-related value, even if `nlp` is constrained. +""" +mutable struct UncMerit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel{M,T,V} + meta :: NLPModelMeta + counters :: Counters + nlp :: M + η :: T # ignored in practice + fx :: T + gx :: V + cx :: V # ignored in practice + Ad :: V # ignored in practice +end + +function UncMerit( + nlp :: M, + η :: T = 0.0; + fx :: T = T(Inf), + gx :: V = fill(T(Inf), nlp.meta.nvar), + cx :: V = T[], + Ad :: V = T[], +) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} + meta = NLPModelMeta(nlp.meta.nvar) + UncMerit{M,T,V}(meta, Counters(), nlp, η, fx, gx, cx, Ad) +end + +function NLPModels.obj(merit :: UncMerit, x :: AbstractVector; update :: Bool = true) + @lencheck merit.meta.nvar x + NLPModels.increment!(merit, :neval_obj) + if update + merit.fx = obj(merit.nlp, x) + end + return merit.fx +end + +function derivative(merit :: UncMerit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true) + @lencheck merit.meta.nvar x d + if update + grad!(merit.nlp, x, merit.gx) + end + return dot(merit.gx, d) +end \ No newline at end of file diff --git a/test/dummy_linesearch_solver.jl b/test/dummy_linesearch_solver.jl index 19374908..415878fd 100644 --- a/test/dummy_linesearch_solver.jl +++ b/test/dummy_linesearch_solver.jl @@ -5,6 +5,7 @@ function dummy_linesearch_solver( rtol :: Real = 1e-6, max_eval :: Int = 1000, max_time :: Float64 = 30.0, + ls_method :: Symbol = :armijo, merit_constructor = L1Merit ) @@ -13,6 +14,7 @@ function dummy_linesearch_solver( nvar, ncon = nlp.meta.nvar, nlp.meta.ncon T = eltype(x) + xt = copy(x) cx = ncon > 0 ? cons(nlp, x) : zeros(T, 0) gx = grad(nlp, x) @@ -51,14 +53,10 @@ function dummy_linesearch_solver( end ϕx = obj(ϕ, x, update=false) - t = 1.0 - ϕt = obj(ϕ, x + Δx) # Updates cx - while !(ϕt ≤ ϕx + 1e-2 * t * Dϕx) - t /= 2 - ϕt = obj(ϕ, x + t * Δx) # Updates cx - end - x += t * Δx - y += t * Δy + lso = linesearch!(ϕ, x, Δx, xt, method=ls_method) + x .= xt + t = lso.t + y .+= t * Δy grad!(nlp, x, gx) # Updates ϕ.gx if ncon > 0 diff --git a/test/test_linesearch.jl b/test/test_linesearch.jl index 4e519255..cebd4db9 100644 --- a/test/test_linesearch.jl +++ b/test/test_linesearch.jl @@ -1,63 +1,81 @@ @testset "Linesearch" begin - @testset "LineModel" begin - nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2)) - x = nlp.meta.x0 - d = -ones(2) - lm = LineModel(nlp, x, d) - g = zeros(2) + ls_methods = [:armijo, :armijo_wolfe] + @testset "Unconstrained problems" begin + specific_tests = Dict( + :armijo => Dict(:nbk => [0, 0, 1, 0]), + :armijo_wolfe => Dict(:nbk => [0, 0, 1, 5], :nbW => [0, 0, 0, 1]), + ) + for ls_method in ls_methods + @testset "Method $ls_method" begin + nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2)) + x = nlp.meta.x0 + d = -ones(2) + lso = linesearch(nlp, x, d, method=ls_method) + @test lso.t == 1 + @test lso.ϕt == 0.0 + for (k,v) in specific_tests[ls_method] + @test lso.specific[k] == v[1] + end - @test obj(lm, 0.0) == obj(nlp, x) - @test grad(lm, 0.0) == dot(grad(nlp, x), d) - @test grad!(lm, 0.0, g) == dot(grad(nlp, x), d) - @test g == grad(nlp, x) - @test derivative(lm, 0.0) == dot(grad(nlp, x), d) - @test derivative!(lm, 0.0, g) == dot(grad(nlp, x), d) - @test g == grad(nlp, x) - @test hess(lm, 0.0) == dot(d, Symmetric(hess(nlp, x), :L) * d) + d = -ones(2) / 2 + lso = linesearch(nlp, x, d, method=ls_method) + @test lso.t == 1 + @test lso.ϕt == 1.25 + for (k,v) in specific_tests[ls_method] + @test lso.specific[k] == v[2] + end - @test obj(lm, 1.0) == 0.0 - @test grad(lm, 1.0) == 0.0 - @test hess(lm, 1.0) == 2d[1]^2 + 8d[2]^2 + d = -2 * ones(2) + lso = linesearch(nlp, x, d, method=ls_method) + @test lso.t < 1 + for (k,v) in specific_tests[ls_method] + @test lso.specific[k] == v[3] + end - redirect!(lm, zeros(2), ones(2)) - @test obj(lm, 0.0) == 0.0 - @test grad(lm, 0.0) == 0.0 - @test hess(lm, 0.0) == 10.0 + nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, zeros(2)) + x = nlp.meta.x0 + d = [1.7; 3.2] + lso = linesearch(nlp, x, d, method=ls_method) + for (k,v) in specific_tests[ls_method] + @test lso.specific[k] == v[4] + end + end # @testset + end # for + end # @testset - @test neval_obj(lm) == 3 - @test neval_grad(lm) == 6 - @test neval_hess(lm) == 3 - end + @testset "Constrainted problems" begin + specific_tests = Dict( + :armijo => Dict(:nbk => [0, 0, 1]), + :armijo_wolfe => Dict(:nbk => [0, 0, 1]), + ) + for ls_method in ls_methods + @testset "Method $ls_method" begin + nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2), x -> [x[1] + 2x[2]], [0.0], [0.0]) + x = nlp.meta.x0 + ϕ = L1Merit(nlp, 1.0) + d = -ones(2) + lso = linesearch(ϕ, x, d, method=ls_method, update_obj_at_x=true, update_derivative_at_x=true) + @test lso.t == 1 + @test lso.ϕt == 0.0 + for (k,v) in specific_tests[ls_method] + @test lso.specific[k] == v[1] + end - @testset "Armijo-Wolfe" begin - nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2)) - lm = LineModel(nlp, nlp.meta.x0, -ones(2)) - g = zeros(2) + d = -ones(2) / 2 + lso = linesearch(ϕ, x, d, method=ls_method, update_obj_at_x=true, update_derivative_at_x=true) + @test lso.t == 1 + @test lso.ϕt == 1.25 + 1.5 + for (k,v) in specific_tests[ls_method] + @test lso.specific[k] == v[2] + end - t, good_grad, ft, nbk, nbW = armijo_wolfe(lm, obj(lm, 0.0), grad(lm, 0.0), g) - @test t == 1 - @test ft == 0.0 - @test nbk == 0 - @test nbW == 0 - - redirect!(lm, nlp.meta.x0, -ones(2) / 2) - t, good_grad, ft, nbk, nbW = armijo_wolfe(lm, obj(lm, 0.0), grad(lm, 0.0), g) - @test t == 1 - @test ft == 1.25 - @test nbk == 0 - @test nbW == 0 - - redirect!(lm, nlp.meta.x0, -2 * ones(2)) - t, good_grad, ft, nbk, nbW = armijo_wolfe(lm, obj(lm, 0.0), grad(lm, 0.0), g) - @test t < 1 - @test nbk > 0 - @test nbW == 0 - - nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, zeros(2)) - lm = LineModel(nlp, nlp.meta.x0, [1.7; 3.2]) - t, good_grad, ft, nbk, nbW = armijo_wolfe(lm, obj(lm, 0.0), grad(lm, 0.0), g) - @test t < 1 - @test nbk > 0 - @test nbW > 0 - end -end + d = -2 * ones(2) + lso = linesearch(ϕ, x, d, method=ls_method, update_obj_at_x=true, update_derivative_at_x=true) + @test lso.t < 1 + for (k,v) in specific_tests[ls_method] + @test lso.specific[k] == v[3] + end + end # @testset + end # for + end # @testset +end \ No newline at end of file From 5a9d2d285bb2916b0458703421137d77c8d85c30 Mon Sep 17 00:00:00 2001 From: Abel Date: Tue, 18 Aug 2020 23:32:08 -0300 Subject: [PATCH 5/7] Draft of trust region with merit --- Project.toml | 3 +- src/merit/auglagmerit.jl | 14 ++-- src/merit/l1merit.jl | 14 ++-- src/merit/merit.jl | 30 ++++++- test/dummy_linesearch_solver.jl | 4 +- test/dummy_solver.jl | 4 +- test/dummy_trust_region_solver.jl | 128 ++++++++++++++++++++++++++++++ test/merit.jl | 29 +++++++ test/runtests.jl | 1 + 9 files changed, 202 insertions(+), 25 deletions(-) create mode 100644 test/dummy_trust_region_solver.jl diff --git a/Project.toml b/Project.toml index 908e4d8d..4792db91 100644 --- a/Project.toml +++ b/Project.toml @@ -19,10 +19,11 @@ NLPModels = "0.13" julia = "^1.3.0" [extras] +Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["NLPModels", "LinearAlgebra", "Test", "Logging"] +test = ["Krylov", "NLPModels", "LinearAlgebra", "Test", "Logging"] diff --git a/src/merit/auglagmerit.jl b/src/merit/auglagmerit.jl index 76b58685..ab8f16f7 100644 --- a/src/merit/auglagmerit.jl +++ b/src/merit/auglagmerit.jl @@ -48,14 +48,12 @@ function AugLagMerit( AugLagMerit{M,T,V}(meta, Counters(), nlp, η, fx, gx, cx, Ad, y, y⁺, Jᵀy⁺, Jv, JᵀJv) end -function NLPModels.obj(merit :: AugLagMerit, x :: AbstractVector; update :: Bool = true) - @lencheck merit.meta.nvar x - NLPModels.increment!(merit, :neval_obj) - if update - merit.fx = obj(merit.nlp, x) - merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) - end - return merit.fx + dot(merit.y, merit.cx) + merit.η * dot(merit.cx, merit.cx) / 2 +function dualobj(merit :: AugLagMerit) + merit.fx + dot(merit.y, merit.cx) +end + +function primalobj(merit :: AugLagMerit) + dot(merit.cx, merit.cx) / 2 end function derivative(merit :: AugLagMerit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true) diff --git a/src/merit/l1merit.jl b/src/merit/l1merit.jl index b1a148f0..52136f07 100644 --- a/src/merit/l1merit.jl +++ b/src/merit/l1merit.jl @@ -35,14 +35,12 @@ function L1Merit( L1Merit{M,T,V}(meta, Counters(), nlp, η, fx, gx, cx, Ad) end -function NLPModels.obj(merit :: L1Merit, x :: AbstractVector; update :: Bool = true) - @lencheck merit.meta.nvar x - NLPModels.increment!(merit, :neval_obj) - if update - merit.fx = obj(merit.nlp, x) - merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) - end - return merit.fx + merit.η * norm(merit.cx, 1) +function dualobj(merit :: L1Merit) + merit.fx +end + +function primalobj(merit :: L1Merit) + norm(merit.cx, 1) end function derivative(merit :: L1Merit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true) diff --git a/src/merit/merit.jl b/src/merit/merit.jl index 878bfed4..da8b9229 100644 --- a/src/merit/merit.jl +++ b/src/merit/merit.jl @@ -1,5 +1,5 @@ using NLPModels -export AbstractMeritModel, obj, derivative +export AbstractMeritModel, obj, derivative, dualobj, primalobj """ AbstractMeritModel @@ -27,14 +27,36 @@ to determine whether the internal stored values should be updated or not. """ abstract type AbstractMeritModel{M,T,V} <: AbstractNLPModel end +""" + dualobj(merit) + +Return the dual part of `merit`, i.e., the part without `η`, at its current values. +No updates or internal function calls are made. +""" +function dualobj end + +""" + primalobj(merit) + +Return the primal part of `merit`, i.e., the part with `η`, at its current values. +No updates or internal function calls are made. +""" +function primalobj end + """ obj(merit, x; update=true) Evaluates the `merit` model at `x`. This will call `obj` and `cons!` to update the internal values of `fx` and `cx`, unless `update=false`. """ -function NLPModels.obj(merit :: AbstractMeritModel, x :: AbstractVector; update :: Bool=false) - throw(MethorError(obj, (merit, x))) +function NLPModels.obj(merit :: AbstractMeritModel, x :: AbstractVector; update :: Bool=true) + @lencheck merit.meta.nvar x + NLPModels.increment!(merit, :neval_obj) + if update + merit.fx = obj(merit.nlp, x) + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + end + dualobj(merit) + merit.η * primalobj(merit) end """ @@ -42,7 +64,7 @@ end Computes the directional derivative of `merit` at `x` on direction `d`. This will call `grad!` and `jprod` to update the internal values of `gx` and `Ad`, unless `update=false`. -This function assumes that `cx` is correct, though. +This function assumes that `cx` is already computed, though. """ function derivative end diff --git a/test/dummy_linesearch_solver.jl b/test/dummy_linesearch_solver.jl index 415878fd..87ed14e5 100644 --- a/test/dummy_linesearch_solver.jl +++ b/test/dummy_linesearch_solver.jl @@ -1,6 +1,6 @@ function dummy_linesearch_solver( nlp :: AbstractNLPModel; - x :: AbstractVector = nlp.meta.x0, + x :: AbstractVector = copy(nlp.meta.x0), atol :: Real = 1e-6, rtol :: Real = 1e-6, max_eval :: Int = 1000, @@ -80,7 +80,7 @@ function dummy_linesearch_solver( :max_eval end - return GenericExecutionStats(:unknown, nlp, + return GenericExecutionStats(status, nlp, objective=fx, dual_feas=norm(dual), primal_feas=norm(cx), multipliers=y, multipliers_L=zeros(T, nvar), multipliers_U=zeros(T, nvar), elapsed_time=elapsed_time, solution=x, iter=iter diff --git a/test/dummy_solver.jl b/test/dummy_solver.jl index 8e29e260..466bdb2e 100644 --- a/test/dummy_solver.jl +++ b/test/dummy_solver.jl @@ -1,5 +1,5 @@ function dummy_solver(nlp :: AbstractNLPModel; - x :: AbstractVector = nlp.meta.x0, + x :: AbstractVector = copy(nlp.meta.x0), atol :: Real = sqrt(eps(eltype(x))), rtol :: Real = sqrt(eps(eltype(x))), max_eval :: Int = 1000, @@ -61,7 +61,7 @@ function dummy_solver(nlp :: AbstractNLPModel; :max_eval end - return GenericExecutionStats(:unknown, nlp, + return GenericExecutionStats(status, nlp, objective=fx, dual_feas=norm(dual), primal_feas=norm(cx), multipliers=y, multipliers_L=zeros(T, nvar), multipliers_U=zeros(T, nvar), elapsed_time=elapsed_time, solution=x, iter=iter diff --git a/test/dummy_trust_region_solver.jl b/test/dummy_trust_region_solver.jl new file mode 100644 index 00000000..0acb5842 --- /dev/null +++ b/test/dummy_trust_region_solver.jl @@ -0,0 +1,128 @@ +using Krylov + +function dummy_trust_region_solver( + nlp :: AbstractNLPModel; + x :: AbstractVector = copy(nlp.meta.x0), + atol :: Real = 1e-6, + rtol :: Real = 1e-6, + max_eval :: Int = 1000, + max_time :: Float64 = 30.0, + # ls_method :: Symbol = :armijo, + merit_constructor = L1Merit +) + + start_time = time() + elapsed_time = 0.0 + evals(nlp) = neval_obj(nlp) + neval_cons(nlp) + + nvar, ncon = nlp.meta.nvar, nlp.meta.ncon + T = eltype(x) + xt = copy(x) + + cx = ncon > 0 ? cons(nlp, x) : zeros(T, 0) + gx = grad(nlp, x) + Jx = ncon > 0 ? jac(nlp, x) : zeros(T, 0, nvar) + y = cgls(Jx', -gx)[1] + + dual = gx + Jx' * y + + iter = 0 + + Δ = 100.0 + ϕ = merit_constructor(nlp, 1e-3, cx=cx, gx=gx) + + ϵd = atol + rtol * norm(dual) + ϵp = atol + + fx = obj(nlp, x) + ϕ.fx = fx + @info log_header([:iter, :nfc, :f, :c, :dual, :time, :Δ, :η, :iterkind], [Int, Int, T, T, T, T, T, T, String]) + @info log_row(Any[iter, evals(nlp), fx, norm(cx), norm(dual), elapsed_time, Δ, ϕ.η, "Initial"]) + solved = norm(dual) < ϵd && norm(cx) < ϵp + tired = evals(nlp) > max_eval || elapsed_time > max_time + + while !(solved || tired) + Hxy = ncon > 0 ? hess_op(nlp, x, y) : hess_op(nlp, x) + local Δx + ϕx = obj(ϕ, x, update=false) + ϕxprimal = primalobj(ϕ) + ϕxdual = dualobj(ϕ) + if ncon > 0 + # Normal step + v = cgls(Jx, -cx, radius=0.8Δ)[1] + # Tangent step + Z = nullspace(Jx) + Δx = v + Z * cg(Z' * Hxy * Z, -Z' * (dual + Hxy * v), radius=sqrt(Δ^2 - dot(v, v)))[1] + else + Δx = cg(Hxy, -dual, radius=Δ)[1] + end + + # Workaround to be agnostic to the merit function + # Pretend fx is the quadratic approximation to obj(nlp, x + Δx) + # Pretend cx is que linear approximation to cons(nlp, x + Δx) + ϕ.fx += dot(Δx, gx) + dot(Δx, Hxy * Δx) / 2 + Ad = Jx * Δx + cpAd = cx .+ Ad + ct = ϕ.cx + ϕ.cx = cpAd + ϕtprimal = primalobj(ϕ) + ϕtdual = dualobj(ϕ) + + xt = x + Δx + ϕt = obj(ϕ, xt, update=true) + ft = ϕ.fx + + @assert ϕxprimal ≥ ϕtprimal + while ϕxdual - ϕtdual < -0.1 * ϕ.η * (ϕxprimal - ϕtprimal) < 0 # For unconstrained problems, right side is 0. + ϕ.η *= 2 + end + + Ared = ϕx - ϕt + Pred = ϕxdual - ϕtdual + ϕ.η * (ϕxprimal - ϕtprimal) + + ρ = Ared / Pred + iter_kind = if ρ > 1e-2 # accept + x .= xt + fx = ft + cx = ϕ.cx + grad!(nlp, x, gx) # Updates ϕ.gx + if ncon > 0 + Jx = jac(nlp, x) + end + y = cgls(Jx', -gx)[1] + dual = gx + Jx' * y + if ρ > 0.75 && norm(Δx) > 0.9Δ + Δ *= 2 + :great + else + :good + end + else + ϕ.fx = fx + ϕ.cx = cx + Δ /= 4 + :bad + end + + elapsed_time = time() - start_time + solved = norm(dual) < ϵd && norm(cx) < ϵp + tired = evals(nlp) > max_eval || elapsed_time > max_time + + iter += 1 + @info log_row(Any[iter, evals(nlp), fx, norm(cx), norm(dual), elapsed_time, Δ, ϕ.η, iter_kind]) + end + + status = if solved + :first_order + elseif elapsed_time > max_time + :max_time + else + :max_eval + end + + return GenericExecutionStats(status, nlp, + objective=fx, dual_feas=norm(dual), primal_feas=norm(cx), + multipliers=y, multipliers_L=zeros(T, nvar), multipliers_U=zeros(T, nvar), + elapsed_time=elapsed_time, solution=x, iter=iter + ) +end diff --git a/test/merit.jl b/test/merit.jl index 8c11e6a6..43ab2deb 100644 --- a/test/merit.jl +++ b/test/merit.jl @@ -62,6 +62,21 @@ function test_merit(merit_constructor, ϕ) @test neval_jprod(nlp) == 1 end + @testset "Separate parts" begin + nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, [-1.2; 1.0], x -> [x[1]^2 + x[2]^2 - 1], [0.0], [0.0]) + ϕ = merit_constructor(nlp, 0.0) + for i = 1:2 + x = randn(nlp.meta.nvar) + d = randn(nlp.meta.nvar) + ϕ.η = 0.0 + manual_dualobj = obj(ϕ, x) + ϕ.η = 1.0 + manual_primalobj = obj(ϕ, x) - manual_dualobj + @test dualobj(ϕ) ≈ manual_dualobj + @test primalobj(ϕ) ≈ manual_primalobj + end + end + @testset "Safe for unconstrained" begin nlp = ADNLPModel(x -> dot(x, x), ones(2)) x = nlp.meta.x0 @@ -86,6 +101,20 @@ function test_merit(merit_constructor, ϕ) @test output.objective < 1e-6 @test output.primal_feas < 1e-6 @test output.dual_feas < 1e-6 + + nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, [-1.2; 1.0]) + output = dummy_trust_region_solver(nlp, merit_constructor=merit_constructor, rtol=0) + @test isapprox(output.solution, ones(2), rtol=1e-6) + @test output.objective < 1e-6 + @test output.primal_feas == 0 + @test output.dual_feas < 1e-6 + + nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, [-1.2; 1.0], x -> [exp(x[1] - 1) - 1], [0.0], [0.0]) + output = dummy_trust_region_solver(nlp, merit_constructor=merit_constructor, rtol=0) + @test isapprox(output.solution, ones(2), rtol=1e-6) + @test output.objective < 1e-6 + @test output.primal_feas < 1e-6 + @test output.dual_feas < 1e-6 end end diff --git a/test/runtests.jl b/test/runtests.jl index f4b46944..e4d59e1b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,6 +9,7 @@ using LinearAlgebra, Logging, Test include("dummy_solver.jl") include("dummy_linesearch_solver.jl") +include("dummy_trust_region_solver.jl") include("test_auxiliary.jl") include("test_linesearch.jl") From dca394983084d525c35eb8e4365ee40de6c944ef Mon Sep 17 00:00:00 2001 From: Abel Date: Wed, 19 Aug 2020 20:30:28 -0300 Subject: [PATCH 6/7] Complete change of trust region --- docs/src/api.md | 14 +-- src/merit/uncmerit.jl | 13 +-- src/trust-region/basic-trust-region.jl | 108 +++++++++++------- src/trust-region/tron-trust-region.jl | 140 ++++++++++++++---------- src/trust-region/trust-region-output.jl | 25 +++++ src/trust-region/trust-region.jl | 139 ++++++++++------------- test/test_trust_region.jl | 113 ++++++++++--------- 7 files changed, 304 insertions(+), 248 deletions(-) create mode 100644 src/trust-region/trust-region-output.jl diff --git a/docs/src/api.md b/docs/src/api.md index 8abb451e..1a75842e 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -29,6 +29,8 @@ armijo_wolfe! ```@docs AbstractMeritModel obj +dualobj +primalobj derivative L1Merit AugLagMerit @@ -46,13 +48,7 @@ show_statuses ```@docs TrustRegionException -SolverTools.AbstractTrustRegion -aredpred -acceptable -reset! -get_property -set_property! -update! -TrustRegion -TRONTrustRegion +trust_region +basic_trust_region! +tron_trust_region! ``` diff --git a/src/merit/uncmerit.jl b/src/merit/uncmerit.jl index 96d76752..d3e7a869 100644 --- a/src/merit/uncmerit.jl +++ b/src/merit/uncmerit.jl @@ -30,13 +30,12 @@ function UncMerit( UncMerit{M,T,V}(meta, Counters(), nlp, η, fx, gx, cx, Ad) end -function NLPModels.obj(merit :: UncMerit, x :: AbstractVector; update :: Bool = true) - @lencheck merit.meta.nvar x - NLPModels.increment!(merit, :neval_obj) - if update - merit.fx = obj(merit.nlp, x) - end - return merit.fx +function dualobj(merit :: UncMerit) + merit.fx +end + +function primalobj(merit :: UncMerit) + zero(eltype(merit.η)) end function derivative(merit :: UncMerit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true) diff --git a/src/trust-region/basic-trust-region.jl b/src/trust-region/basic-trust-region.jl index 5523437e..28f8bd53 100644 --- a/src/trust-region/basic-trust-region.jl +++ b/src/trust-region/basic-trust-region.jl @@ -1,40 +1,72 @@ -export TrustRegion - -"""Basic trust region type.""" -mutable struct TrustRegion <: AbstractTrustRegion - initial_radius :: AbstractFloat - radius :: AbstractFloat - max_radius :: AbstractFloat - acceptance_threshold :: AbstractFloat - increase_threshold :: AbstractFloat - decrease_factor :: AbstractFloat - increase_factor :: AbstractFloat - ratio :: AbstractFloat - - function TrustRegion(initial_radius :: T; - max_radius :: T=one(T)/sqrt(eps(T)), - acceptance_threshold :: T=T(1.0e-4), - increase_threshold :: T=T(0.95), - decrease_factor :: T=one(T)/3, - increase_factor :: T=3 * one(T) / 2) where T <: Number - - initial_radius > 0 || (initial_radius = one(T)) - max_radius > initial_radius || throw(TrustRegionException("Invalid initial radius")) - (0 < acceptance_threshold < increase_threshold < 1) || throw(TrustRegionException("Invalid thresholds")) - (0 < decrease_factor < 1 < increase_factor) || throw(TrustRegionException("Invalid decrease/increase factors")) - - return new(initial_radius, initial_radius, max_radius, - acceptance_threshold, increase_threshold, - decrease_factor, increase_factor, 0) +export basic_trust_region! + +""" + tro = basic_trust_region!(ϕ, x, Δx, xt, Δq, Δc, Δ; kwargs...) + +Update `xt` and `Δ` using a trust region strategy. See [`trust_region`](@ref) for the basic usage, and below for specifc information about this strategy. + +This strategy corresponds to the keyword `method=:basic` on `trust_region`. + +Given `ρ = ared / pred`, +- if `ρ < ηacc`, then `xₖ₊₁ = xₖ` and `Δₖ₊₁ = σdec‖d‖`; +- if `ηacc ≤ ρ < ηinc`, then `xₖ₊₁ = xₖ + d` and `Δₖ₊₁ = Δₖ`; +- otherwise, then `xₖ₊₁ = xₖ + d` and `Δₖ₊₁ = min(Δₘₐₓ, max(Δₖ, σinc‖d‖))`. +""" +function basic_trust_region!( + ϕ :: AbstractMeritModel{M,T,V}, + x :: V, + d :: V, + xt :: V, + Δq :: T, + Ad :: V, + Δ :: T; + update_obj_at_x :: Bool=false, + penalty_update :: Symbol=:basic, + max_radius :: T=one(T)/sqrt(eps(T)), + ηacc :: Real=T(1.0e-4), + ηinc :: Real=T(0.95), + σdec :: Real=one(T) / 3, + σinc :: Real=3 * one(T) / 2, +) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} + (0 < ηacc < ηinc < 1) || throw(TrustRegionException("Invalid thresholds")) + (0 < σdec < 1 < σinc) || throw(TrustRegionException("Invalid decrease/increase factors")) + ϕx = obj(ϕ, x, update=update_obj_at_x) + ϕxf = dualobj(ϕ) + ϕxc = primalobj(ϕ) + ϕ.fx += Δq + ϕ.cx .+= Ad + mf = dualobj(ϕ) + mc = primalobj(ϕ) + + if penalty_update == :basic + while ϕxf - mf < -0.1ϕ.η * (ϕxc - mc) < 0 + ϕ.η *= 2 + end + else + throw(TrustRegionException("Unidentified penalty_update $penalty_update")) end -end - -function update!(tr :: TrustRegion, step_norm :: AbstractFloat) - if tr.ratio < tr.acceptance_threshold - tr.radius = tr.decrease_factor * step_norm - elseif tr.ratio >= tr.increase_threshold - tr.radius = min(tr.max_radius, - max(tr.radius, tr.increase_factor * step_norm)) + + @. xt = x + d + ϕt = obj(ϕ, xt) + ϕtf = dualobj(ϕ) + ϕtc = primalobj(ϕ) + + m = mf + ϕ.η * mc + ared = ϕx - ϕt + pred = ϕx - m + ρ = ared / pred + + normd = norm(d) + status = if ρ > ηinc + Δ = min(max_radius, max(Δ, σinc * normd)) + :great + elseif ρ < ηacc + Δ = σdec * normd + xt .= x + :bad + else + :good end - return tr -end + + return TrustRegionOutput(status, ared, pred, ρ, status != :bad, Δ, xt) +end \ No newline at end of file diff --git a/src/trust-region/tron-trust-region.jl b/src/trust-region/tron-trust-region.jl index 57ff961b..b25fb375 100644 --- a/src/trust-region/tron-trust-region.jl +++ b/src/trust-region/tron-trust-region.jl @@ -1,59 +1,87 @@ -export TRONTrustRegion - -"""Trust region used by TRON""" -mutable struct TRONTrustRegion <: AbstractTrustRegion - initial_radius :: AbstractFloat - radius :: AbstractFloat - max_radius :: AbstractFloat - acceptance_threshold :: AbstractFloat - decrease_threshold :: AbstractFloat - increase_threshold :: AbstractFloat - large_decrease_factor :: AbstractFloat - small_decrease_factor :: AbstractFloat - increase_factor :: AbstractFloat - ratio :: AbstractFloat - quad_min :: AbstractFloat - - function TRONTrustRegion(initial_radius :: T; - max_radius :: T=one(T)/sqrt(eps(T)), - acceptance_threshold :: T=T(1.0e-4), - decrease_threshold :: T=T(0.25), - increase_threshold :: T=T(0.75), - large_decrease_factor :: T=T(0.25), - small_decrease_factor :: T=T(0.5), - increase_factor :: T=T(4)) where T <: AbstractFloat - - initial_radius > 0 || (initial_radius = one(T)) - max_radius > initial_radius || throw(TrustRegionException("Invalid initial radius")) - (0 < acceptance_threshold < decrease_threshold < increase_threshold < 1) || throw(TrustRegionException("Invalid thresholds")) - (0 < large_decrease_factor < small_decrease_factor < 1 < increase_factor) || throw(TrustRegionException("Invalid decrease/increase factors")) - - return new(initial_radius, initial_radius, max_radius, - acceptance_threshold, decrease_threshold, increase_threshold, - large_decrease_factor, small_decrease_factor, increase_factor, - zero(T), zero(T)) +export tron_trust_region! + +""" + tro = tron_trust_region!(ϕ, x, Δx, xt, Δq, Δc, Δ; kwargs...) + + +Update `xt` and `Δ` using a trust region strategy. See [`trust_region`](@ref) for the basic usage, and below for specifc information about this strategy. + +This strategy corresponds to the keyword `method=:tron` on `trust_region`. +In addition to the default keywords, the following keywords are also available: +- `update_derivative_at_x`: Whether to call `derivative(ϕ, x, d; update=true)` to update `ϕ.gx` and `ϕ.Ad` (default: `true`); +- `ηdec`: threshold for decreasing Δ (default: 0.25) +- `σlarge_dec`: factor used in the decrease heuristics (default: 0.25) + +Given `ρ = ared / pred`, and the `slope = Dϕ(x, d)`, +- Compute γ = ϕt - ϕx - slope +- Compute α = γ ≤ 0 ? σinc : max(σlarge_dec, -slope / 2γ) +- if `ρ < ηacc`, then `xₖ₊₁ = xₖ` and `Δₖ₊₁ = min(max(α σlarge_dec)×‖d‖, σdecΔₖ)`; +- if `ηacc ≤ ρ < ηdec`, then `xₖ₊₁ = xₖ + d` and `Δₖ₊₁ = max(σlarge_decΔₖ, min(α‖d‖, σdecΔₖ))`; +- if `ηec ≤ ρ < ηinc`, then `xₖ₊₁ = xₖ + d` and `Δₖ₊₁ = max(σlarge_decΔₖ, min(α‖d‖, σincΔₖ))`; +- otherwise, then `xₖ₊₁ = xₖ + d` and `Δₖ₊₁ = min(Δₘₐₓ, max(Δₖ, min(α‖d‖, σincΔₖ)))`. +""" +function tron_trust_region!( + ϕ :: AbstractMeritModel{M,T,V}, + x :: V, + d :: V, + xt :: V, + Δq :: T, + Ad :: V, + Δ :: T; + update_obj_at_x :: Bool=false, + update_derivative_at_x :: Bool=true, + penalty_update :: Symbol=:basic, + max_radius :: T=one(T)/sqrt(eps(T)), + ηacc :: Real=T(1.0e-4), + ηdec :: Real=T(0.25), + ηinc :: Real=T(0.95), + σlarge_dec :: Real=T(0.25), + σdec :: Real=T(0.5), + σinc :: Real=3 * one(T) / 2, +) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} + (0 < ηacc < ηinc < 1) || throw(TrustRegionException("Invalid thresholds")) + (0 < σdec < 1 < σinc) || throw(TrustRegionException("Invalid decrease/increase factors")) + ϕx = obj(ϕ, x, update=update_obj_at_x) + slope = derivative(ϕ, x, d, update=update_derivative_at_x) + ϕxf = dualobj(ϕ) + ϕxc = primalobj(ϕ) + ϕ.fx += Δq + ϕ.cx .+= Ad + mf = dualobj(ϕ) + mc = primalobj(ϕ) + + if penalty_update == :basic + while ϕxf - mf < -0.1ϕ.η * (ϕxc - mc) < 0 + ϕ.η *= 2 + end + else + throw(TrustRegionException("Unidentified penalty_update $penalty_update")) end -end - -function aredpred(tr :: TRONTrustRegion, nlp :: AbstractNLPModel, f :: T, - f_trial :: T, Δm :: T, x_trial :: Vector{T}, - step :: Vector{T}, slope :: T) where T <: AbstractFloat - ared, pred = aredpred(nlp, f, f_trial, Δm, x_trial, step, slope) - γ = f_trial - f - slope - quad_min = γ <= 0 ? tr.increase_factor : max(tr.large_decrease_factor, -slope / γ / 2) - return ared, pred, quad_min -end - -function update!(tr :: TRONTrustRegion, step_norm :: AbstractFloat) - α, σ₁, σ₂, σ₃ = tr.quad_min, tr.large_decrease_factor, tr.small_decrease_factor, tr.increase_factor - tr.radius = if tr.ratio < tr.acceptance_threshold - min(max(α, σ₁) * step_norm, σ₂ * tr.radius) - elseif tr.ratio < tr.decrease_threshold - max(σ₁ * tr.radius, min(α * step_norm, σ₂ * tr.radius)) - elseif tr.ratio < tr.increase_threshold - max(σ₁ * tr.radius, min(α * step_norm, σ₃ * tr.radius)) + + @. xt = x + d + ϕt = obj(ϕ, xt) + ϕtf = dualobj(ϕ) + ϕtc = primalobj(ϕ) + + m = mf + ϕ.η * mc + ared = ϕx - ϕt + pred = ϕx - m + ρ = ared / pred + + γ = ϕt - ϕx - slope + α = γ ≤ 0 ? σinc : max(σlarge_dec, -slope / 2γ) + + normd = norm(d) + Δ, status = if ρ < ηacc + x .= xt + min(max(α, σlarge_dec) * normd, σdec * Δ), :bad + elseif ρ < ηdec + max(σlarge_dec * Δ, min(α * normd, σdec * Δ)), :good + elseif ρ < ηinc + max(σlarge_dec * Δ, min(α * normd, σinc * Δ)), :good else - min(tr.max_radius, max(tr.radius, min(α * step_norm, σ₃ * tr.radius))) + min(max_radius, max(Δ, min(α * normd, σinc * Δ))), :great end - return tr -end + + return TrustRegionOutput(status, ared, pred, ρ, status != :bad, Δ, xt) +end \ No newline at end of file diff --git a/src/trust-region/trust-region-output.jl b/src/trust-region/trust-region-output.jl new file mode 100644 index 00000000..dbddbe17 --- /dev/null +++ b/src/trust-region/trust-region-output.jl @@ -0,0 +1,25 @@ +export TrustRegionOutput + +struct TrustRegionOutput{T <: Real, V <: AbstractVector} + status :: Symbol + ared :: T + pred :: T + ρ :: T + success :: Bool + Δ :: T + xt :: V + specific :: NamedTuple +end + +function TrustRegionOutput( + status :: Symbol, + ared :: T, + pred :: T, + ρ :: T, + success :: Bool, + Δ :: T, + xt :: V; + specific :: NamedTuple = NamedTuple() +) where {T <: Real, V <: AbstractVector{<: T}} + TrustRegionOutput(status, ared, pred, ρ, success, Δ, xt, specific) +end \ No newline at end of file diff --git a/src/trust-region/trust-region.jl b/src/trust-region/trust-region.jl index a1ec4a79..20f47ffb 100644 --- a/src/trust-region/trust-region.jl +++ b/src/trust-region/trust-region.jl @@ -1,98 +1,69 @@ # A trust-region type and basic utility functions. -import NLPModels: reset! -export TrustRegionException, acceptable, aredpred, get_property, ratio, ratio!, reset!, set_property!, update! +export TrustRegionException +export trust_region, trust_region! +# import NLPModels: reset! +# export TrustRegionException, acceptable, aredpred, get_property, ratio, ratio!, reset!, set_property!, update! + +include("trust-region-output.jl") +include("basic-trust-region.jl") +include("tron-trust-region.jl") + +const tr_dict = Dict( + :basic => basic_trust_region!, + :tron => tron_trust_region!, +) "Exception type raised in case of error." mutable struct TrustRegionException <: Exception msg :: String end -"""`AbstractTrustRegion` - -An abstract trust region type so that specific trust regions define update -rules differently. Child types must have at least the following fields: - -- `acceptance_threshold :: AbstractFloat` -- `initial_radius :: AbstractFloat` -- `radius :: AbstractFloat` -- `ratio :: AbstractFloat` - -and the following function: - -- `update!(tr, step_norm)` -""" -abstract type AbstractTrustRegion end - -"""`ared, pred = aredpred(nlp, f, f_trial, Δm, x_trial, step, slope)` - -Compute the actual and predicted reductions `∆f` and `Δm`, where -`Δf = f_trial - f` is the actual reduction is an objective/merit/penalty function, -`Δm = m_trial - m` is the reduction predicted by the model `m` of `f`. -We assume that `m` is being minimized, and therefore that `Δm < 0`. -""" -function aredpred(nlp :: AbstractNLPModel, f :: T, f_trial :: T, Δm :: T, - x_trial :: Vector{T}, step :: Vector{T}, slope :: T) where T <: AbstractFloat - absf = abs(f) - ϵ = eps(T) - pred = Δm - max(one(T), absf) * 10 * ϵ - - ared = f_trial - f + max(one(T), absf) * 10 * ϵ - if (abs(Δm) < 10_000 * ϵ) || (abs(ared) < 10_000 * ϵ * absf) - # correct for roundoff error - g_trial = grad(nlp, x_trial) - slope_trial = dot(g_trial, step) - ared = (slope_trial + slope) / 2 - end - return ared, pred -end - - -"""`acceptable(tr)` - -Return `true` if a step is acceptable """ -function acceptable(tr :: AbstractTrustRegion) - return tr.ratio >= tr.acceptance_threshold -end - -"""`reset!(tr)` - -Reset the trust-region radius to its initial value + tro = trust_region(ϕ, x, d, Δq, Ad, Δ; method=:basic, penalty_update=:basic, kwargs...) + tro = trust_region!(ϕ, x, d, xt, Δq, Ad, Δ; method=:basic, penalty_update=:basic, kwargs...) + tro = trust_region(nlp, x, d, Δq, Δ; method=:basic, fx=obj(nlp, x), gx=grad(nlp, x), kwargs...) + tro = trust_region!(nlp, x, d, xt, Δq, Δ; method=:basic, fx=obj(nlp, x), gx=grad(nlp, x), kwargs...) + +Update `xt` and `Δ` using a trust region strategy according to the `method` keyword. +The output is a `TrustRegionOutput`. + +The actual reduction of the model is given by +``` +ared = ϕx - ϕt, +``` +where `ϕx = obj(nlp, x; update=update_obj_at_x)` and `ϕt = obj(nlp, xt)`. The predicted reduction is +given by +``` +pred = ϕx - (ϕx + Δq) - ϕ.η P(ϕ.cx + Ad), +``` +where `P` is the penalty function used by `ϕ` and `Ad` is the Jacobian times `d`. + +The following keyword argument are available: +- `update_obj_at_x`: Whether to update `ϕ`'s objective at `x` (default: `false`); +- `penalty_update`: Strategy to update the parameter `ϕ.η`. (default: `:basic`); +- `max_radius`: Maximum value for `Δ` (default 1/√ϵ(T)); +- `ηacc`: threshold for successful step (default: 1.0e-4); +- `ηinc`: threshold for great step (default: 0.95), +- `σdec`: factor used in the radius decrease heuristics (default: 1/3); +- `σinc`: factor used in the radius increase heuristics (default: 3/2); + +Some methods have additional keyword arguments. + +For the `nlp` version, the problem must be unconstrained or bound-constrained (i.e., `nlp.meta.nequ += 0`). In such case a `UncMerit` is created using the keyword argument `fx`. """ -function reset!(tr :: AbstractTrustRegion) - tr.radius = tr.initial_radius - return tr -end +function trust_region end - -"""A basic getter for `AbstractTrustRegion` instances. -Should be overhauled when it's possible to overload `getfield()` -and `setfield!()`. See -https://github.com/JuliaLang/julia/issues/1974 -""" -function get_property(tr :: AbstractTrustRegion, prop :: Symbol) - # All fields are gettable. - gettable = fieldnames(typeof(tr)) - prop in gettable || throw(TrustRegionException("Unknown property: $prop")) - getfield(tr, prop) +function trust_region(nlp_or_ϕ, x, d, args...; kwargs...) + xt = copy(x) + trust_region!(nlp_or_ϕ, x, d, xt, args...; kwargs...) end -"""A basic setter for `AbstractTrustRegion` instances. -""" -function set_property!(tr :: AbstractTrustRegion, prop :: Symbol, value :: Any) - gettable = fieldnames(typeof(tr)) - prop in gettable || throw(TrustRegionException("Unknown property: $prop")) - setfield!(tr, prop, value) +function trust_region!(nlp :: AbstractNLPModel, x :: AbstractVector, d :: AbstractVector, xt :: AbstractVector, Δq :: Real, Δ :: Real; fx=obj(nlp, x), kwargs...) + ϕ = UncMerit(nlp, fx=fx) + trust_region!(ϕ, x, d, xt, Δq, eltype(x)[], Δ; kwargs...) end -"""`update!(tr, step_norm)` - -Update the trust-region radius based on the ratio of actual vs. predicted reduction -and the step norm. -""" -function update!(tr :: AbstractTrustRegion, ratio :: AbstractFloat, step_norm :: AbstractFloat) - throw(NotImplementedError("`update!` not implemented for this TrustRegion type")) -end - -include("basic-trust-region.jl") -include("tron-trust-region.jl") +function trust_region!(ϕ :: AbstractMeritModel, x :: AbstractVector, args...; method :: Symbol=:basic, kwargs...) + SolverTools.tr_dict[method](ϕ, x, args...; kwargs...) +end \ No newline at end of file diff --git a/test/test_trust_region.jl b/test/test_trust_region.jl index bd543b5c..2940dd3e 100644 --- a/test/test_trust_region.jl +++ b/test/test_trust_region.jl @@ -1,62 +1,67 @@ @testset "Trust Region" begin - @testset "aredpred" begin - nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2)) - x = nlp.meta.x0 - d = -ones(2) - xt = x + d - f = obj(nlp, x) - ft = obj(nlp, xt) - Δm = ft - f - ared, pred = aredpred(nlp, f, ft, Δm, xt, d, dot(grad(nlp, x), d)) - @test abs(ared - pred) < 1e-12 - tr = TRONTrustRegion(100.0) - ared, pred, qmin = aredpred(tr, nlp, f, ft, Δm, xt, d, dot(grad(nlp, x), d)) - @test abs(ared - pred) < 1e-12 - d = -1e-12 * ones(2) - xt = x + d - ft = obj(nlp, xt) - Δm = ft - f - ared, pred = aredpred(nlp, f, ft, Δm, xt, d, dot(grad(nlp, x), d)) - @test abs(ared - pred) < 1e-12 - end + tr_methods = [:basic, :tron] + + @testset "Unconstrained problems" begin + for tr_method in tr_methods + @testset "Method $tr_method" begin + nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2)) + x = nlp.meta.x0 + d = -ones(2) + Δ = norm(d) + Δq = dot(grad(nlp, x), d) + dot(d, hprod(nlp, x, d)) / 2 + tro = trust_region(nlp, x, d, Δq, Δ, method=tr_method, update_obj_at_x=true) + @test tro.status == :great + @test tro.Δ ≥ Δ + @test tro.xt == x + d + @test tro.success - @testset "BasicTrustRegion" begin - Δ₀ = 10.0 - tr = TrustRegion(Δ₀) - tr.ratio = 1.0 - @test acceptable(tr) == true + Δq = -tro.ared / 0.25 + tro = trust_region(nlp, x, d, Δq, Δ, method=tr_method, update_obj_at_x=true) + @test tro.ρ == 0.25 + @test tro.status == :good + @test tro.xt == x + d + @test tro.success - tr.ratio = 10.0 - update!(tr, Δ₀) - @test tr.radius > Δ₀ - reset!(tr) - tr.ratio = -1.0 - update!(tr, Δ₀) - @test tr.radius < Δ₀ - reset!(tr) + Δq = -tro.ared / 1e-6 + tro = trust_region(nlp, x, d, Δq, Δ, method=tr_method, update_obj_at_x=true) + @test tro.ρ == 1e-6 + @test tro.status == :bad + @test tro.xt == x + @test !tro.success + end + end end - @testset "TRONTrustRegion" begin - Δ₀ = 10.0 - tr = TRONTrustRegion(Δ₀) - tr.ratio = 1.0 - @test acceptable(tr) == true + @testset "Constrained problems" begin + for tr_method in tr_methods + @testset "Method $tr_method" begin + nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2), x -> [x[1] + 2x[2]], [0.0], [0.0]) + x = nlp.meta.x0 + ϕ = L1Merit(nlp, 1.0) + d = -ones(2) + Δ = norm(d) + Δq = dot(grad(nlp, x), d) + dot(d, hprod(nlp, x, d)) / 2 + Ad = jprod(nlp, x, d) + tro = trust_region(ϕ, x, d, Δq, Ad, Δ, method=tr_method, update_obj_at_x=true) + @test tro.status == :great + @test tro.Δ ≥ Δ + @test tro.xt == x + d + @test tro.success + + Δq = -tro.ared / 0.25 - ϕ.η * (norm(ϕ.cx, 1) - norm(ϕ.cx + Ad, 1)) + tro = trust_region(ϕ, x, d, Δq, Ad, Δ, method=tr_method, update_obj_at_x=true) + @test tro.ρ == 0.25 + @test tro.status == :good + @test tro.xt == x + d + @test tro.success - tr.ratio = tr.acceptance_threshold - 1 - update!(tr, Δ₀) - @test tr.radius < Δ₀ - reset!(tr) - tr.ratio = (tr.acceptance_threshold + tr.decrease_threshold) / 2 - update!(tr, Δ₀) - @test tr.radius < Δ₀ - reset!(tr) - tr.ratio = (tr.decrease_threshold + tr.increase_threshold) / 2 - update!(tr, Δ₀) - @test tr.radius < Δ₀ - reset!(tr) - tr.ratio = tr.increase_threshold + 1 - tr.quad_min = 2.0 - update!(tr, Δ₀) - @test tr.radius > Δ₀ + Δq = -tro.ared / 1e-6 - ϕ.η * (norm(ϕ.cx, 1) - norm(ϕ.cx + Ad, 1)) + tro = trust_region(ϕ, x, d, Δq, Ad, Δ, method=tr_method, update_obj_at_x=true) + @test tro.ρ == 1e-6 + @test tro.status == :bad + @test tro.xt == x + @test !tro.success + end + end end end From e412920e4cf596287fc2273c4ff00b37b37c02d1 Mon Sep 17 00:00:00 2001 From: Abel Date: Wed, 19 Aug 2020 20:54:47 -0300 Subject: [PATCH 7/7] Use the new trust region on the dummy solver --- test/dummy_trust_region_solver.jl | 51 +++++++------------------------ 1 file changed, 11 insertions(+), 40 deletions(-) diff --git a/test/dummy_trust_region_solver.jl b/test/dummy_trust_region_solver.jl index 0acb5842..284ab000 100644 --- a/test/dummy_trust_region_solver.jl +++ b/test/dummy_trust_region_solver.jl @@ -7,7 +7,7 @@ function dummy_trust_region_solver( rtol :: Real = 1e-6, max_eval :: Int = 1000, max_time :: Float64 = 30.0, - # ls_method :: Symbol = :armijo, + tr_method :: Symbol = :basic, merit_constructor = L1Merit ) @@ -29,7 +29,7 @@ function dummy_trust_region_solver( iter = 0 Δ = 100.0 - ϕ = merit_constructor(nlp, 1e-3, cx=cx, gx=gx) + ϕ = merit_constructor(nlp, 1e-3, cx=copy(cx), gx=gx) ϵd = atol + rtol * norm(dual) ϵp = atol @@ -44,9 +44,6 @@ function dummy_trust_region_solver( while !(solved || tired) Hxy = ncon > 0 ? hess_op(nlp, x, y) : hess_op(nlp, x) local Δx - ϕx = obj(ϕ, x, update=false) - ϕxprimal = primalobj(ϕ) - ϕxdual = dualobj(ϕ) if ncon > 0 # Normal step v = cgls(Jx, -cx, radius=0.8Δ)[1] @@ -57,51 +54,25 @@ function dummy_trust_region_solver( Δx = cg(Hxy, -dual, radius=Δ)[1] end - # Workaround to be agnostic to the merit function - # Pretend fx is the quadratic approximation to obj(nlp, x + Δx) - # Pretend cx is que linear approximation to cons(nlp, x + Δx) - ϕ.fx += dot(Δx, gx) + dot(Δx, Hxy * Δx) / 2 + Δq = dot(Δx, gx) + dot(Δx, Hxy * Δx) / 2 Ad = Jx * Δx - cpAd = cx .+ Ad - ct = ϕ.cx - ϕ.cx = cpAd - ϕtprimal = primalobj(ϕ) - ϕtdual = dualobj(ϕ) - - xt = x + Δx - ϕt = obj(ϕ, xt, update=true) - ft = ϕ.fx - - @assert ϕxprimal ≥ ϕtprimal - while ϕxdual - ϕtdual < -0.1 * ϕ.η * (ϕxprimal - ϕtprimal) < 0 # For unconstrained problems, right side is 0. - ϕ.η *= 2 - end + tro = trust_region!(ϕ, x, Δx, xt, Δq, Ad, Δ, method=tr_method, update_obj_at_x=false) - Ared = ϕx - ϕt - Pred = ϕxdual - ϕtdual + ϕ.η * (ϕxprimal - ϕtprimal) + Δ = tro.Δ + x .= tro.xt - ρ = Ared / Pred - iter_kind = if ρ > 1e-2 # accept - x .= xt - fx = ft - cx = ϕ.cx - grad!(nlp, x, gx) # Updates ϕ.gx + if tro.success + fx = ϕ.fx + cx .= ϕ.cx + grad!(nlp, x, gx) if ncon > 0 Jx = jac(nlp, x) end y = cgls(Jx', -gx)[1] dual = gx + Jx' * y - if ρ > 0.75 && norm(Δx) > 0.9Δ - Δ *= 2 - :great - else - :good - end else ϕ.fx = fx ϕ.cx = cx - Δ /= 4 - :bad end elapsed_time = time() - start_time @@ -109,7 +80,7 @@ function dummy_trust_region_solver( tired = evals(nlp) > max_eval || elapsed_time > max_time iter += 1 - @info log_row(Any[iter, evals(nlp), fx, norm(cx), norm(dual), elapsed_time, Δ, ϕ.η, iter_kind]) + @info log_row(Any[iter, evals(nlp), fx, norm(cx), norm(dual), elapsed_time, Δ, ϕ.η, tro.status]) end status = if solved