diff --git a/.gitignore b/.gitignore index 2cd4ac3f..98474722 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ docs/build docs/Manifest.toml + +Manifest.toml \ No newline at end of file diff --git a/docs/src/api.md b/docs/src/api.md index 1585a3af..a0e4d821 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -27,6 +27,17 @@ redirect! armijo_wolfe ``` +## Merit + +See also [`obj`](@ref). + +```@docs +AbstractMeritModel +derivative +L1Merit +AugLagMerit +``` + ## Stats ```@docs diff --git a/docs/src/reference.md b/docs/src/reference.md index 2dec9b13..b0d59bcf 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -1,4 +1,12 @@ # Reference -```@index +## Contents + +```@contents +Pages = ["reference.md"] ``` + +## Index + +```@index +``` \ No newline at end of file diff --git a/src/SolverTools.jl b/src/SolverTools.jl index ed6ecde2..5947635b 100644 --- a/src/SolverTools.jl +++ b/src/SolverTools.jl @@ -16,6 +16,7 @@ include("auxiliary/logger.jl") include("stats/stats.jl") # Algorithmic components. +include("merit/merit.jl") include("linesearch/linesearch.jl") include("trust-region/trust-region.jl") diff --git a/src/linesearch/line_model.jl b/src/linesearch/line_model.jl index 54f21426..08e4b78b 100644 --- a/src/linesearch/line_model.jl +++ b/src/linesearch/line_model.jl @@ -1,5 +1,3 @@ -import NLPModels: obj, grad, grad!, hess - export LineModel export obj, grad, derivative, grad!, derivative!, hess, redirect! @@ -38,7 +36,7 @@ end ϕ(t) := f(x + td). """ -function obj(f :: LineModel, t :: AbstractFloat) +function NLPModels.obj(f :: LineModel, t :: AbstractFloat) NLPModels.increment!(f, :neval_obj) return obj(f.nlp, f.x + t * f.d) end @@ -51,7 +49,7 @@ i.e., ϕ'(t) = ∇f(x + td)ᵀd. """ -function grad(f :: LineModel, t :: AbstractFloat) +function NLPModels.grad(f :: LineModel, t :: AbstractFloat) NLPModels.increment!(f, :neval_grad) return dot(grad(f.nlp, f.x + t * f.d), f.d) end @@ -67,7 +65,7 @@ i.e., The gradient ∇f(x + td) is stored in `g`. """ -function grad!(f :: LineModel, t :: AbstractFloat, g :: AbstractVector) +function NLPModels.grad!(f :: LineModel, t :: AbstractFloat, g :: AbstractVector) NLPModels.increment!(f, :neval_grad) return dot(grad!(f.nlp, f.x + t * f.d, g), f.d) end @@ -81,7 +79,7 @@ i.e., ϕ"(t) = dᵀ∇²f(x + td)d. """ -function hess(f :: LineModel, t :: AbstractFloat) +function NLPModels.hess(f :: LineModel, t :: AbstractFloat) NLPModels.increment!(f, :neval_hess) return dot(f.d, hprod(f.nlp, f.x + t * f.d, f.d)) end diff --git a/src/merit/auglagmerit.jl b/src/merit/auglagmerit.jl new file mode 100644 index 00000000..cbb05c12 --- /dev/null +++ b/src/merit/auglagmerit.jl @@ -0,0 +1,58 @@ +export AugLagMerit + +@doc raw""" + AugLagMerit(nlp, η; kwargs...) + +Creates an augmented Lagrangian merit function for the equality constrained problem +```math +\min f(x) \quad \text{s.to} \quad c(x) = 0 +``` +defined by +```math +\phi(x, yₖ; η) = f(x) - yₖᵀc(x) + η ½\|c(x)\|² +``` + +In addition to the keyword arguments declared in [`AbstractMeritModel`](@ref), an `AugLagMerit` also +accepts the argument `y`. +""" +mutable struct AugLagMerit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel + nlp :: M + η :: T + fx :: T + gx :: V + cx :: V + Ad :: V + y :: V +end + +function AugLagMerit( + nlp :: M, + η :: T; + fx :: T = T(Inf), + gx :: V = fill(T(Inf), nlp.meta.nvar), + cx :: V = fill(T(Inf), nlp.meta.ncon), + Ad :: V = fill(T(Inf), nlp.meta.ncon), + y :: V = fill(T(Inf), nlp.meta.ncon) +) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} + AugLagMerit{M,T,V}(nlp, η, fx, gx, cx, Ad, y) +end + +function NLPModels.obj(merit :: AugLagMerit, x :: AbstractVector; update :: Bool = true) + if update + merit.fx = obj(merit.nlp, x) + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + end + return merit.fx - dot(merit.y, merit.cx) + merit.η * dot(merit.cx, merit.cx) / 2 +end + +function derivative(merit :: AugLagMerit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true) + if update + grad!(merit.nlp, x, merit.gx) + merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad) + end + if merit.nlp.meta.ncon == 0 + return dot(merit.gx, d) + else + return dot(merit.gx, d) - dot(merit.y, merit.Ad) + merit.η * dot(merit.cx, merit.Ad) + end +end \ No newline at end of file diff --git a/src/merit/l1merit.jl b/src/merit/l1merit.jl new file mode 100644 index 00000000..754a07d1 --- /dev/null +++ b/src/merit/l1merit.jl @@ -0,0 +1,62 @@ +export L1Merit + +@doc raw""" + L1Merit(nlp, η; kwargs...) + +Creates a ℓ₁ merit function for the equality constrained problem +```math +\min f(x) \quad \text{s.to} \quad c(x) = 0 +``` +defined by +```math +\phi_1(x; η) = f(x) + η\|c(x)\|₁ +``` +""" +mutable struct L1Merit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel + nlp :: M + η :: T + fx :: T + gx :: V + cx :: V + Ad :: V +end + +function L1Merit( + nlp :: M, + η :: T; + fx :: T = T(Inf), + gx :: V = fill(T(Inf), nlp.meta.nvar), + cx :: V = fill(T(Inf), nlp.meta.ncon), + Ad :: V = fill(T(Inf), nlp.meta.ncon) +) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}} + L1Merit{M,T,V}(nlp, η, fx, gx, cx, Ad) +end + +function NLPModels.obj(merit :: L1Merit, x :: AbstractVector; update :: Bool = true) + if update + merit.fx = obj(merit.nlp, x) + merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx) + end + return merit.fx + merit.η * norm(merit.cx, 1) +end + +function derivative(merit :: L1Merit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true) + if update + grad!(merit.nlp, x, merit.gx) + merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad) + end + if merit.nlp.meta.ncon == 0 + return dot(merit.gx, d) + else + return dot(merit.gx, d) + merit.η * sum( + if ci > 0 + Adi + elseif ci < 0 + -Adi + else + abs(Adi) + end + for (ci, Adi) in zip(merit.cx, merit.Ad) + ) + end +end \ No newline at end of file diff --git a/src/merit/merit.jl b/src/merit/merit.jl new file mode 100644 index 00000000..787cb21a --- /dev/null +++ b/src/merit/merit.jl @@ -0,0 +1,46 @@ +using NLPModels +export AbstractMeritModel, obj, derivative + +""" + AbstractMeritModel + +Model for merit functions. All models should store +- `nlp`: The NLP with the corresponding problem. +- `η`: The merit parameter. +- `fx`: The objective at some point. +- `cx`: The constraints vector at some point. +- `gx`: The constraints vector at some point. +- `Ad`: The Jacobian-direction product. + +All models allow a constructor of form + + Merit(nlp, η; fx=Inf, cx=[Inf,…,Inf], gx=[Inf,…,Inf], Ad=[Inf,…,Inf]) + +Additional arguments and constructors may be provided. +""" +abstract type AbstractMeritModel end + +""" + obj(merit, x; update=true) + +Computes the `merit` function value at `x`. +This will call `obj` and `cons` for the internal model, unless `update` is called with `false`. +The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`. +""" +function NLPModels.obj(merit::AbstractMeritModel, x::AbstractVector) + # I'd prefer to use only `function NLPModels.obj end` instead, but it doesn't work and using + # only `function obj end` overwrites the docstring + throw(MethodError(NLPModels.obj, (merit, x))) +end + +""" + derivative(merit, x, d; update=true) + +Computes the derivative derivative of `merit` at `x` on direction `d`. +This will call `grad!` and `jprod` to update the internal values of `gx` and `Ad`, but will assume that `cx` is correct. +The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`. +""" +function derivative end + +include("auglagmerit.jl") +include("l1merit.jl") diff --git a/test/dummy_linesearch_solver.jl b/test/dummy_linesearch_solver.jl new file mode 100644 index 00000000..19374908 --- /dev/null +++ b/test/dummy_linesearch_solver.jl @@ -0,0 +1,90 @@ +function dummy_linesearch_solver( + nlp :: AbstractNLPModel; + x :: AbstractVector = nlp.meta.x0, + atol :: Real = 1e-6, + rtol :: Real = 1e-6, + max_eval :: Int = 1000, + max_time :: Float64 = 30.0, + merit_constructor = L1Merit +) + + start_time = time() + elapsed_time = 0.0 + + nvar, ncon = nlp.meta.nvar, nlp.meta.ncon + T = eltype(x) + + cx = ncon > 0 ? cons(nlp, x) : zeros(T, 0) + gx = grad(nlp, x) + Jx = ncon > 0 ? jac(nlp, x) : zeros(T, 0, nvar) + y = -Jx' \ gx + Hxy = ncon > 0 ? hess(nlp, x, y) : hess(nlp, x) + + dual = gx + Jx' * y + + iter = 0 + + ϕ = merit_constructor(nlp, 1e-3, cx=cx, gx=gx) + + ϵd = atol + rtol * norm(dual) + ϵp = atol + + fx = obj(nlp, x) + @info log_header([:iter, :f, :c, :dual, :time, :η], [Int, T, T, T, T, T]) + @info log_row(Any[iter, fx, norm(cx), norm(dual), elapsed_time, ϕ.η]) + solved = norm(dual) < ϵd && norm(cx) < ϵp + tired = neval_obj(nlp) + neval_cons(nlp) > max_eval || elapsed_time > max_time + + while !(solved || tired) + Hxy = ncon > 0 ? hess(nlp, x, y) : hess(nlp, x) + W = Symmetric([Hxy zeros(T, nvar, ncon); Jx zeros(T, ncon, ncon)], :L) + Δxy = -W \ [dual; cx] + Δx = Δxy[1:nvar] + Δy = Δxy[nvar+1:end] + + ϕ.fx = fx + ncon > 0 && jprod!(nlp, x, Δx, ϕ.Ad) + Dϕx = derivative(ϕ, x, Δx, update=false) + while Dϕx ≥ 0 + ϕ.η *= 2 + Dϕx = derivative(ϕ, x, Δx, update=false) + end + ϕx = obj(ϕ, x, update=false) + + t = 1.0 + ϕt = obj(ϕ, x + Δx) # Updates cx + while !(ϕt ≤ ϕx + 1e-2 * t * Dϕx) + t /= 2 + ϕt = obj(ϕ, x + t * Δx) # Updates cx + end + x += t * Δx + y += t * Δy + + grad!(nlp, x, gx) # Updates ϕ.gx + if ncon > 0 + Jx = jac(nlp, x) + end + dual = gx + Jx' * y + elapsed_time = time() - start_time + solved = norm(dual) < ϵd && norm(cx) < ϵp + tired = neval_obj(nlp) + neval_cons(nlp) > max_eval || elapsed_time > max_time + + iter += 1 + fx = obj(nlp, x) + @info log_row(Any[iter, fx, norm(cx), norm(dual), elapsed_time, ϕ.η]) + end + + status = if solved + :first_order + elseif elapsed_time > max_time + :max_time + else + :max_eval + end + + return GenericExecutionStats(:unknown, nlp, + objective=fx, dual_feas=norm(dual), primal_feas=norm(cx), + multipliers=y, multipliers_L=zeros(T, nvar), multipliers_U=zeros(T, nvar), + elapsed_time=elapsed_time, solution=x, iter=iter + ) +end diff --git a/test/merit.jl b/test/merit.jl new file mode 100644 index 00000000..b8c44085 --- /dev/null +++ b/test/merit.jl @@ -0,0 +1,110 @@ +function test_merit(merit_constructor, ϕ) + @testset "Consistency" begin + nlp = ADNLPModel(x -> dot(x, x), zeros(5), x -> [sum(x.^2) - 5; sum(x) - 1; prod(x)], zeros(3), zeros(3)) + + for x in (zeros(5), ones(5), -ones(5), BigFloat.([π, -2π, 3π, -4π, 0.0])), + d in (-ones(5), ones(5)) + η = one(eltype(x)) + merit = merit_constructor(nlp, η) + ϕx = obj(merit, x) + @test ϕx ≈ ϕ(nlp, x, η) + @test obj(nlp, x) == merit.fx + @test cons(nlp, x) == merit.cx + + Dϕx = derivative(merit, x, d) + ϵ = √eps(eltype(η)) + Dϕxa = (ϕ(nlp, x + ϵ * d, η) - ϕ(nlp, x, η)) / ϵ + @test isapprox(Dϕx, Dϕxa, rtol=1e-6) + @test grad(nlp, x) == merit.gx + @test jac(nlp, x) * d == merit.Ad + + ϕt = obj(merit, x + d) + @test ϕt ≈ ϕ(nlp, x + d, η) + @test obj(nlp, x + d) == merit.fx + @test cons(nlp, x + d) == merit.cx + end + end + + @testset "Simple line search" begin + nlp = ADNLPModel(x -> dot(x, x), [-1.0; 1.0], x -> [sum(x) - 1], [0.0], [0.0]) + + sol = [0.5; 0.5] + x = [-1.0; 1.0] + d = 30 * (sol - x) + η = 1.0 + + merit = merit_constructor(nlp, η) + + @assert obj(nlp, x + d) > obj(nlp, x) + @assert norm(cons(nlp, x + d)) > norm(cons(nlp, x)) + + reset!(nlp) + bk = 0 + ϕx = obj(merit, x) + obj(merit, x, update=false) + Dϕx = derivative(merit, x, d) + derivative(merit, x, d, update=false) + @test Dϕx < 0 # If d is not a descent direction for your merit, change your parameters + t = 1.0 + ϕt = obj(merit, x + d) + while ϕt > ϕx + 0.5 * t * Dϕx + t /= 2 + ϕt = obj(merit, x + t * d) + if t < 1e-16 + error("Failure") + end + bk += 1 + end + @test t < 1 + @test neval_obj(nlp) == 2 + bk + @test neval_cons(nlp) == 2 + bk + @test neval_grad(nlp) == 1 + @test neval_jprod(nlp) == 1 + end + + @testset "Safe for unconstrained" begin + nlp = ADNLPModel(x -> dot(x, x), ones(2)) + x = nlp.meta.x0 + d = ones(2) + + merit = merit_constructor(nlp, 1.0) + @test obj(merit, x) == obj(nlp, x) + @test derivative(merit, x, d) == dot(grad(nlp, x), d) + end + + @testset "Use on solver" begin + nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, [-1.2; 1.0]) + output = dummy_linesearch_solver(nlp, merit_constructor=merit_constructor, rtol=0) + @test isapprox(output.solution, ones(2), rtol=1e-6) + @test output.objective < 1e-6 + @test output.primal_feas == 0 + @test output.dual_feas < 1e-6 + + nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, [-1.2; 1.0], x -> [exp(x[1] - 1) - 1], [0.0], [0.0]) + output = dummy_linesearch_solver(nlp, merit_constructor=merit_constructor, rtol=0) + @test isapprox(output.solution, ones(2), rtol=1e-6) + @test output.objective < 1e-6 + @test output.primal_feas < 1e-6 + @test output.dual_feas < 1e-6 + end +end + +@testset "Merit functions" begin + merits = [ # Name, Constructor, ϕ(nlp, x, η) + ( + "L1Merit", + L1Merit, + (nlp, x, η) -> obj(nlp, x) + η * norm(cons(nlp, x), 1) + ), + ( + "AugLagMerit", + (nlp, η; kwargs...) -> AugLagMerit(nlp, η, y=ones(typeof(η), nlp.meta.ncon); kwargs...), + (nlp, x, η) -> obj(nlp, x) - sum(cons(nlp, x)) + η * norm(cons(nlp, x))^2 / 2 # y = (1,…,1)ᵀ + ) + ] + for (name, merit, ϕ) in merits + @testset "Merit $name" begin + test_merit(merit, ϕ) + end + end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index e56c4737..f4b46944 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,9 +8,11 @@ using NLPModels using LinearAlgebra, Logging, Test include("dummy_solver.jl") +include("dummy_linesearch_solver.jl") include("test_auxiliary.jl") include("test_linesearch.jl") include("test_logging.jl") +include("merit.jl") include("test_stats.jl") -include("test_trust_region.jl") +include("test_trust_region.jl") \ No newline at end of file