JuliaSmoothOptimizers · abelsiqueira · Aug 7, 2020 · Aug 7, 2020
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,4 @@
 docs/build
 docs/Manifest.toml
+
+Manifest.toml
diff --git a/docs/src/api.md b/docs/src/api.md
@@ -27,6 +27,17 @@ redirect!
 armijo_wolfe
 ```
 
+## Merit
+
+See also [`obj`](@ref).
+
+```@docs
+AbstractMeritModel
+derivative
+L1Merit
+AugLagMerit
+```
+
 ## Stats
 
 ```@docs

diff --git a/docs/src/reference.md b/docs/src/reference.md
@@ -1,4 +1,12 @@
 # Reference
 
-```@index
+## Contents
+
+```@contents
+Pages = ["reference.md"]
 ```
+
+## Index
+
+```@index
+```
diff --git a/src/SolverTools.jl b/src/SolverTools.jl
@@ -16,6 +16,7 @@ include("auxiliary/logger.jl")
 include("stats/stats.jl")
 
 # Algorithmic components.
+include("merit/merit.jl")
 include("linesearch/linesearch.jl")
 include("trust-region/trust-region.jl")
 

diff --git a/src/linesearch/line_model.jl b/src/linesearch/line_model.jl
@@ -1,5 +1,3 @@
-import NLPModels: obj, grad, grad!, hess
-
 export LineModel
 export obj, grad, derivative, grad!, derivative!, hess, redirect!
 
@@ -38,7 +36,7 @@ end
 
     ϕ(t) := f(x + td).
 """
-function obj(f :: LineModel, t :: AbstractFloat)
+function NLPModels.obj(f :: LineModel, t :: AbstractFloat)
   NLPModels.increment!(f, :neval_obj)
   return obj(f.nlp, f.x + t * f.d)
 end
@@ -51,7 +49,7 @@ i.e.,
 
     ϕ'(t) = ∇f(x + td)ᵀd.
 """
-function grad(f :: LineModel, t :: AbstractFloat)
+function NLPModels.grad(f :: LineModel, t :: AbstractFloat)
   NLPModels.increment!(f, :neval_grad)
   return dot(grad(f.nlp, f.x + t * f.d), f.d)
 end
@@ -67,7 +65,7 @@ i.e.,
 
 The gradient ∇f(x + td) is stored in `g`.
 """
-function grad!(f :: LineModel, t :: AbstractFloat, g :: AbstractVector)
+function NLPModels.grad!(f :: LineModel, t :: AbstractFloat, g :: AbstractVector)
   NLPModels.increment!(f, :neval_grad)
   return dot(grad!(f.nlp, f.x + t * f.d, g), f.d)
 end
@@ -81,7 +79,7 @@ i.e.,
 
     ϕ"(t) = dᵀ∇²f(x + td)d.
 """
-function hess(f :: LineModel, t :: AbstractFloat)
+function NLPModels.hess(f :: LineModel, t :: AbstractFloat)
   NLPModels.increment!(f, :neval_hess)
   return dot(f.d, hprod(f.nlp, f.x + t * f.d, f.d))
 end
diff --git a/src/merit/auglagmerit.jl b/src/merit/auglagmerit.jl
@@ -0,0 +1,58 @@
+export AugLagMerit
+
+@doc raw"""
+    AugLagMerit(nlp, η; kwargs...)
+
+Creates an augmented Lagrangian merit function for the equality constrained problem
+```math
+\min f(x) \quad \text{s.to} \quad c(x) = 0
+```
+defined by
+```math
+\phi(x, yₖ; η) = f(x) - yₖᵀc(x) + η ½\|c(x)\|²
+```
+
+In addition to the keyword arguments declared in [`AbstractMeritModel`](@ref), an `AugLagMerit` also
+accepts the argument `y`.
+"""
+mutable struct AugLagMerit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel
+    nlp :: M
+    η :: T
+    fx :: T
+    gx :: V
+    cx :: V
+    Ad :: V
+    y :: V
+end
+
+function AugLagMerit(
+    nlp :: M,
+    η :: T;
+    fx :: T = T(Inf),
+    gx :: V = fill(T(Inf), nlp.meta.nvar),
+    cx :: V = fill(T(Inf), nlp.meta.ncon),
+    Ad :: V = fill(T(Inf), nlp.meta.ncon),
+    y :: V = fill(T(Inf), nlp.meta.ncon)
+) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}}
+    AugLagMerit{M,T,V}(nlp, η, fx, gx, cx, Ad, y)
+end
+
+function NLPModels.obj(merit :: AugLagMerit, x :: AbstractVector; update :: Bool = true)
+    if update
+        merit.fx = obj(merit.nlp, x)
+        merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx)
+    end
+    return merit.fx - dot(merit.y, merit.cx) + merit.η * dot(merit.cx, merit.cx) / 2
+end
+
+function derivative(merit :: AugLagMerit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true)
+    if update
+        grad!(merit.nlp, x, merit.gx)
+        merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad)
+    end
+    if merit.nlp.meta.ncon == 0
+        return dot(merit.gx, d)
+    else
+        return dot(merit.gx, d) - dot(merit.y, merit.Ad) + merit.η * dot(merit.cx, merit.Ad)
+    end
+end
diff --git a/src/merit/l1merit.jl b/src/merit/l1merit.jl
@@ -0,0 +1,62 @@
+export L1Merit
+
+@doc raw"""
+    L1Merit(nlp, η; kwargs...)
+
+Creates a ℓ₁ merit function for the equality constrained problem
+```math
+\min f(x) \quad \text{s.to} \quad c(x) = 0
+```
+defined by
+```math
+\phi_1(x; η) = f(x) + η\|c(x)\|₁
+```
+"""
+mutable struct L1Merit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel
+    nlp :: M
+    η :: T
+    fx :: T
+    gx :: V
+    cx :: V
+    Ad :: V
+end
+
+function L1Merit(
+    nlp :: M,
+    η :: T;
+    fx :: T = T(Inf),
+    gx :: V = fill(T(Inf), nlp.meta.nvar),
+    cx :: V = fill(T(Inf), nlp.meta.ncon),
+    Ad :: V = fill(T(Inf), nlp.meta.ncon)
+) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}}
+    L1Merit{M,T,V}(nlp, η, fx, gx, cx, Ad)
+end
+
+function NLPModels.obj(merit :: L1Merit, x :: AbstractVector; update :: Bool = true)
+    if update
+        merit.fx = obj(merit.nlp, x)
+        merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx)
+    end
+    return merit.fx + merit.η * norm(merit.cx, 1)
+end
+
+function derivative(merit :: L1Merit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true)
+    if update
+        grad!(merit.nlp, x, merit.gx)
+        merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad)
+    end
+    if merit.nlp.meta.ncon == 0
+        return dot(merit.gx, d)
+    else
+        return dot(merit.gx, d) + merit.η * sum(
+            if ci > 0
+                Adi
+            elseif ci < 0
+                -Adi
+            else
+                abs(Adi)
+            end
+            for (ci, Adi) in zip(merit.cx, merit.Ad)
+        )
+    end
+end
diff --git a/src/merit/merit.jl b/src/merit/merit.jl
@@ -0,0 +1,46 @@
+using NLPModels
+export AbstractMeritModel, obj, derivative
+
+"""
+    AbstractMeritModel
+
+Model for merit functions. All models should store
+- `nlp`: The NLP with the corresponding problem.
+- `η`: The merit parameter.
+- `fx`: The objective at some point.
+- `cx`: The constraints vector at some point.
+- `gx`: The constraints vector at some point.
+- `Ad`: The Jacobian-direction product.
+
+All models allow a constructor of form
+
+    Merit(nlp, η; fx=Inf, cx=[Inf,…,Inf], gx=[Inf,…,Inf], Ad=[Inf,…,Inf])
+
+Additional arguments and constructors may be provided.
+"""
+abstract type AbstractMeritModel end
+
+"""
+    obj(merit, x; update=true)
+
+Computes the `merit` function value at `x`.
+This will call `obj` and `cons` for the internal model, unless `update` is called with `false`.
+The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`.
+"""
+function NLPModels.obj(merit::AbstractMeritModel, x::AbstractVector)
+    # I'd prefer to use only `function NLPModels.obj end` instead, but it doesn't work and using
+    # only `function obj end` overwrites the docstring
+    throw(MethodError(NLPModels.obj, (merit, x)))
+end
+
+"""
+    derivative(merit, x, d; update=true)
+
+Computes the derivative derivative of `merit` at `x` on direction `d`.
+This will call `grad!` and `jprod` to update the internal values of `gx` and `Ad`, but will assume that `cx` is correct.
+The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`.
+"""
+function derivative end
+
+include("auglagmerit.jl")
+include("l1merit.jl")
diff --git a/test/dummy_linesearch_solver.jl b/test/dummy_linesearch_solver.jl
@@ -0,0 +1,90 @@
+function dummy_linesearch_solver(
+  nlp :: AbstractNLPModel;
+  x :: AbstractVector = nlp.meta.x0,
+  atol :: Real = 1e-6,
+  rtol :: Real = 1e-6,
+  max_eval :: Int = 1000,
+  max_time :: Float64 = 30.0,
+  merit_constructor = L1Merit
+)
+
+  start_time = time()
+  elapsed_time = 0.0
+
+  nvar, ncon = nlp.meta.nvar, nlp.meta.ncon
+  T = eltype(x)
+
+  cx = ncon > 0 ? cons(nlp, x) : zeros(T, 0)
+  gx = grad(nlp, x)
+  Jx = ncon > 0 ? jac(nlp, x) : zeros(T, 0, nvar)
+  y = -Jx' \ gx
+  Hxy = ncon > 0 ? hess(nlp, x, y) : hess(nlp, x)
+
+  dual = gx + Jx' * y
+
+  iter = 0
+
+  ϕ = merit_constructor(nlp, 1e-3, cx=cx, gx=gx)
+
+  ϵd = atol + rtol * norm(dual)
+  ϵp = atol
+
+  fx = obj(nlp, x)
+  @info log_header([:iter, :f, :c, :dual, :time, :η], [Int, T, T, T, T, T])
+  @info log_row(Any[iter, fx, norm(cx), norm(dual), elapsed_time, ϕ.η])
+  solved = norm(dual) < ϵd && norm(cx) < ϵp
+  tired = neval_obj(nlp) + neval_cons(nlp) > max_eval || elapsed_time > max_time
+
+  while !(solved || tired)
+    Hxy = ncon > 0 ? hess(nlp, x, y) : hess(nlp, x)
+    W = Symmetric([Hxy  zeros(T, nvar, ncon); Jx  zeros(T, ncon, ncon)], :L)
+    Δxy = -W \ [dual; cx]
+    Δx = Δxy[1:nvar]
+    Δy = Δxy[nvar+1:end]
+
+    ϕ.fx = fx
+    ncon > 0 && jprod!(nlp, x, Δx, ϕ.Ad)
+    Dϕx = derivative(ϕ, x, Δx, update=false)
+    while Dϕx ≥ 0
+      ϕ.η *= 2
+      Dϕx = derivative(ϕ, x, Δx, update=false)
+    end
+    ϕx = obj(ϕ, x, update=false)
+
+    t = 1.0
+    ϕt = obj(ϕ, x + Δx) # Updates cx
+    while !(ϕt ≤ ϕx + 1e-2 * t * Dϕx)
+      t /= 2
+      ϕt = obj(ϕ, x + t * Δx) # Updates cx
+    end
+    x += t * Δx
+    y += t * Δy
+
+    grad!(nlp, x, gx) # Updates ϕ.gx
+    if ncon > 0
+      Jx = jac(nlp, x)
+    end
+    dual = gx + Jx' * y
+    elapsed_time = time() - start_time
+    solved = norm(dual) < ϵd && norm(cx) < ϵp
+    tired = neval_obj(nlp) + neval_cons(nlp) > max_eval || elapsed_time > max_time
+
+    iter += 1
+    fx = obj(nlp, x)
+    @info log_row(Any[iter, fx, norm(cx), norm(dual), elapsed_time, ϕ.η])
+  end
+
+  status = if solved
+    :first_order
+  elseif elapsed_time > max_time
+    :max_time
+  else
+    :max_eval
+  end
+
+  return GenericExecutionStats(:unknown, nlp,
+                               objective=fx, dual_feas=norm(dual), primal_feas=norm(cx),
+                               multipliers=y, multipliers_L=zeros(T, nvar), multipliers_U=zeros(T, nvar),
+                               elapsed_time=elapsed_time, solution=x, iter=iter
+                              )
+end