From c6a2913c5904bb46fd531b62777b91ada5b104c0 Mon Sep 17 00:00:00 2001
From: Abel Soares Siqueira <abel.s.siqueira@gmail.com>
Date: Fri, 7 Aug 2020 10:48:03 -0300
Subject: [PATCH 1/7] First draft of merit function

---
 .gitignore                      |   2 +
 src/SolverTools.jl              |   1 +
 src/linesearch/line_model.jl    |  10 ++-
 src/merit/auglagmerit.jl        |  43 +++++++++++++
 src/merit/l1merit.jl            |  50 +++++++++++++++
 src/merit/merit.jl              |  35 ++++++++++
 test/dummy_linesearch_solver.jl |  90 ++++++++++++++++++++++++++
 test/merit.jl                   | 110 ++++++++++++++++++++++++++++++++
 test/runtests.jl                |   4 +-
 9 files changed, 338 insertions(+), 7 deletions(-)
 create mode 100644 src/merit/auglagmerit.jl
 create mode 100644 src/merit/l1merit.jl
 create mode 100644 src/merit/merit.jl
 create mode 100644 test/dummy_linesearch_solver.jl
 create mode 100644 test/merit.jl

diff --git a/.gitignore b/.gitignore
index 2cd4ac3f..98474722 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,4 @@
 docs/build
 docs/Manifest.toml
+
+Manifest.toml
\ No newline at end of file
diff --git a/src/SolverTools.jl b/src/SolverTools.jl
index ed6ecde2..5947635b 100644
--- a/src/SolverTools.jl
+++ b/src/SolverTools.jl
@@ -16,6 +16,7 @@ include("auxiliary/logger.jl")
 include("stats/stats.jl")
 
 # Algorithmic components.
+include("merit/merit.jl")
 include("linesearch/linesearch.jl")
 include("trust-region/trust-region.jl")
 
diff --git a/src/linesearch/line_model.jl b/src/linesearch/line_model.jl
index 54f21426..08e4b78b 100644
--- a/src/linesearch/line_model.jl
+++ b/src/linesearch/line_model.jl
@@ -1,5 +1,3 @@
-import NLPModels: obj, grad, grad!, hess
-
 export LineModel
 export obj, grad, derivative, grad!, derivative!, hess, redirect!
 
@@ -38,7 +36,7 @@ end
 
     ϕ(t) := f(x + td).
 """
-function obj(f :: LineModel, t :: AbstractFloat)
+function NLPModels.obj(f :: LineModel, t :: AbstractFloat)
   NLPModels.increment!(f, :neval_obj)
   return obj(f.nlp, f.x + t * f.d)
 end
@@ -51,7 +49,7 @@ i.e.,
 
     ϕ'(t) = ∇f(x + td)ᵀd.
 """
-function grad(f :: LineModel, t :: AbstractFloat)
+function NLPModels.grad(f :: LineModel, t :: AbstractFloat)
   NLPModels.increment!(f, :neval_grad)
   return dot(grad(f.nlp, f.x + t * f.d), f.d)
 end
@@ -67,7 +65,7 @@ i.e.,
 
 The gradient ∇f(x + td) is stored in `g`.
 """
-function grad!(f :: LineModel, t :: AbstractFloat, g :: AbstractVector)
+function NLPModels.grad!(f :: LineModel, t :: AbstractFloat, g :: AbstractVector)
   NLPModels.increment!(f, :neval_grad)
   return dot(grad!(f.nlp, f.x + t * f.d, g), f.d)
 end
@@ -81,7 +79,7 @@ i.e.,
 
     ϕ"(t) = dᵀ∇²f(x + td)d.
 """
-function hess(f :: LineModel, t :: AbstractFloat)
+function NLPModels.hess(f :: LineModel, t :: AbstractFloat)
   NLPModels.increment!(f, :neval_hess)
   return dot(f.d, hprod(f.nlp, f.x + t * f.d, f.d))
 end
diff --git a/src/merit/auglagmerit.jl b/src/merit/auglagmerit.jl
new file mode 100644
index 00000000..93aa2a12
--- /dev/null
+++ b/src/merit/auglagmerit.jl
@@ -0,0 +1,43 @@
+export AugLagMerit
+
+mutable struct AugLagMerit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel
+    nlp :: M
+    η :: T
+    fx :: T
+    gx :: V
+    cx :: V
+    Ad :: V
+    y :: V
+end
+
+function AugLagMerit(
+    nlp :: M,
+    η :: T;
+    fx :: T = T(Inf),
+    gx :: V = fill(T(Inf), nlp.meta.nvar),
+    cx :: V = fill(T(Inf), nlp.meta.ncon),
+    Ad :: V = fill(T(Inf), nlp.meta.ncon),
+    y :: V = fill(T(Inf), nlp.meta.ncon)
+) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}}
+    AugLagMerit{M,T,V}(nlp, η, fx, gx, cx, Ad, y)
+end
+
+function NLPModels.obj(merit :: AugLagMerit, x :: AbstractVector; update :: Bool = true)
+    if update
+        merit.fx = obj(merit.nlp, x)
+        merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx)
+    end
+    return merit.fx - dot(merit.y, merit.cx) + merit.η * dot(merit.cx, merit.cx) / 2
+end
+
+function derivative(merit :: AugLagMerit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true)
+    if update
+        grad!(merit.nlp, x, merit.gx)
+        merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad)
+    end
+    if merit.nlp.meta.ncon == 0
+        return dot(merit.gx, d)
+    else
+        return dot(merit.gx, d) - dot(merit.y, merit.Ad) + merit.η * dot(merit.cx, merit.Ad)
+    end
+end
\ No newline at end of file
diff --git a/src/merit/l1merit.jl b/src/merit/l1merit.jl
new file mode 100644
index 00000000..853b4e74
--- /dev/null
+++ b/src/merit/l1merit.jl
@@ -0,0 +1,50 @@
+export L1Merit
+
+mutable struct L1Merit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel
+    nlp :: M
+    η :: T
+    fx :: T
+    gx :: V
+    cx :: V
+    Ad :: V
+end
+
+function L1Merit(
+    nlp :: M,
+    η :: T;
+    fx :: T = T(Inf),
+    gx :: V = fill(T(Inf), nlp.meta.nvar),
+    cx :: V = fill(T(Inf), nlp.meta.ncon),
+    Ad :: V = fill(T(Inf), nlp.meta.ncon)
+) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}}
+    L1Merit{M,T,V}(nlp, η, fx, gx, cx, Ad)
+end
+
+function NLPModels.obj(merit :: L1Merit, x :: AbstractVector; update :: Bool = true)
+    if update
+        merit.fx = obj(merit.nlp, x)
+        merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx)
+    end
+    return merit.fx + merit.η * norm(merit.cx, 1)
+end
+
+function derivative(merit :: L1Merit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true)
+    if update
+        grad!(merit.nlp, x, merit.gx)
+        merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad)
+    end
+    if merit.nlp.meta.ncon == 0
+        return dot(merit.gx, d)
+    else
+        return dot(merit.gx, d) + merit.η * sum(
+            if ci > 0
+                Adi
+            elseif ci < 0
+                -Adi
+            else
+                abs(Adi)
+            end
+            for (ci, Adi) in zip(merit.cx, merit.Ad)
+        )
+    end
+end
\ No newline at end of file
diff --git a/src/merit/merit.jl b/src/merit/merit.jl
new file mode 100644
index 00000000..68d6ff6e
--- /dev/null
+++ b/src/merit/merit.jl
@@ -0,0 +1,35 @@
+import NLPModels.obj
+export AbstractMeritModel, obj, derivative
+
+"""
+    AbstractMeritModel
+
+Model for merit functions. All models should store
+- `nlp`: The NLP with the corresponding problem.
+- `fx`: The objective at some point.
+- `cx`: The constraints vector at some point.
+- `gx`: The constraints vector at some point.
+- `Ad`: The Jacobian-direction product.
+"""
+abstract type AbstractMeritModel end
+
+"""
+    obj(merit, x; update=true)
+
+Computes the `merit` function value at `x`.
+This will call `obj` and `cons` for the internal model, unless `update` is called with `false`.
+The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`.
+"""
+function obj end
+
+"""
+    derivative(merit, x, d; update=true)
+
+Computes the derivative derivative of `merit` at `x` on direction `d`.
+This will call `grad!` and `jprod` to update the internal values of `gx` and `Ad`, but will assume that `cx` is correct.
+The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`.
+"""
+function derivative! end
+
+include("auglagmerit.jl")
+include("l1merit.jl")
diff --git a/test/dummy_linesearch_solver.jl b/test/dummy_linesearch_solver.jl
new file mode 100644
index 00000000..19374908
--- /dev/null
+++ b/test/dummy_linesearch_solver.jl
@@ -0,0 +1,90 @@
+function dummy_linesearch_solver(
+  nlp :: AbstractNLPModel;
+  x :: AbstractVector = nlp.meta.x0,
+  atol :: Real = 1e-6,
+  rtol :: Real = 1e-6,
+  max_eval :: Int = 1000,
+  max_time :: Float64 = 30.0,
+  merit_constructor = L1Merit
+)
+
+  start_time = time()
+  elapsed_time = 0.0
+
+  nvar, ncon = nlp.meta.nvar, nlp.meta.ncon
+  T = eltype(x)
+
+  cx = ncon > 0 ? cons(nlp, x) : zeros(T, 0)
+  gx = grad(nlp, x)
+  Jx = ncon > 0 ? jac(nlp, x) : zeros(T, 0, nvar)
+  y = -Jx' \ gx
+  Hxy = ncon > 0 ? hess(nlp, x, y) : hess(nlp, x)
+
+  dual = gx + Jx' * y
+
+  iter = 0
+
+  ϕ = merit_constructor(nlp, 1e-3, cx=cx, gx=gx)
+
+  ϵd = atol + rtol * norm(dual)
+  ϵp = atol
+
+  fx = obj(nlp, x)
+  @info log_header([:iter, :f, :c, :dual, :time, :η], [Int, T, T, T, T, T])
+  @info log_row(Any[iter, fx, norm(cx), norm(dual), elapsed_time, ϕ.η])
+  solved = norm(dual) < ϵd && norm(cx) < ϵp
+  tired = neval_obj(nlp) + neval_cons(nlp) > max_eval || elapsed_time > max_time
+
+  while !(solved || tired)
+    Hxy = ncon > 0 ? hess(nlp, x, y) : hess(nlp, x)
+    W = Symmetric([Hxy  zeros(T, nvar, ncon); Jx  zeros(T, ncon, ncon)], :L)
+    Δxy = -W \ [dual; cx]
+    Δx = Δxy[1:nvar]
+    Δy = Δxy[nvar+1:end]
+
+    ϕ.fx = fx
+    ncon > 0 && jprod!(nlp, x, Δx, ϕ.Ad)
+    Dϕx = derivative(ϕ, x, Δx, update=false)
+    while Dϕx ≥ 0
+      ϕ.η *= 2
+      Dϕx = derivative(ϕ, x, Δx, update=false)
+    end
+    ϕx = obj(ϕ, x, update=false)
+
+    t = 1.0
+    ϕt = obj(ϕ, x + Δx) # Updates cx
+    while !(ϕt ≤ ϕx + 1e-2 * t * Dϕx)
+      t /= 2
+      ϕt = obj(ϕ, x + t * Δx) # Updates cx
+    end
+    x += t * Δx
+    y += t * Δy
+
+    grad!(nlp, x, gx) # Updates ϕ.gx
+    if ncon > 0
+      Jx = jac(nlp, x)
+    end
+    dual = gx + Jx' * y
+    elapsed_time = time() - start_time
+    solved = norm(dual) < ϵd && norm(cx) < ϵp
+    tired = neval_obj(nlp) + neval_cons(nlp) > max_eval || elapsed_time > max_time
+
+    iter += 1
+    fx = obj(nlp, x)
+    @info log_row(Any[iter, fx, norm(cx), norm(dual), elapsed_time, ϕ.η])
+  end
+
+  status = if solved
+    :first_order
+  elseif elapsed_time > max_time
+    :max_time
+  else
+    :max_eval
+  end
+
+  return GenericExecutionStats(:unknown, nlp,
+                               objective=fx, dual_feas=norm(dual), primal_feas=norm(cx),
+                               multipliers=y, multipliers_L=zeros(T, nvar), multipliers_U=zeros(T, nvar),
+                               elapsed_time=elapsed_time, solution=x, iter=iter
+                              )
+end
diff --git a/test/merit.jl b/test/merit.jl
new file mode 100644
index 00000000..b8c44085
--- /dev/null
+++ b/test/merit.jl
@@ -0,0 +1,110 @@
+function test_merit(merit_constructor, ϕ)
+  @testset "Consistency" begin
+    nlp = ADNLPModel(x -> dot(x, x), zeros(5), x -> [sum(x.^2) - 5; sum(x) - 1; prod(x)], zeros(3), zeros(3))
+
+    for x in (zeros(5), ones(5), -ones(5), BigFloat.([π, -2π, 3π, -4π, 0.0])),
+        d in (-ones(5), ones(5))
+      η = one(eltype(x))
+      merit = merit_constructor(nlp, η)
+      ϕx = obj(merit, x)
+      @test ϕx ≈ ϕ(nlp, x, η)
+      @test obj(nlp, x) == merit.fx
+      @test cons(nlp, x) == merit.cx
+
+      Dϕx = derivative(merit, x, d)
+      ϵ = √eps(eltype(η))
+      Dϕxa = (ϕ(nlp, x + ϵ * d, η) - ϕ(nlp, x, η)) / ϵ
+      @test isapprox(Dϕx, Dϕxa, rtol=1e-6)
+      @test grad(nlp, x) == merit.gx
+      @test jac(nlp, x) * d == merit.Ad
+
+      ϕt = obj(merit, x + d)
+      @test ϕt ≈ ϕ(nlp, x + d, η)
+      @test obj(nlp, x + d) == merit.fx
+      @test cons(nlp, x + d) == merit.cx
+    end
+  end
+
+  @testset "Simple line search" begin
+    nlp = ADNLPModel(x -> dot(x, x), [-1.0; 1.0], x -> [sum(x) - 1], [0.0], [0.0])
+
+    sol = [0.5; 0.5]
+    x = [-1.0; 1.0]
+    d = 30 * (sol - x)
+    η = 1.0
+
+    merit = merit_constructor(nlp, η)
+
+    @assert obj(nlp, x + d) > obj(nlp, x)
+    @assert norm(cons(nlp, x + d)) > norm(cons(nlp, x))
+
+    reset!(nlp)
+    bk = 0
+    ϕx = obj(merit, x)
+    obj(merit, x, update=false)
+    Dϕx = derivative(merit, x, d)
+    derivative(merit, x, d, update=false)
+    @test Dϕx < 0 # If d is not a descent direction for your merit, change your parameters
+    t = 1.0
+    ϕt = obj(merit, x + d)
+    while ϕt > ϕx + 0.5 * t * Dϕx
+      t /= 2
+      ϕt = obj(merit, x + t * d)
+      if t < 1e-16
+        error("Failure")
+      end
+      bk += 1
+    end
+    @test t < 1
+    @test neval_obj(nlp) == 2 + bk
+    @test neval_cons(nlp) == 2 + bk
+    @test neval_grad(nlp) == 1
+    @test neval_jprod(nlp) == 1
+  end
+
+  @testset "Safe for unconstrained" begin
+    nlp = ADNLPModel(x -> dot(x, x), ones(2))
+    x = nlp.meta.x0
+    d = ones(2)
+
+    merit = merit_constructor(nlp, 1.0)
+    @test obj(merit, x) == obj(nlp, x)
+    @test derivative(merit, x, d) == dot(grad(nlp, x), d)
+  end
+
+  @testset "Use on solver" begin
+    nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, [-1.2; 1.0])
+    output = dummy_linesearch_solver(nlp, merit_constructor=merit_constructor, rtol=0)
+    @test isapprox(output.solution, ones(2), rtol=1e-6)
+    @test output.objective < 1e-6
+    @test output.primal_feas == 0
+    @test output.dual_feas < 1e-6
+
+    nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, [-1.2; 1.0], x -> [exp(x[1] - 1) - 1], [0.0], [0.0])
+    output = dummy_linesearch_solver(nlp, merit_constructor=merit_constructor, rtol=0)
+    @test isapprox(output.solution, ones(2), rtol=1e-6)
+    @test output.objective < 1e-6
+    @test output.primal_feas < 1e-6
+    @test output.dual_feas < 1e-6
+  end
+end
+
+@testset "Merit functions" begin
+  merits = [ # Name, Constructor, ϕ(nlp, x, η)
+    (
+      "L1Merit",
+      L1Merit,
+      (nlp, x, η) -> obj(nlp, x) + η * norm(cons(nlp, x), 1)
+    ),
+    (
+      "AugLagMerit",
+      (nlp, η; kwargs...) -> AugLagMerit(nlp, η, y=ones(typeof(η), nlp.meta.ncon); kwargs...),
+      (nlp, x, η) -> obj(nlp, x) - sum(cons(nlp, x)) + η * norm(cons(nlp, x))^2 / 2 # y = (1,…,1)ᵀ
+    )
+  ]
+  for (name, merit, ϕ) in merits
+    @testset "Merit $name" begin
+      test_merit(merit, ϕ)
+    end
+  end
+end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index e56c4737..f4b46944 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -8,9 +8,11 @@ using NLPModels
 using LinearAlgebra, Logging, Test
 
 include("dummy_solver.jl")
+include("dummy_linesearch_solver.jl")
 
 include("test_auxiliary.jl")
 include("test_linesearch.jl")
 include("test_logging.jl")
+include("merit.jl")
 include("test_stats.jl")
-include("test_trust_region.jl")
+include("test_trust_region.jl")
\ No newline at end of file

From 80ad155898d60b80292aa2e54474d56cb32c073b Mon Sep 17 00:00:00 2001
From: Abel <abel.s.siqueira@gmail.com>
Date: Fri, 7 Aug 2020 14:43:47 -0300
Subject: [PATCH 2/7] Merit docs

---
 docs/src/api.md          | 11 +++++++++++
 docs/src/reference.md    | 10 +++++++++-
 src/merit/auglagmerit.jl | 15 +++++++++++++++
 src/merit/l1merit.jl     | 12 ++++++++++++
 src/merit/merit.jl       | 17 ++++++++++++++---
 5 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/docs/src/api.md b/docs/src/api.md
index 1585a3af..a0e4d821 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -27,6 +27,17 @@ redirect!
 armijo_wolfe
 ```
 
+## Merit
+
+See also [`obj`](@ref).
+
+```@docs
+AbstractMeritModel
+derivative
+L1Merit
+AugLagMerit
+```
+
 ## Stats
 
 ```@docs
diff --git a/docs/src/reference.md b/docs/src/reference.md
index 2dec9b13..b0d59bcf 100644
--- a/docs/src/reference.md
+++ b/docs/src/reference.md
@@ -1,4 +1,12 @@
 # Reference
 
-```@index
+## Contents
+
+```@contents
+Pages = ["reference.md"]
 ```
+
+## Index
+
+```@index
+```
\ No newline at end of file
diff --git a/src/merit/auglagmerit.jl b/src/merit/auglagmerit.jl
index 93aa2a12..cbb05c12 100644
--- a/src/merit/auglagmerit.jl
+++ b/src/merit/auglagmerit.jl
@@ -1,5 +1,20 @@
 export AugLagMerit
 
+@doc raw"""
+    AugLagMerit(nlp, η; kwargs...)
+
+Creates an augmented Lagrangian merit function for the equality constrained problem
+```math
+\min f(x) \quad \text{s.to} \quad c(x) = 0
+```
+defined by
+```math
+\phi(x, yₖ; η) = f(x) - yₖᵀc(x) + η ½\|c(x)\|²
+```
+
+In addition to the keyword arguments declared in [`AbstractMeritModel`](@ref), an `AugLagMerit` also
+accepts the argument `y`.
+"""
 mutable struct AugLagMerit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel
     nlp :: M
     η :: T
diff --git a/src/merit/l1merit.jl b/src/merit/l1merit.jl
index 853b4e74..754a07d1 100644
--- a/src/merit/l1merit.jl
+++ b/src/merit/l1merit.jl
@@ -1,5 +1,17 @@
 export L1Merit
 
+@doc raw"""
+    L1Merit(nlp, η; kwargs...)
+
+Creates a ℓ₁ merit function for the equality constrained problem
+```math
+\min f(x) \quad \text{s.to} \quad c(x) = 0
+```
+defined by
+```math
+\phi_1(x; η) = f(x) + η\|c(x)\|₁
+```
+"""
 mutable struct L1Merit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel
     nlp :: M
     η :: T
diff --git a/src/merit/merit.jl b/src/merit/merit.jl
index 68d6ff6e..787cb21a 100644
--- a/src/merit/merit.jl
+++ b/src/merit/merit.jl
@@ -1,4 +1,4 @@
-import NLPModels.obj
+using NLPModels
 export AbstractMeritModel, obj, derivative
 
 """
@@ -6,10 +6,17 @@ export AbstractMeritModel, obj, derivative
 
 Model for merit functions. All models should store
 - `nlp`: The NLP with the corresponding problem.
+- `η`: The merit parameter.
 - `fx`: The objective at some point.
 - `cx`: The constraints vector at some point.
 - `gx`: The constraints vector at some point.
 - `Ad`: The Jacobian-direction product.
+
+All models allow a constructor of form
+
+    Merit(nlp, η; fx=Inf, cx=[Inf,…,Inf], gx=[Inf,…,Inf], Ad=[Inf,…,Inf])
+
+Additional arguments and constructors may be provided.
 """
 abstract type AbstractMeritModel end
 
@@ -20,7 +27,11 @@ Computes the `merit` function value at `x`.
 This will call `obj` and `cons` for the internal model, unless `update` is called with `false`.
 The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`.
 """
-function obj end
+function NLPModels.obj(merit::AbstractMeritModel, x::AbstractVector)
+    # I'd prefer to use only `function NLPModels.obj end` instead, but it doesn't work and using
+    # only `function obj end` overwrites the docstring
+    throw(MethodError(NLPModels.obj, (merit, x)))
+end
 
 """
     derivative(merit, x, d; update=true)
@@ -29,7 +40,7 @@ Computes the derivative derivative of `merit` at `x` on direction `d`.
 This will call `grad!` and `jprod` to update the internal values of `gx` and `Ad`, but will assume that `cx` is correct.
 The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`.
 """
-function derivative! end
+function derivative end
 
 include("auglagmerit.jl")
 include("l1merit.jl")

From c689b3f8c733adefc24b5e70dfdb3ca65610f3fa Mon Sep 17 00:00:00 2001
From: Abel <abel.s.siqueira@gmail.com>
Date: Fri, 7 Aug 2020 17:18:16 -0300
Subject: [PATCH 3/7] Change merit to an NLPModel

---
 src/merit/auglagmerit.jl | 119 +++++++++++++++++++++++++++++----------
 src/merit/l1merit.jl     |  78 +++++++++++++------------
 src/merit/merit.jl       |  20 +++----
 test/merit.jl            |   4 +-
 4 files changed, 139 insertions(+), 82 deletions(-)

diff --git a/src/merit/auglagmerit.jl b/src/merit/auglagmerit.jl
index cbb05c12..f2663dc6 100644
--- a/src/merit/auglagmerit.jl
+++ b/src/merit/auglagmerit.jl
@@ -1,7 +1,7 @@
 export AugLagMerit
 
 @doc raw"""
-    AugLagMerit(nlp, η; kwargs...)
+  AugLagMerit(nlp, η; kwargs...)
 
 Creates an augmented Lagrangian merit function for the equality constrained problem
 ```math
@@ -9,50 +9,107 @@ Creates an augmented Lagrangian merit function for the equality constrained prob
 ```
 defined by
 ```math
-\phi(x, yₖ; η) = f(x) - yₖᵀc(x) + η ½\|c(x)\|²
+\phi(x, yₖ; η) = f(x) + yₖᵀc(x) + η ½\|c(x)\|²
 ```
 
 In addition to the keyword arguments declared in [`AbstractMeritModel`](@ref), an `AugLagMerit` also
 accepts the argument `y`.
 """
 mutable struct AugLagMerit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel
-    nlp :: M
-    η :: T
-    fx :: T
-    gx :: V
-    cx :: V
-    Ad :: V
-    y :: V
+  meta :: NLPModelMeta
+  counters :: Counters
+  nlp  :: M
+  η  :: T
+  fx   :: T
+  gx   :: V
+  cx   :: V
+  Ad   :: V
+  y  :: V
+  y⁺   :: V
+  Jᵀy⁺ :: V
+  Jv   :: V
+  JᵀJv   :: V
 end
 
 function AugLagMerit(
-    nlp :: M,
-    η :: T;
-    fx :: T = T(Inf),
-    gx :: V = fill(T(Inf), nlp.meta.nvar),
-    cx :: V = fill(T(Inf), nlp.meta.ncon),
-    Ad :: V = fill(T(Inf), nlp.meta.ncon),
-    y :: V = fill(T(Inf), nlp.meta.ncon)
+  nlp  :: M,
+  η  :: T;
+  fx   :: T = T(Inf),
+  gx   :: V = fill(T(Inf), nlp.meta.nvar),
+  cx   :: V = fill(T(Inf), nlp.meta.ncon),
+  Ad   :: V = fill(T(Inf), nlp.meta.ncon),
+  y  :: V = fill(T(Inf), nlp.meta.ncon),
+  y⁺   :: V = fill(T(Inf), nlp.meta.ncon), # y + η * c(x)
+  Jᵀy⁺ :: V = fill(T(Inf), nlp.meta.nvar),
+  Jv   :: V = fill(T(Inf), nlp.meta.ncon),
+  JᵀJv :: V = fill(T(Inf), nlp.meta.nvar)
 ) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}}
-    AugLagMerit{M,T,V}(nlp, η, fx, gx, cx, Ad, y)
+  meta = NLPModelMeta(nlp.meta.nvar)
+  AugLagMerit{M,T,V}(meta, Counters(), nlp, η, fx, gx, cx, Ad, y, y⁺, Jᵀy⁺, Jv, JᵀJv)
 end
 
 function NLPModels.obj(merit :: AugLagMerit, x :: AbstractVector; update :: Bool = true)
-    if update
-        merit.fx = obj(merit.nlp, x)
-        merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx)
-    end
-    return merit.fx - dot(merit.y, merit.cx) + merit.η * dot(merit.cx, merit.cx) / 2
+  @lencheck merit.meta.nvar x
+  NLPModels.increment!(merit, :neval_obj)
+  if update
+    merit.fx = obj(merit.nlp, x)
+    merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx)
+  end
+  return merit.fx + dot(merit.y, merit.cx) + merit.η * dot(merit.cx, merit.cx) / 2
 end
 
 function derivative(merit :: AugLagMerit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true)
-    if update
-        grad!(merit.nlp, x, merit.gx)
-        merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad)
-    end
-    if merit.nlp.meta.ncon == 0
-        return dot(merit.gx, d)
-    else
-        return dot(merit.gx, d) - dot(merit.y, merit.Ad) + merit.η * dot(merit.cx, merit.Ad)
-    end
+  @lencheck merit.meta.nvar x d
+  if update
+    grad!(merit.nlp, x, merit.gx)
+    merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad)
+  end
+  if merit.nlp.meta.ncon == 0
+    return dot(merit.gx, d)
+  else
+    return dot(merit.gx, d) + dot(merit.y, merit.Ad) + merit.η * dot(merit.cx, merit.Ad)
+  end
+end
+
+function NLPModels.grad!(merit :: AugLagMerit, x :: AbstractVector, g :: AbstractVector; update :: Bool = true)
+  @lencheck merit.meta.nvar x g
+  NLPModels.increment!(merit, :neval_grad)
+  if update
+    grad!(nlp.model, x, merit.gx)
+    merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx)
+    merit.y⁺ .= merit.y .+ merit.η .* merit.cx
+    merit.nlp.meta.ncon > 0 && jtprod!(merit.nlp, x, merit.y⁺, merit.Jᵀy⁺)
+  end
+  g .= merit.gx .+ merit.Jᵀy⁺
+  return g
+end
+
+function NLPModels.objgrad!(merit :: AugLagMerit, x :: AbstractVector, g :: AbstractVector)
+  @lencheck merit.meta.nvar x g
+  NLPModels.increment!(merit, :neval_obj)
+  NLPModels.increment!(merit, :neval_grad)
+  if update
+    merit.fx = obj(merit.nlp, x)
+    grad!(nlp.model, x, merit.gx)
+    merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx)
+    merit.y⁺ .= merit.y .+ merit.η .* merit.cx
+    merit.nlp.meta.ncon > 0 && jtprod!(merit.nlp, x, merit.y⁺, merit.Jᵀy⁺)
+  end
+  f = merit.fx + dot(merit.y, merit.cx) + merit.η * dot(merit.cx, merit.cx) / 2
+  g .= merit.gx .+ merit.Jᵀy⁺
+  return f, g
+end
+
+function NLPModels.hprod!(merit :: AugLagMerit, x :: AbstractVector, v :: AbstractVector, Hv :: AbstractVector; obj_weight :: Float64 = 1.0)
+  @lencheck merit.meta.nvar x v Hv
+  NLPModels.increment!(merit, :neval_hprod)
+  if update
+    merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx)
+    merit.y⁺ .= merit.y .+ merit.η .* merit.cx
+  end
+  jprod!(merit.model, x, v, merit.Jv)
+  jtprod!(merit.model, x, merit.Jv, merit.JᵀJv)
+  hprod!(merit.model, x, merit.y⁺, v, Hv, obj_weight = obj_weight)
+  Hv .+= merit.η * merit.JᵀJv
+  return Hv
 end
\ No newline at end of file
diff --git a/src/merit/l1merit.jl b/src/merit/l1merit.jl
index 754a07d1..6a09da2c 100644
--- a/src/merit/l1merit.jl
+++ b/src/merit/l1merit.jl
@@ -13,50 +13,56 @@ defined by
 ```
 """
 mutable struct L1Merit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel
-    nlp :: M
-    η :: T
-    fx :: T
-    gx :: V
-    cx :: V
-    Ad :: V
+  meta :: NLPModelMeta
+  counters :: Counters
+  nlp :: M
+  η :: T
+  fx :: T
+  gx :: V
+  cx :: V
+  Ad :: V
 end
 
 function L1Merit(
-    nlp :: M,
-    η :: T;
-    fx :: T = T(Inf),
-    gx :: V = fill(T(Inf), nlp.meta.nvar),
-    cx :: V = fill(T(Inf), nlp.meta.ncon),
-    Ad :: V = fill(T(Inf), nlp.meta.ncon)
+  nlp :: M,
+  η :: T;
+  fx :: T = T(Inf),
+  gx :: V = fill(T(Inf), nlp.meta.nvar),
+  cx :: V = fill(T(Inf), nlp.meta.ncon),
+  Ad :: V = fill(T(Inf), nlp.meta.ncon)
 ) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}}
-    L1Merit{M,T,V}(nlp, η, fx, gx, cx, Ad)
+  meta = NLPModelMeta(nlp.meta.nvar)
+  L1Merit{M,T,V}(meta, Counters(), nlp, η, fx, gx, cx, Ad)
 end
 
 function NLPModels.obj(merit :: L1Merit, x :: AbstractVector; update :: Bool = true)
-    if update
-        merit.fx = obj(merit.nlp, x)
-        merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx)
-    end
-    return merit.fx + merit.η * norm(merit.cx, 1)
+  @lencheck merit.meta.nvar x
+  NLPModels.increment!(merit, :neval_obj)
+  if update
+    merit.fx = obj(merit.nlp, x)
+    merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx)
+  end
+  return merit.fx + merit.η * norm(merit.cx, 1)
 end
 
 function derivative(merit :: L1Merit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true)
-    if update
-        grad!(merit.nlp, x, merit.gx)
-        merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad)
-    end
-    if merit.nlp.meta.ncon == 0
-        return dot(merit.gx, d)
-    else
-        return dot(merit.gx, d) + merit.η * sum(
-            if ci > 0
-                Adi
-            elseif ci < 0
-                -Adi
-            else
-                abs(Adi)
-            end
-            for (ci, Adi) in zip(merit.cx, merit.Ad)
-        )
-    end
+  @lencheck merit.meta.nvar x d
+  if update
+    grad!(merit.nlp, x, merit.gx)
+    merit.nlp.meta.ncon > 0 && jprod!(merit.nlp, x, d, merit.Ad)
+  end
+  if merit.nlp.meta.ncon == 0
+    return dot(merit.gx, d)
+  else
+    return dot(merit.gx, d) + merit.η * sum(
+      if ci > 0
+        Adi
+      elseif ci < 0
+        -Adi
+      else
+        abs(Adi)
+      end
+      for (ci, Adi) in zip(merit.cx, merit.Ad)
+    )
+  end
 end
\ No newline at end of file
diff --git a/src/merit/merit.jl b/src/merit/merit.jl
index 787cb21a..c4869a9d 100644
--- a/src/merit/merit.jl
+++ b/src/merit/merit.jl
@@ -17,26 +17,20 @@ All models allow a constructor of form
     Merit(nlp, η; fx=Inf, cx=[Inf,…,Inf], gx=[Inf,…,Inf], Ad=[Inf,…,Inf])
 
 Additional arguments and constructors may be provided.
-"""
-abstract type AbstractMeritModel end
 
-"""
-    obj(merit, x; update=true)
+An AbstractMeritModel is an AbstractNLPModel, but the API may not be completely implemented. For
+instance, the `L1Merit` model doesn't provide any gradient function, but it provides a directional
+derivative function.
 
-Computes the `merit` function value at `x`.
-This will call `obj` and `cons` for the internal model, unless `update` is called with `false`.
-The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`.
+Furthermore, all implemented methods accept an `update` keyword that defaults to `true`. It is used
+to determine whether the internal stored values should be updated or not.
 """
-function NLPModels.obj(merit::AbstractMeritModel, x::AbstractVector)
-    # I'd prefer to use only `function NLPModels.obj end` instead, but it doesn't work and using
-    # only `function obj end` overwrites the docstring
-    throw(MethodError(NLPModels.obj, (merit, x)))
-end
+abstract type AbstractMeritModel <: AbstractNLPModel end
 
 """
     derivative(merit, x, d; update=true)
 
-Computes the derivative derivative of `merit` at `x` on direction `d`.
+Computes the directional derivative of `merit` at `x` on direction `d`.
 This will call `grad!` and `jprod` to update the internal values of `gx` and `Ad`, but will assume that `cx` is correct.
 The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`.
 """
diff --git a/test/merit.jl b/test/merit.jl
index b8c44085..8c11e6a6 100644
--- a/test/merit.jl
+++ b/test/merit.jl
@@ -98,8 +98,8 @@ end
     ),
     (
       "AugLagMerit",
-      (nlp, η; kwargs...) -> AugLagMerit(nlp, η, y=ones(typeof(η), nlp.meta.ncon); kwargs...),
-      (nlp, x, η) -> obj(nlp, x) - sum(cons(nlp, x)) + η * norm(cons(nlp, x))^2 / 2 # y = (1,…,1)ᵀ
+      (nlp, η; kwargs...) -> AugLagMerit(nlp, η, y=-ones(typeof(η), nlp.meta.ncon); kwargs...),
+      (nlp, x, η) -> obj(nlp, x) - sum(cons(nlp, x)) + η * norm(cons(nlp, x))^2 / 2 # y = (-1,…,-1)ᵀ
     )
   ]
   for (name, merit, ϕ) in merits

From 22672d17ffc4e7136911301982ba801dfebace7d Mon Sep 17 00:00:00 2001
From: Abel <abel.s.siqueira@gmail.com>
Date: Tue, 11 Aug 2020 17:07:53 -0300
Subject: [PATCH 4/7] Change linesearch to use merit models

---
 docs/src/api.md                    |  15 ++--
 src/linesearch/armijo.jl           |  57 +++++++++++++
 src/linesearch/armijo_wolfe.jl     |  97 +++++++++++----------
 src/linesearch/line_model.jl       |  85 -------------------
 src/linesearch/linesearch.jl       |  46 +++++++++-
 src/linesearch/linesearchoutput.jl |  35 ++++++++
 src/merit/auglagmerit.jl           |   2 +-
 src/merit/l1merit.jl               |   2 +-
 src/merit/merit.jl                 |  17 +++-
 src/merit/uncmerit.jl              |  48 +++++++++++
 test/dummy_linesearch_solver.jl    |  14 ++--
 test/test_linesearch.jl            | 130 ++++++++++++++++-------------
 12 files changed, 338 insertions(+), 210 deletions(-)
 create mode 100644 src/linesearch/armijo.jl
 delete mode 100644 src/linesearch/line_model.jl
 create mode 100644 src/linesearch/linesearchoutput.jl
 create mode 100644 src/merit/uncmerit.jl

diff --git a/docs/src/api.md b/docs/src/api.md
index a0e4d821..8abb451e 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -18,24 +18,21 @@ project_step!
 ## Line-Search
 
 ```@docs
-LineModel
-obj
-grad
-grad!
-hess
-redirect!
-armijo_wolfe
+linesearch
+LineSearchOutput
+armijo!
+armijo_wolfe!
 ```
 
 ## Merit
 
-See also [`obj`](@ref).
-
 ```@docs
 AbstractMeritModel
+obj
 derivative
 L1Merit
 AugLagMerit
+UncMerit
 ```
 
 ## Stats
diff --git a/src/linesearch/armijo.jl b/src/linesearch/armijo.jl
new file mode 100644
index 00000000..c97e2224
--- /dev/null
+++ b/src/linesearch/armijo.jl
@@ -0,0 +1,57 @@
+export armijo!
+
+"""
+    lso = armijo!(ϕ, x, d, xt; kwargs...)
+
+Performs a line search from `x` along the direction `d` for `AbstractMeritModel` `ϕ`.
+The steplength is chosen trying to satisfy the Armijo condition
+```math
+ϕ(x + t d; η) ≤ ϕ(x) + τ₀ t Dϕ(x; η),
+```
+using backtracking.
+
+The keyword aguments are:
+- ...
+
+The output is a `LineSearchOutput`. The following specific information are also available in
+`lso.specific`:
+- nbk: the number of times the steplength was decreased to satisfy the Armijo condition, i.e.,
+  number of backtracks;
+
+The following keyword arguments can be provided:
+- `update_obj_at_x`: Whether to call `obj(ϕ, x; update=true)` to update `ϕ.fx` and `ϕ.cx` (default: `false`);
+- `update_derivative_at_x`: Whether to call `derivative(ϕ, x, d; update=true)` to update `ϕ.gx` and `ϕ.Ad` (default: `false`);
+- `t`: starting steplength (default `1`);
+- `τ₀`: slope factor in the Armijo condition (default `max(1e-4, √ϵₘ)`);
+- `σ`: backtracking decrease factor (default `0.4`);
+- `bk_max`: maximum number of backtracks (default `10`).
+"""
+function armijo!(
+  ϕ :: AbstractMeritModel{M,T,V},
+  x :: V,
+  d :: V,
+  xt :: V;
+  update_obj_at_x :: Bool = false,
+  update_derivative_at_x :: Bool = false,
+  t :: T=one(T),
+  τ₀ :: T=max(T(1.0e-4), sqrt(eps(T))),
+  σ :: T=T(0.4),
+  bk_max :: Int=10
+) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}}
+
+  nbk = 0
+  ϕx = obj(ϕ, x; update=update_obj_at_x)
+  slope = derivative(ϕ, x, d; update=update_derivative_at_x)
+
+  xt .= x .+ t .* d
+  ϕt = obj(ϕ, xt)
+  while !(ϕt ≤ ϕx + τ₀ * t * slope) && (nbk < bk_max)
+    t *= σ
+    xt .= x .+ t .* d
+    ϕt = obj(ϕ, xt)
+
+    nbk += 1
+  end
+
+  return LineSearchOutput(t, xt, ϕt, specific=(nbk=nbk,))
+end
diff --git a/src/linesearch/armijo_wolfe.jl b/src/linesearch/armijo_wolfe.jl
index 14b3348e..aae84d77 100644
--- a/src/linesearch/armijo_wolfe.jl
+++ b/src/linesearch/armijo_wolfe.jl
@@ -1,84 +1,91 @@
-export armijo_wolfe
+export armijo_wolfe!
 
 """
-    t, good_grad, ht, nbk, nbW = armijo_wolfe(h, h₀, slope, g)
+    lso = armijo_wolfe!(ϕ, x, d, xt; kwargs...)
 
-Performs a line search from `x` along the direction `d` as defined by the
-`LineModel` ``h(t) = f(x + t d)``, where
-`h₀ = h(0) = f(x)`, `slope = h'(0) = ∇f(x)ᵀd` and `g` is a vector that will be
-overwritten with the gradient at various points. On exit, if `good_grad=true`,
-`g` contains the gradient at the final step length.
-The steplength is chosen trying to satisfy the Armijo and Wolfe conditions. The Armijo condition is
+Performs a line search from `x` along the direction `d` for `AbstractMeritModel` `ϕ`.
+The steplength is chosen trying to satisfy the Armijo and Wolfe conditions.
+The Armijo condition is
 ```math
-h(t) ≤ h₀ + τ₀ t h'(0)
+ϕ(x + t d; η) ≤ ϕ(x) + τ₀ t Dϕ(x; d, η)
 ```
 and the Wolfe condition is
 ```math
-h'(t) ≤ τ₁ h'(0).
+Dϕ(x + t d; d, η) ≤ τ₁ Dϕ(x; d, η).
 ```
-Initially the step is increased trying to satisfy the Wolfe condition.
-Afterwards, only backtracking is performed in order to try to satisfy the Armijo condition.
-The final steplength may only satisfy Armijo's condition.
+Initially the step is increased trying to satisfy the Wolfe condition. Afterwards, only backtracking
+is performed in order to try to satisfy the Armijo condition. The final steplength may only satisfy
+Armijo's condition.
 
-The output is the following:
-- t: the step length;
-- good_grad: whether `g` is the gradient at `x + t * d`;
-- ht: the model value at `t`, i.e., `f(x + t * d)`;
-- nbk: the number of times the steplength was decreased to satisfy the Armijo condition, i.e., number of backtracks;
+The output is a `LineSearchOutput`. The following specific information are also available in
+`lso.specific`:
+- nbk: the number of times the steplength was decreased to satisfy the Armijo condition, i.e.,
+  number of backtracks;
 - nbW: the number of times the steplength was increased to satisfy the Wolfe condition.
 
 The following keyword arguments can be provided:
+- `update_obj_at_x`: Whether to call `obj(ϕ, x; update=true)` to update `ϕ.fx` and `ϕ.cx` (default: `false`);
+- `update_derivative_at_x`: Whether to call `derivative(ϕ, x, d; update=true)` to update `ϕ.gx` and `ϕ.Ad` (default: `false`);
 - `t`: starting steplength (default `1`);
 - `τ₀`: slope factor in the Armijo condition (default `max(1e-4, √ϵₘ)`);
 - `τ₁`: slope factor in the Wolfe condition. It should satisfy `τ₁ > τ₀` (default `0.9999`);
+- `σ`: backtracking decrease factor (default `0.4`);
 - `bk_max`: maximum number of backtracks (default `10`);
-- `bW_max`: maximum number of increases (default `5`);
-- `verbose`: whether to print information (default `false`).
+- `bW_max`: maximum number of increases (default `5`).
 """
-function armijo_wolfe(h :: LineModel,
-                      h₀ :: T,
-                      slope :: T,
-                      g :: Array{T,1};
-                      t :: T=one(T),
-                      τ₀ :: T=max(T(1.0e-4), sqrt(eps(T))),
-                      τ₁ :: T=T(0.9999),
-                      bk_max :: Int=10,
-                      bW_max :: Int=5,
-                      verbose :: Bool=false) where T <: AbstractFloat
+function armijo_wolfe!(
+  ϕ :: AbstractMeritModel{M,T,V},
+  x :: V,
+  d :: V,
+  xt :: V;
+  update_obj_at_x :: Bool = false,
+  update_derivative_at_x :: Bool = false,
+  t :: T=one(T),
+  τ₀ :: T=max(T(1.0e-4), sqrt(eps(T))),
+  τ₁ :: T=T(0.9999),
+  σ :: T=T(0.4),
+  bk_max :: Int=10,
+  bW_max :: Int=5
+) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}}
 
   # Perform improved Armijo linesearch.
   nbk = 0
   nbW = 0
+  ϕx = obj(ϕ, x; update=update_obj_at_x)
+  slope = derivative(ϕ, x, d; update=update_derivative_at_x)
 
   # First try to increase t to satisfy loose Wolfe condition
-  ht = obj(h, t)
-  slope_t = grad!(h, t, g)
-  while (slope_t < τ₁*slope) && (ht <= h₀ + τ₀ * t * slope) && (nbW < bW_max)
+  xt .= x .+ t .* d
+  ϕt = obj(ϕ, xt)
+  slope_t = derivative(ϕ, xt, d)
+  while (slope_t < τ₁*slope) && (ϕt <= ϕx + τ₀ * t * slope) && (nbW < bW_max)
     t *= 5
-    ht = obj(h, t)
-    slope_t = grad!(h, t, g)
+    xt .= x .+ t .* d
+    ϕt = obj(ϕ, xt)
+    slope_t = derivative(ϕ, xt, d)
     nbW += 1
   end
 
-  hgoal = h₀ + slope * t * τ₀;
+  ϕgoal = ϕx + slope * t * τ₀;
   fact = -T(0.8)
   ϵ = eps(T)^T(3/5)
 
   # Enrich Armijo's condition with Hager & Zhang numerical trick
-  Armijo = (ht <= hgoal) || ((ht <= h₀ + ϵ * abs(h₀)) && (slope_t <= fact * slope))
+  Armijo = (ϕt <= ϕgoal) || ((ϕt <= ϕx + ϵ * abs(ϕx)) && (slope_t <= fact * slope))
   good_grad = true
   while !Armijo && (nbk < bk_max)
-    t *= T(0.4)
-    ht = obj(h, t)
-    hgoal = h₀ + slope * t * τ₀;
+    t *= σ
+    xt .= x .+ t .* d
+    ϕt = obj(ϕ, xt)
+    ϕgoal = ϕx + slope * t * τ₀;
 
     # avoids unused grad! calls
     Armijo = false
     good_grad = false
-    if ht <= hgoal
+    if ϕt <= ϕgoal
       Armijo = true
-    elseif ht <= h₀ + ϵ * abs(h₀)
-      slope_t = grad!(h, t, g)
+    elseif ϕt <= ϕx + ϵ * abs(ϕx)
+      slope_t = derivative!(ϕ, xt, d)
       good_grad = true
       if slope_t <= fact * slope
         Armijo = true
@@ -88,7 +95,5 @@ function armijo_wolfe(h :: LineModel,
     nbk += 1
   end
 
-  verbose && @printf("  %4d %4d\n", nbk, nbW);
-
-  return (t, good_grad, ht, nbk, nbW)
+  return LineSearchOutput(t, xt, ϕt, good_grad=good_grad, gt=ϕ.gx, specific=(nbk=nbk, nbW=nbW))
 end
diff --git a/src/linesearch/line_model.jl b/src/linesearch/line_model.jl
deleted file mode 100644
index 08e4b78b..00000000
--- a/src/linesearch/line_model.jl
+++ /dev/null
@@ -1,85 +0,0 @@
-export LineModel
-export obj, grad, derivative, grad!, derivative!, hess, redirect!
-
-"""A type to represent the restriction of a function to a direction.
-Given f : R → Rⁿ, x ∈ Rⁿ and a nonzero direction d ∈ Rⁿ,
-
-    ϕ = LineModel(nlp, x, d)
-
-represents the function ϕ : R → R defined by
-
-    ϕ(t) := f(x + td).
-"""
-mutable struct LineModel <: AbstractNLPModel
-  meta :: NLPModelMeta
-  counters :: Counters
-  nlp :: AbstractNLPModel
-  x :: AbstractVector
-  d :: AbstractVector
-end
-
-function LineModel(nlp :: AbstractNLPModel, x :: AbstractVector, d :: AbstractVector)
-  meta = NLPModelMeta(1, x0=zeros(eltype(x), 1), name="LineModel to $(nlp.meta.name))")
-  return LineModel(meta, Counters(), nlp, x, d)
-end
-
-"""`redirect!(ϕ, x, d)`
-
-Change the values of x and d of the LineModel ϕ, but retains the counters.
-"""
-function redirect!(ϕ :: LineModel, x :: AbstractVector, d :: AbstractVector)
-  ϕ.x, ϕ.d = x, d
-  return ϕ
-end
-
-"""`obj(f, t)` evaluates the objective of the `LineModel`
-
-    ϕ(t) := f(x + td).
-"""
-function NLPModels.obj(f :: LineModel, t :: AbstractFloat)
-  NLPModels.increment!(f, :neval_obj)
-  return obj(f.nlp, f.x + t * f.d)
-end
-
-"""`grad(f, t)` evaluates the first derivative of the `LineModel`
-
-    ϕ(t) := f(x + td),
-
-i.e.,
-
-    ϕ'(t) = ∇f(x + td)ᵀd.
-"""
-function NLPModels.grad(f :: LineModel, t :: AbstractFloat)
-  NLPModels.increment!(f, :neval_grad)
-  return dot(grad(f.nlp, f.x + t * f.d), f.d)
-end
-derivative(f :: LineModel, t :: AbstractFloat) = grad(f, t)
-
-"""`grad!(f, t, g)` evaluates the first derivative of the `LineModel`
-
-    ϕ(t) := f(x + td),
-
-i.e.,
-
-    ϕ'(t) = ∇f(x + td)ᵀd.
-
-The gradient ∇f(x + td) is stored in `g`.
-"""
-function NLPModels.grad!(f :: LineModel, t :: AbstractFloat, g :: AbstractVector)
-  NLPModels.increment!(f, :neval_grad)
-  return dot(grad!(f.nlp, f.x + t * f.d, g), f.d)
-end
-derivative!(f :: LineModel, t :: AbstractFloat, g :: AbstractVector) = grad!(f, t, g)
-
-"""Evaluate the second derivative of the `LineModel`
-
-    ϕ(t) := f(x + td),
-
-i.e.,
-
-    ϕ"(t) = dᵀ∇²f(x + td)d.
-"""
-function NLPModels.hess(f :: LineModel, t :: AbstractFloat)
-  NLPModels.increment!(f, :neval_hess)
-  return dot(f.d, hprod(f.nlp, f.x + t * f.d, f.d))
-end
diff --git a/src/linesearch/linesearch.jl b/src/linesearch/linesearch.jl
index 69680d00..70332a16 100644
--- a/src/linesearch/linesearch.jl
+++ b/src/linesearch/linesearch.jl
@@ -1,2 +1,46 @@
-include("line_model.jl")
+export linesearch, linesearch!
+
+include("linesearchoutput.jl")
+include("armijo.jl")
 include("armijo_wolfe.jl")
+
+const ls_dict = Dict(
+  :armijo => armijo!,
+  :armijo_wolfe => armijo_wolfe!
+)
+
+"""
+    lso = linesearch(ϕ, x, d; method=:armijo, kwargs...)
+    lso = linesearch!(ϕ, x, d, xt; method=:armijo, kwargs...)
+    lso = linesearch(nlp, x, d; method=:armijo, fx=obj(nlp, x), gx=grad(nlp, x), kwargs...)
+    lso = linesearch!(nlp, x, d, xt; method=:armijo, fx=obj(nlp, x), gx=grad(nlp, x), kwargs...)
+
+Perform a line search for unconstrained or bound-constrained problem `nlp` or merit model `ϕ` starting at `x` on direction `d`, storing the result in `xt`, for the in place version.
+Returns a `LineSearchOutput`.
+The `method` keyword argument can be any of the following:
+
+- `:armijo`: Calls the simple Armijo line search, performing backtracking.
+- `:armijo_wolfe`: Calls the Armijo-Wolfe line search, that tries to satisfy the Wolfe conditions in addition to the Armijo conditions.
+
+When the input is `ϕ`, a merit model, the following keyword arguments are available for any method:
+- `update_obj_at_x`: Whether to call `obj(ϕ, x; update=true)` to update `ϕ.fx` and `ϕ.cx` (default: `false`);
+- `update_derivative_at_x`: Whether to call `derivative(ϕ, x, d; update=true)` to update `ϕ.gx` and `ϕ.Ad`.
+
+For the `nlp` version, the problem must be unconstrained or bound-constrained (i.e., `nlp.meta.nequ = 0`).
+In such case a `UncMerit` is created using the keyword arguments `fx` and `gx`. **Be warned** that `gx` will be overwritten.
+"""
+function linesearch end
+
+function linesearch(nlp_or_ϕ, x, d; kwargs...)
+  xt = copy(x)
+  linesearch!(nlp_or_ϕ, x, d, xt; kwargs...)
+end
+
+function linesearch!(nlp :: AbstractNLPModel, x :: AbstractVector, args...; fx=obj(nlp, x), gx=grad(nlp, x), kwargs...)
+  ϕ = UncMerit(nlp, fx=fx, gx=gx)
+  linesearch!(ϕ, x, args...; kwargs...)
+end
+
+function linesearch!(ϕ :: AbstractMeritModel, x :: AbstractVector, args...; method :: Symbol=:armijo, kwargs...)
+  SolverTools.ls_dict[method](ϕ, x, args...; kwargs...)
+end
\ No newline at end of file
diff --git a/src/linesearch/linesearchoutput.jl b/src/linesearch/linesearchoutput.jl
new file mode 100644
index 00000000..8eb28e98
--- /dev/null
+++ b/src/linesearch/linesearchoutput.jl
@@ -0,0 +1,35 @@
+export LineSearchOutput
+
+# TODO: Generalize AbstractExecutionStats?
+
+"""
+    LineSearchOutput
+
+Structure to store the output of a line search algorithm.
+The following variables are stored:
+- `t`: The final step length;
+- `xt`: The final iterate ``xt = x + t d``;
+- `ϕt`: The final merit function value, i.e., `ϕ(xt)`;
+- `good_grad`: Whether the gradient was computed at `xt`;
+- `gt`: If `good_grad` is `true`, the gradient at `xt`;
+- `specific`: A NamedTuple object storing specific method information. Check the specifc method for this information.
+"""
+struct LineSearchOutput{T <: Real, V <: AbstractVector{<: T}}
+  t :: T
+  xt :: V
+  ϕt :: T
+  good_grad :: Bool
+  gt :: V
+  specific :: NamedTuple
+end
+
+function LineSearchOutput(
+  t :: T,
+  xt :: V,
+  ϕt :: T;
+  good_grad :: Bool=false,
+  gt :: V=T[],
+  specific :: NamedTuple = NamedTuple()
+) where {T <: Real, V <: AbstractVector{<: T}}
+  LineSearchOutput(t, xt, ϕt, good_grad, gt, specific)
+end
\ No newline at end of file
diff --git a/src/merit/auglagmerit.jl b/src/merit/auglagmerit.jl
index f2663dc6..76b58685 100644
--- a/src/merit/auglagmerit.jl
+++ b/src/merit/auglagmerit.jl
@@ -15,7 +15,7 @@ defined by
 In addition to the keyword arguments declared in [`AbstractMeritModel`](@ref), an `AugLagMerit` also
 accepts the argument `y`.
 """
-mutable struct AugLagMerit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel
+mutable struct AugLagMerit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel{M,T,V}
   meta :: NLPModelMeta
   counters :: Counters
   nlp  :: M
diff --git a/src/merit/l1merit.jl b/src/merit/l1merit.jl
index 6a09da2c..b1a148f0 100644
--- a/src/merit/l1merit.jl
+++ b/src/merit/l1merit.jl
@@ -12,7 +12,7 @@ defined by
 \phi_1(x; η) = f(x) + η\|c(x)\|₁
 ```
 """
-mutable struct L1Merit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel
+mutable struct L1Merit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel{M,T,V}
   meta :: NLPModelMeta
   counters :: Counters
   nlp :: M
diff --git a/src/merit/merit.jl b/src/merit/merit.jl
index c4869a9d..878bfed4 100644
--- a/src/merit/merit.jl
+++ b/src/merit/merit.jl
@@ -25,16 +25,27 @@ derivative function.
 Furthermore, all implemented methods accept an `update` keyword that defaults to `true`. It is used
 to determine whether the internal stored values should be updated or not.
 """
-abstract type AbstractMeritModel <: AbstractNLPModel end
+abstract type AbstractMeritModel{M,T,V} <: AbstractNLPModel end
+
+"""
+    obj(merit, x; update=true)
+
+Evaluates the `merit` model at `x`.
+This will call `obj` and `cons!` to update the internal values of `fx` and `cx`, unless `update=false`.
+"""
+function NLPModels.obj(merit :: AbstractMeritModel, x :: AbstractVector; update :: Bool=false)
+  throw(MethorError(obj, (merit, x)))
+end
 
 """
     derivative(merit, x, d; update=true)
 
 Computes the directional derivative of `merit` at `x` on direction `d`.
-This will call `grad!` and `jprod` to update the internal values of `gx` and `Ad`, but will assume that `cx` is correct.
-The option exist to allow updating the `η` parameter without recomputing `fx` and `cx`.
+This will call `grad!` and `jprod` to update the internal values of `gx` and `Ad`, unless `update=false`.
+This function assumes that `cx` is correct, though.
 """
 function derivative end
 
 include("auglagmerit.jl")
 include("l1merit.jl")
+include("uncmerit.jl")
\ No newline at end of file
diff --git a/src/merit/uncmerit.jl b/src/merit/uncmerit.jl
new file mode 100644
index 00000000..96d76752
--- /dev/null
+++ b/src/merit/uncmerit.jl
@@ -0,0 +1,48 @@
+export UncMerit
+
+@doc raw"""
+  UncMerit(nlp; kwargs...)
+  UncMerit(nlp, η; kwargs...) # η is ignored
+
+Creates a merit wrapper for unconstrained or bound-countrained problems.
+Formally accepts the same constructor as other merit functions, but in practice it ignores any constrained-related value, even if `nlp` is constrained.
+"""
+mutable struct UncMerit{M <: AbstractNLPModel, T <: Real, V <: AbstractVector} <: AbstractMeritModel{M,T,V}
+  meta :: NLPModelMeta
+  counters :: Counters
+  nlp  :: M
+  η  :: T # ignored in practice
+  fx   :: T
+  gx   :: V
+  cx   :: V # ignored in practice
+  Ad   :: V # ignored in practice
+end
+
+function UncMerit(
+  nlp  :: M,
+  η  :: T = 0.0;
+  fx   :: T = T(Inf),
+  gx   :: V = fill(T(Inf), nlp.meta.nvar),
+  cx   :: V = T[],
+  Ad   :: V = T[],
+) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}}
+  meta = NLPModelMeta(nlp.meta.nvar)
+  UncMerit{M,T,V}(meta, Counters(), nlp, η, fx, gx, cx, Ad)
+end
+
+function NLPModels.obj(merit :: UncMerit, x :: AbstractVector; update :: Bool = true)
+  @lencheck merit.meta.nvar x
+  NLPModels.increment!(merit, :neval_obj)
+  if update
+    merit.fx = obj(merit.nlp, x)
+  end
+  return merit.fx
+end
+
+function derivative(merit :: UncMerit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true)
+  @lencheck merit.meta.nvar x d
+  if update
+    grad!(merit.nlp, x, merit.gx)
+  end
+  return dot(merit.gx, d)
+end
\ No newline at end of file
diff --git a/test/dummy_linesearch_solver.jl b/test/dummy_linesearch_solver.jl
index 19374908..415878fd 100644
--- a/test/dummy_linesearch_solver.jl
+++ b/test/dummy_linesearch_solver.jl
@@ -5,6 +5,7 @@ function dummy_linesearch_solver(
   rtol :: Real = 1e-6,
   max_eval :: Int = 1000,
   max_time :: Float64 = 30.0,
+  ls_method :: Symbol = :armijo,
   merit_constructor = L1Merit
 )
 
@@ -13,6 +14,7 @@ function dummy_linesearch_solver(
 
   nvar, ncon = nlp.meta.nvar, nlp.meta.ncon
   T = eltype(x)
+  xt = copy(x)
 
   cx = ncon > 0 ? cons(nlp, x) : zeros(T, 0)
   gx = grad(nlp, x)
@@ -51,14 +53,10 @@ function dummy_linesearch_solver(
     end
     ϕx = obj(ϕ, x, update=false)
 
-    t = 1.0
-    ϕt = obj(ϕ, x + Δx) # Updates cx
-    while !(ϕt ≤ ϕx + 1e-2 * t * Dϕx)
-      t /= 2
-      ϕt = obj(ϕ, x + t * Δx) # Updates cx
-    end
-    x += t * Δx
-    y += t * Δy
+    lso = linesearch!(ϕ, x, Δx, xt, method=ls_method)
+    x .= xt
+    t = lso.t
+    y .+= t * Δy
 
     grad!(nlp, x, gx) # Updates ϕ.gx
     if ncon > 0
diff --git a/test/test_linesearch.jl b/test/test_linesearch.jl
index 4e519255..cebd4db9 100644
--- a/test/test_linesearch.jl
+++ b/test/test_linesearch.jl
@@ -1,63 +1,81 @@
 @testset "Linesearch" begin
-  @testset "LineModel" begin
-    nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2))
-    x = nlp.meta.x0
-    d = -ones(2)
-    lm = LineModel(nlp, x, d)
-    g = zeros(2)
+  ls_methods = [:armijo, :armijo_wolfe]
+  @testset "Unconstrained problems" begin
+    specific_tests = Dict(
+      :armijo => Dict(:nbk => [0, 0, 1, 0]),
+      :armijo_wolfe => Dict(:nbk => [0, 0, 1, 5], :nbW => [0, 0, 0, 1]),
+    )
+    for ls_method in ls_methods
+      @testset "Method $ls_method" begin
+        nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2))
+        x = nlp.meta.x0
+        d = -ones(2)
+        lso = linesearch(nlp, x, d, method=ls_method)
+        @test lso.t == 1
+        @test lso.ϕt == 0.0
+        for (k,v) in specific_tests[ls_method]
+          @test lso.specific[k] == v[1]
+        end
 
-    @test obj(lm, 0.0) == obj(nlp, x)
-    @test grad(lm, 0.0) == dot(grad(nlp, x), d)
-    @test grad!(lm, 0.0, g) == dot(grad(nlp, x), d)
-    @test g == grad(nlp, x)
-    @test derivative(lm, 0.0) == dot(grad(nlp, x), d)
-    @test derivative!(lm, 0.0, g) == dot(grad(nlp, x), d)
-    @test g == grad(nlp, x)
-    @test hess(lm, 0.0) == dot(d, Symmetric(hess(nlp, x), :L) * d)
+        d = -ones(2) / 2
+        lso = linesearch(nlp, x, d, method=ls_method)
+        @test lso.t == 1
+        @test lso.ϕt == 1.25
+        for (k,v) in specific_tests[ls_method]
+          @test lso.specific[k] == v[2]
+        end
 
-    @test obj(lm,  1.0) == 0.0
-    @test grad(lm, 1.0) == 0.0
-    @test hess(lm, 1.0) == 2d[1]^2 + 8d[2]^2
+        d = -2 * ones(2)
+        lso = linesearch(nlp, x, d, method=ls_method)
+        @test lso.t < 1
+        for (k,v) in specific_tests[ls_method]
+          @test lso.specific[k] == v[3]
+        end
 
-    redirect!(lm, zeros(2), ones(2))
-    @test obj(lm,  0.0) == 0.0
-    @test grad(lm, 0.0) == 0.0
-    @test hess(lm, 0.0) == 10.0
+        nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, zeros(2))
+        x = nlp.meta.x0
+        d = [1.7; 3.2]
+        lso = linesearch(nlp, x, d, method=ls_method)
+        for (k,v) in specific_tests[ls_method]
+          @test lso.specific[k] == v[4]
+        end
+      end # @testset
+    end # for
+  end # @testset
 
-    @test neval_obj(lm) == 3
-    @test neval_grad(lm) == 6
-    @test neval_hess(lm) == 3
-  end
+  @testset "Constrainted problems" begin
+    specific_tests = Dict(
+      :armijo => Dict(:nbk => [0, 0, 1]),
+      :armijo_wolfe => Dict(:nbk => [0, 0, 1]),
+    )
+    for ls_method in ls_methods
+      @testset "Method $ls_method" begin
+        nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2), x -> [x[1] + 2x[2]], [0.0], [0.0])
+        x = nlp.meta.x0
+        ϕ = L1Merit(nlp, 1.0)
+        d = -ones(2)
+        lso = linesearch(ϕ, x, d, method=ls_method, update_obj_at_x=true, update_derivative_at_x=true)
+        @test lso.t == 1
+        @test lso.ϕt == 0.0
+        for (k,v) in specific_tests[ls_method]
+          @test lso.specific[k] == v[1]
+        end
 
-  @testset "Armijo-Wolfe" begin
-    nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2))
-    lm = LineModel(nlp, nlp.meta.x0, -ones(2))
-    g = zeros(2)
+        d = -ones(2) / 2
+        lso = linesearch(ϕ, x, d, method=ls_method, update_obj_at_x=true, update_derivative_at_x=true)
+        @test lso.t == 1
+        @test lso.ϕt == 1.25 + 1.5
+        for (k,v) in specific_tests[ls_method]
+          @test lso.specific[k] == v[2]
+        end
 
-    t, good_grad, ft, nbk, nbW = armijo_wolfe(lm, obj(lm, 0.0), grad(lm, 0.0), g)
-    @test t == 1
-    @test ft == 0.0
-    @test nbk == 0
-    @test nbW == 0
-
-    redirect!(lm, nlp.meta.x0, -ones(2) / 2)
-    t, good_grad, ft, nbk, nbW = armijo_wolfe(lm, obj(lm, 0.0), grad(lm, 0.0), g)
-    @test t == 1
-    @test ft == 1.25
-    @test nbk == 0
-    @test nbW == 0
-
-    redirect!(lm, nlp.meta.x0, -2 * ones(2))
-    t, good_grad, ft, nbk, nbW = armijo_wolfe(lm, obj(lm, 0.0), grad(lm, 0.0), g)
-    @test t < 1
-    @test nbk > 0
-    @test nbW == 0
-
-    nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, zeros(2))
-    lm = LineModel(nlp, nlp.meta.x0, [1.7; 3.2])
-    t, good_grad, ft, nbk, nbW = armijo_wolfe(lm, obj(lm, 0.0), grad(lm, 0.0), g)
-    @test t < 1
-    @test nbk > 0
-    @test nbW > 0
-  end
-end
+        d = -2 * ones(2)
+        lso = linesearch(ϕ, x, d, method=ls_method, update_obj_at_x=true, update_derivative_at_x=true)
+        @test lso.t < 1
+        for (k,v) in specific_tests[ls_method]
+          @test lso.specific[k] == v[3]
+        end
+      end # @testset
+    end # for
+  end # @testset
+end
\ No newline at end of file

From 5a9d2d285bb2916b0458703421137d77c8d85c30 Mon Sep 17 00:00:00 2001
From: Abel <abel.s.siqueira@gmail.com>
Date: Tue, 18 Aug 2020 23:32:08 -0300
Subject: [PATCH 5/7] Draft of trust region with merit

---
 Project.toml                      |   3 +-
 src/merit/auglagmerit.jl          |  14 ++--
 src/merit/l1merit.jl              |  14 ++--
 src/merit/merit.jl                |  30 ++++++-
 test/dummy_linesearch_solver.jl   |   4 +-
 test/dummy_solver.jl              |   4 +-
 test/dummy_trust_region_solver.jl | 128 ++++++++++++++++++++++++++++++
 test/merit.jl                     |  29 +++++++
 test/runtests.jl                  |   1 +
 9 files changed, 202 insertions(+), 25 deletions(-)
 create mode 100644 test/dummy_trust_region_solver.jl

diff --git a/Project.toml b/Project.toml
index 908e4d8d..4792db91 100644
--- a/Project.toml
+++ b/Project.toml
@@ -19,10 +19,11 @@ NLPModels = "0.13"
 julia = "^1.3.0"
 
 [extras]
+Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["NLPModels", "LinearAlgebra", "Test", "Logging"]
+test = ["Krylov", "NLPModels", "LinearAlgebra", "Test", "Logging"]
diff --git a/src/merit/auglagmerit.jl b/src/merit/auglagmerit.jl
index 76b58685..ab8f16f7 100644
--- a/src/merit/auglagmerit.jl
+++ b/src/merit/auglagmerit.jl
@@ -48,14 +48,12 @@ function AugLagMerit(
   AugLagMerit{M,T,V}(meta, Counters(), nlp, η, fx, gx, cx, Ad, y, y⁺, Jᵀy⁺, Jv, JᵀJv)
 end
 
-function NLPModels.obj(merit :: AugLagMerit, x :: AbstractVector; update :: Bool = true)
-  @lencheck merit.meta.nvar x
-  NLPModels.increment!(merit, :neval_obj)
-  if update
-    merit.fx = obj(merit.nlp, x)
-    merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx)
-  end
-  return merit.fx + dot(merit.y, merit.cx) + merit.η * dot(merit.cx, merit.cx) / 2
+function dualobj(merit :: AugLagMerit)
+  merit.fx + dot(merit.y, merit.cx)
+end
+
+function primalobj(merit :: AugLagMerit)
+  dot(merit.cx, merit.cx) / 2
 end
 
 function derivative(merit :: AugLagMerit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true)
diff --git a/src/merit/l1merit.jl b/src/merit/l1merit.jl
index b1a148f0..52136f07 100644
--- a/src/merit/l1merit.jl
+++ b/src/merit/l1merit.jl
@@ -35,14 +35,12 @@ function L1Merit(
   L1Merit{M,T,V}(meta, Counters(), nlp, η, fx, gx, cx, Ad)
 end
 
-function NLPModels.obj(merit :: L1Merit, x :: AbstractVector; update :: Bool = true)
-  @lencheck merit.meta.nvar x
-  NLPModels.increment!(merit, :neval_obj)
-  if update
-    merit.fx = obj(merit.nlp, x)
-    merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx)
-  end
-  return merit.fx + merit.η * norm(merit.cx, 1)
+function dualobj(merit :: L1Merit)
+  merit.fx
+end
+
+function primalobj(merit :: L1Merit)
+  norm(merit.cx, 1)
 end
 
 function derivative(merit :: L1Merit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true)
diff --git a/src/merit/merit.jl b/src/merit/merit.jl
index 878bfed4..da8b9229 100644
--- a/src/merit/merit.jl
+++ b/src/merit/merit.jl
@@ -1,5 +1,5 @@
 using NLPModels
-export AbstractMeritModel, obj, derivative
+export AbstractMeritModel, obj, derivative, dualobj, primalobj
 
 """
     AbstractMeritModel
@@ -27,14 +27,36 @@ to determine whether the internal stored values should be updated or not.
 """
 abstract type AbstractMeritModel{M,T,V} <: AbstractNLPModel end
 
+"""
+    dualobj(merit)
+
+Return the dual part of `merit`, i.e., the part without `η`, at its current values.
+No updates or internal function calls are made.
+"""
+function dualobj end
+
+"""
+    primalobj(merit)
+
+Return the primal part of `merit`, i.e., the part with `η`, at its current values.
+No updates or internal function calls are made.
+"""
+function primalobj end
+
 """
     obj(merit, x; update=true)
 
 Evaluates the `merit` model at `x`.
 This will call `obj` and `cons!` to update the internal values of `fx` and `cx`, unless `update=false`.
 """
-function NLPModels.obj(merit :: AbstractMeritModel, x :: AbstractVector; update :: Bool=false)
-  throw(MethorError(obj, (merit, x)))
+function NLPModels.obj(merit :: AbstractMeritModel, x :: AbstractVector; update :: Bool=true)
+  @lencheck merit.meta.nvar x
+  NLPModels.increment!(merit, :neval_obj)
+  if update
+    merit.fx = obj(merit.nlp, x)
+    merit.nlp.meta.ncon > 0 && cons!(merit.nlp, x, merit.cx)
+  end
+  dualobj(merit) + merit.η * primalobj(merit)
 end
 
 """
@@ -42,7 +64,7 @@ end
 
 Computes the directional derivative of `merit` at `x` on direction `d`.
 This will call `grad!` and `jprod` to update the internal values of `gx` and `Ad`, unless `update=false`.
-This function assumes that `cx` is correct, though.
+This function assumes that `cx` is already computed, though.
 """
 function derivative end
 
diff --git a/test/dummy_linesearch_solver.jl b/test/dummy_linesearch_solver.jl
index 415878fd..87ed14e5 100644
--- a/test/dummy_linesearch_solver.jl
+++ b/test/dummy_linesearch_solver.jl
@@ -1,6 +1,6 @@
 function dummy_linesearch_solver(
   nlp :: AbstractNLPModel;
-  x :: AbstractVector = nlp.meta.x0,
+  x :: AbstractVector = copy(nlp.meta.x0),
   atol :: Real = 1e-6,
   rtol :: Real = 1e-6,
   max_eval :: Int = 1000,
@@ -80,7 +80,7 @@ function dummy_linesearch_solver(
     :max_eval
   end
 
-  return GenericExecutionStats(:unknown, nlp,
+  return GenericExecutionStats(status, nlp,
                                objective=fx, dual_feas=norm(dual), primal_feas=norm(cx),
                                multipliers=y, multipliers_L=zeros(T, nvar), multipliers_U=zeros(T, nvar),
                                elapsed_time=elapsed_time, solution=x, iter=iter
diff --git a/test/dummy_solver.jl b/test/dummy_solver.jl
index 8e29e260..466bdb2e 100644
--- a/test/dummy_solver.jl
+++ b/test/dummy_solver.jl
@@ -1,5 +1,5 @@
 function dummy_solver(nlp :: AbstractNLPModel;
-                      x :: AbstractVector = nlp.meta.x0,
+                      x :: AbstractVector = copy(nlp.meta.x0),
                       atol :: Real = sqrt(eps(eltype(x))),
                       rtol :: Real = sqrt(eps(eltype(x))),
                       max_eval :: Int = 1000,
@@ -61,7 +61,7 @@ function dummy_solver(nlp :: AbstractNLPModel;
     :max_eval
   end
 
-  return GenericExecutionStats(:unknown, nlp,
+  return GenericExecutionStats(status, nlp,
                                objective=fx, dual_feas=norm(dual), primal_feas=norm(cx),
                                multipliers=y, multipliers_L=zeros(T, nvar), multipliers_U=zeros(T, nvar),
                                elapsed_time=elapsed_time, solution=x, iter=iter
diff --git a/test/dummy_trust_region_solver.jl b/test/dummy_trust_region_solver.jl
new file mode 100644
index 00000000..0acb5842
--- /dev/null
+++ b/test/dummy_trust_region_solver.jl
@@ -0,0 +1,128 @@
+using Krylov
+
+function dummy_trust_region_solver(
+  nlp :: AbstractNLPModel;
+  x :: AbstractVector = copy(nlp.meta.x0),
+  atol :: Real = 1e-6,
+  rtol :: Real = 1e-6,
+  max_eval :: Int = 1000,
+  max_time :: Float64 = 30.0,
+  # ls_method :: Symbol = :armijo,
+  merit_constructor = L1Merit
+)
+
+  start_time = time()
+  elapsed_time = 0.0
+  evals(nlp) = neval_obj(nlp) + neval_cons(nlp)
+
+  nvar, ncon = nlp.meta.nvar, nlp.meta.ncon
+  T = eltype(x)
+  xt = copy(x)
+
+  cx = ncon > 0 ? cons(nlp, x) : zeros(T, 0)
+  gx = grad(nlp, x)
+  Jx = ncon > 0 ? jac(nlp, x) : zeros(T, 0, nvar)
+  y = cgls(Jx', -gx)[1]
+
+  dual = gx + Jx' * y
+
+  iter = 0
+
+  Δ = 100.0
+  ϕ = merit_constructor(nlp, 1e-3, cx=cx, gx=gx)
+
+  ϵd = atol + rtol * norm(dual)
+  ϵp = atol
+
+  fx = obj(nlp, x)
+  ϕ.fx = fx
+  @info log_header([:iter, :nfc, :f, :c, :dual, :time, :Δ, :η, :iterkind], [Int, Int, T, T, T, T, T, T, String])
+  @info log_row(Any[iter, evals(nlp), fx, norm(cx), norm(dual), elapsed_time, Δ, ϕ.η, "Initial"])
+  solved = norm(dual) < ϵd && norm(cx) < ϵp
+  tired = evals(nlp) > max_eval || elapsed_time > max_time
+
+  while !(solved || tired)
+    Hxy = ncon > 0 ? hess_op(nlp, x, y) : hess_op(nlp, x)
+    local Δx
+    ϕx = obj(ϕ, x, update=false)
+    ϕxprimal = primalobj(ϕ)
+    ϕxdual   = dualobj(ϕ)
+    if ncon > 0
+      # Normal step
+      v = cgls(Jx, -cx, radius=0.8Δ)[1]
+      # Tangent step
+      Z = nullspace(Jx)
+      Δx = v + Z * cg(Z' * Hxy * Z, -Z' * (dual + Hxy * v), radius=sqrt(Δ^2 - dot(v, v)))[1]
+    else
+      Δx = cg(Hxy, -dual, radius=Δ)[1]
+    end
+
+    # Workaround to be agnostic to the merit function
+    # Pretend fx is the quadratic approximation to obj(nlp, x + Δx)
+    # Pretend cx is que linear approximation to cons(nlp, x + Δx)
+    ϕ.fx += dot(Δx, gx) + dot(Δx, Hxy * Δx) / 2
+    Ad = Jx * Δx
+    cpAd = cx .+ Ad
+    ct = ϕ.cx
+    ϕ.cx = cpAd
+    ϕtprimal = primalobj(ϕ)
+    ϕtdual = dualobj(ϕ)
+
+    xt = x + Δx
+    ϕt = obj(ϕ, xt, update=true)
+    ft = ϕ.fx
+
+    @assert ϕxprimal ≥ ϕtprimal
+    while ϕxdual - ϕtdual < -0.1 * ϕ.η * (ϕxprimal - ϕtprimal) < 0 # For unconstrained problems, right side is 0.
+      ϕ.η *= 2
+    end
+
+    Ared = ϕx - ϕt
+    Pred = ϕxdual - ϕtdual + ϕ.η * (ϕxprimal - ϕtprimal)
+
+    ρ = Ared / Pred
+    iter_kind = if ρ > 1e-2 # accept
+      x .= xt
+      fx = ft
+      cx = ϕ.cx
+      grad!(nlp, x, gx) # Updates ϕ.gx
+      if ncon > 0
+        Jx = jac(nlp, x)
+      end
+      y = cgls(Jx', -gx)[1]
+      dual = gx + Jx' * y
+      if ρ > 0.75 && norm(Δx) > 0.9Δ
+        Δ *= 2
+        :great
+      else
+        :good
+      end
+    else
+      ϕ.fx = fx
+      ϕ.cx = cx
+      Δ /= 4
+      :bad
+    end
+
+    elapsed_time = time() - start_time
+    solved = norm(dual) < ϵd && norm(cx) < ϵp
+    tired = evals(nlp) > max_eval || elapsed_time > max_time
+
+    iter += 1
+    @info log_row(Any[iter, evals(nlp), fx, norm(cx), norm(dual), elapsed_time, Δ, ϕ.η, iter_kind])
+  end
+
+  status = if solved
+    :first_order
+  elseif elapsed_time > max_time
+    :max_time
+  else
+    :max_eval
+  end
+
+  return GenericExecutionStats(status, nlp,
+                               objective=fx, dual_feas=norm(dual), primal_feas=norm(cx),
+                               multipliers=y, multipliers_L=zeros(T, nvar), multipliers_U=zeros(T, nvar),
+                               elapsed_time=elapsed_time, solution=x, iter=iter
+                              )
+end
diff --git a/test/merit.jl b/test/merit.jl
index 8c11e6a6..43ab2deb 100644
--- a/test/merit.jl
+++ b/test/merit.jl
@@ -62,6 +62,21 @@ function test_merit(merit_constructor, ϕ)
     @test neval_jprod(nlp) == 1
   end
 
+  @testset "Separate parts" begin
+    nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, [-1.2; 1.0], x -> [x[1]^2 + x[2]^2 - 1], [0.0], [0.0])
+    ϕ = merit_constructor(nlp, 0.0)
+    for i = 1:2
+      x = randn(nlp.meta.nvar)
+      d = randn(nlp.meta.nvar)
+      ϕ.η = 0.0
+      manual_dualobj = obj(ϕ, x)
+      ϕ.η = 1.0
+      manual_primalobj = obj(ϕ, x) - manual_dualobj
+      @test dualobj(ϕ) ≈ manual_dualobj
+      @test primalobj(ϕ) ≈ manual_primalobj
+    end
+  end
+
   @testset "Safe for unconstrained" begin
     nlp = ADNLPModel(x -> dot(x, x), ones(2))
     x = nlp.meta.x0
@@ -86,6 +101,20 @@ function test_merit(merit_constructor, ϕ)
     @test output.objective < 1e-6
     @test output.primal_feas < 1e-6
     @test output.dual_feas < 1e-6
+
+    nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, [-1.2; 1.0])
+    output = dummy_trust_region_solver(nlp, merit_constructor=merit_constructor, rtol=0)
+    @test isapprox(output.solution, ones(2), rtol=1e-6)
+    @test output.objective < 1e-6
+    @test output.primal_feas == 0
+    @test output.dual_feas < 1e-6
+
+    nlp = ADNLPModel(x -> (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2, [-1.2; 1.0], x -> [exp(x[1] - 1) - 1], [0.0], [0.0])
+    output = dummy_trust_region_solver(nlp, merit_constructor=merit_constructor, rtol=0)
+    @test isapprox(output.solution, ones(2), rtol=1e-6)
+    @test output.objective < 1e-6
+    @test output.primal_feas < 1e-6
+    @test output.dual_feas < 1e-6
   end
 end
 
diff --git a/test/runtests.jl b/test/runtests.jl
index f4b46944..e4d59e1b 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -9,6 +9,7 @@ using LinearAlgebra, Logging, Test
 
 include("dummy_solver.jl")
 include("dummy_linesearch_solver.jl")
+include("dummy_trust_region_solver.jl")
 
 include("test_auxiliary.jl")
 include("test_linesearch.jl")

From dca394983084d525c35eb8e4365ee40de6c944ef Mon Sep 17 00:00:00 2001
From: Abel <abel.s.siqueira@gmail.com>
Date: Wed, 19 Aug 2020 20:30:28 -0300
Subject: [PATCH 6/7] Complete change of trust region

---
 docs/src/api.md                         |  14 +--
 src/merit/uncmerit.jl                   |  13 +--
 src/trust-region/basic-trust-region.jl  | 108 +++++++++++-------
 src/trust-region/tron-trust-region.jl   | 140 ++++++++++++++----------
 src/trust-region/trust-region-output.jl |  25 +++++
 src/trust-region/trust-region.jl        | 139 ++++++++++-------------
 test/test_trust_region.jl               | 113 ++++++++++---------
 7 files changed, 304 insertions(+), 248 deletions(-)
 create mode 100644 src/trust-region/trust-region-output.jl

diff --git a/docs/src/api.md b/docs/src/api.md
index 8abb451e..1a75842e 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -29,6 +29,8 @@ armijo_wolfe!
 ```@docs
 AbstractMeritModel
 obj
+dualobj
+primalobj
 derivative
 L1Merit
 AugLagMerit
@@ -46,13 +48,7 @@ show_statuses
 
 ```@docs
 TrustRegionException
-SolverTools.AbstractTrustRegion
-aredpred
-acceptable
-reset!
-get_property
-set_property!
-update!
-TrustRegion
-TRONTrustRegion
+trust_region
+basic_trust_region!
+tron_trust_region!
 ```
diff --git a/src/merit/uncmerit.jl b/src/merit/uncmerit.jl
index 96d76752..d3e7a869 100644
--- a/src/merit/uncmerit.jl
+++ b/src/merit/uncmerit.jl
@@ -30,13 +30,12 @@ function UncMerit(
   UncMerit{M,T,V}(meta, Counters(), nlp, η, fx, gx, cx, Ad)
 end
 
-function NLPModels.obj(merit :: UncMerit, x :: AbstractVector; update :: Bool = true)
-  @lencheck merit.meta.nvar x
-  NLPModels.increment!(merit, :neval_obj)
-  if update
-    merit.fx = obj(merit.nlp, x)
-  end
-  return merit.fx
+function dualobj(merit :: UncMerit)
+  merit.fx
+end
+
+function primalobj(merit :: UncMerit)
+  zero(eltype(merit.η))
 end
 
 function derivative(merit :: UncMerit, x :: AbstractVector, d :: AbstractVector; update :: Bool = true)
diff --git a/src/trust-region/basic-trust-region.jl b/src/trust-region/basic-trust-region.jl
index 5523437e..28f8bd53 100644
--- a/src/trust-region/basic-trust-region.jl
+++ b/src/trust-region/basic-trust-region.jl
@@ -1,40 +1,72 @@
-export TrustRegion
-
-"""Basic trust region type."""
-mutable struct TrustRegion <: AbstractTrustRegion
-  initial_radius :: AbstractFloat
-  radius :: AbstractFloat
-  max_radius :: AbstractFloat
-  acceptance_threshold :: AbstractFloat
-  increase_threshold :: AbstractFloat
-  decrease_factor :: AbstractFloat
-  increase_factor :: AbstractFloat
-  ratio :: AbstractFloat
-
-  function TrustRegion(initial_radius :: T;
-                       max_radius :: T=one(T)/sqrt(eps(T)),
-                       acceptance_threshold :: T=T(1.0e-4),
-                       increase_threshold :: T=T(0.95),
-                       decrease_factor :: T=one(T)/3,
-                       increase_factor :: T=3 * one(T) / 2) where T <: Number
-
-    initial_radius > 0 || (initial_radius = one(T))
-    max_radius > initial_radius || throw(TrustRegionException("Invalid initial radius"))
-    (0 < acceptance_threshold < increase_threshold < 1) || throw(TrustRegionException("Invalid thresholds"))
-    (0 < decrease_factor < 1 < increase_factor) || throw(TrustRegionException("Invalid decrease/increase factors"))
-
-    return new(initial_radius, initial_radius, max_radius,
-               acceptance_threshold, increase_threshold,
-               decrease_factor, increase_factor, 0)
+export basic_trust_region!
+
+"""
+    tro = basic_trust_region!(ϕ, x, Δx, xt, Δq, Δc, Δ; kwargs...)
+
+Update `xt` and `Δ` using a trust region strategy. See [`trust_region`](@ref) for the basic usage, and below for specifc information about this strategy.
+
+This strategy corresponds to the keyword `method=:basic` on `trust_region`.
+
+Given `ρ = ared / pred`,
+- if `ρ < ηacc`, then `xₖ₊₁ = xₖ` and `Δₖ₊₁ = σdec‖d‖`;
+- if `ηacc ≤ ρ < ηinc`, then `xₖ₊₁ = xₖ + d` and `Δₖ₊₁ = Δₖ`;
+- otherwise, then `xₖ₊₁ = xₖ + d` and `Δₖ₊₁ = min(Δₘₐₓ, max(Δₖ, σinc‖d‖))`.
+"""
+function basic_trust_region!(
+  ϕ :: AbstractMeritModel{M,T,V},
+  x :: V,
+  d :: V,
+  xt :: V,
+  Δq :: T,
+  Ad :: V,
+  Δ :: T;
+  update_obj_at_x :: Bool=false,
+  penalty_update :: Symbol=:basic,
+  max_radius :: T=one(T)/sqrt(eps(T)),
+  ηacc :: Real=T(1.0e-4),
+  ηinc :: Real=T(0.95),
+  σdec :: Real=one(T) / 3,
+  σinc :: Real=3 * one(T) / 2,
+) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}}
+  (0 < ηacc < ηinc < 1) || throw(TrustRegionException("Invalid thresholds"))
+  (0 < σdec < 1 < σinc) || throw(TrustRegionException("Invalid decrease/increase factors"))
+  ϕx = obj(ϕ, x, update=update_obj_at_x)
+  ϕxf = dualobj(ϕ)
+  ϕxc = primalobj(ϕ)
+  ϕ.fx += Δq
+  ϕ.cx .+= Ad
+  mf = dualobj(ϕ)
+  mc = primalobj(ϕ)
+
+  if penalty_update == :basic
+    while ϕxf - mf < -0.1ϕ.η * (ϕxc - mc) < 0
+      ϕ.η *= 2
+    end
+  else
+    throw(TrustRegionException("Unidentified penalty_update $penalty_update"))
   end
-end
-
-function update!(tr :: TrustRegion, step_norm :: AbstractFloat)
-  if tr.ratio < tr.acceptance_threshold
-    tr.radius = tr.decrease_factor * step_norm
-  elseif tr.ratio >= tr.increase_threshold
-    tr.radius = min(tr.max_radius,
-                    max(tr.radius, tr.increase_factor * step_norm))
+
+  @. xt = x + d
+  ϕt = obj(ϕ, xt)
+  ϕtf = dualobj(ϕ)
+  ϕtc = primalobj(ϕ)
+
+  m = mf + ϕ.η * mc
+  ared = ϕx - ϕt
+  pred = ϕx - m
+  ρ = ared / pred
+
+  normd = norm(d)
+  status = if ρ > ηinc
+    Δ = min(max_radius, max(Δ, σinc * normd))
+    :great
+  elseif ρ < ηacc
+    Δ = σdec * normd
+    xt .= x
+    :bad
+  else
+    :good
   end
-  return tr
-end
+
+  return TrustRegionOutput(status, ared, pred, ρ, status != :bad, Δ, xt)
+end
\ No newline at end of file
diff --git a/src/trust-region/tron-trust-region.jl b/src/trust-region/tron-trust-region.jl
index 57ff961b..b25fb375 100644
--- a/src/trust-region/tron-trust-region.jl
+++ b/src/trust-region/tron-trust-region.jl
@@ -1,59 +1,87 @@
-export TRONTrustRegion
-
-"""Trust region used by TRON"""
-mutable struct TRONTrustRegion <: AbstractTrustRegion
-  initial_radius :: AbstractFloat
-  radius :: AbstractFloat
-  max_radius :: AbstractFloat
-  acceptance_threshold :: AbstractFloat
-  decrease_threshold :: AbstractFloat
-  increase_threshold :: AbstractFloat
-  large_decrease_factor :: AbstractFloat
-  small_decrease_factor :: AbstractFloat
-  increase_factor :: AbstractFloat
-  ratio :: AbstractFloat
-  quad_min :: AbstractFloat
-
-  function TRONTrustRegion(initial_radius :: T;
-                           max_radius :: T=one(T)/sqrt(eps(T)),
-                           acceptance_threshold :: T=T(1.0e-4),
-                           decrease_threshold :: T=T(0.25),
-                           increase_threshold :: T=T(0.75),
-                           large_decrease_factor :: T=T(0.25),
-                           small_decrease_factor :: T=T(0.5),
-                           increase_factor :: T=T(4)) where T <: AbstractFloat
-
-    initial_radius > 0 || (initial_radius = one(T))
-    max_radius > initial_radius || throw(TrustRegionException("Invalid initial radius"))
-    (0 < acceptance_threshold < decrease_threshold < increase_threshold < 1) || throw(TrustRegionException("Invalid thresholds"))
-    (0 < large_decrease_factor < small_decrease_factor < 1 < increase_factor) || throw(TrustRegionException("Invalid decrease/increase factors"))
-
-    return new(initial_radius, initial_radius, max_radius,
-               acceptance_threshold, decrease_threshold, increase_threshold,
-               large_decrease_factor, small_decrease_factor, increase_factor,
-               zero(T), zero(T))
+export tron_trust_region!
+
+"""
+    tro = tron_trust_region!(ϕ, x, Δx, xt, Δq, Δc, Δ; kwargs...)
+
+
+Update `xt` and `Δ` using a trust region strategy. See [`trust_region`](@ref) for the basic usage, and below for specifc information about this strategy.
+
+This strategy corresponds to the keyword `method=:tron` on `trust_region`.
+In addition to the default keywords, the following keywords are also available:
+- `update_derivative_at_x`: Whether to call `derivative(ϕ, x, d; update=true)` to update `ϕ.gx` and `ϕ.Ad` (default: `true`);
+- `ηdec`: threshold for decreasing Δ (default: 0.25)
+- `σlarge_dec`: factor used in the decrease heuristics (default: 0.25)
+
+Given `ρ = ared / pred`, and the `slope = Dϕ(x, d)`,
+- Compute γ = ϕt - ϕx - slope
+- Compute α = γ ≤ 0 ? σinc : max(σlarge_dec, -slope / 2γ)
+- if `ρ < ηacc`, then `xₖ₊₁ = xₖ` and `Δₖ₊₁ = min(max(α  σlarge_dec)×‖d‖, σdecΔₖ)`;
+- if `ηacc ≤ ρ < ηdec`, then `xₖ₊₁ = xₖ + d` and `Δₖ₊₁ = max(σlarge_decΔₖ, min(α‖d‖, σdecΔₖ))`;
+- if `ηec ≤ ρ < ηinc`, then `xₖ₊₁ = xₖ + d` and `Δₖ₊₁ = max(σlarge_decΔₖ, min(α‖d‖, σincΔₖ))`;
+- otherwise, then `xₖ₊₁ = xₖ + d` and `Δₖ₊₁ = min(Δₘₐₓ, max(Δₖ, min(α‖d‖, σincΔₖ)))`.
+"""
+function tron_trust_region!(
+  ϕ :: AbstractMeritModel{M,T,V},
+  x :: V,
+  d :: V,
+  xt :: V,
+  Δq :: T,
+  Ad :: V,
+  Δ :: T;
+  update_obj_at_x :: Bool=false,
+  update_derivative_at_x :: Bool=true,
+  penalty_update :: Symbol=:basic,
+  max_radius :: T=one(T)/sqrt(eps(T)),
+  ηacc :: Real=T(1.0e-4),
+  ηdec :: Real=T(0.25),
+  ηinc :: Real=T(0.95),
+  σlarge_dec :: Real=T(0.25),
+  σdec :: Real=T(0.5),
+  σinc :: Real=3 * one(T) / 2,
+) where {M <: AbstractNLPModel, T <: Real, V <: AbstractVector{<: T}}
+  (0 < ηacc < ηinc < 1) || throw(TrustRegionException("Invalid thresholds"))
+  (0 < σdec < 1 < σinc) || throw(TrustRegionException("Invalid decrease/increase factors"))
+  ϕx = obj(ϕ, x, update=update_obj_at_x)
+  slope = derivative(ϕ, x, d, update=update_derivative_at_x)
+  ϕxf = dualobj(ϕ)
+  ϕxc = primalobj(ϕ)
+  ϕ.fx += Δq
+  ϕ.cx .+= Ad
+  mf = dualobj(ϕ)
+  mc = primalobj(ϕ)
+
+  if penalty_update == :basic
+    while ϕxf - mf < -0.1ϕ.η * (ϕxc - mc) < 0
+      ϕ.η *= 2
+    end
+  else
+    throw(TrustRegionException("Unidentified penalty_update $penalty_update"))
   end
-end
-
-function aredpred(tr :: TRONTrustRegion, nlp :: AbstractNLPModel, f :: T,
-                  f_trial :: T, Δm :: T, x_trial :: Vector{T},
-                  step :: Vector{T}, slope :: T) where T <: AbstractFloat
-  ared, pred = aredpred(nlp, f, f_trial, Δm, x_trial, step, slope)
-  γ = f_trial - f - slope
-  quad_min = γ <= 0 ? tr.increase_factor : max(tr.large_decrease_factor, -slope / γ / 2)
-  return ared, pred, quad_min
-end
-
-function update!(tr :: TRONTrustRegion, step_norm :: AbstractFloat)
-  α, σ₁, σ₂, σ₃ = tr.quad_min, tr.large_decrease_factor, tr.small_decrease_factor, tr.increase_factor
-  tr.radius = if tr.ratio < tr.acceptance_threshold
-    min(max(α, σ₁) * step_norm, σ₂ * tr.radius)
-  elseif tr.ratio < tr.decrease_threshold
-    max(σ₁ * tr.radius, min(α * step_norm, σ₂ * tr.radius))
-  elseif tr.ratio < tr.increase_threshold
-    max(σ₁ * tr.radius, min(α * step_norm, σ₃ * tr.radius))
+
+  @. xt = x + d
+  ϕt = obj(ϕ, xt)
+  ϕtf = dualobj(ϕ)
+  ϕtc = primalobj(ϕ)
+
+  m = mf + ϕ.η * mc
+  ared = ϕx - ϕt
+  pred = ϕx - m
+  ρ = ared / pred
+
+  γ = ϕt - ϕx - slope
+  α = γ ≤ 0 ? σinc : max(σlarge_dec, -slope / 2γ)
+
+  normd = norm(d)
+  Δ, status = if ρ < ηacc
+    x .= xt
+    min(max(α, σlarge_dec) * normd, σdec * Δ), :bad
+  elseif ρ < ηdec
+    max(σlarge_dec * Δ, min(α * normd, σdec * Δ)), :good
+  elseif ρ < ηinc
+    max(σlarge_dec * Δ, min(α * normd, σinc * Δ)), :good
   else
-    min(tr.max_radius, max(tr.radius, min(α * step_norm, σ₃ * tr.radius)))
+    min(max_radius, max(Δ, min(α * normd, σinc * Δ))), :great
   end
-  return tr
-end
+
+  return TrustRegionOutput(status, ared, pred, ρ, status != :bad, Δ, xt)
+end
\ No newline at end of file
diff --git a/src/trust-region/trust-region-output.jl b/src/trust-region/trust-region-output.jl
new file mode 100644
index 00000000..dbddbe17
--- /dev/null
+++ b/src/trust-region/trust-region-output.jl
@@ -0,0 +1,25 @@
+export TrustRegionOutput
+
+struct TrustRegionOutput{T <: Real, V <: AbstractVector}
+  status :: Symbol
+  ared :: T
+  pred :: T
+  ρ :: T
+  success :: Bool
+  Δ :: T
+  xt :: V
+  specific :: NamedTuple
+end
+
+function TrustRegionOutput(
+  status :: Symbol,
+  ared :: T,
+  pred :: T,
+  ρ :: T,
+  success :: Bool,
+  Δ :: T,
+  xt :: V;
+  specific :: NamedTuple = NamedTuple()
+) where {T <: Real, V <: AbstractVector{<: T}}
+  TrustRegionOutput(status, ared, pred, ρ, success, Δ, xt, specific)
+end
\ No newline at end of file
diff --git a/src/trust-region/trust-region.jl b/src/trust-region/trust-region.jl
index a1ec4a79..20f47ffb 100644
--- a/src/trust-region/trust-region.jl
+++ b/src/trust-region/trust-region.jl
@@ -1,98 +1,69 @@
 # A trust-region type and basic utility functions.
-import NLPModels: reset!
-export TrustRegionException, acceptable, aredpred, get_property, ratio, ratio!, reset!, set_property!, update!
+export TrustRegionException
+export trust_region, trust_region!
+# import NLPModels: reset!
+# export TrustRegionException, acceptable, aredpred, get_property, ratio, ratio!, reset!, set_property!, update!
+
+include("trust-region-output.jl")
+include("basic-trust-region.jl")
+include("tron-trust-region.jl")
+
+const tr_dict = Dict(
+  :basic => basic_trust_region!,
+  :tron => tron_trust_region!,
+)
 
 "Exception type raised in case of error."
 mutable struct TrustRegionException <: Exception
   msg  :: String
 end
 
-"""`AbstractTrustRegion`
-
-An abstract trust region type so that specific trust regions define update
-rules differently. Child types must have at least the following fields:
-
-- `acceptance_threshold :: AbstractFloat`
-- `initial_radius :: AbstractFloat`
-- `radius :: AbstractFloat`
-- `ratio :: AbstractFloat`
-
-and the following function:
-
-- `update!(tr, step_norm)`
-"""
-abstract type AbstractTrustRegion end
-
-"""`ared, pred = aredpred(nlp, f, f_trial, Δm, x_trial, step, slope)`
-
-Compute the actual and predicted reductions `∆f` and `Δm`, where
-`Δf = f_trial - f` is the actual reduction is an objective/merit/penalty function,
-`Δm = m_trial - m` is the reduction predicted by the model `m` of `f`.
-We assume that `m` is being minimized, and therefore that `Δm < 0`.
-"""
-function aredpred(nlp :: AbstractNLPModel, f :: T, f_trial :: T, Δm :: T,
-                  x_trial :: Vector{T}, step :: Vector{T}, slope :: T) where T <: AbstractFloat
-  absf = abs(f)
-  ϵ = eps(T)
-  pred = Δm - max(one(T), absf) * 10 * ϵ
-
-  ared = f_trial - f + max(one(T), absf) * 10 * ϵ
-  if (abs(Δm) < 10_000 * ϵ) || (abs(ared) < 10_000 * ϵ * absf)
-    # correct for roundoff error
-    g_trial = grad(nlp, x_trial)
-    slope_trial = dot(g_trial, step)
-    ared = (slope_trial + slope) / 2
-  end
-  return ared, pred
-end
-
-
-"""`acceptable(tr)`
-
-Return `true` if a step is acceptable
 """
-function acceptable(tr :: AbstractTrustRegion)
-  return tr.ratio >= tr.acceptance_threshold
-end
-
-"""`reset!(tr)`
-
-Reset the trust-region radius to its initial value
+    tro = trust_region(ϕ, x, d, Δq, Ad, Δ; method=:basic, penalty_update=:basic, kwargs...)
+    tro = trust_region!(ϕ, x, d, xt, Δq, Ad, Δ; method=:basic, penalty_update=:basic, kwargs...)
+    tro = trust_region(nlp, x, d, Δq, Δ; method=:basic, fx=obj(nlp, x), gx=grad(nlp, x), kwargs...)
+    tro = trust_region!(nlp, x, d, xt, Δq, Δ; method=:basic, fx=obj(nlp, x), gx=grad(nlp, x), kwargs...)
+
+Update `xt` and `Δ` using a trust region strategy according to the `method` keyword.
+The output is a `TrustRegionOutput`.
+
+The actual reduction of the model is given by
+```
+ared = ϕx - ϕt,
+```
+where `ϕx = obj(nlp, x; update=update_obj_at_x)` and `ϕt = obj(nlp, xt)`. The predicted reduction is
+given by
+```
+pred = ϕx - (ϕx + Δq) - ϕ.η P(ϕ.cx + Ad),
+```
+where `P` is the penalty function used by `ϕ` and `Ad` is the Jacobian times `d`.
+
+The following keyword argument are available:
+- `update_obj_at_x`: Whether to update `ϕ`'s objective at `x` (default: `false`);
+- `penalty_update`: Strategy to update the parameter `ϕ.η`. (default: `:basic`);
+- `max_radius`: Maximum value for `Δ` (default 1/√ϵ(T));
+- `ηacc`: threshold for successful step (default: 1.0e-4);
+- `ηinc`: threshold for great step (default: 0.95),
+- `σdec`: factor used in the radius decrease heuristics (default: 1/3);
+- `σinc`: factor used in the radius increase heuristics (default: 3/2);
+
+Some methods have additional keyword arguments.
+
+For the `nlp` version, the problem must be unconstrained or bound-constrained (i.e., `nlp.meta.nequ
+= 0`). In such case a `UncMerit` is created using the keyword argument `fx`.
 """
-function reset!(tr :: AbstractTrustRegion)
-  tr.radius = tr.initial_radius
-  return tr
-end
+function trust_region end
 
-
-"""A basic getter for `AbstractTrustRegion` instances.
-Should be overhauled when it's possible to overload `getfield()`
-and `setfield!()`. See
-https://github.com/JuliaLang/julia/issues/1974
-"""
-function get_property(tr :: AbstractTrustRegion, prop :: Symbol)
-  # All fields are gettable.
-  gettable = fieldnames(typeof(tr))
-  prop in gettable || throw(TrustRegionException("Unknown property: $prop"))
-  getfield(tr, prop)
+function trust_region(nlp_or_ϕ, x, d, args...; kwargs...)
+  xt = copy(x)
+  trust_region!(nlp_or_ϕ, x, d, xt, args...; kwargs...)
 end
 
-"""A basic setter for `AbstractTrustRegion` instances.
-"""
-function set_property!(tr :: AbstractTrustRegion, prop :: Symbol, value :: Any)
-  gettable = fieldnames(typeof(tr))
-  prop in gettable || throw(TrustRegionException("Unknown property: $prop"))
-  setfield!(tr, prop, value)
+function trust_region!(nlp :: AbstractNLPModel, x :: AbstractVector, d :: AbstractVector, xt :: AbstractVector, Δq :: Real, Δ :: Real; fx=obj(nlp, x), kwargs...)
+  ϕ = UncMerit(nlp, fx=fx)
+  trust_region!(ϕ, x, d, xt, Δq, eltype(x)[], Δ; kwargs...)
 end
 
-"""`update!(tr, step_norm)`
-
-Update the trust-region radius based on the ratio of actual vs. predicted reduction
-and the step norm.
-"""
-function update!(tr :: AbstractTrustRegion, ratio :: AbstractFloat, step_norm :: AbstractFloat)
-  throw(NotImplementedError("`update!` not implemented for this TrustRegion type"))
-end
-
-include("basic-trust-region.jl")
-include("tron-trust-region.jl")
+function trust_region!(ϕ :: AbstractMeritModel, x :: AbstractVector, args...; method :: Symbol=:basic, kwargs...)
+  SolverTools.tr_dict[method](ϕ, x, args...; kwargs...)
+end
\ No newline at end of file
diff --git a/test/test_trust_region.jl b/test/test_trust_region.jl
index bd543b5c..2940dd3e 100644
--- a/test/test_trust_region.jl
+++ b/test/test_trust_region.jl
@@ -1,62 +1,67 @@
 @testset "Trust Region" begin
-  @testset "aredpred" begin
-    nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2))
-    x = nlp.meta.x0
-    d = -ones(2)
-    xt = x + d
-    f = obj(nlp, x)
-    ft = obj(nlp, xt)
-    Δm = ft - f
-    ared, pred = aredpred(nlp, f, ft, Δm, xt, d, dot(grad(nlp, x), d))
-    @test abs(ared - pred) < 1e-12
-    tr = TRONTrustRegion(100.0)
-    ared, pred, qmin = aredpred(tr, nlp, f, ft, Δm, xt, d, dot(grad(nlp, x), d))
-    @test abs(ared - pred) < 1e-12
-    d = -1e-12 * ones(2)
-    xt = x + d
-    ft = obj(nlp, xt)
-    Δm = ft - f
-    ared, pred = aredpred(nlp, f, ft, Δm, xt, d, dot(grad(nlp, x), d))
-    @test abs(ared - pred) < 1e-12
-  end
+  tr_methods = [:basic, :tron]
+
+  @testset "Unconstrained problems" begin
+    for tr_method in tr_methods
+      @testset "Method $tr_method" begin
+        nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2))
+        x = nlp.meta.x0
+        d = -ones(2)
+        Δ = norm(d)
+        Δq = dot(grad(nlp, x), d) + dot(d, hprod(nlp, x, d)) / 2
+        tro = trust_region(nlp, x, d, Δq, Δ, method=tr_method, update_obj_at_x=true)
+        @test tro.status == :great
+        @test tro.Δ ≥ Δ
+        @test tro.xt == x + d
+        @test tro.success
 
-  @testset "BasicTrustRegion" begin
-    Δ₀ = 10.0
-    tr = TrustRegion(Δ₀)
-    tr.ratio = 1.0
-    @test acceptable(tr) == true
+        Δq = -tro.ared / 0.25
+        tro = trust_region(nlp, x, d, Δq, Δ, method=tr_method, update_obj_at_x=true)
+        @test tro.ρ == 0.25
+        @test tro.status == :good
+        @test tro.xt == x + d
+        @test tro.success
 
-    tr.ratio = 10.0
-    update!(tr, Δ₀)
-    @test tr.radius > Δ₀
-    reset!(tr)
-    tr.ratio = -1.0
-    update!(tr, Δ₀)
-    @test tr.radius < Δ₀
-    reset!(tr)
+        Δq = -tro.ared / 1e-6
+        tro = trust_region(nlp, x, d, Δq, Δ, method=tr_method, update_obj_at_x=true)
+        @test tro.ρ == 1e-6
+        @test tro.status == :bad
+        @test tro.xt == x
+        @test !tro.success
+      end
+    end
   end
 
-  @testset "TRONTrustRegion" begin
-    Δ₀ = 10.0
-    tr = TRONTrustRegion(Δ₀)
-    tr.ratio = 1.0
-    @test acceptable(tr) == true
+  @testset "Constrained problems" begin
+    for tr_method in tr_methods
+      @testset "Method $tr_method" begin
+        nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2), x -> [x[1] + 2x[2]], [0.0], [0.0])
+        x = nlp.meta.x0
+        ϕ = L1Merit(nlp, 1.0)
+        d = -ones(2)
+        Δ = norm(d)
+        Δq = dot(grad(nlp, x), d) + dot(d, hprod(nlp, x, d)) / 2
+        Ad = jprod(nlp, x, d)
+        tro = trust_region(ϕ, x, d, Δq, Ad, Δ, method=tr_method, update_obj_at_x=true)
+        @test tro.status == :great
+        @test tro.Δ ≥ Δ
+        @test tro.xt == x + d
+        @test tro.success
+
+        Δq = -tro.ared / 0.25 - ϕ.η * (norm(ϕ.cx, 1) - norm(ϕ.cx + Ad, 1))
+        tro = trust_region(ϕ, x, d, Δq, Ad, Δ, method=tr_method, update_obj_at_x=true)
+        @test tro.ρ == 0.25
+        @test tro.status == :good
+        @test tro.xt == x + d
+        @test tro.success
 
-    tr.ratio = tr.acceptance_threshold - 1
-    update!(tr, Δ₀)
-    @test tr.radius < Δ₀
-    reset!(tr)
-    tr.ratio = (tr.acceptance_threshold + tr.decrease_threshold) / 2
-    update!(tr, Δ₀)
-    @test tr.radius < Δ₀
-    reset!(tr)
-    tr.ratio = (tr.decrease_threshold + tr.increase_threshold) / 2
-    update!(tr, Δ₀)
-    @test tr.radius < Δ₀
-    reset!(tr)
-    tr.ratio = tr.increase_threshold + 1
-    tr.quad_min = 2.0
-    update!(tr, Δ₀)
-    @test tr.radius > Δ₀
+        Δq = -tro.ared / 1e-6 - ϕ.η * (norm(ϕ.cx, 1) - norm(ϕ.cx + Ad, 1))
+        tro = trust_region(ϕ, x, d, Δq, Ad, Δ, method=tr_method, update_obj_at_x=true)
+        @test tro.ρ == 1e-6
+        @test tro.status == :bad
+        @test tro.xt == x
+        @test !tro.success
+      end
+    end
   end
 end

From e412920e4cf596287fc2273c4ff00b37b37c02d1 Mon Sep 17 00:00:00 2001
From: Abel <abel.s.siqueira@gmail.com>
Date: Wed, 19 Aug 2020 20:54:47 -0300
Subject: [PATCH 7/7] Use the new trust region on the dummy solver

---
 test/dummy_trust_region_solver.jl | 51 +++++++------------------------
 1 file changed, 11 insertions(+), 40 deletions(-)

diff --git a/test/dummy_trust_region_solver.jl b/test/dummy_trust_region_solver.jl
index 0acb5842..284ab000 100644
--- a/test/dummy_trust_region_solver.jl
+++ b/test/dummy_trust_region_solver.jl
@@ -7,7 +7,7 @@ function dummy_trust_region_solver(
   rtol :: Real = 1e-6,
   max_eval :: Int = 1000,
   max_time :: Float64 = 30.0,
-  # ls_method :: Symbol = :armijo,
+  tr_method :: Symbol = :basic,
   merit_constructor = L1Merit
 )
 
@@ -29,7 +29,7 @@ function dummy_trust_region_solver(
   iter = 0
 
   Δ = 100.0
-  ϕ = merit_constructor(nlp, 1e-3, cx=cx, gx=gx)
+  ϕ = merit_constructor(nlp, 1e-3, cx=copy(cx), gx=gx)
 
   ϵd = atol + rtol * norm(dual)
   ϵp = atol
@@ -44,9 +44,6 @@ function dummy_trust_region_solver(
   while !(solved || tired)
     Hxy = ncon > 0 ? hess_op(nlp, x, y) : hess_op(nlp, x)
     local Δx
-    ϕx = obj(ϕ, x, update=false)
-    ϕxprimal = primalobj(ϕ)
-    ϕxdual   = dualobj(ϕ)
     if ncon > 0
       # Normal step
       v = cgls(Jx, -cx, radius=0.8Δ)[1]
@@ -57,51 +54,25 @@ function dummy_trust_region_solver(
       Δx = cg(Hxy, -dual, radius=Δ)[1]
     end
 
-    # Workaround to be agnostic to the merit function
-    # Pretend fx is the quadratic approximation to obj(nlp, x + Δx)
-    # Pretend cx is que linear approximation to cons(nlp, x + Δx)
-    ϕ.fx += dot(Δx, gx) + dot(Δx, Hxy * Δx) / 2
+    Δq = dot(Δx, gx) + dot(Δx, Hxy * Δx) / 2
     Ad = Jx * Δx
-    cpAd = cx .+ Ad
-    ct = ϕ.cx
-    ϕ.cx = cpAd
-    ϕtprimal = primalobj(ϕ)
-    ϕtdual = dualobj(ϕ)
-
-    xt = x + Δx
-    ϕt = obj(ϕ, xt, update=true)
-    ft = ϕ.fx
-
-    @assert ϕxprimal ≥ ϕtprimal
-    while ϕxdual - ϕtdual < -0.1 * ϕ.η * (ϕxprimal - ϕtprimal) < 0 # For unconstrained problems, right side is 0.
-      ϕ.η *= 2
-    end
+    tro = trust_region!(ϕ, x, Δx, xt, Δq, Ad, Δ, method=tr_method, update_obj_at_x=false)
 
-    Ared = ϕx - ϕt
-    Pred = ϕxdual - ϕtdual + ϕ.η * (ϕxprimal - ϕtprimal)
+    Δ = tro.Δ
+    x .= tro.xt
 
-    ρ = Ared / Pred
-    iter_kind = if ρ > 1e-2 # accept
-      x .= xt
-      fx = ft
-      cx = ϕ.cx
-      grad!(nlp, x, gx) # Updates ϕ.gx
+    if tro.success
+      fx = ϕ.fx
+      cx .= ϕ.cx
+      grad!(nlp, x, gx)
       if ncon > 0
         Jx = jac(nlp, x)
       end
       y = cgls(Jx', -gx)[1]
       dual = gx + Jx' * y
-      if ρ > 0.75 && norm(Δx) > 0.9Δ
-        Δ *= 2
-        :great
-      else
-        :good
-      end
     else
       ϕ.fx = fx
       ϕ.cx = cx
-      Δ /= 4
-      :bad
     end
 
     elapsed_time = time() - start_time
@@ -109,7 +80,7 @@ function dummy_trust_region_solver(
     tired = evals(nlp) > max_eval || elapsed_time > max_time
 
     iter += 1
-    @info log_row(Any[iter, evals(nlp), fx, norm(cx), norm(dual), elapsed_time, Δ, ϕ.η, iter_kind])
+    @info log_row(Any[iter, evals(nlp), fx, norm(cx), norm(dual), elapsed_time, Δ, ϕ.η, tro.status])
   end
 
   status = if solved