Improve allocation of LineModel

abelsiqueira · abelsiqueira · commit 8c2015ca6f09 · 2021-07-10T23:22:22.000-03:00
diff --git a/Project.toml b/Project.toml
@@ -16,8 +16,7 @@ julia = "^1.3.0"
 [extras]
 ADNLPModels = "54578032-b7ea-4c30-94aa-7cbd1cce6c9a"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
-NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["ADNLPModels", "Logging", "NLPModels", "Test"]
+test = ["ADNLPModels", "Logging", "Test"]
diff --git a/src/linesearch/line_model.jl b/src/linesearch/line_model.jl
@@ -1,7 +1,7 @@
 import NLPModels: obj, grad, grad!, objgrad!, objgrad, hess
 
 export LineModel
-export obj, grad, derivative, grad!, objgrad!, objgrad, derivative!, hess, redirect!
+export obj, grad, derivative, grad!, objgrad!, objgrad, derivative!, hess, hess!, redirect!
 
 """A type to represent the restriction of a function to a direction.
 Given f : R → Rⁿ, x ∈ Rⁿ and a nonzero direction d ∈ Rⁿ,
@@ -12,17 +12,18 @@ represents the function ϕ : R → R defined by
 
     ϕ(t) := f(x + td).
 """
-mutable struct LineModel{T, S} <: AbstractNLPModel{T, S}
+mutable struct LineModel{T, S, M <: AbstractNLPModel{T, S}} <: AbstractNLPModel{T, S}
   meta::NLPModelMeta{T, S}
   counters::Counters
-  nlp::AbstractNLPModel{T, S}
+  nlp::M
   x::S
   d::S
+  xt::S
 end
 
-function LineModel(nlp::AbstractNLPModel{T, S}, x::S, d::S) where {T, S}
+function LineModel(nlp::AbstractNLPModel{T, S}, x::S, d::S; xt::S = similar(x)) where {T, S}
   meta = NLPModelMeta{T, S}(1, x0 = zeros(T, 1), name = "LineModel to $(nlp.meta.name))")
-  return LineModel(meta, Counters(), nlp, x, d)
+  return LineModel(meta, Counters(), nlp, x, d, xt)
 end
 
 """`redirect!(ϕ, x, d)`
@@ -40,7 +41,8 @@ end
 """
 function obj(f::LineModel, t::AbstractFloat)
   NLPModels.increment!(f, :neval_obj)
-  return obj(f.nlp, f.x + t * f.d)
+  @. f.xt = f.x + t * f.d
+  return obj(f.nlp, f.xt)
 end
 
 """`grad(f, t)` evaluates the first derivative of the `LineModel`
@@ -53,7 +55,8 @@ i.e.,
 """
 function grad(f::LineModel, t::AbstractFloat)
   NLPModels.increment!(f, :neval_grad)
-  return dot(grad(f.nlp, f.x + t * f.d), f.d)
+  @. f.xt = f.x + t * f.d
+  return dot(grad(f.nlp, f.xt), f.d)
 end
 derivative(f::LineModel, t::AbstractFloat) = grad(f, t)
 
@@ -69,7 +72,8 @@ The gradient ∇f(x + td) is stored in `g`.
 """
 function grad!(f::LineModel, t::AbstractFloat, g::AbstractVector)
   NLPModels.increment!(f, :neval_grad)
-  return dot(grad!(f.nlp, f.x + t * f.d, g), f.d)
+  @. f.xt = f.x + t * f.d
+  return dot(grad!(f.nlp, f.xt, g), f.d)
 end
 derivative!(f::LineModel, t::AbstractFloat, g::AbstractVector) = grad!(f, t, g)
 
@@ -86,8 +90,9 @@ The gradient ∇f(x + td) is stored in `g`.
 function objgrad!(f::LineModel, t::AbstractFloat, g::AbstractVector)
   NLPModels.increment!(f, :neval_obj)
   NLPModels.increment!(f, :neval_grad)
-  fx, gx = objgrad!(f.nlp, f.x + t * f.d, g)
-  return fx, dot(gx, f.d)
+  @. f.xt = f.x + t * f.d
+  fx, _ = objgrad!(f.nlp, f.xt, g)
+  return fx, dot(g, f.d)
 end
 
 """`objgrad(f, t)` evaluates the objective and first derivative of the `LineModel`
@@ -112,5 +117,20 @@ i.e.,
 """
 function hess(f::LineModel, t::AbstractFloat)
   NLPModels.increment!(f, :neval_hess)
-  return dot(f.d, hprod(f.nlp, f.x + t * f.d, f.d))
+  @. f.xt = f.x + t * f.d
+  return dot(f.d, hprod(f.nlp, f.xt, f.d))
+end
+
+"""Evaluate the second derivative of the `LineModel`
+
+    ϕ(t) := f(x + td),
+
+i.e.,
+
+    ϕ"(t) = dᵀ∇²f(x + td)d.
+"""
+function hess!(f::LineModel, t::AbstractFloat, Hv::AbstractVector)
+  NLPModels.increment!(f, :neval_hess)
+  @. f.xt = f.x + t * f.d
+  return dot(f.d, hprod!(f.nlp, f.xt, f.d, Hv))
 end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -8,6 +8,7 @@ using ADNLPModels, NLPModels
 using LinearAlgebra, Logging, Test
 
 include("dummy_solver.jl")
+include("simple_model.jl")
 
 include("test_auxiliary.jl")
 include("test_linesearch.jl")
diff --git a/test/simple_model.jl b/test/simple_model.jl
@@ -0,0 +1,35 @@
+mutable struct SimpleModel{T, S} <: AbstractNLPModel{T, S}
+  meta :: NLPModelMeta{T, S}
+  counters :: Counters
+end
+
+SimpleModel(n :: Int) = SimpleModel(NLPModelMeta(n, x0=ones(n)), Counters())
+
+function NLPModels.obj(nlp::SimpleModel, x::AbstractVector)
+  increment!(nlp, :neval_obj)
+  sum(xi ^ 4 for xi in x) / 12
+end
+
+function NLPModels.grad!(nlp::SimpleModel, x::AbstractVector, g::AbstractVector)
+  increment!(nlp, :neval_grad)
+  @. g = x ^ 3 / 3
+  g
+end
+
+function NLPModels.objgrad!(nlp::SimpleModel, x::AbstractVector, g::AbstractVector)
+  increment!(nlp, :neval_obj)
+  increment!(nlp, :neval_grad)
+  @. g = x ^ 3 / 3
+  return sum(xi ^4 for xi in x) / 12, g
+end
+
+function NLPModels.hprod!(nlp::SimpleModel, x::AbstractVector{T}, v::AbstractVector, Hv::AbstractVector; obj_weight::T = one(T)) where T
+  increment!(nlp, :neval_hprod)
+  @. Hv = obj_weight * x ^ 2 * v
+  Hv
+end
+
+function NLPModels.hess(nlp::SimpleModel, x::AbstractVector{T}; obj_weight::T = one(T)) where T
+  increment!(nlp, :neval_hprod)
+  return obj_weight .* diagm(0 => x .^ 2)
+end
diff --git a/test/test_linesearch.jl b/test/test_linesearch.jl
@@ -1,10 +1,46 @@
+"""
+    @wrappedallocs(expr)
+
+Given an expression, this macro wraps that expression inside a new function
+which will evaluate that expression and measure the amount of memory allocated
+by the expression. Wrapping the expression in a new function allows for more
+accurate memory allocation detection when using global variables (e.g. when
+at the REPL).
+
+For example, `@wrappedallocs(x + y)` produces:
+
+```julia
+function g(x1, x2)
+    @allocated x1 + x2
+end
+g(x, y)
+```
+
+You can use this macro in a unit test to verify that a function does not
+allocate:
+
+```
+@test @wrappedallocs(x + y) == 0
+```
+"""
+macro wrappedallocs(expr)
+    argnames = [gensym() for a in expr.args]
+    quote
+        function g($(argnames...))
+            @allocated $(Expr(expr.head, argnames...))
+        end
+        $(Expr(:call, :g, [esc(a) for a in expr.args]...))
+    end
+end
+
 @testset "Linesearch" begin
   @testset "LineModel" begin
-    nlp = ADNLPModel(x -> x[1]^2 + 4 * x[2]^2, ones(2))
+    n = 200
+    nlp = SimpleModel(n)
     x = nlp.meta.x0
-    d = -ones(2)
+    d = -ones(n)
     lm = LineModel(nlp, x, d)
-    g = zeros(2)
+    g = zeros(n)
 
     @test obj(lm, 0.0) == obj(nlp, x)
     @test grad(lm, 0.0) == dot(grad(nlp, x), d)
@@ -17,19 +53,19 @@
     @test g == grad(nlp, x + d)
     @test objgrad(lm, 0.0) == (obj(nlp, x), dot(grad(nlp, x), d))
     @test hess(lm, 0.0) == dot(d, Symmetric(hess(nlp, x), :L) * d)
+    @test hess!(lm, 0.0, g) == dot(d, hprod!(nlp, x, d, g))
 
     @test obj(lm, 1.0) == 0.0
     @test grad(lm, 1.0) == 0.0
-    @test hess(lm, 1.0) == 2d[1]^2 + 8d[2]^2
+    @test hess(lm, 1.0) == 0.0
 
-    redirect!(lm, zeros(2), ones(2))
-    @test obj(lm, 0.0) == 0.0
-    @test grad(lm, 0.0) == 0.0
-    @test hess(lm, 0.0) == 10.0
+    @test obj(lm, 0.0) ≈ n / 12
+    @test grad(lm, 0.0) ≈ -n / 3
+    @test hess(lm, 0.0) == n
 
     @test neval_obj(lm) == 5
     @test neval_grad(lm) == 8
-    @test neval_hess(lm) == 3
+    @test neval_hess(lm) == 4
   end
 
   @testset "Armijo-Wolfe" begin
@@ -63,4 +99,43 @@
     @test nbk > 0
     @test nbW > 0
   end
+
+  if VERSION ≥ v"1.6"
+    @testset "Don't allocate" begin
+      n = 200
+      nlp = SimpleModel(n)
+      x = nlp.meta.x0
+      g = zeros(n)
+      d = -40 * ones(n)
+      lm = LineModel(nlp, x, d)
+
+      al = @wrappedallocs obj(lm, 1.0)
+      @test al == 0
+
+      al = @wrappedallocs grad!(lm, 1.0, g)
+      @test al == 0
+
+      al = @wrappedallocs objgrad!(lm, 1.0, g)
+      @test al == 0
+
+      al = @wrappedallocs hess!(lm, 1.0, g)
+      @test al == 0
+
+      h₀ = obj(lm, 0.0)
+      slope = grad(lm, 0.0)
+      (t, gg, ht, nbk, nbW) = armijo_wolfe(lm, h₀, slope, g)
+      al = @wrappedallocs armijo_wolfe(lm, h₀, slope, g)
+      @test al == 0
+
+      function armijo_wolfe_alloc(lm, h₀, slope, g, bk_max)
+        @allocated armijo_wolfe(lm, h₀, slope, g, bk_max=bk_max)
+      end
+
+      for bk_max = 0:8
+        (t, gg, ht, nbk, nbW) = armijo_wolfe(lm, h₀, slope, g, bk_max=bk_max)
+        al = armijo_wolfe_alloc(lm, h₀, slope, g, bk_max)
+        @test al == 0
+      end
+    end
+  end
 end