Calculate derivatives using Jacobian-vector products

devmotion · devmotion · commit 49e807d8625a · 2025-11-25T23:31:49.000+01:00
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -12,15 +12,15 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        julia-version: ['min', 'lts', '1']
+        # julia-version: ['min', 'lts', '1']
+        julia-version: ['1']
         os: [ubuntu-latest, windows-latest, macOS-latest]
     steps:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.julia-version }}
       - uses: julia-actions/cache@v2
-      - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
         with:
           annotate: true
diff --git a/Project.toml b/Project.toml
@@ -14,9 +14,8 @@ DoubleFloats = "1"
 ExplicitImports = "1.14"
 JET = "0.9, 0.10"
 LinearAlgebra = "<0.0.1, 1"
-NLSolversBase = "7"
+NLSolversBase = "8"
 NaNMath = "1"
-Optim = "1"
 OptimTestProblems = "2"
 Printf = "<0.0.1, 1"
 Test = "<0.0.1, 1"
@@ -27,9 +26,11 @@ Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 DoubleFloats = "497a8b3b-efae-58df-a0af-a86822472b78"
 ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
 JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
-Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 OptimTestProblems = "cec144fc-5a64-5bc6-99fb-dde8f63e154c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Aqua", "ExplicitImports", "JET", "Test", "OptimTestProblems", "Optim", "DoubleFloats"]
+test = ["Aqua", "ExplicitImports", "JET", "Test", "OptimTestProblems", "DoubleFloats"]
+
+[sources]
+NLSolversBase = { url = "https://github.com/devmotion/NLSolversBase.jl.git", rev = "dmw/jvp" }
diff --git a/src/LineSearches.jl b/src/LineSearches.jl
@@ -1,7 +1,7 @@
 module LineSearches
 
 using Printf: @sprintf
-using LinearAlgebra: dot, norm
+using LinearAlgebra: norm
 using NaNMath: NaNMath
 using NLSolversBase: NLSolversBase, AbstractObjective
 
@@ -16,74 +16,40 @@ export InitialHagerZhang, InitialStatic, InitialPrevious,
 function make_ϕ(df, x_new, x, s)
     function ϕ(α)
         # Move a distance of alpha in the direction of s
-        x_new .= x .+ α.*s
+        x_new .= muladd.(α, s, x)
 
         # Evaluate f(x+α*s)
-        NLSolversBase.value!(df, x_new)
+        return NLSolversBase.value!(df, x_new)
     end
     ϕ
 end
 function make_ϕdϕ(df, x_new, x, s)
     function ϕdϕ(α)
         # Move a distance of alpha in the direction of s
-        x_new .= x .+ α.*s
-
-        # Evaluate ∇f(x+α*s)
-        NLSolversBase.value_gradient!(df, x_new)
+        x_new .= muladd.(α, s, x)
 
         # Calculate ϕ(a_i), ϕ'(a_i)
-        NLSolversBase.value(df), real(dot(NLSolversBase.gradient(df), s))
+        ϕ, dϕ = NLSolversBase.value_jvp!(df, x_new, s)
+
+        return ϕ, real(dϕ)
     end
     ϕdϕ
 end
 function make_ϕ_dϕ(df, x_new, x, s)
     function dϕ(α)
         # Move a distance of alpha in the direction of s
-        x_new .= x .+ α.*s
-
-        # Evaluate ∇f(x+α*s)
-        NLSolversBase.gradient!(df, x_new)
+        x_new .= muladd.(α, s, x)
 
         # Calculate ϕ'(a_i)
-        real(dot(NLSolversBase.gradient(df), s))
+        return real(NLSolversBase.jvp!(df, x_new, s))
     end
     make_ϕ(df, x_new, x, s), dϕ
 end
 function make_ϕ_dϕ_ϕdϕ(df, x_new, x, s)
-    function dϕ(α)
-        # Move a distance of alpha in the direction of s
-        x_new .= x .+ α.*s
-
-        # Evaluate f(x+α*s) and ∇f(x+α*s)
-        NLSolversBase.gradient!(df, x_new)
-
-        # Calculate ϕ'(a_i)
-        real(dot(NLSolversBase.gradient(df), s))
-    end
-    function ϕdϕ(α)
-        # Move a distance of alpha in the direction of s
-        x_new .= x .+ α.*s
-
-        # Evaluate ∇f(x+α*s)
-        NLSolversBase.value_gradient!(df, x_new)
-
-        # Calculate ϕ'(a_i)
-        NLSolversBase.value(df), real(dot(NLSolversBase.gradient(df), s))
-    end
-    make_ϕ(df, x_new, x, s), dϕ, ϕdϕ
+    make_ϕ_dϕ(df, x_new, x, s)..., make_ϕdϕ(df, x_new, x, s)
 end
 function make_ϕ_ϕdϕ(df, x_new, x, s)
-    function ϕdϕ(α)
-        # Move a distance of alpha in the direction of s
-        x_new .= x .+ α.*s
-
-        # Evaluate ∇f(x+α*s)
-        NLSolversBase.value_gradient!(df, x_new)
-
-        # Calculate ϕ'(a_i)
-        NLSolversBase.value(df), real(dot(NLSolversBase.gradient(df), s))
-    end
-    make_ϕ(df, x_new, x, s), ϕdϕ
+    make_ϕ(df, x_new, x, s), make_ϕdϕ(df, x_new, x, s)
 end
 
 include("types.jl")
diff --git a/src/initialguess.jl b/src/initialguess.jl
@@ -74,7 +74,8 @@ function (is::InitialQuadratic{T})(ls, state, phi_0, dphi_0, df) where T
         # If we're at the first iteration
         αguess = is.α0
     else
-        αguess = 2 * (NLSolversBase.value(df) - state.f_x_previous) / dphi_0
+        f_x_ls = NLSolversBase.value!(df, state.x_ls)
+        αguess = 2 * (f_x_ls - state.f_x_previous) / dphi_0
         αguess = NaNMath.max(is.αmin, state.alpha*is.ρ, αguess)
         αguess = NaNMath.min(is.αmax, αguess)
         # if αguess ≈ 1, then make it 1 (Newton-type behaviour)
@@ -180,8 +181,8 @@ function (is::InitialHagerZhang)(ls::Tls, state, phi_0, dphi_0, df) where Tls
         # and the user has not provided an initial step size (is.α0 is NaN),
         # then we
         # pick the initial step size according to HZ #I0
-        state.alpha = _hzI0(state.x, NLSolversBase.gradient(df),
-                            NLSolversBase.value(df),
+        fx, gx = NLSolversBase.value_gradient!(df, state.x)
+        state.alpha = _hzI0(state.x, gx, fx,
                             is.αmax,
                             convert(eltype(state.x), is.ψ0)) # Hack to deal with type instability between is{T} and state.x
         if Tls <: HagerZhang
diff --git a/src/types.jl b/src/types.jl
@@ -1,4 +1,4 @@
-mutable struct LineSearchException{T<:Real} <: Exception
+struct LineSearchException{T<:Real} <: Exception
     message::AbstractString
     alpha::T
 end
diff --git a/test/arbitrary_precision.jl b/test/arbitrary_precision.jl
@@ -98,6 +98,7 @@ doublefloatstypes = [Double64, Double32, Double16]
     ls = HagerZhang{T}()
     state = getstate()
     state.f_x_previous = 2*phi_0
+    state.x_ls = zeros(T, 2)
     is = InitialQuadratic{T}(snap2one=(convert(T, 0.9),convert(T, Inf)))
     is(ls, state, phi_0, dphi_0, df)
     @test !isnan(state.alpha)
@@ -107,6 +108,7 @@ doublefloatstypes = [Double64, Double32, Double16]
     ls = HagerZhang{T}()
     state = getstate()
     state.f_x_previous = 2*phi_0
+    state.x_ls = zeros(T, 2)
     is = InitialQuadratic{T}(snap2one=(convert(T, 0.75),convert(T, Inf)))
     is(ls, state, phi_0, dphi_0, df)
     @test !isnan(state.alpha)
diff --git a/test/examples.jl b/test/examples.jl
diff --git a/test/initial.jl b/test/initial.jl
@@ -88,6 +88,7 @@
     ls = HagerZhang()
     state = getstate()
     state.f_x_previous = 2*phi_0
+    state.x_ls = zeros(2)
     is = InitialQuadratic(snap2one=(0.9,Inf))
     is(ls, state, phi_0, dphi_0, df)
     @test state.alpha == 1.0
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -14,7 +14,6 @@ my_tests = [
     "initial.jl",
     "alphacalc.jl",
     "arbitrary_precision.jl",
-    "examples.jl",
     "captured.jl",
     "issues.jl",
     "qa.jl",

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-mutable struct LineSearchException{T<:Real} <: Exception`
	`1`	`+struct LineSearchException{T<:Real} <: Exception`
`2`	`2`	`message::AbstractString`
`3`	`3`	`alpha::T`
`4`	`4`	`end`