Calculate derivatives using Jacobian-vector products

devmotion · devmotion · commit 99ef858518f3 · 2025-12-02T09:26:34.000+01:00
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -12,15 +12,15 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        julia-version: ['min', 'lts', '1']
+        # julia-version: ['min', 'lts', '1']
+        julia-version: ['1']
         os: [ubuntu-latest, windows-latest, macOS-latest]
     steps:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.julia-version }}
       - uses: julia-actions/cache@v2
-      - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
         with:
           annotate: true
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "LineSearches"
 uuid = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
-version = "7.5.1"
+version = "7.6.0"
 
 [deps]
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -14,9 +14,8 @@ DoubleFloats = "1"
 ExplicitImports = "1.14"
 JET = "0.9, 0.10"
 LinearAlgebra = "<0.0.1, 1"
-NLSolversBase = "7"
+NLSolversBase = "8"
 NaNMath = "1"
-Optim = "1"
 OptimTestProblems = "2"
 Printf = "<0.0.1, 1"
 Test = "<0.0.1, 1"
@@ -27,9 +26,11 @@ Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 DoubleFloats = "497a8b3b-efae-58df-a0af-a86822472b78"
 ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
 JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
-Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 OptimTestProblems = "cec144fc-5a64-5bc6-99fb-dde8f63e154c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Aqua", "ExplicitImports", "JET", "Test", "OptimTestProblems", "Optim", "DoubleFloats"]
+test = ["Aqua", "ExplicitImports", "JET", "Test", "OptimTestProblems", "DoubleFloats"]
+
+[sources]
+NLSolversBase = { url = "https://github.com/devmotion/NLSolversBase.jl.git", rev = "dmw/jvp" }
diff --git a/src/LineSearches.jl b/src/LineSearches.jl
@@ -1,7 +1,7 @@
 module LineSearches
 
 using Printf: @sprintf
-using LinearAlgebra: dot, norm
+using LinearAlgebra: norm
 using NaNMath: NaNMath
 using NLSolversBase: NLSolversBase, AbstractObjective
 
@@ -16,36 +16,32 @@ export InitialHagerZhang, InitialStatic, InitialPrevious,
 function make_ϕ(df, x_new, x, s)
     function ϕ(α)
         # Move a distance of alpha in the direction of s
-        x_new .= x .+ α.*s
+        x_new .= muladd.(α, s, x)
 
         # Evaluate f(x+α*s)
-        NLSolversBase.value!(df, x_new)
+        return NLSolversBase.value!(df, x_new)
     end
     ϕ
 end
 function make_ϕdϕ(df, x_new, x, s)
     function ϕdϕ(α)
         # Move a distance of alpha in the direction of s
-        x_new .= x .+ α.*s
-
-        # Evaluate ∇f(x+α*s)
-        f_x_new, g_x_new = NLSolversBase.value_gradient!(df, x_new)
+        x_new .= muladd.(α, s, x)
 
         # Calculate ϕ(a_i), ϕ'(a_i)
-        return f_x_new, real(dot(g_x_new, s))
+        ϕ, dϕ = NLSolversBase.value_jvp!(df, x_new, s)
+
+        return ϕ, real(dϕ)
     end
     ϕdϕ
 end
 function make_ϕ_dϕ(df, x_new, x, s)
     function dϕ(α)
         # Move a distance of alpha in the direction of s
-        x_new .= x .+ α.*s
-
-        # Evaluate ∇f(x+α*s)
-        g_x_new = NLSolversBase.gradient!(df, x_new)
+        x_new .= muladd.(α, s, x)
 
         # Calculate ϕ'(a_i)
-        return real(dot(g_x_new, s))
+        return real(NLSolversBase.jvp!(df, x_new, s))
     end
     make_ϕ(df, x_new, x, s), dϕ
 end
diff --git a/src/initialguess.jl b/src/initialguess.jl
@@ -13,6 +13,8 @@ is scaled with the `l_2` norm of the step direction.
 end
 
 function (is::InitialStatic{T})(ls, state, phi_0, dphi_0, df) where T
+    # phi_0 is (or should be) equal to NLSolversBase.value(df, state.x) and `state.f_x`
+    @assert phi_0 == state.f_x
     PT = promote_type(T, real(eltype(state.s)))
     if is.scaled == true && (ns = real(norm(state.s))) > convert(PT, 0)
         # TODO: Type instability if there's a type mismatch between is.alpha and ns?
@@ -70,11 +72,13 @@ If αmax ≠ 1.0, then you should consider to ensure that snap2one[2] < αmax.
 end
 
 function (is::InitialQuadratic{T})(ls, state, phi_0, dphi_0, df) where T
+    # phi_0 is (or should be) equal to NLSolversBase.value(df, state.x) and `state.f_x`
+    @assert phi_0 == state.f_x
     if !isfinite(state.f_x_previous) || isapprox(dphi_0, convert(T, 0), atol=eps(T)) # Need to add a tolerance
         # If we're at the first iteration
         αguess = is.α0
     else
-        αguess = 2 * (phi_0 - state.f_x_previous) / dphi_0
+        αguess = 2 * (state.f_x - state.f_x_previous) / dphi_0
         αguess = NaNMath.max(is.αmin, state.alpha*is.ρ, αguess)
         αguess = NaNMath.min(is.αmax, αguess)
         # if αguess ≈ 1, then make it 1 (Newton-type behaviour)
@@ -135,6 +139,8 @@ function InitialConstantChange(; αmin = 1e-12,
 end
 
 function (is::InitialConstantChange{T})(ls, state, phi_0, dphi_0, df) where T
+    # phi_0 is (or should be) equal to NLSolversBase.value(df, state.x) and `state.f_x`
+    @assert phi_0 == state.f_x
     if !isfinite(is.dϕ_0_previous[]) || !isfinite(state.alpha) ||
         isapprox(dphi_0, convert(T, 0), atol=eps(T))
         # If we're at the first iteration
@@ -175,15 +181,19 @@ otherwise, we select according to procedure I1-2, with starting value α0.
 end
 
 function (is::InitialHagerZhang)(ls::Tls, state, phi_0, dphi_0, df) where Tls
+    # phi_0 is (or should be) equal to NLSolversBase.value(df, state.x) and `state.f_x`
+    @assert phi_0 == state.f_x
     if isnan(state.f_x_previous) && isnan(is.α0)
         # If we're at the first iteration (f_x_previous is NaN)
         # and the user has not provided an initial step size (is.α0 is NaN),
         # then we
         # pick the initial step size according to HZ #I0
-        # phi_0 is (or should be) equal to NLSolversBase.value(df, state.x) 
-        # TODO: Make the gradient available as part of the state?
-        g_x = NLSolversBase.gradient!(df, state.x)
-        state.alpha = _hzI0(state.x, g_x, phi_0,
+        g_x = if hasproperty(state, :g_x)
+            state.g_x
+        else
+            NLSolversBase.gradient!(df, state.x)
+        end
+        state.alpha = _hzI0(state.x, g_x, state.f_x,
                             is.αmax,
                             convert(eltype(state.x), is.ψ0)) # Hack to deal with type instability between is{T} and state.x
         if Tls <: HagerZhang
diff --git a/src/types.jl b/src/types.jl
@@ -1,4 +1,4 @@
-mutable struct LineSearchException{T<:Real} <: Exception
+struct LineSearchException{T<:Real} <: Exception
     message::AbstractString
     alpha::T
 end
diff --git a/test/arbitrary_precision.jl b/test/arbitrary_precision.jl
@@ -22,7 +22,7 @@ doublefloatstypes = [Double64, Double32, Double16]
     @test dphi_0 isa T
 
     function getstate()
-        state = StateDummy(convert(T, 1),  x, similar(x), convert(T, NaN), p)
+        state = StateDummy(convert(T, 1),  x,  phi_0, grtmp, similar(x), convert(T, NaN), p)
     end
     # Test HagerZhang I0
     ls = HagerZhang{T}()
@@ -98,6 +98,7 @@ doublefloatstypes = [Double64, Double32, Double16]
     ls = HagerZhang{T}()
     state = getstate()
     state.f_x_previous = 2*phi_0
+    state.x_ls = zeros(T, 2)
     is = InitialQuadratic{T}(snap2one=(convert(T, 0.9),convert(T, Inf)))
     is(ls, state, phi_0, dphi_0, df)
     @test !isnan(state.alpha)
@@ -107,6 +108,7 @@ doublefloatstypes = [Double64, Double32, Double16]
     ls = HagerZhang{T}()
     state = getstate()
     state.f_x_previous = 2*phi_0
+    state.x_ls = zeros(T, 2)
     is = InitialQuadratic{T}(snap2one=(convert(T, 0.75),convert(T, Inf)))
     is(ls, state, phi_0, dphi_0, df)
     @test !isnan(state.alpha)
diff --git a/test/examples.jl b/test/examples.jl
diff --git a/test/initial.jl b/test/initial.jl
@@ -12,7 +12,7 @@
     dphi_0 = dot(p, grtmp)
 
     function getstate()
-        state = StateDummy(1.0,  x, similar(x), NaN, p)
+        state = StateDummy(1.0,  x, phi_0, grtmp, similar(x), NaN, p)
     end
     # Test HagerZhang I0
     ls = HagerZhang()
@@ -88,6 +88,7 @@
     ls = HagerZhang()
     state = getstate()
     state.f_x_previous = 2*phi_0
+    state.x_ls = zeros(2)
     is = InitialQuadratic(snap2one=(0.9,Inf))
     is(ls, state, phi_0, dphi_0, df)
     @test state.alpha == 0.5
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -14,7 +14,6 @@ my_tests = [
     "initial.jl",
     "alphacalc.jl",
     "arbitrary_precision.jl",
-    "examples.jl",
     "captured.jl",
     "issues.jl",
     "qa.jl",
@@ -23,6 +22,8 @@ my_tests = [
 mutable struct StateDummy
     alpha
     x
+    f_x
+    g_x
     x_ls
     f_x_previous
     s

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-mutable struct LineSearchException{T<:Real} <: Exception`
	`1`	`+struct LineSearchException{T<:Real} <: Exception`
`2`	`2`	`message::AbstractString`
`3`	`3`	`alpha::T`
`4`	`4`	`end`