From 3bf32f1638146527845a3b61ffeabf160a80f3ef Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 12 Aug 2025 05:56:17 +0000 Subject: [PATCH 01/26] Bump actions/checkout from 4 to 5 Bumps [actions/checkout](https://github.com/actions/checkout) from 4 to 5. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: '5' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/CI.yml | 2 +- .github/workflows/Documentation.yml | 2 +- .github/workflows/Downstream.yml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index bed7d65..2afad1e 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -18,7 +18,7 @@ jobs: version: - '1' steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.version }} diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml index 1af17da..598d45f 100644 --- a/.github/workflows/Documentation.yml +++ b/.github/workflows/Documentation.yml @@ -12,7 +12,7 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - uses: julia-actions/setup-julia@latest with: version: '1' diff --git a/.github/workflows/Downstream.yml b/.github/workflows/Downstream.yml index 6447099..1cff6d2 100644 --- a/.github/workflows/Downstream.yml +++ b/.github/workflows/Downstream.yml @@ -19,14 +19,14 @@ jobs: package: - {user: SciML, repo: OrdinaryDiffEq.jl, group: InterfaceII} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.julia-version }} arch: x64 - uses: julia-actions/julia-buildpkg@latest - name: Clone Downstream - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: repository: ${{ matrix.package.user }}/${{ matrix.package.repo }} path: downstream From bdfcc9987938ab53b36ece4592bcb4bf0a945add Mon Sep 17 00:00:00 2001 From: ChrisRackauckas Date: Wed, 13 Aug 2025 07:51:20 -0400 Subject: [PATCH 02/26] Add comprehensive docstrings for utility and internal functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Document epsilon computation functions with mathematical details - Add comprehensive JVPCache struct and constructor documentation - Document setindex overloads and utility functions (_vec, _mat) - Add docstrings for sparse Jacobian internal functions - Document Hessian utility functions and cache helpers - Include examples, parameter descriptions, and implementation notes 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- src/FiniteDiff.jl | 100 ++++++++++++++++++++++++++++++++++ src/epsilons.jl | 119 +++++++++++++++++++++++++++++++++++++++-- src/hessians.jl | 43 ++++++++++++++- src/iteration_utils.jl | 61 ++++++++++++++++++++- src/jacobians.jl | 41 ++++++++++++++ src/jvp.jl | 59 ++++++++++++++++++-- 6 files changed, 413 insertions(+), 10 deletions(-) diff --git a/src/FiniteDiff.jl b/src/FiniteDiff.jl index 2481a70..d1a7d06 100644 --- a/src/FiniteDiff.jl +++ b/src/FiniteDiff.jl @@ -9,26 +9,126 @@ using LinearAlgebra, ArrayInterface import Base: resize! +""" + _vec(x) + +Internal utility function to vectorize arrays while preserving scalars. + +# Arguments +- `x`: Array or scalar + +# Returns +- `vec(x)` for arrays, `x` unchanged for scalars +""" _vec(x) = vec(x) _vec(x::Number) = x +""" + _mat(x) + +Internal utility function to ensure matrix format. + +Converts vectors to column matrices while preserving existing matrices. +Used internally to ensure consistent matrix dimensions for operations. + +# Arguments +- `x`: Matrix or vector + +# Returns +- `x` unchanged if already a matrix +- Reshaped column matrix if `x` is a vector +""" _mat(x::AbstractMatrix) = x _mat(x::AbstractVector) = reshape(x, (axes(x, 1), Base.OneTo(1))) # Setindex overloads without piracy setindex(x...) = Base.setindex(x...) +""" + setindex(x::AbstractArray, v, i...) + +Non-mutating setindex operation that returns a copy with modified elements. + +Creates a mutable copy of array `x`, sets the specified indices to value `v`, +and returns the modified copy. This avoids type piracy while providing +setindex functionality for immutable arrays. + +# Arguments +- `x::AbstractArray`: Array to modify (not mutated) +- `v`: Value to set at the specified indices +- `i...`: Indices where the value should be set + +# Returns +- Modified copy of `x` with `x[i...] = v` + +# Examples +```julia +x = [1, 2, 3] +y = setindex(x, 99, 2) # y = [1, 99, 3], x unchanged +``` +""" function setindex(x::AbstractArray, v, i...) _x = Base.copymutable(x) _x[i...] = v return _x end +""" + setindex(x::AbstractVector, v, i::Int) + +Broadcasted setindex operation for vectors using boolean masking. + +Sets the i-th element of vector `x` to value `v` using broadcasting operations. +This implementation uses boolean masks to avoid explicit copying and provide +efficient vectorized operations. + +# Arguments +- `x::AbstractVector`: Input vector +- `v`: Value to set at index `i` +- `i::Int`: Index to modify + +# Returns +- Vector with `x[i] = v`, computed via broadcasting + +# Examples +```julia +x = [1.0, 2.0, 3.0] +y = setindex(x, 99.0, 2) # [1.0, 99.0, 3.0] +``` +""" function setindex(x::AbstractVector, v, i::Int) n = length(x) x .* (i .!== 1:n) .+ v .* (i .== 1:n) end +""" + setindex(x::AbstractMatrix, v, i::Int, j::Int) + +Broadcasted setindex operation for matrices using boolean masking. + +Sets the (i,j)-th element of matrix `x` to value `v` using broadcasting operations. +This implementation uses boolean masks to avoid explicit copying and provide +efficient vectorized operations. + +# Arguments +- `x::AbstractMatrix`: Input matrix +- `v`: Value to set at position (i,j) +- `i::Int`: Row index to modify +- `j::Int`: Column index to modify + +# Returns +- Matrix with `x[i,j] = v`, computed via broadcasting + +# Examples +```julia +x = [1.0 2.0; 3.0 4.0] +y = setindex(x, 99.0, 1, 2) # [1.0 99.0; 3.0 4.0] +``` + +# Notes +The implementation uses transposed broadcasting `(j .!== i:m)'` which appears +to be a typo - should likely be `(j .!== 1:m)'` for correct column masking. +""" function setindex(x::AbstractMatrix, v, i::Int, j::Int) n, m = Base.size(x) x .* (i .!== 1:n) .* (j .!== i:m)' .+ v .* (i .== 1:n) .* (j .== i:m)' diff --git a/src/epsilons.jl b/src/epsilons.jl index 5e62c72..55055c1 100644 --- a/src/epsilons.jl +++ b/src/epsilons.jl @@ -2,26 +2,126 @@ Very heavily inspired by Calculus.jl, but with an emphasis on performance and DiffEq API convenience. =# -#= -Compute the finite difference interval epsilon. +""" + compute_epsilon(::Val{:forward}, x::T, relstep::Real, absstep::Real, dir::Real) where T<:Number + +Compute the finite difference step size (epsilon) for forward finite differences. + +The step size is computed as `max(relstep*abs(x), absstep)*dir`, which ensures +numerical stability by using a relative step scaled by the magnitude of `x` +when `x` is large, and an absolute step when `x` is small. + +# Arguments +- `::Val{:forward}`: Finite difference type indicator for forward differences +- `x::T`: Point at which to compute the step size +- `relstep::Real`: Relative step size factor +- `absstep::Real`: Absolute step size fallback +- `dir::Real`: Direction multiplier (typically ±1) + +# Returns +- Step size `ϵ` for forward finite difference: `f(x + ϵ)` + Reference: Numerical Recipes, chapter 5.7. -=# +""" @inline function compute_epsilon(::Val{:forward}, x::T, relstep::Real, absstep::Real, dir::Real) where T<:Number return max(relstep*abs(x), absstep)*dir end +""" + compute_epsilon(::Val{:central}, x::T, relstep::Real, absstep::Real, dir=nothing) where T<:Number + +Compute the finite difference step size (epsilon) for central finite differences. + +The step size is computed as `max(relstep*abs(x), absstep)`, which ensures +numerical stability by using a relative step scaled by the magnitude of `x` +when `x` is large, and an absolute step when `x` is small. + +# Arguments +- `::Val{:central}`: Finite difference type indicator for central differences +- `x::T`: Point at which to compute the step size +- `relstep::Real`: Relative step size factor +- `absstep::Real`: Absolute step size fallback +- `dir`: Direction parameter (unused for central differences) + +# Returns +- Step size `ϵ` for central finite difference: `(f(x + ϵ) - f(x - ϵ)) / (2ϵ)` +""" @inline function compute_epsilon(::Val{:central}, x::T, relstep::Real, absstep::Real, dir=nothing) where T<:Number return max(relstep*abs(x), absstep) end +""" + compute_epsilon(::Val{:hcentral}, x::T, relstep::Real, absstep::Real, dir=nothing) where T<:Number + +Compute the finite difference step size (epsilon) for central finite differences in Hessian computations. + +The step size is computed as `max(relstep*abs(x), absstep)`, which ensures +numerical stability by using a relative step scaled by the magnitude of `x` +when `x` is large, and an absolute step when `x` is small. + +# Arguments +- `::Val{:hcentral}`: Finite difference type indicator for Hessian central differences +- `x::T`: Point at which to compute the step size +- `relstep::Real`: Relative step size factor +- `absstep::Real`: Absolute step size fallback +- `dir`: Direction parameter (unused for central differences) + +# Returns +- Step size `ϵ` for Hessian central finite differences +""" @inline function compute_epsilon(::Val{:hcentral}, x::T, relstep::Real, absstep::Real, dir=nothing) where T<:Number return max(relstep*abs(x), absstep) end +""" + compute_epsilon(::Val{:complex}, x::T, ::Union{Nothing,T}=nothing, ::Union{Nothing,T}=nothing, dir=nothing) where T<:Real + +Compute the finite difference step size (epsilon) for complex step differentiation. + +For complex step differentiation, the step size is simply the machine epsilon `eps(T)`, +which provides optimal accuracy since complex step differentiation doesn't suffer from +subtractive cancellation errors. + +# Arguments +- `::Val{:complex}`: Finite difference type indicator for complex step differentiation +- `x::T`: Point at which to compute the step size (unused, type determines epsilon) +- Additional arguments are unused for complex step differentiation + +# Returns +- Machine epsilon `eps(T)` for complex step differentiation: `imag(f(x + iϵ)) / ϵ` + +# Notes +Complex step differentiation computes derivatives as `imag(f(x + iϵ)) / ϵ` where `ϵ = eps(T)`. +This method provides machine precision accuracy without subtractive cancellation. +""" @inline function compute_epsilon(::Val{:complex}, x::T, ::Union{Nothing,T}=nothing, ::Union{Nothing,T}=nothing, dir=nothing) where T<:Real return eps(T) end +""" + default_relstep(fdtype, ::Type{T}) where T<:Number + +Compute the default relative step size for finite difference approximations. + +Returns optimal default step sizes based on the finite difference method and +numerical type, balancing truncation error and round-off error. + +# Arguments +- `fdtype`: Finite difference type (`Val(:forward)`, `Val(:central)`, `Val(:hcentral)`, etc.) +- `::Type{T}`: Numerical type for which to compute the step size + +# Returns +- `sqrt(eps(real(T)))` for forward differences +- `cbrt(eps(real(T)))` for central differences +- `eps(T)^(1/4)` for Hessian central differences +- `one(real(T))` for other types + +# Notes +These step sizes minimize the total error (truncation + round-off) for each method: +- Forward differences have O(h) truncation error, optimal h ~ sqrt(eps) +- Central differences have O(h²) truncation error, optimal h ~ eps^(1/3) +- Hessian methods have O(h²) truncation error but involve more operations +""" default_relstep(::Type{V}, T) where V = default_relstep(V(), T) @inline function default_relstep(::Val{fdtype}, ::Type{T}) where {fdtype,T<:Number} if fdtype==:forward @@ -35,6 +135,19 @@ default_relstep(::Type{V}, T) where V = default_relstep(V(), T) end end +""" + fdtype_error(::Type{T}=Float64) where T + +Throw an informative error for unsupported finite difference type combinations. + +# Arguments +- `::Type{T}`: Return type of the function being differentiated + +# Errors +- For `Real` return types: suggests `Val{:forward}`, `Val{:central}`, `Val{:complex}` +- For `Complex` return types: suggests `Val{:forward}`, `Val{:central}` (no complex step) +- For other types: suggests the return type should be Real or Complex subtype +""" function fdtype_error(::Type{T}=Float64) where T if T<:Real error("Unrecognized fdtype: valid values are Val{:forward}, Val{:central} and Val{:complex}.") diff --git a/src/hessians.jl b/src/hessians.jl index 1b43156..d35c01f 100644 --- a/src/hessians.jl +++ b/src/hessians.jl @@ -5,11 +5,52 @@ struct HessianCache{T,fdtype,inplace} xmm::T end -#used to dispatch on StaticArrays +""" + _hessian_inplace(::Type{T}) where T + _hessian_inplace(x) + +Internal function to determine if Hessian computation should be performed in-place. + +Returns `Val(true)` if the array type is mutable and supports in-place operations, +`Val(false)` otherwise. Used to dispatch on StaticArrays vs mutable arrays. + +# Arguments +- `::Type{T}` or `x`: Array type or array instance + +# Returns +- `Val(true)` if the array type supports in-place mutation +- `Val(false)` if the array type is immutable (e.g., StaticArray) +""" _hessian_inplace(::Type{T}) where T = Val(ArrayInterface.ismutable(T)) _hessian_inplace(x) = _hessian_inplace(typeof(x)) + +""" + __Symmetric(x) + +Internal utility function that wraps a matrix in a `Symmetric` view. + +# Arguments +- `x`: Matrix to be wrapped + +# Returns +- `Symmetric(x)`: Symmetric view of the matrix +""" __Symmetric(x) = Symmetric(x) +""" + mutable_zeromatrix(x) + +Internal utility function to create a mutable zero matrix with the same structure as `x`. + +Creates a zero matrix compatible with `x` and ensures it's mutable for in-place operations. +If the created matrix is immutable, it converts it to a mutable copy. + +# Arguments +- `x`: Array whose structure should be matched + +# Returns +- Mutable zero matrix with the same dimensions and compatible type as `x` +""" function mutable_zeromatrix(x) A = ArrayInterface.zeromatrix(x) ArrayInterface.ismutable(A) ? A : Base.copymutable(A) diff --git a/src/iteration_utils.jl b/src/iteration_utils.jl index 6228c45..65b188e 100644 --- a/src/iteration_utils.jl +++ b/src/iteration_utils.jl @@ -1,3 +1,27 @@ +""" + _colorediteration!(J, sparsity, rows_index, cols_index, vfx, colorvec, color_i, ncols) + +Internal function for sparse Jacobian assembly using graph coloring. + +Updates the Jacobian matrix `J` with finite difference values for columns +belonging to the current color group. This is part of the graph-coloring +algorithm that computes multiple Jacobian columns simultaneously. + +# Arguments +- `J`: Jacobian matrix to update (modified in-place) +- `sparsity`: Sparsity pattern of the Jacobian +- `rows_index`: Row indices of non-zero elements +- `cols_index`: Column indices of non-zero elements +- `vfx`: Vector of computed finite difference values +- `colorvec`: Vector assigning colors to columns +- `color_i`: Current color being processed +- `ncols`: Total number of columns in the Jacobian + +# Notes +This function implements the core loop of the graph-coloring sparse Jacobian +algorithm. It assigns finite difference values to the appropriate entries +in the Jacobian matrix for all columns sharing the same color. +""" @inline function _colorediteration!(J,sparsity,rows_index,cols_index,vfx,colorvec,color_i,ncols) for i in 1:length(cols_index) if colorvec[cols_index[i]] == color_i @@ -6,8 +30,41 @@ end end -#override default setting of using findstructralnz +""" + _use_findstructralnz(sparsity) + +Internal function to determine whether to use `findstructralnz` for sparsity detection. + +Returns `true` if the sparsity pattern has structural information that can be +utilized by `findstructralnz`, `false` otherwise. This overrides the default +behavior and delegates to `ArrayInterface.has_sparsestruct`. + +# Arguments +- `sparsity`: Sparsity pattern object + +# Returns +- `true` if structural sparsity information is available, `false` otherwise +""" _use_findstructralnz(sparsity) = ArrayInterface.has_sparsestruct(sparsity) -# test if J, sparsity are both SparseMatrixCSC and have the same sparsity pattern of stored values +""" + _use_sparseCSC_common_sparsity(J, sparsity) + +Internal function to test for common sparsity patterns between Jacobian and sparsity matrix. + +Tests if both `J` and `sparsity` are `SparseMatrixCSC` matrices with the same +sparsity pattern of stored values. Currently always returns `false` as a +conservative default. + +# Arguments +- `J`: Jacobian matrix +- `sparsity`: Sparsity pattern matrix + +# Returns +- `false` (conservative default - may be optimized in the future) + +# Notes +This function could be optimized to return `true` when both matrices have +identical sparsity patterns, allowing for more efficient algorithms. +""" _use_sparseCSC_common_sparsity(J, sparsity) = false diff --git a/src/jacobians.jl b/src/jacobians.jl index 1297a6a..f5c2e08 100644 --- a/src/jacobians.jl +++ b/src/jacobians.jl @@ -125,6 +125,27 @@ function JacobianCache( JacobianCache{typeof(_x1),typeof(_x2),typeof(_fx),typeof(fx1),typeof(colorvec),typeof(sparsity),fdtype,returntype}(_x1,_x2,_fx,fx1,colorvec,sparsity) end +""" + _make_Ji(::AbstractArray, rows_index, cols_index, dx, colorvec, color_i, nrows, ncols) + +Internal function to construct the Jacobian contribution matrix for sparse Jacobian computation. + +Creates a matrix `Ji` that represents the contribution of the `color_i` color group +to the Jacobian. This is used in graph-coloring based sparse Jacobian computation. + +# Arguments +- `::AbstractArray`: Array type dispatch +- `rows_index`: Row indices of non-zero elements +- `cols_index`: Column indices of non-zero elements +- `dx`: Computed finite difference values +- `colorvec`: Vector assigning colors to columns +- `color_i`: Current color being processed +- `nrows`: Number of rows in the Jacobian +- `ncols`: Number of columns in the Jacobian + +# Returns +- `Ji`: Matrix containing the Jacobian entries for the current color +""" function _make_Ji(::AbstractArray, rows_index,cols_index,dx,colorvec,color_i,nrows,ncols) pick_inds = [i for i in 1:length(rows_index) if colorvec[cols_index[i]] == color_i] rows_index_c = rows_index[pick_inds] @@ -137,6 +158,26 @@ function _make_Ji(::AbstractArray, rows_index,cols_index,dx,colorvec,color_i,nro Ji end +""" + _make_Ji(::AbstractArray, xtype, dx, color_i, nrows, ncols) + +Internal function to construct the Jacobian contribution matrix for dense Jacobian computation. + +Creates a matrix `Ji` by placing the finite difference vector `dx` in the column +corresponding to `color_i`, with zeros elsewhere. This is used for dense Jacobian +computation without sparsity structure. + +# Arguments +- `::AbstractArray`: Array type dispatch +- `xtype`: Type of the input vector (for type stability) +- `dx`: Computed finite difference values +- `color_i`: Current color/column being processed +- `nrows`: Number of rows in the Jacobian +- `ncols`: Number of columns in the Jacobian + +# Returns +- `Ji`: Matrix containing the Jacobian entries for the current column +""" function _make_Ji(::AbstractArray, xtype, dx, color_i, nrows, ncols) Ji = mapreduce(i -> i==color_i ? dx : zero(dx), hcat, 1:ncols) size(Ji) != (nrows, ncols) ? reshape(Ji, (nrows, ncols)) : Ji #branch when size(dx) == (1,) => size(Ji) == (1,) while size(J) == (1,1) diff --git a/src/jvp.jl b/src/jvp.jl index 2b915c9..7a6bec6 100644 --- a/src/jvp.jl +++ b/src/jvp.jl @@ -1,3 +1,16 @@ +""" + JVPCache{X1, FX1, FDType} + +Cache structure for Jacobian-vector product (JVP) computations. + +Stores temporary arrays needed for efficient JVP computation without repeated allocations. +The JVP computes `J(x) * v` where `J(x)` is the Jacobian of function `f` at point `x` +and `v` is a vector. + +# Fields +- `x1::X1`: Temporary array for perturbed input values +- `fx1::FX1`: Temporary array for function evaluations +""" mutable struct JVPCache{X1, FX1, FDType} x1 :: X1 fx1 :: FX1 @@ -6,9 +19,25 @@ end """ FiniteDiff.JVPCache( x, - fdtype :: Type{T1} = Val{:forward}) + fdtype::Type{T1} = Val{:forward}) + +Allocating cache constructor for Jacobian-vector product computations. + +Creates a `JVPCache` by allocating temporary arrays with the same structure as `x`. +This constructor is convenient but allocates memory for the cache arrays. -Allocating Cache Constructor. +# Arguments +- `x`: Input vector whose structure determines the cache array sizes +- `fdtype::Type{T1} = Val{:forward}`: Finite difference method type + +# Returns +- `JVPCache` with allocated temporary arrays for JVP computation + +# Examples +```julia +x = [1.0, 2.0, 3.0] +cache = JVPCache(x, Val(:forward)) +``` """ function JVPCache( x, @@ -21,9 +50,31 @@ end FiniteDiff.JVPCache( x, fx1, - fdtype :: Type{T1} = Val{:forward}, + fdtype::Type{T1} = Val{:forward}) + +Non-allocating cache constructor for Jacobian-vector product computations. + +Creates a `JVPCache` using pre-allocated arrays `x` and `fx1`. This constructor +is memory-efficient as it reuses existing arrays without additional allocation. + +# Arguments +- `x`: Pre-allocated array for perturbed input values +- `fx1`: Pre-allocated array for function evaluations +- `fdtype::Type{T1} = Val{:forward}`: Finite difference method type + +# Returns +- `JVPCache` using the provided arrays as cache storage + +# Examples +```julia +x = [1.0, 2.0, 3.0] +fx1 = similar(x) +cache = JVPCache(x, fx1, Val(:forward)) +``` -Non-Allocating Cache Constructor. +# Notes +The arrays `x` and `fx1` will be modified during JVP computations. Ensure they +are not used elsewhere if their values need to be preserved. """ function JVPCache( x, From 5f5325983bb14c8fa3d4fb40dca44e73e9cd56a3 Mon Sep 17 00:00:00 2001 From: ChrisRackauckas Date: Wed, 13 Aug 2025 08:00:22 -0400 Subject: [PATCH 03/26] Apply SciMLStyle formatting to all documented source files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Formatted all modified source files using JuliaFormatter with SciMLStyle: - Fixed indentation and spacing consistency - Standardized function parameter alignment - Applied consistent code style throughout 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- src/epsilons.jl | 18 +- src/gradients.jl | 207 ++++++++------- src/hessians.jl | 554 ++++++++++++++++++++++------------------- src/iteration_utils.jl | 141 +++++------ src/jacobians.jl | 331 +++++++++++++----------- src/jvp.jl | 133 ++++++---- 6 files changed, 777 insertions(+), 607 deletions(-) diff --git a/src/epsilons.jl b/src/epsilons.jl index 55055c1..4984dec 100644 --- a/src/epsilons.jl +++ b/src/epsilons.jl @@ -23,7 +23,8 @@ when `x` is large, and an absolute step when `x` is small. Reference: Numerical Recipes, chapter 5.7. """ -@inline function compute_epsilon(::Val{:forward}, x::T, relstep::Real, absstep::Real, dir::Real) where T<:Number +@inline function compute_epsilon( + ::Val{:forward}, x::T, relstep::Real, absstep::Real, dir::Real) where {T <: Number} return max(relstep*abs(x), absstep)*dir end @@ -46,7 +47,8 @@ when `x` is large, and an absolute step when `x` is small. # Returns - Step size `ϵ` for central finite difference: `(f(x + ϵ) - f(x - ϵ)) / (2ϵ)` """ -@inline function compute_epsilon(::Val{:central}, x::T, relstep::Real, absstep::Real, dir=nothing) where T<:Number +@inline function compute_epsilon(::Val{:central}, x::T, relstep::Real, + absstep::Real, dir = nothing) where {T <: Number} return max(relstep*abs(x), absstep) end @@ -69,7 +71,8 @@ when `x` is large, and an absolute step when `x` is small. # Returns - Step size `ϵ` for Hessian central finite differences """ -@inline function compute_epsilon(::Val{:hcentral}, x::T, relstep::Real, absstep::Real, dir=nothing) where T<:Number +@inline function compute_epsilon(::Val{:hcentral}, x::T, relstep::Real, + absstep::Real, dir = nothing) where {T <: Number} return max(relstep*abs(x), absstep) end @@ -94,7 +97,8 @@ subtractive cancellation errors. Complex step differentiation computes derivatives as `imag(f(x + iϵ)) / ϵ` where `ϵ = eps(T)`. This method provides machine precision accuracy without subtractive cancellation. """ -@inline function compute_epsilon(::Val{:complex}, x::T, ::Union{Nothing,T}=nothing, ::Union{Nothing,T}=nothing, dir=nothing) where T<:Real +@inline function compute_epsilon(::Val{:complex}, x::T, ::Union{Nothing, T} = nothing, + ::Union{Nothing, T} = nothing, dir = nothing) where {T <: Real} return eps(T) end @@ -122,8 +126,8 @@ These step sizes minimize the total error (truncation + round-off) for each meth - Central differences have O(h²) truncation error, optimal h ~ eps^(1/3) - Hessian methods have O(h²) truncation error but involve more operations """ -default_relstep(::Type{V}, T) where V = default_relstep(V(), T) -@inline function default_relstep(::Val{fdtype}, ::Type{T}) where {fdtype,T<:Number} +default_relstep(::Type{V}, T) where {V} = default_relstep(V(), T) +@inline function default_relstep(::Val{fdtype}, ::Type{T}) where {fdtype, T <: Number} if fdtype==:forward return sqrt(eps(real(T))) elseif fdtype==:central @@ -148,7 +152,7 @@ Throw an informative error for unsupported finite difference type combinations. - For `Complex` return types: suggests `Val{:forward}`, `Val{:central}` (no complex step) - For other types: suggests the return type should be Real or Complex subtype """ -function fdtype_error(::Type{T}=Float64) where T +function fdtype_error(::Type{T} = Float64) where {T} if T<:Real error("Unrecognized fdtype: valid values are Val{:forward}, Val{:central} and Val{:complex}.") elseif T<:Complex diff --git a/src/gradients.jl b/src/gradients.jl index 6bb97f4..a108378 100644 --- a/src/gradients.jl +++ b/src/gradients.jl @@ -1,4 +1,5 @@ -struct GradientCache{CacheType1,CacheType2,CacheType3,CacheType4,fdtype,returntype,inplace} +struct GradientCache{ + CacheType1, CacheType2, CacheType3, CacheType4, fdtype, returntype, inplace} fx::CacheType1 c1::CacheType2 c2::CacheType3 @@ -16,12 +17,11 @@ end Allocating Cache Constructor """ function GradientCache( - df, - x, - fdtype=Val(:central), - returntype=eltype(df), - inplace=Val(true)) - + df, + x, + fdtype = Val(:central), + returntype = eltype(df), + inplace = Val(true)) fdtype isa Type && (fdtype = fdtype()) inplace isa Type && (inplace = inplace()) if typeof(x) <: AbstractArray # the vector->scalar case @@ -59,9 +59,8 @@ function GradientCache( _c3 = x end - GradientCache{Nothing,typeof(_c1),typeof(_c2),typeof(_c3),fdtype, - returntype,inplace}(nothing, _c1, _c2, _c3) - + GradientCache{Nothing, typeof(_c1), typeof(_c2), typeof(_c3), fdtype, + returntype, inplace}(nothing, _c1, _c2, _c3) end """ @@ -103,14 +102,14 @@ GradientCache{Float64, Vector{Float64}, Vector{Float64}, Vector{Float64}, Val{:c ``` """ function GradientCache( - fx::Fx,# match order in struct for Setfield - c1::T, - c2::T, - c3::T, - fdtype=Val(:central), - returntype=eltype(fx), - inplace=Val(true)) where {T,Fx} # Val(false) isn't so important for vector -> scalar, it gets ignored in that case anyway. - GradientCache{Fx,T,T,T,fdtype,returntype,inplace}(fx, c1, c2, c3) + fx::Fx,# match order in struct for Setfield + c1::T, + c2::T, + c3::T, + fdtype = Val(:central), + returntype = eltype(fx), + inplace = Val(true)) where {T, Fx} # Val(false) isn't so important for vector -> scalar, it gets ignored in that case anyway. + GradientCache{Fx, T, T, T, fdtype, returntype, inplace}(fx, c1, c2, c3) end """ @@ -124,23 +123,60 @@ end absstep=relstep, dir=true) -Gradients are either a vector->scalar map `f(x)`, or a scalar->vector map `f(fx,x)` if `inplace=Val{true}` and `fx=f(x)` if `inplace=Val{false}`. +Compute the gradient of function `f` at point `x` using finite differences. -Cache-less. +This is the cache-less version that allocates temporary arrays internally. +Supports both vector→scalar maps `f(x) → scalar` and scalar→vector maps depending +on the `inplace` parameter and function signature. + +# Arguments +- `f`: Function to differentiate + - If `typeof(x) <: AbstractArray`: `f(x)` should return a scalar (vector→scalar gradient) + - If `typeof(x) <: Number` and `inplace=Val(true)`: `f(fx, x)` modifies `fx` in-place (scalar→vector gradient) + - If `typeof(x) <: Number` and `inplace=Val(false)`: `f(x)` returns a vector (scalar→vector gradient) +- `x`: Point at which to evaluate the gradient (vector or scalar) +- `fdtype::Type{T1}=Val{:central}`: Finite difference method (`:forward`, `:central`, `:complex`) +- `returntype::Type{T2}=eltype(x)`: Element type of gradient components +- `inplace::Type{Val{T3}}=Val{true}`: Whether to use in-place function evaluation + +# Keyword Arguments +- `relstep`: Relative step size (default: method-dependent optimal value) +- `absstep=relstep`: Absolute step size fallback +- `dir=true`: Direction for step size (typically ±1) + +# Returns +- Gradient vector `∇f` where `∇f[i] = ∂f/∂x[i]` + +# Examples +```julia +# Vector→scalar gradient +f(x) = x[1]^2 + x[2]^2 +x = [1.0, 2.0] +grad = finite_difference_gradient(f, x) # [2.0, 4.0] + +# Scalar→vector gradient (out-of-place) +g(t) = [t^2, t^3] +t = 2.0 +grad = finite_difference_gradient(g, t, Val(:central), eltype(t), Val(false)) +``` + +# Notes +- Forward differences: `O(n)` function evaluations, `O(h)` accuracy +- Central differences: `O(2n)` function evaluations, `O(h²)` accuracy +- Complex step: `O(n)` function evaluations, machine precision accuracy """ function finite_difference_gradient( - f, - x, - fdtype=Val(:central), - returntype=eltype(x), - inplace=Val(true), - fx=nothing, - c1=nothing, - c2=nothing; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep, - dir=true) - + f, + x, + fdtype = Val(:central), + returntype = eltype(x), + inplace = Val(true), + fx = nothing, + c1 = nothing, + c2 = nothing; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep, + dir = true) inplace isa Type && (inplace = inplace()) if typeof(x) <: AbstractArray df = zero(returntype) .* x @@ -162,7 +198,8 @@ function finite_difference_gradient( end end cache = GradientCache(df, x, fdtype, returntype, inplace) - finite_difference_gradient!(df, f, x, cache, relstep=relstep, absstep=absstep, dir=dir) + finite_difference_gradient!( + df, f, x, cache, relstep = relstep, absstep = absstep, dir = dir) end """ @@ -181,20 +218,19 @@ Gradients are either a vector->scalar map `f(x)`, or a scalar->vector map `f(fx, Cache-less. """ function finite_difference_gradient!( - df, - f, - x, - fdtype=Val(:central), - returntype=eltype(df), - inplace=Val(true), - fx=nothing, - c1=nothing, - c2=nothing; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep) - + df, + f, + x, + fdtype = Val(:central), + returntype = eltype(df), + inplace = Val(true), + fx = nothing, + c1 = nothing, + c2 = nothing; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep) cache = GradientCache(df, x, fdtype, returntype, inplace) - finite_difference_gradient!(df, f, x, cache, relstep=relstep, absstep=absstep) + finite_difference_gradient!(df, f, x, cache, relstep = relstep, absstep = absstep) end """ @@ -212,32 +248,32 @@ Gradients are either a vector->scalar map `f(x)`, or a scalar->vector map `f(fx, Cached. """ function finite_difference_gradient( - f, - x, - cache::GradientCache{T1,T2,T3,T4,fdtype,returntype,inplace}; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep, - dir=true) where {T1,T2,T3,T4,fdtype,returntype,inplace} - + f, + x, + cache::GradientCache{T1, T2, T3, T4, fdtype, returntype, inplace}; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep, + dir = true) where {T1, T2, T3, T4, fdtype, returntype, inplace} if typeof(x) <: AbstractArray df = zero(returntype) .* x else df = zero(cache.c1) end - finite_difference_gradient!(df, f, x, cache, relstep=relstep, absstep=absstep, dir=dir) + finite_difference_gradient!( + df, f, x, cache, relstep = relstep, absstep = absstep, dir = dir) df end # vector of derivatives of a vector->scalar map by each component of a vector x # this ignores the value of "inplace", because it doesn't make much sense function finite_difference_gradient!( - df, - f, - x, - cache::GradientCache{T1,T2,T3,T4,fdtype,returntype,inplace}; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep, - dir=true) where {T1,T2,T3,T4,fdtype,returntype,inplace} + df, + f, + x, + cache::GradientCache{T1, T2, T3, T4, fdtype, returntype, inplace}; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep, + dir = true) where {T1, T2, T3, T4, fdtype, returntype, inplace} # NOTE: in this case epsilon is a vector, we need two arrays for epsilon and x1 # c1 denotes x1, c2 is epsilon @@ -247,11 +283,12 @@ function finite_difference_gradient!( end copyto!(c1, x) if fdtype == Val(:forward) - @inbounds for i ∈ eachindex(x) + @inbounds for i in eachindex(x) if ArrayInterface.fast_scalar_indexing(c2) epsilon = ArrayInterface.allowed_getindex(c2, i) * dir else - epsilon = compute_epsilon(fdtype, one(eltype(x)), relstep, absstep, dir) * dir + epsilon = compute_epsilon(fdtype, one(eltype(x)), relstep, absstep, dir) * + dir end c1_old = ArrayInterface.allowed_getindex(c1, i) ArrayInterface.allowed_setindex!(c1, c1_old + epsilon, i) @@ -278,11 +315,12 @@ function finite_difference_gradient!( end elseif fdtype == Val(:central) copyto!(c3, x) - @inbounds for i ∈ eachindex(x) + @inbounds for i in eachindex(x) if ArrayInterface.fast_scalar_indexing(c2) epsilon = ArrayInterface.allowed_getindex(c2, i) * dir else - epsilon = compute_epsilon(fdtype, one(eltype(x)), relstep, absstep, dir) * dir + epsilon = compute_epsilon(fdtype, one(eltype(x)), relstep, absstep, dir) * + dir end c1_old = ArrayInterface.allowed_getindex(c1, i) ArrayInterface.allowed_setindex!(c1, c1_old + epsilon, i) @@ -303,7 +341,7 @@ function finite_difference_gradient!( elseif fdtype == Val(:complex) && returntype <: Real # we use c1 here to avoid typing issues with x epsilon_complex = eps(real(eltype(x))) - @inbounds for i ∈ eachindex(x) + @inbounds for i in eachindex(x) c1_old = ArrayInterface.allowed_getindex(c1, i) ArrayInterface.allowed_setindex!(c1, c1_old + im * epsilon_complex, i) ArrayInterface.allowed_setindex!(df, imag(f(c1)) / epsilon_complex, i) @@ -316,13 +354,13 @@ function finite_difference_gradient!( end function finite_difference_gradient!( - df::StridedVector{<:Number}, - f, - x::StridedVector{<:Number}, - cache::GradientCache{T1,T2,T3,T4,fdtype,returntype,inplace}; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep, - dir=true) where {T1,T2,T3,T4,fdtype,returntype,inplace} + df::StridedVector{<:Number}, + f, + x::StridedVector{<:Number}, + cache::GradientCache{T1, T2, T3, T4, fdtype, returntype, inplace}; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep, + dir = true) where {T1, T2, T3, T4, fdtype, returntype, inplace} # c1 is x1 if we need a complex copy of x, otherwise Nothing # c2 is Nothing @@ -334,7 +372,7 @@ function finite_difference_gradient!( end copyto!(c3, x) if fdtype == Val(:forward) - for i ∈ eachindex(x) + for i in eachindex(x) epsilon = compute_epsilon(fdtype, x[i], relstep, absstep, dir) x_old = x[i] if typeof(fx) != Nothing @@ -371,7 +409,7 @@ function finite_difference_gradient!( end end elseif fdtype == Val(:central) - @inbounds for i ∈ eachindex(x) + @inbounds for i in eachindex(x) epsilon = compute_epsilon(fdtype, x[i], relstep, absstep, dir) x_old = x[i] c3[i] += epsilon @@ -397,11 +435,12 @@ function finite_difference_gradient!( df[i] -= im * imag(dfi / (2 * im * epsilon)) end end - elseif fdtype == Val(:complex) && returntype <: Real && eltype(df) <: Real && eltype(x) <: Real + elseif fdtype == Val(:complex) && returntype <: Real && eltype(df) <: Real && + eltype(x) <: Real copyto!(c1, x) epsilon_complex = eps(real(eltype(x))) # we use c1 here to avoid typing issues with x - @inbounds for i ∈ eachindex(x) + @inbounds for i in eachindex(x) c1_old = c1[i] c1[i] += im * epsilon_complex df[i] = imag(f(c1)) / epsilon_complex @@ -416,13 +455,13 @@ end # vector of derivatives of a scalar->vector map # this is effectively a vector of partial derivatives, but we still call it a gradient function finite_difference_gradient!( - df, - f, - x::Number, - cache::GradientCache{T1,T2,T3,T4,fdtype,returntype,inplace}; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep, - dir=true) where {T1,T2,T3,T4,fdtype,returntype,inplace} + df, + f, + x::Number, + cache::GradientCache{T1, T2, T3, T4, fdtype, returntype, inplace}; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep, + dir = true) where {T1, T2, T3, T4, fdtype, returntype, inplace} # NOTE: in this case epsilon is a scalar, we need two arrays for fx1 and fx2 # c1 denotes fx1, c2 is fx2, sizes guaranteed by the cache constructor diff --git a/src/hessians.jl b/src/hessians.jl index d35c01f..46b1678 100644 --- a/src/hessians.jl +++ b/src/hessians.jl @@ -1,259 +1,295 @@ -struct HessianCache{T,fdtype,inplace} - xpp::T - xpm::T - xmp::T - xmm::T -end - -""" - _hessian_inplace(::Type{T}) where T - _hessian_inplace(x) - -Internal function to determine if Hessian computation should be performed in-place. - -Returns `Val(true)` if the array type is mutable and supports in-place operations, -`Val(false)` otherwise. Used to dispatch on StaticArrays vs mutable arrays. - -# Arguments -- `::Type{T}` or `x`: Array type or array instance - -# Returns -- `Val(true)` if the array type supports in-place mutation -- `Val(false)` if the array type is immutable (e.g., StaticArray) -""" -_hessian_inplace(::Type{T}) where T = Val(ArrayInterface.ismutable(T)) -_hessian_inplace(x) = _hessian_inplace(typeof(x)) - -""" - __Symmetric(x) - -Internal utility function that wraps a matrix in a `Symmetric` view. - -# Arguments -- `x`: Matrix to be wrapped - -# Returns -- `Symmetric(x)`: Symmetric view of the matrix -""" -__Symmetric(x) = Symmetric(x) - -""" - mutable_zeromatrix(x) - -Internal utility function to create a mutable zero matrix with the same structure as `x`. - -Creates a zero matrix compatible with `x` and ensures it's mutable for in-place operations. -If the created matrix is immutable, it converts it to a mutable copy. - -# Arguments -- `x`: Array whose structure should be matched - -# Returns -- Mutable zero matrix with the same dimensions and compatible type as `x` -""" -function mutable_zeromatrix(x) - A = ArrayInterface.zeromatrix(x) - ArrayInterface.ismutable(A) ? A : Base.copymutable(A) -end - -""" - HessianCache( - xpp, - xpm, - xmp, - xmm, - fdtype::Type{T1}=Val{:hcentral}, - inplace::Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false}) - -Non-allocating cache constructor. -""" -function HessianCache(xpp,xpm,xmp,xmm, - fdtype=Val(:hcentral), - inplace = _hessian_inplace(x)) - fdtype isa Type && (fdtype = fdtype()) - inplace isa Type && (inplace = inplace()) - HessianCache{typeof(xpp),fdtype,inplace}(xpp,xpm,xmp,xmm) -end - -""" - HessianCache( - x, - fdtype::Type{T1}=Val{:hcentral}, - inplace::Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false}) - -Allocating cache constructor. -""" -function HessianCache(x, fdtype=Val(:hcentral), - inplace = _hessian_inplace(x)) - cx = copy(x) - fdtype isa Type && (fdtype = fdtype()) - inplace isa Type && (inplace = inplace()) - HessianCache{typeof(cx),fdtype,inplace}(cx, copy(x), copy(x), copy(x)) -end - -""" - finite_difference_hessian( - f, - x::AbstractArray{<:Number}, - fdtype :: Type{T1}=Val{:hcentral}, - inplace :: Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false}; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep) - -Cache-less. -""" -function finite_difference_hessian(f, x, - fdtype = Val(:hcentral), - inplace = _hessian_inplace(x); - relstep = default_relstep(fdtype, eltype(x)), - absstep = relstep) - - cache = HessianCache(x, fdtype, inplace) - finite_difference_hessian(f, x, cache; relstep=relstep, absstep=absstep) -end - -""" - finite_difference_hessian( - f, - x, - cache::HessianCache{T,fdtype,inplace}; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep) - -Cached. -""" -function finite_difference_hessian( - f,x, - cache::HessianCache{T,fdtype,inplace}; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep) where {T,fdtype,inplace} - H = mutable_zeromatrix(x) - finite_difference_hessian!(H, f, x, cache; relstep=relstep, absstep=absstep) - __Symmetric(H) -end - -""" - finite_difference_hessian!( - H::AbstractMatrix, - f, - x::AbstractArray{<:Number}, - fdtype :: Type{T1}=Val{:hcentral}, - inplace :: Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false}; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep) - -Cache-less. -""" -function finite_difference_hessian!(H,f, - x, - fdtype = Val(:hcentral), - inplace = _hessian_inplace(x); - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep) - - cache = HessianCache(x,fdtype,inplace) - finite_difference_hessian!(H, f, x, cache; relstep=relstep, absstep=absstep) -end - -""" - finite_difference_hessian!( - H, - f, - x, - cache::HessianCache{T,fdtype,inplace}; - relstep = default_relstep(fdtype, eltype(x)), - absstep = relstep) - -Cached. -""" -function finite_difference_hessian!(H,f,x, - cache::HessianCache{T,fdtype,inplace}; - relstep = default_relstep(fdtype, eltype(x)), - absstep = relstep) where {T,fdtype,inplace} - - @assert fdtype == Val(:hcentral) - n = length(x) - xpp, xpm, xmp, xmm = cache.xpp, cache.xpm, cache.xmp, cache.xmm - fx = f(x) - - if inplace === Val(true) - _xpp, _xpm, _xmp, _xmm = xpp, xpm, xmp, xmm - copyto!(xpp,x) - copyto!(xpm,x) - copyto!(xmp,x) - copyto!(xmm,x) - else # ignore the cache since immutable - xpp, xpm, xmp, xmm = copy(x), copy(x), copy(x), copy(x) - end - - for i = 1:n - xi = ArrayInterface.allowed_getindex(x,i) - epsilon = compute_epsilon(Val(:hcentral), xi, relstep, absstep) - - if inplace === Val(true) - ArrayInterface.allowed_setindex!(xpp,xi + epsilon,i) - ArrayInterface.allowed_setindex!(xmm,xi - epsilon,i) - else - _xpp = setindex(xpp,xi + epsilon, i) - _xmm = setindex(xmm,xi - epsilon, i) - end - - ArrayInterface.allowed_setindex!(H,(f(_xpp) - 2*fx + f(_xmm)) / epsilon^2,i,i) - epsiloni = compute_epsilon(Val(:central), xi, relstep, absstep) - xp = xi + epsiloni - xm = xi - epsiloni - - if inplace === Val(true) - ArrayInterface.allowed_setindex!(xpp,xp,i) - ArrayInterface.allowed_setindex!(xpm,xp,i) - ArrayInterface.allowed_setindex!(xmp,xm,i) - ArrayInterface.allowed_setindex!(xmm,xm,i) - else - _xpp = setindex(xpp,xp,i) - _xpm = setindex(xpm,xp,i) - _xmp = setindex(xmp,xm,i) - _xmm = setindex(xmm,xm,i) - end - - for j = i+1:n - xj = ArrayInterface.allowed_getindex(x,j) - epsilonj = compute_epsilon(Val(:central), xj, relstep, absstep) - xp = xj + epsilonj - xm = xj - epsilonj - - if inplace === Val(true) - ArrayInterface.allowed_setindex!(xpp,xp,j) - ArrayInterface.allowed_setindex!(xpm,xm,j) - ArrayInterface.allowed_setindex!(xmp,xp,j) - ArrayInterface.allowed_setindex!(xmm,xm,j) - else - _xpp = setindex(_xpp,xp,j) - _xpm = setindex(_xpm,xm,j) - _xmp = setindex(_xmp,xp,j) - _xmm = setindex(_xmm,xm,j) - end - - ArrayInterface.allowed_setindex!(H,(f(_xpp) - f(_xpm) - f(_xmp) + f(_xmm))/(4*epsiloni*epsilonj),i,j) - - if inplace === Val(true) - ArrayInterface.allowed_setindex!(xpp,xj,j) - ArrayInterface.allowed_setindex!(xpm,xj,j) - ArrayInterface.allowed_setindex!(xmp,xj,j) - ArrayInterface.allowed_setindex!(xmm,xj,j) - else - _xpp = setindex(_xpp,xj,j) - _xpm = setindex(_xpm,xj,j) - _xmp = setindex(_xmp,xj,j) - _xmm = setindex(_xmm,xj,j) - end - end - - if inplace === Val(true) - ArrayInterface.allowed_setindex!(xpp,xi,i) - ArrayInterface.allowed_setindex!(xpm,xi,i) - ArrayInterface.allowed_setindex!(xmp,xi,i) - ArrayInterface.allowed_setindex!(xmm,xi,i) - end - end - LinearAlgebra.copytri!(H,'U') -end +struct HessianCache{T, fdtype, inplace} + xpp::T + xpm::T + xmp::T + xmm::T +end + +""" + _hessian_inplace(::Type{T}) where T + _hessian_inplace(x) + +Internal function to determine if Hessian computation should be performed in-place. + +Returns `Val(true)` if the array type is mutable and supports in-place operations, +`Val(false)` otherwise. Used to dispatch on StaticArrays vs mutable arrays. + +# Arguments +- `::Type{T}` or `x`: Array type or array instance + +# Returns +- `Val(true)` if the array type supports in-place mutation +- `Val(false)` if the array type is immutable (e.g., StaticArray) +""" +_hessian_inplace(::Type{T}) where {T} = Val(ArrayInterface.ismutable(T)) +_hessian_inplace(x) = _hessian_inplace(typeof(x)) + +""" + __Symmetric(x) + +Internal utility function that wraps a matrix in a `Symmetric` view. + +# Arguments +- `x`: Matrix to be wrapped + +# Returns +- `Symmetric(x)`: Symmetric view of the matrix +""" +__Symmetric(x) = Symmetric(x) + +""" + mutable_zeromatrix(x) + +Internal utility function to create a mutable zero matrix with the same structure as `x`. + +Creates a zero matrix compatible with `x` and ensures it's mutable for in-place operations. +If the created matrix is immutable, it converts it to a mutable copy. + +# Arguments +- `x`: Array whose structure should be matched + +# Returns +- Mutable zero matrix with the same dimensions and compatible type as `x` +""" +function mutable_zeromatrix(x) + A = ArrayInterface.zeromatrix(x) + ArrayInterface.ismutable(A) ? A : Base.copymutable(A) +end + +""" + HessianCache( + xpp, + xpm, + xmp, + xmm, + fdtype::Type{T1}=Val{:hcentral}, + inplace::Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false}) + +Non-allocating cache constructor. +""" +function HessianCache(xpp, xpm, xmp, xmm, + fdtype = Val(:hcentral), + inplace = _hessian_inplace(x)) + fdtype isa Type && (fdtype = fdtype()) + inplace isa Type && (inplace = inplace()) + HessianCache{typeof(xpp), fdtype, inplace}(xpp, xpm, xmp, xmm) +end + +""" + HessianCache( + x, + fdtype::Type{T1}=Val{:hcentral}, + inplace::Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false}) + +Allocating cache constructor. +""" +function HessianCache(x, fdtype = Val(:hcentral), + inplace = _hessian_inplace(x)) + cx = copy(x) + fdtype isa Type && (fdtype = fdtype()) + inplace isa Type && (inplace = inplace()) + HessianCache{typeof(cx), fdtype, inplace}(cx, copy(x), copy(x), copy(x)) +end + +""" + finite_difference_hessian( + f, + x::AbstractArray{<:Number}, + fdtype::Type{T1}=Val{:hcentral}, + inplace::Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false}; + relstep=default_relstep(fdtype, eltype(x)), + absstep=relstep) + +Compute the Hessian matrix of scalar function `f` at point `x` using finite differences. + +This is the cache-less version that allocates temporary arrays internally. +The Hessian `H[i,j] = ∂²f/(∂x[i]∂x[j])` is computed using the specified finite +difference method, typically central differences for optimal accuracy. + +# Arguments +- `f`: Scalar-valued function to differentiate (vector→scalar map) +- `x::AbstractArray{<:Number}`: Point at which to evaluate the Hessian +- `fdtype::Type{T1}=Val{:hcentral}`: Finite difference method (`:hcentral` for Hessian central differences) +- `inplace::Type{Val{T2}}`: Whether to use in-place operations (auto-detected for StaticArrays) + +# Keyword Arguments +- `relstep`: Relative step size (default: method-dependent optimal value) +- `absstep=relstep`: Absolute step size fallback + +# Returns +- Hessian matrix `H` where `H[i,j] = ∂²f/(∂x[i]∂x[j])` +- Returns `Symmetric(H)` view to enforce mathematical symmetry + +# Examples +```julia +f(x) = x[1]^2 + x[1]*x[2] + x[2]^2 # Quadratic function +x = [1.0, 2.0] +H = finite_difference_hessian(f, x) # 2×2 Symmetric matrix +``` + +# Mathematical Background +For a scalar function `f: ℝⁿ → ℝ`, the Hessian central difference approximation is: +``` +H[i,j] ≈ (f(x + eᵢhᵢ + eⱼhⱼ) - f(x + eᵢhᵢ - eⱼhⱼ) - f(x - eᵢhᵢ + eⱼhⱼ) + f(x - eᵢhᵢ - eⱼhⱼ)) / (4hᵢhⱼ) +``` +where `eᵢ` is the i-th unit vector and `hᵢ` is the step size in dimension i. + +# Notes +- Requires `O(n²)` function evaluations for an n-dimensional input +- Central differences provide `O(h²)` accuracy for second derivatives +- The result is automatically symmetrized using `Symmetric()` wrapper +- For large problems, consider using `finite_difference_gradient` twice instead +""" +function finite_difference_hessian(f, x, + fdtype = Val(:hcentral), + inplace = _hessian_inplace(x); + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep) + cache = HessianCache(x, fdtype, inplace) + finite_difference_hessian(f, x, cache; relstep = relstep, absstep = absstep) +end + +""" + finite_difference_hessian( + f, + x, + cache::HessianCache{T,fdtype,inplace}; + relstep=default_relstep(fdtype, eltype(x)), + absstep=relstep) + +Cached. +""" +function finite_difference_hessian( + f, x, + cache::HessianCache{T, fdtype, inplace}; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep) where {T, fdtype, inplace} + H = mutable_zeromatrix(x) + finite_difference_hessian!(H, f, x, cache; relstep = relstep, absstep = absstep) + __Symmetric(H) +end + +""" + finite_difference_hessian!( + H::AbstractMatrix, + f, + x::AbstractArray{<:Number}, + fdtype :: Type{T1}=Val{:hcentral}, + inplace :: Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false}; + relstep=default_relstep(fdtype, eltype(x)), + absstep=relstep) + +Cache-less. +""" +function finite_difference_hessian!(H, f, + x, + fdtype = Val(:hcentral), + inplace = _hessian_inplace(x); + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep) + cache = HessianCache(x, fdtype, inplace) + finite_difference_hessian!(H, f, x, cache; relstep = relstep, absstep = absstep) +end + +""" + finite_difference_hessian!( + H, + f, + x, + cache::HessianCache{T,fdtype,inplace}; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep) + +Cached. +""" +function finite_difference_hessian!(H, f, x, + cache::HessianCache{T, fdtype, inplace}; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep) where {T, fdtype, inplace} + @assert fdtype == Val(:hcentral) + n = length(x) + xpp, xpm, xmp, xmm = cache.xpp, cache.xpm, cache.xmp, cache.xmm + fx = f(x) + + if inplace === Val(true) + _xpp, _xpm, _xmp, _xmm = xpp, xpm, xmp, xmm + copyto!(xpp, x) + copyto!(xpm, x) + copyto!(xmp, x) + copyto!(xmm, x) + else # ignore the cache since immutable + xpp, xpm, xmp, xmm = copy(x), copy(x), copy(x), copy(x) + end + + for i in 1:n + xi = ArrayInterface.allowed_getindex(x, i) + epsilon = compute_epsilon(Val(:hcentral), xi, relstep, absstep) + + if inplace === Val(true) + ArrayInterface.allowed_setindex!(xpp, xi + epsilon, i) + ArrayInterface.allowed_setindex!(xmm, xi - epsilon, i) + else + _xpp = setindex(xpp, xi + epsilon, i) + _xmm = setindex(xmm, xi - epsilon, i) + end + + ArrayInterface.allowed_setindex!(H, (f(_xpp) - 2*fx + f(_xmm)) / epsilon^2, i, i) + epsiloni = compute_epsilon(Val(:central), xi, relstep, absstep) + xp = xi + epsiloni + xm = xi - epsiloni + + if inplace === Val(true) + ArrayInterface.allowed_setindex!(xpp, xp, i) + ArrayInterface.allowed_setindex!(xpm, xp, i) + ArrayInterface.allowed_setindex!(xmp, xm, i) + ArrayInterface.allowed_setindex!(xmm, xm, i) + else + _xpp = setindex(xpp, xp, i) + _xpm = setindex(xpm, xp, i) + _xmp = setindex(xmp, xm, i) + _xmm = setindex(xmm, xm, i) + end + + for j in (i + 1):n + xj = ArrayInterface.allowed_getindex(x, j) + epsilonj = compute_epsilon(Val(:central), xj, relstep, absstep) + xp = xj + epsilonj + xm = xj - epsilonj + + if inplace === Val(true) + ArrayInterface.allowed_setindex!(xpp, xp, j) + ArrayInterface.allowed_setindex!(xpm, xm, j) + ArrayInterface.allowed_setindex!(xmp, xp, j) + ArrayInterface.allowed_setindex!(xmm, xm, j) + else + _xpp = setindex(_xpp, xp, j) + _xpm = setindex(_xpm, xm, j) + _xmp = setindex(_xmp, xp, j) + _xmm = setindex(_xmm, xm, j) + end + + ArrayInterface.allowed_setindex!( + H, (f(_xpp) - f(_xpm) - f(_xmp) + f(_xmm))/(4*epsiloni*epsilonj), i, j) + + if inplace === Val(true) + ArrayInterface.allowed_setindex!(xpp, xj, j) + ArrayInterface.allowed_setindex!(xpm, xj, j) + ArrayInterface.allowed_setindex!(xmp, xj, j) + ArrayInterface.allowed_setindex!(xmm, xj, j) + else + _xpp = setindex(_xpp, xj, j) + _xpm = setindex(_xpm, xj, j) + _xmp = setindex(_xmp, xj, j) + _xmm = setindex(_xmm, xj, j) + end + end + + if inplace === Val(true) + ArrayInterface.allowed_setindex!(xpp, xi, i) + ArrayInterface.allowed_setindex!(xpm, xi, i) + ArrayInterface.allowed_setindex!(xmp, xi, i) + ArrayInterface.allowed_setindex!(xmm, xi, i) + end + end + LinearAlgebra.copytri!(H, 'U') +end diff --git a/src/iteration_utils.jl b/src/iteration_utils.jl index 65b188e..537148d 100644 --- a/src/iteration_utils.jl +++ b/src/iteration_utils.jl @@ -1,70 +1,71 @@ -""" - _colorediteration!(J, sparsity, rows_index, cols_index, vfx, colorvec, color_i, ncols) - -Internal function for sparse Jacobian assembly using graph coloring. - -Updates the Jacobian matrix `J` with finite difference values for columns -belonging to the current color group. This is part of the graph-coloring -algorithm that computes multiple Jacobian columns simultaneously. - -# Arguments -- `J`: Jacobian matrix to update (modified in-place) -- `sparsity`: Sparsity pattern of the Jacobian -- `rows_index`: Row indices of non-zero elements -- `cols_index`: Column indices of non-zero elements -- `vfx`: Vector of computed finite difference values -- `colorvec`: Vector assigning colors to columns -- `color_i`: Current color being processed -- `ncols`: Total number of columns in the Jacobian - -# Notes -This function implements the core loop of the graph-coloring sparse Jacobian -algorithm. It assigns finite difference values to the appropriate entries -in the Jacobian matrix for all columns sharing the same color. -""" -@inline function _colorediteration!(J,sparsity,rows_index,cols_index,vfx,colorvec,color_i,ncols) - for i in 1:length(cols_index) - if colorvec[cols_index[i]] == color_i - J[rows_index[i],cols_index[i]] = vfx[rows_index[i]] - end - end -end - -""" - _use_findstructralnz(sparsity) - -Internal function to determine whether to use `findstructralnz` for sparsity detection. - -Returns `true` if the sparsity pattern has structural information that can be -utilized by `findstructralnz`, `false` otherwise. This overrides the default -behavior and delegates to `ArrayInterface.has_sparsestruct`. - -# Arguments -- `sparsity`: Sparsity pattern object - -# Returns -- `true` if structural sparsity information is available, `false` otherwise -""" -_use_findstructralnz(sparsity) = ArrayInterface.has_sparsestruct(sparsity) - -""" - _use_sparseCSC_common_sparsity(J, sparsity) - -Internal function to test for common sparsity patterns between Jacobian and sparsity matrix. - -Tests if both `J` and `sparsity` are `SparseMatrixCSC` matrices with the same -sparsity pattern of stored values. Currently always returns `false` as a -conservative default. - -# Arguments -- `J`: Jacobian matrix -- `sparsity`: Sparsity pattern matrix - -# Returns -- `false` (conservative default - may be optimized in the future) - -# Notes -This function could be optimized to return `true` when both matrices have -identical sparsity patterns, allowing for more efficient algorithms. -""" -_use_sparseCSC_common_sparsity(J, sparsity) = false +""" + _colorediteration!(J, sparsity, rows_index, cols_index, vfx, colorvec, color_i, ncols) + +Internal function for sparse Jacobian assembly using graph coloring. + +Updates the Jacobian matrix `J` with finite difference values for columns +belonging to the current color group. This is part of the graph-coloring +algorithm that computes multiple Jacobian columns simultaneously. + +# Arguments +- `J`: Jacobian matrix to update (modified in-place) +- `sparsity`: Sparsity pattern of the Jacobian +- `rows_index`: Row indices of non-zero elements +- `cols_index`: Column indices of non-zero elements +- `vfx`: Vector of computed finite difference values +- `colorvec`: Vector assigning colors to columns +- `color_i`: Current color being processed +- `ncols`: Total number of columns in the Jacobian + +# Notes +This function implements the core loop of the graph-coloring sparse Jacobian +algorithm. It assigns finite difference values to the appropriate entries +in the Jacobian matrix for all columns sharing the same color. +""" +@inline function _colorediteration!( + J, sparsity, rows_index, cols_index, vfx, colorvec, color_i, ncols) + for i in 1:length(cols_index) + if colorvec[cols_index[i]] == color_i + J[rows_index[i], cols_index[i]] = vfx[rows_index[i]] + end + end +end + +""" + _use_findstructralnz(sparsity) + +Internal function to determine whether to use `findstructralnz` for sparsity detection. + +Returns `true` if the sparsity pattern has structural information that can be +utilized by `findstructralnz`, `false` otherwise. This overrides the default +behavior and delegates to `ArrayInterface.has_sparsestruct`. + +# Arguments +- `sparsity`: Sparsity pattern object + +# Returns +- `true` if structural sparsity information is available, `false` otherwise +""" +_use_findstructralnz(sparsity) = ArrayInterface.has_sparsestruct(sparsity) + +""" + _use_sparseCSC_common_sparsity(J, sparsity) + +Internal function to test for common sparsity patterns between Jacobian and sparsity matrix. + +Tests if both `J` and `sparsity` are `SparseMatrixCSC` matrices with the same +sparsity pattern of stored values. Currently always returns `false` as a +conservative default. + +# Arguments +- `J`: Jacobian matrix +- `sparsity`: Sparsity pattern matrix + +# Returns +- `false` (conservative default - may be optimized in the future) + +# Notes +This function could be optimized to return `true` when both matrices have +identical sparsity patterns, allowing for more efficient algorithms. +""" +_use_sparseCSC_common_sparsity(J, sparsity) = false diff --git a/src/jacobians.jl b/src/jacobians.jl index f5c2e08..0793269 100644 --- a/src/jacobians.jl +++ b/src/jacobians.jl @@ -1,24 +1,24 @@ -mutable struct JacobianCache{CacheType1,CacheType2,CacheType3,CacheType4,ColorType,SparsityType,fdtype,returntype} - x1 :: CacheType1 - x2 :: CacheType2 - fx :: CacheType3 - fx1 :: CacheType4 - colorvec :: ColorType - sparsity :: SparsityType +mutable struct JacobianCache{CacheType1, CacheType2, CacheType3, CacheType4, + ColorType, SparsityType, fdtype, returntype} + x1::CacheType1 + x2::CacheType2 + fx::CacheType3 + fx1::CacheType4 + colorvec::ColorType + sparsity::SparsityType end function JacobianCache( - x, - fdtype :: Union{Val{T1},Type{T1}} = Val(:forward), - returntype :: Type{T2} = eltype(x); - inplace :: Union{Val{T3},Type{T3}} = Val(true), - colorvec = 1:length(x), - sparsity = nothing) where {T1,T2,T3} - + x, + fdtype::Union{Val{T1}, Type{T1}} = Val(:forward), + returntype::Type{T2} = eltype(x); + inplace::Union{Val{T3}, Type{T3}} = Val(true), + colorvec = 1:length(x), + sparsity = nothing) where {T1, T2, T3} fdtype isa Type && (fdtype = fdtype()) inplace isa Type && (inplace = inplace()) if eltype(x) <: Real && fdtype==Val(:complex) - x1 = false .* im .* x + x1 = false .* im .* x _fx = false .* im .* x else x1 = copy(x) @@ -26,12 +26,13 @@ function JacobianCache( end if fdtype==Val(:complex) - _fx1 = nothing + _fx1 = nothing else _fx1 = copy(x) end - JacobianCache(x1,_fx,_fx1,fdtype,returntype;colorvec=colorvec,sparsity=sparsity) + JacobianCache( + x1, _fx, _fx1, fdtype, returntype; colorvec = colorvec, sparsity = sparsity) end """ @@ -47,18 +48,17 @@ Allocating Cache Constructor. This assumes the Jacobian is square. """ function JacobianCache( - x , - fx, - fdtype :: Union{Val{T1},Type{T1}} = Val(:forward), - returntype :: Type{T2} = eltype(x); - inplace :: Union{Val{T3},Type{T3}} = Val(true), - colorvec = 1:length(x), - sparsity = nothing) where {T1,T2,T3} - + x, + fx, + fdtype::Union{Val{T1}, Type{T1}} = Val(:forward), + returntype::Type{T2} = eltype(x); + inplace::Union{Val{T3}, Type{T3}} = Val(true), + colorvec = 1:length(x), + sparsity = nothing) where {T1, T2, T3} fdtype isa Type && (fdtype = fdtype()) inplace isa Type && (inplace = inplace()) if eltype(x) <: Real && fdtype==Val(:complex) - x1 = false .* im .* x + x1 = false .* im .* x else x1 = copy(x) end @@ -70,12 +70,13 @@ function JacobianCache( end if fdtype==Val(:complex) - _fx1 = nothing + _fx1 = nothing else _fx1 = copy(fx) end - JacobianCache(x1,_fx,_fx1,fdtype,returntype;colorvec=colorvec,sparsity=sparsity) + JacobianCache( + x1, _fx, _fx1, fdtype, returntype; colorvec = colorvec, sparsity = sparsity) end """ @@ -91,27 +92,26 @@ end Non-Allocating Cache Constructor. """ function JacobianCache( - x1 , - fx , - fx1, - fdtype :: Union{Val{T1},Type{T1}} = Val(:forward), - returntype :: Type{T2} = eltype(fx); - inplace :: Union{Val{T3},Type{T3}} = Val(true), - colorvec = 1:length(x1), - sparsity = nothing) where {T1,T2,T3} - + x1, + fx, + fx1, + fdtype::Union{Val{T1}, Type{T1}} = Val(:forward), + returntype::Type{T2} = eltype(fx); + inplace::Union{Val{T3}, Type{T3}} = Val(true), + colorvec = 1:length(x1), + sparsity = nothing) where {T1, T2, T3} fdtype isa Type && (fdtype = fdtype()) inplace isa Type && (inplace = inplace()) if fdtype==Val(:complex) !(returntype<:Real) && fdtype_error(returntype) if eltype(fx) <: Real - _fx = false .* im .* fx + _fx = false .* im .* fx else _fx = fx end if eltype(x1) <: Real - _x1 = false .* im .* x1 + _x1 = false .* im .* x1 else _x1 = x1 end @@ -122,7 +122,9 @@ function JacobianCache( _fx = fx end _x2 = zero(_x1) - JacobianCache{typeof(_x1),typeof(_x2),typeof(_fx),typeof(fx1),typeof(colorvec),typeof(sparsity),fdtype,returntype}(_x1,_x2,_fx,fx1,colorvec,sparsity) + JacobianCache{typeof(_x1), typeof(_x2), typeof(_fx), typeof(fx1), + typeof(colorvec), typeof(sparsity), fdtype, returntype}( + _x1, _x2, _fx, fx1, colorvec, sparsity) end """ @@ -146,14 +148,15 @@ to the Jacobian. This is used in graph-coloring based sparse Jacobian computatio # Returns - `Ji`: Matrix containing the Jacobian entries for the current color """ -function _make_Ji(::AbstractArray, rows_index,cols_index,dx,colorvec,color_i,nrows,ncols) +function _make_Ji( + ::AbstractArray, rows_index, cols_index, dx, colorvec, color_i, nrows, ncols) pick_inds = [i for i in 1:length(rows_index) if colorvec[cols_index[i]] == color_i] rows_index_c = rows_index[pick_inds] cols_index_c = cols_index[pick_inds] len_rows = length(pick_inds) - unused_rows = setdiff(1:nrows,rows_index_c) - perm_rows = sortperm(vcat(rows_index_c,unused_rows)) - cols_index_c = vcat(cols_index_c,zeros(Int,nrows-len_rows))[perm_rows] + unused_rows = setdiff(1:nrows, rows_index_c) + perm_rows = sortperm(vcat(rows_index_c, unused_rows)) + cols_index_c = vcat(cols_index_c, zeros(Int, nrows-len_rows))[perm_rows] Ji = [j==cols_index_c[i] ? dx[i] : false for i in 1:nrows, j in 1:ncols] Ji end @@ -186,36 +189,73 @@ end """ FiniteDiff.finite_difference_jacobian( f, - x :: AbstractArray{<:Number}, - fdtype :: Type{T1}=Val{:central}, - returntype :: Type{T2}=eltype(x), + x::AbstractArray{<:Number}, + fdtype::Type{T1}=Val{:forward}, + returntype::Type{T2}=eltype(x), + f_in=nothing; relstep=default_relstep(fdtype, eltype(x)), absstep=relstep, - colorvec = 1:length(x), - sparsity = nothing, - jac_prototype = nothing, + colorvec=1:length(x), + sparsity=nothing, + jac_prototype=nothing, dir=true) -Cache-less. +Compute the Jacobian matrix of function `f` at point `x` using finite differences. + +This is the cache-less version that allocates temporary arrays internally. +The Jacobian `J[i,j] = ∂f[i]/∂x[j]` is computed using the specified finite +difference method. Supports sparse Jacobians via graph coloring. + +# Arguments +- `f`: Function to differentiate (vector→vector map) +- `x::AbstractArray{<:Number}`: Point at which to evaluate the Jacobian +- `fdtype::Type{T1}=Val{:forward}`: Finite difference method (`:forward`, `:central`, `:complex`) +- `returntype::Type{T2}=eltype(x)`: Element type of the function output +- `f_in=nothing`: Pre-computed `f(x)` value (if available) + +# Keyword Arguments +- `relstep`: Relative step size (default: method-dependent optimal value) +- `absstep=relstep`: Absolute step size fallback +- `colorvec=1:length(x)`: Column coloring for sparse Jacobians +- `sparsity=nothing`: Sparsity pattern for the Jacobian +- `jac_prototype=nothing`: Prototype matrix defining Jacobian structure +- `dir=true`: Direction for step size (typically ±1) + +# Returns +- Jacobian matrix `J` where `J[i,j] = ∂f[i]/∂x[j]` + +# Examples +```julia +f(x) = [x[1]^2 + x[2], x[1] * x[2]] +x = [1.0, 2.0] +J = finite_difference_jacobian(f, x) # 2×2 matrix +``` + +# Notes +- Forward differences: `O(n)` function evaluations, `O(h)` accuracy +- Central differences: `O(2n)` function evaluations, `O(h²)` accuracy +- Complex step: `O(n)` function evaluations, machine precision accuracy +- Sparse Jacobians use graph coloring to reduce function evaluations """ function finite_difference_jacobian(f, x, - fdtype = Val(:forward), - returntype = eltype(x), - f_in = nothing; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep, - colorvec = 1:length(x), - sparsity = nothing, - jac_prototype = nothing, - dir=true) - + fdtype = Val(:forward), + returntype = eltype(x), + f_in = nothing; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep, + colorvec = 1:length(x), + sparsity = nothing, + jac_prototype = nothing, + dir = true) if f_in isa Nothing fx = f(x) else fx = f_in end cache = JacobianCache(x, fx, fdtype, returntype) - finite_difference_jacobian(f, x, cache, fx; relstep=relstep, absstep=absstep, colorvec=colorvec, sparsity=sparsity, jac_prototype=jac_prototype, dir=dir) + finite_difference_jacobian( + f, x, cache, fx; relstep = relstep, absstep = absstep, colorvec = colorvec, + sparsity = sparsity, jac_prototype = jac_prototype, dir = dir) end void_setindex!(args...) = (setindex!(args...); return) @@ -235,17 +275,16 @@ void_setindex!(args...) = (setindex!(args...); return) Cached. """ function finite_difference_jacobian( - f, - x, - cache::JacobianCache{T1,T2,T3,T4,cType,sType,fdtype,returntype}, - f_in=nothing; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep, - colorvec = cache.colorvec, - sparsity = cache.sparsity, - jac_prototype = nothing, - dir=true) where {T1,T2,T3,T4,cType,sType,fdtype,returntype} - + f, + x, + cache::JacobianCache{T1, T2, T3, T4, cType, sType, fdtype, returntype}, + f_in = nothing; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep, + colorvec = cache.colorvec, + sparsity = cache.sparsity, + jac_prototype = nothing, + dir = true) where {T1, T2, T3, T4, cType, sType, fdtype, returntype} x1, fx, fx1 = cache.x1, cache.fx, cache.fx1 if !(f_in isa Nothing) @@ -259,7 +298,9 @@ function finite_difference_jacobian( end vecx = _vec(x) vecx1 = _vec(x1) - J = jac_prototype isa Nothing ? (sparsity isa Nothing ? Array{eltype(x),2}(undef, length(vecfx), 0) : zeros(eltype(x),size(sparsity))) : zero(jac_prototype) + J = jac_prototype isa Nothing ? + (sparsity isa Nothing ? Array{eltype(x), 2}(undef, length(vecfx), 0) : + zeros(eltype(x), size(sparsity))) : zero(jac_prototype) nrows, ncols = size(J) if !(sparsity isa Nothing) @@ -269,12 +310,11 @@ function finite_difference_jacobian( end if fdtype == Val(:forward) - function calculate_Ji_forward(i) x_save = ArrayInterface.allowed_getindex(vecx, i) epsilon = compute_epsilon(Val(:forward), x_save, relstep, absstep, dir) _vecx1 = setindex(vecx, x_save+epsilon, i) - _x1 = ArrayInterface.restructure(x,_vecx1) + _x1 = ArrayInterface.restructure(x, _vecx1) vecfx1 = _vec(f(_x1)) dx = (vecfx1-vecfx) / epsilon return dx @@ -284,30 +324,31 @@ function finite_difference_jacobian( J = mapreduce(calculate_Ji_forward, hcat, 1:maximum(colorvec)) J = _mat(J) else - @inbounds for color_i ∈ 1:maximum(colorvec) + @inbounds for color_i in 1:maximum(colorvec) if sparsity isa Nothing dx = calculate_Ji_forward(color_i) J = J + _make_Ji(J, eltype(x), dx, color_i, nrows, ncols) else tmp = norm(vecx .* (colorvec .== color_i)) - epsilon = compute_epsilon(Val(:forward), sqrt(tmp), relstep, absstep, dir) + epsilon = compute_epsilon( + Val(:forward), sqrt(tmp), relstep, absstep, dir) _vecx = @. vecx + epsilon * (colorvec == color_i) _x = reshape(_vecx, axes(x)) vecfx1 = _vec(f(_x)) dx = (vecfx1-vecfx)/epsilon - Ji = _make_Ji(J,rows_index,cols_index,dx,colorvec,color_i,nrows,ncols) + Ji = _make_Ji( + J, rows_index, cols_index, dx, colorvec, color_i, nrows, ncols) J = J + Ji end end end elseif fdtype == Val(:central) - function calculate_Ji_central(i) - x1_save = ArrayInterface.allowed_getindex(vecx1,i) - x_save = ArrayInterface.allowed_getindex(vecx,i) + x1_save = ArrayInterface.allowed_getindex(vecx1, i) + x_save = ArrayInterface.allowed_getindex(vecx, i) epsilon = compute_epsilon(Val(:forward), x1_save, relstep, absstep, dir) - _vecx1 = setindex(vecx1,x1_save+epsilon,i) - _vecx = setindex(vecx,x_save-epsilon,i) + _vecx1 = setindex(vecx1, x1_save+epsilon, i) + _vecx = setindex(vecx, x_save-epsilon, i) _x1 = reshape(_vecx1, axes(x)) _x = reshape(_vecx, axes(x)) vecfx1 = _vec(f(_x1)) @@ -320,13 +361,14 @@ function finite_difference_jacobian( J = mapreduce(calculate_Ji_central, hcat, 1:maximum(colorvec)) J = _mat(J) else - @inbounds for color_i ∈ 1:maximum(colorvec) + @inbounds for color_i in 1:maximum(colorvec) if sparsity isa Nothing dx = calculate_Ji_central(color_i) J = J + _make_Ji(J, eltype(x), dx, color_i, nrows, ncols) else tmp = norm(vecx1 .* (colorvec .== color_i)) - epsilon = compute_epsilon(Val(:forward), sqrt(tmp), relstep, absstep, dir) + epsilon = compute_epsilon( + Val(:forward), sqrt(tmp), relstep, absstep, dir) _vecx1 = @. vecx1 + epsilon * (colorvec == color_i) _vecx = @. vecx - epsilon * (colorvec == color_i) _x1 = reshape(_vecx1, axes(x)) @@ -334,7 +376,8 @@ function finite_difference_jacobian( vecfx1 = _vec(f(_x1)) vecfx = _vec(f(_x)) dx = (vecfx1-vecfx)/(2epsilon) - Ji = _make_Ji(J,rows_index,cols_index,dx,colorvec,color_i,nrows,ncols) + Ji = _make_Ji( + J, rows_index, cols_index, dx, colorvec, color_i, nrows, ncols) J = J + Ji end end @@ -343,8 +386,8 @@ function finite_difference_jacobian( epsilon = eps(eltype(x)) function calculate_Ji_complex(i) - x_save = ArrayInterface.allowed_getindex(vecx,i) - _vecx = setindex(complex.(vecx),x_save+im*epsilon,i) + x_save = ArrayInterface.allowed_getindex(vecx, i) + _vecx = setindex(complex.(vecx), x_save+im*epsilon, i) _x = reshape(_vecx, axes(x)) vecfx = _vec(f(_x)) dx = imag(vecfx)/epsilon @@ -355,7 +398,7 @@ function finite_difference_jacobian( J = mapreduce(calculate_Ji_complex, hcat, 1:maximum(colorvec)) J = _mat(J) else - @inbounds for color_i ∈ 1:maximum(colorvec) + @inbounds for color_i in 1:maximum(colorvec) if sparsity isa Nothing dx = calculate_Ji_complex(color_i) J = J + _make_Ji(J, eltype(x), dx, color_i, nrows, ncols) @@ -364,7 +407,8 @@ function finite_difference_jacobian( _x = reshape(_vecx, axes(x)) vecfx = _vec(f(_x)) dx = imag(vecfx)/epsilon - Ji = _make_Ji(J,rows_index,cols_index,dx,colorvec,color_i,nrows,ncols) + Ji = _make_Ji( + J, rows_index, cols_index, dx, colorvec, color_i, nrows, ncols) J = J + Ji end end @@ -391,29 +435,30 @@ end Cache-less. """ function finite_difference_jacobian!(J, - f, - x, - fdtype = Val(:forward), - returntype = eltype(x), - f_in = nothing; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep, - colorvec = 1:length(x), - sparsity = ArrayInterface.has_sparsestruct(J) ? J : nothing) + f, + x, + fdtype = Val(:forward), + returntype = eltype(x), + f_in = nothing; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep, + colorvec = 1:length(x), + sparsity = ArrayInterface.has_sparsestruct(J) ? J : nothing) if f_in isa Nothing && fdtype == Val(:forward) - if size(J,1) == length(x) + if size(J, 1) == length(x) fx = zero(x) else - fx = zeros(returntype,size(J,1)) + fx = zeros(returntype, size(J, 1)) end - f(fx,x) + f(fx, x) cache = JacobianCache(x, fx, fdtype, returntype) elseif f_in isa Nothing cache = JacobianCache(x, fdtype, returntype) else cache = JacobianCache(x, f_in, fdtype, returntype) end - finite_difference_jacobian!(J, f, x, cache, cache.fx; relstep=relstep, absstep=absstep, colorvec=colorvec, sparsity=sparsity) + finite_difference_jacobian!(J, f, x, cache, cache.fx; relstep = relstep, + absstep = absstep, colorvec = colorvec, sparsity = sparsity) end function _findstructralnz(A::DenseMatrix) @@ -448,17 +493,16 @@ end Cached. """ function finite_difference_jacobian!( - J, - f, - x, - cache::JacobianCache{T1,T2,T3,T4,cType,sType,fdtype,returntype}, - f_in = nothing; - relstep = default_relstep(fdtype, eltype(x)), - absstep = relstep, - colorvec = cache.colorvec, - sparsity = cache.sparsity, - dir = true) where {T1,T2,T3,T4,cType,sType,fdtype,returntype} - + J, + f, + x, + cache::JacobianCache{T1, T2, T3, T4, cType, sType, fdtype, returntype}, + f_in = nothing; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep, + colorvec = cache.colorvec, + sparsity = cache.sparsity, + dir = true) where {T1, T2, T3, T4, cType, sType, fdtype, returntype} m, n = size(J) _color = reshape(colorvec, axes(x)...) @@ -491,15 +535,15 @@ function finite_difference_jacobian!( vfx = _vec(f_in) end - @inbounds for color_i ∈ 1:maximum(colorvec) + @inbounds for color_i in 1:maximum(colorvec) if sparsity isa Nothing - x1_save = ArrayInterface.allowed_getindex(x1,color_i) + x1_save = ArrayInterface.allowed_getindex(x1, color_i) epsilon = compute_epsilon(Val(:forward), x1_save, relstep, absstep, dir) ArrayInterface.allowed_setindex!(x1, x1_save + epsilon, color_i) f(fx1, x1) # J is dense, so either it is truly dense or this is the # compressed form of the coloring, so write into it. - @. J[:,color_i] = (vfx1 - vfx) / epsilon + @. J[:, color_i] = (vfx1 - vfx) / epsilon # Now return x1 back to its original value ArrayInterface.allowed_setindex!(x1, x1_save, color_i) else # Perturb along the colorvec vector @@ -512,9 +556,10 @@ function finite_difference_jacobian!( @. vfx1 = (vfx1 - vfx) / epsilon if ArrayInterface.fast_scalar_indexing(x1) if sparseCSC_common_sparsity - _colorediteration!(J,vfx1,colorvec,color_i,n) + _colorediteration!(J, vfx1, colorvec, color_i, n) else - _colorediteration!(J,sparsity,rows_index,cols_index,vfx1,colorvec,color_i,n) + _colorediteration!( + J, sparsity, rows_index, cols_index, vfx1, colorvec, color_i, n) end else #= @@ -523,7 +568,8 @@ function finite_difference_jacobian!( J[rows_index, cols_index] .+= (colorvec[cols_index] .== color_i) .* vfx1[rows_index] += means requires a zero'd out start =# - fast_jacobian_setindex!(J, rows_index, cols_index, _color, color_i, vfx1) + fast_jacobian_setindex!( + J, rows_index, cols_index, _color, color_i, vfx1) end # Now return x1 back to its original value @. x1 = x1 - epsilon * (_color == color_i) @@ -531,7 +577,7 @@ function finite_difference_jacobian!( end #for ends here elseif fdtype == Val(:central) vfx1 = _vec(fx1) - @inbounds for color_i ∈ 1:maximum(colorvec) + @inbounds for color_i in 1:maximum(colorvec) if sparsity isa Nothing x_save = ArrayInterface.allowed_getindex(x, color_i) epsilon = compute_epsilon(Val(:central), x_save, relstep, absstep, dir) @@ -539,48 +585,51 @@ function finite_difference_jacobian!( f(fx1, x1) ArrayInterface.allowed_setindex!(x1, x_save - epsilon, color_i) f(fx, x1) - @. J[:,color_i] = (vfx1 - vfx) / 2epsilon + @. J[:, color_i] = (vfx1 - vfx) / 2epsilon ArrayInterface.allowed_setindex!(x1, x_save, color_i) else # Perturb along the colorvec vector @. x2 = x1 * (_color == color_i) tmp = norm(x2) epsilon = compute_epsilon(Val(:central), sqrt(tmp), relstep, absstep, dir) @. x1 = x1 + epsilon * (_color == color_i) - @. x = x - epsilon * (_color == color_i) + @. x = x - epsilon * (_color == color_i) f(fx1, x1) f(fx, x) @. vfx1 = (vfx1 - vfx) / 2epsilon if ArrayInterface.fast_scalar_indexing(x1) if sparseCSC_common_sparsity - _colorediteration!(J,vfx1,colorvec,color_i,n) + _colorediteration!(J, vfx1, colorvec, color_i, n) else - _colorediteration!(J,sparsity,rows_index,cols_index,vfx1,colorvec,color_i,n) + _colorediteration!( + J, sparsity, rows_index, cols_index, vfx1, colorvec, color_i, n) end else - fast_jacobian_setindex!(J, rows_index, cols_index, _color, color_i, vfx1) + fast_jacobian_setindex!( + J, rows_index, cols_index, _color, color_i, vfx1) end @. x1 = x1 - epsilon * (_color == color_i) - @. x = x + epsilon * (_color == color_i) + @. x = x + epsilon * (_color == color_i) end end elseif fdtype==Val(:complex) && returntype<:Real epsilon = eps(eltype(x)) - @inbounds for color_i ∈ 1:maximum(colorvec) + @inbounds for color_i in 1:maximum(colorvec) if sparsity isa Nothing x1_save = ArrayInterface.allowed_getindex(x1, color_i) ArrayInterface.allowed_setindex!(x1, x1_save + im*epsilon, color_i) - f(fx,x1) - @. J[:,color_i] = imag(vfx) / epsilon - ArrayInterface.allowed_setindex!(x1, x1_save,color_i) + f(fx, x1) + @. J[:, color_i] = imag(vfx) / epsilon + ArrayInterface.allowed_setindex!(x1, x1_save, color_i) else # Perturb along the colorvec vector @. x1 = x1 + im * epsilon * (_color == color_i) - f(fx,x1) + f(fx, x1) @. vfx = imag(vfx) / epsilon if ArrayInterface.fast_scalar_indexing(x1) if sparseCSC_common_sparsity - _colorediteration!(J,vfx,colorvec,color_i,n) + _colorediteration!(J, vfx, colorvec, color_i, n) else - _colorediteration!(J,sparsity,rows_index,cols_index,vfx,colorvec,color_i,n) + _colorediteration!( + J, sparsity, rows_index, cols_index, vfx, colorvec, color_i, n) end else fast_jacobian_setindex!(J, rows_index, cols_index, _color, color_i, vfx) @@ -595,8 +644,8 @@ function finite_difference_jacobian!( end function resize!(cache::JacobianCache, i::Int) - resize!(cache.x1, i) - resize!(cache.fx, i) + resize!(cache.x1, i) + resize!(cache.fx, i) cache.fx1 !== nothing && resize!(cache.fx1, i) cache.colorvec = 1:i nothing @@ -605,5 +654,9 @@ end @inline fill_matrix!(J, v) = fill!(J, v) @inline function fast_jacobian_setindex!(J, rows_index, cols_index, _color, color_i, vfx) - @. void_setindex!((J,), getindex((J,), rows_index, cols_index) + (getindex((_color,), cols_index) == color_i) * getindex((vfx,), rows_index), rows_index, cols_index) -end \ No newline at end of file + @. void_setindex!((J,), + getindex((J,), rows_index, cols_index) + + (getindex((_color,), cols_index) == color_i) * getindex((vfx,), rows_index), + rows_index, + cols_index) +end diff --git a/src/jvp.jl b/src/jvp.jl index 7a6bec6..810835f 100644 --- a/src/jvp.jl +++ b/src/jvp.jl @@ -12,8 +12,8 @@ and `v` is a vector. - `fx1::FX1`: Temporary array for function evaluations """ mutable struct JVPCache{X1, FX1, FDType} - x1 :: X1 - fx1 :: FX1 + x1::X1 + fx1::FX1 end """ @@ -40,8 +40,8 @@ cache = JVPCache(x, Val(:forward)) ``` """ function JVPCache( - x, - fdtype::Union{Val{FD},Type{FD}} = Val(:forward)) where {FD} + x, + fdtype::Union{Val{FD}, Type{FD}} = Val(:forward)) where {FD} fdtype isa Type && (fdtype = fdtype()) JVPCache{typeof(x), typeof(x), fdtype}(copy(x), copy(x)) end @@ -77,31 +77,70 @@ The arrays `x` and `fx1` will be modified during JVP computations. Ensure they are not used elsewhere if their values need to be preserved. """ function JVPCache( - x, - fx, - fdtype::Union{Val{FD},Type{FD}} = Val(:forward)) where {FD} + x, + fx, + fdtype::Union{Val{FD}, Type{FD}} = Val(:forward)) where {FD} fdtype isa Type && (fdtype = fdtype()) - JVPCache{typeof(x), typeof(fx), fdtype}(x,fx) + JVPCache{typeof(x), typeof(fx), fdtype}(x, fx) end """ FiniteDiff.finite_difference_jvp( f, - x :: AbstractArray{<:Number}, - v :: AbstractArray{<:Number}, - fdtype :: Type{T1}=Val{:central}, + x::AbstractArray{<:Number}, + v::AbstractArray{<:Number}, + fdtype::Type{T1}=Val{:forward}, + f_in=nothing; relstep=default_relstep(fdtype, eltype(x)), absstep=relstep) -Cache-less. +Compute the Jacobian-vector product `J(x) * v` using finite differences. + +This function computes the directional derivative of `f` at `x` in direction `v` +without explicitly forming the Jacobian matrix. This is more efficient than +computing the full Jacobian when only `J*v` is needed. + +# Arguments +- `f`: Function to differentiate (vector→vector map) +- `x::AbstractArray{<:Number}`: Point at which to evaluate the Jacobian +- `v::AbstractArray{<:Number}`: Direction vector for the product +- `fdtype::Type{T1}=Val{:forward}`: Finite difference method (`:forward`, `:central`) +- `f_in=nothing`: Pre-computed `f(x)` value (if available) + +# Keyword Arguments +- `relstep`: Relative step size (default: method-dependent optimal value) +- `absstep=relstep`: Absolute step size fallback + +# Returns +- Vector `J(x) * v` representing the Jacobian-vector product + +# Examples +```julia +f(x) = [x[1]^2 + x[2], x[1] * x[2], x[2]^3] +x = [1.0, 2.0] +v = [1.0, 0.0] # Direction vector +jvp = finite_difference_jvp(f, x, v) # Directional derivative +``` + +# Mathematical Background +The JVP is computed using the finite difference approximation: +- Forward: `J(x) * v ≈ (f(x + h*v) - f(x)) / h` +- Central: `J(x) * v ≈ (f(x + h*v) - f(x - h*v)) / (2h)` + +where `h` is the step size and `v` is the direction vector. + +# Notes +- Requires only `O(1)` or `O(2)` function evaluations (vs `O(n)` for full Jacobian) +- Forward differences: 1 extra function evaluation, `O(h)` accuracy +- Central differences: 2 extra function evaluations, `O(h²)` accuracy +- Particularly efficient when `v` is sparse or when only one directional derivative is needed """ function finite_difference_jvp(f, x, v, - fdtype = Val(:forward), - f_in = nothing; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep, - dir=true) - + fdtype = Val(:forward), + f_in = nothing; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep, + dir = true) if f_in isa Nothing fx = f(x) else @@ -123,15 +162,14 @@ end Cached. """ function finite_difference_jvp( - f, - x, - v, - cache::JVPCache{X1, FX1, fdtype}, - f_in=nothing; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep, - dir=true) where {X1, FX1, fdtype} - + f, + x, + v, + cache::JVPCache{X1, FX1, fdtype}, + f_in = nothing; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep, + dir = true) where {X1, FX1, fdtype} if fdtype == Val(:complex) ArgumentError("finite_difference_jvp doesn't support :complex-mode finite diff") end @@ -140,7 +178,7 @@ function finite_difference_jvp( epsilon = compute_epsilon(fdtype, tmp, relstep, absstep, dir) if fdtype == Val(:forward) fx = f_in isa Nothing ? f(x) : f_in - x1 = @. x + epsilon * v + x1 = @. x + epsilon * v fx1 = f(x1) fx1 = @. (fx1-fx)/epsilon elseif fdtype == Val(:central) @@ -170,18 +208,18 @@ end Cache-less. """ function finite_difference_jvp!(jvp, - f, - x, - v, - fdtype = Val(:forward), - f_in = nothing; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep) + f, + x, + v, + fdtype = Val(:forward), + f_in = nothing; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep) if !isnothing(f_in) cache = JVPCache(x, f_in, fdtype) elseif fdtype == Val(:forward) fx = zero(x) - f(fx,x) + f(fx, x) cache = JVPCache(x, fx, fdtype) else cache = JVPCache(x, fdtype) @@ -203,21 +241,20 @@ end Cached. """ function finite_difference_jvp!( - jvp, - f, - x, - v, - cache::JVPCache{X1, FX1, fdtype}, - f_in = nothing; - relstep = default_relstep(fdtype, eltype(x)), - absstep = relstep, - dir = true) where {X1, FX1, fdtype} - + jvp, + f, + x, + v, + cache::JVPCache{X1, FX1, fdtype}, + f_in = nothing; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep, + dir = true) where {X1, FX1, fdtype} if fdtype == Val(:complex) ArgumentError("finite_difference_jvp doesn't support :complex-mode finite diff") end - (;x1, fx1) = cache + (; x1, fx1) = cache tmp = sqrt(abs(dot(_vec(x), _vec(v)))) epsilon = compute_epsilon(fdtype, tmp, relstep, absstep, dir) if fdtype == Val(:forward) @@ -242,7 +279,7 @@ function finite_difference_jvp!( end function resize!(cache::JVPCache, i::Int) - resize!(cache.x1, i) + resize!(cache.x1, i) cache.fx1 !== nothing && resize!(cache.fx1, i) nothing end From 07a1c780ba4090f6117f2982b527e8669195be0a Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Wed, 13 Aug 2025 12:17:30 +0000 Subject: [PATCH 04/26] Update src/epsilons.jl --- src/epsilons.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/epsilons.jl b/src/epsilons.jl index 4984dec..7e76b3b 100644 --- a/src/epsilons.jl +++ b/src/epsilons.jl @@ -85,6 +85,10 @@ For complex step differentiation, the step size is simply the machine epsilon `e which provides optimal accuracy since complex step differentiation doesn't suffer from subtractive cancellation errors. +!!! warn + + `f` must be a function of a real variable which also happens to be complex analytic when the input is complex! + # Arguments - `::Val{:complex}`: Finite difference type indicator for complex step differentiation - `x::T`: Point at which to compute the step size (unused, type determines epsilon) From 18e838e3b9c3e60107190b0fe587801f8b8d0dee Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Wed, 13 Aug 2025 12:18:59 +0000 Subject: [PATCH 05/26] Apply suggestions from code review --- src/jvp.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/jvp.jl b/src/jvp.jl index 810835f..7493aa2 100644 --- a/src/jvp.jl +++ b/src/jvp.jl @@ -130,9 +130,9 @@ The JVP is computed using the finite difference approximation: where `h` is the step size and `v` is the direction vector. # Notes -- Requires only `O(1)` or `O(2)` function evaluations (vs `O(n)` for full Jacobian) -- Forward differences: 1 extra function evaluation, `O(h)` accuracy -- Central differences: 2 extra function evaluations, `O(h²)` accuracy +- Requires only `2` function evaluations (vs `O(n)` for full Jacobian) +- Forward differences: 2 function evaluations, `O(h)` accuracy +- Central differences: 2 function evaluations, `O(h²)` accuracy - Particularly efficient when `v` is sparse or when only one directional derivative is needed """ function finite_difference_jvp(f, x, v, From 410c99952b269997119a899e7455ef4cd619dc2a Mon Sep 17 00:00:00 2001 From: ChrisRackauckas Date: Wed, 13 Aug 2025 08:24:09 -0400 Subject: [PATCH 06/26] Add JVP functions to API documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Added finite_difference_jvp and finite_difference_jvp! to docs - Added JVPCache documentation section - Included explanation of JVP functionality and use cases - Documentation now builds cleanly without warnings 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- docs/src/api.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/src/api.md b/docs/src/api.md index 44f437c..9009d52 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -56,3 +56,14 @@ FiniteDiff.HessianCache ``` Hessians are for functions `f(x)` which return a scalar. + +## Jacobian-Vector Products (JVP) + +```@docs +FiniteDiff.finite_difference_jvp +FiniteDiff.finite_difference_jvp! +FiniteDiff.JVPCache +``` + +JVP functions compute the Jacobian-vector product `J(x) * v` efficiently without computing the full Jacobian matrix. This is particularly useful when you only need directional derivatives. + From 5409eb4751099cb3af3f1c088947fb6675dc811f Mon Sep 17 00:00:00 2001 From: ChrisRackauckas Date: Wed, 13 Aug 2025 08:29:50 -0400 Subject: [PATCH 07/26] Restructure API documentation into separate pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Split single API page into organized category pages: - Overview (api.md) - Quick start and navigation - Derivatives - Single/multi-point derivative functions - Gradients - Gradient computation with detailed notes - Jacobians - Jacobian matrices including sparse support - Hessians - Hessian matrices with mathematical background - JVP - Jacobian-vector products for efficient directional derivatives - Utilities - Internal functions and helpers - Updated pages.jl with hierarchical documentation structure - Enhanced each page with category-specific guidance and examples - Improved navigation with cross-references between sections - Documentation builds successfully with clean sidebar organization 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- docs/pages.jl | 14 +++++++- docs/src/api.md | 78 ++++++++++++++++------------------------- docs/src/derivatives.md | 26 ++++++++++++++ docs/src/gradients.md | 38 ++++++++++++++++++++ docs/src/hessians.md | 45 ++++++++++++++++++++++++ docs/src/jacobians.md | 41 ++++++++++++++++++++++ docs/src/jvp.md | 48 +++++++++++++++++++++++++ docs/src/utilities.md | 31 ++++++++++++++++ 8 files changed, 272 insertions(+), 49 deletions(-) create mode 100644 docs/src/derivatives.md create mode 100644 docs/src/gradients.md create mode 100644 docs/src/hessians.md create mode 100644 docs/src/jacobians.md create mode 100644 docs/src/jvp.md create mode 100644 docs/src/utilities.md diff --git a/docs/pages.jl b/docs/pages.jl index d4fa44b..ea50627 100644 --- a/docs/pages.jl +++ b/docs/pages.jl @@ -1,3 +1,15 @@ # Put in a separate page so it can be used by SciMLDocs.jl -pages = ["Home" => "index.md", "tutorials.md", "api.md"] +pages = [ + "Home" => "index.md", + "Tutorials" => "tutorials.md", + "API Reference" => [ + "Overview" => "api.md", + "Derivatives" => "derivatives.md", + "Gradients" => "gradients.md", + "Jacobians" => "jacobians.md", + "Hessians" => "hessians.md", + "Jacobian-Vector Products" => "jvp.md", + "Utilities" => "utilities.md" + ] +] diff --git a/docs/src/api.md b/docs/src/api.md index 9009d52..f47081e 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -1,69 +1,51 @@ -# API +# API Reference ```@docs FiniteDiff ``` -## Derivatives +FiniteDiff.jl provides fast, non-allocating finite difference calculations with support for sparsity patterns and various array types. The API is organized into several categories: -```@docs -FiniteDiff.finite_difference_derivative -FiniteDiff.finite_difference_derivative! -FiniteDiff.DerivativeCache -``` +## Function Categories -## Gradients +### [Derivatives](@ref derivatives) +Single and multi-point derivatives of scalar functions. -```@docs -FiniteDiff.finite_difference_gradient -FiniteDiff.finite_difference_gradient! -FiniteDiff.GradientCache -``` +### [Gradients](@ref gradients) +Gradients of scalar-valued functions with respect to vector inputs. -Gradients are either a vector->scalar map `f(x)`, or a scalar->vector map `f(fx,x)` if `inplace=Val{true}` and `fx=f(x)` if `inplace=Val{false}`. +### [Jacobians](@ref jacobians) +Jacobian matrices of vector-valued functions, including sparse Jacobian support. -Note that here `fx` is a cached function call of `f`. If you provide `fx`, then -`fx` will be used in the forward differencing method to skip a function call. -It is on you to make sure that you update `cache.fx` every time before -calling `FiniteDiff.finite_difference_gradient!`. If `fx` is an immutable, e.g. a scalar or -a `StaticArray`, `cache.fx` should be updated using `@set` from [Setfield.jl](https://github.com/jw3126/Setfield.jl). -A good use of this is if you have a cache array for the output of `fx` already being used, you can make it alias -into the differencing algorithm here. +### [Hessians](@ref hessians) +Hessian matrices of scalar-valued functions. -## Jacobians +### [Jacobian-Vector Products](@ref jvp) +Efficient computation of directional derivatives without forming full Jacobians. -```@docs -FiniteDiff.finite_difference_jacobian -FiniteDiff.finite_difference_jacobian! -FiniteDiff.JacobianCache -``` +### [Utilities](@ref utilities) +Internal utilities and helper functions. -Jacobians are for functions `f!(fx,x)` when using in-place `finite_difference_jacobian!`, -and `fx = f(x)` when using out-of-place `finite_difference_jacobian`. The out-of-place -jacobian will return a similar type as `jac_prototype` if it is not a `nothing`. For non-square -Jacobians, a cache which specifies the vector `fx` is required. +## Quick Start -For sparse differentiation, pass a `colorvec` of matrix colors. `sparsity` should be a sparse -or structured matrix (`Tridiagonal`, `Banded`, etc. according to the ArrayInterfaceCore.jl specs) -to allow for decompression, otherwise the result will be the colorvec compressed Jacobian. +All functions follow a consistent API pattern: -## Hessians +- **Cache-less versions**: `finite_difference_*` - convenient but allocate temporary arrays +- **In-place versions**: `finite_difference_*!` - efficient, non-allocating when used with caches +- **Cache constructors**: `*Cache` - pre-allocate work arrays for repeated computations -```@docs -FiniteDiff.finite_difference_hessian -FiniteDiff.finite_difference_hessian! -FiniteDiff.HessianCache -``` +## Method Selection -Hessians are for functions `f(x)` which return a scalar. +Choose your finite difference method based on accuracy and performance needs: -## Jacobian-Vector Products (JVP) +- **Forward differences**: Fast, `O(h)` accuracy, requires `O(n)` function evaluations +- **Central differences**: More accurate `O(h²)`, requires `O(2n)` function evaluations +- **Complex step**: Machine precision accuracy, `O(n)` evaluations, requires complex-analytic functions -```@docs -FiniteDiff.finite_difference_jvp -FiniteDiff.finite_difference_jvp! -FiniteDiff.JVPCache -``` +## Performance Tips -JVP functions compute the Jacobian-vector product `J(x) * v` efficiently without computing the full Jacobian matrix. This is particularly useful when you only need directional derivatives. +1. **Use caches** for repeated computations to avoid allocations +2. **Consider sparsity** for large Jacobians with known sparsity patterns +3. **Choose appropriate methods** based on your accuracy requirements +4. **Leverage JVPs** when you only need directional derivatives diff --git a/docs/src/derivatives.md b/docs/src/derivatives.md new file mode 100644 index 0000000..66addae --- /dev/null +++ b/docs/src/derivatives.md @@ -0,0 +1,26 @@ +# Derivatives + +Functions for computing derivatives of scalar-valued functions. + +## Functions + +```@docs +FiniteDiff.finite_difference_derivative +FiniteDiff.finite_difference_derivative! +``` + +## Cache + +```@docs +FiniteDiff.DerivativeCache +``` + +## Notes + +Derivatives are computed for scalar→scalar maps `f(x)` where `x` can be a single point or a collection of points. The derivative functions support: + +- **Forward differences**: `O(1)` function evaluation per point, `O(h)` accuracy +- **Central differences**: `O(2)` function evaluations per point, `O(h²)` accuracy +- **Complex step**: `O(1)` function evaluation per point, machine precision accuracy + +For optimal performance with repeated computations, use the cached versions with `DerivativeCache`. \ No newline at end of file diff --git a/docs/src/gradients.md b/docs/src/gradients.md new file mode 100644 index 0000000..9cd28bc --- /dev/null +++ b/docs/src/gradients.md @@ -0,0 +1,38 @@ +# Gradients + +Functions for computing gradients of scalar-valued functions with respect to vector inputs. + +## Functions + +```@docs +FiniteDiff.finite_difference_gradient +FiniteDiff.finite_difference_gradient! +``` + +## Cache + +```@docs +FiniteDiff.GradientCache +``` + +## Function Types + +Gradients support two types of function mappings: + +- **Vector→scalar**: `f(x)` where `x` is a vector and `f` returns a scalar +- **Scalar→vector**: `f(fx, x)` for in-place evaluation or `fx = f(x)` for out-of-place + +## Performance Notes + +- **Forward differences**: `O(n)` function evaluations, `O(h)` accuracy +- **Central differences**: `O(2n)` function evaluations, `O(h²)` accuracy +- **Complex step**: `O(n)` function evaluations, machine precision accuracy + +## Cache Management + +When using `GradientCache` with pre-computed function values: + +- If you provide `fx`, then `fx` will be used in forward differencing to skip a function call +- You must update `cache.fx` before each call to `finite_difference_gradient!` +- For immutable types (scalars, `StaticArray`), use `@set` from [Setfield.jl](https://github.com/jw3126/Setfield.jl) +- Consider aliasing existing arrays into the cache for memory efficiency \ No newline at end of file diff --git a/docs/src/hessians.md b/docs/src/hessians.md new file mode 100644 index 0000000..e57b9e8 --- /dev/null +++ b/docs/src/hessians.md @@ -0,0 +1,45 @@ +# Hessians + +Functions for computing Hessian matrices of scalar-valued functions. + +## Functions + +```@docs +FiniteDiff.finite_difference_hessian +FiniteDiff.finite_difference_hessian! +``` + +## Cache + +```@docs +FiniteDiff.HessianCache +``` + +## Function Requirements + +Hessian functions are designed for scalar-valued functions `f(x)` where: + +- `x` is a vector of parameters +- `f(x)` returns a scalar value +- The Hessian `H[i,j] = ∂²f/(∂x[i]∂x[j])` is automatically symmetrized + +## Mathematical Background + +For a scalar function `f: ℝⁿ → ℝ`, the Hessian central difference approximation is: + +``` +H[i,j] ≈ (f(x + eᵢhᵢ + eⱼhⱼ) - f(x + eᵢhᵢ - eⱼhⱼ) - f(x - eᵢhᵢ + eⱼhⱼ) + f(x - eᵢhᵢ - eⱼhⱼ)) / (4hᵢhⱼ) +``` + +where `eᵢ` is the i-th unit vector and `hᵢ` is the step size in dimension i. + +## Performance Considerations + +- **Complexity**: Requires `O(n²)` function evaluations for an n-dimensional input +- **Accuracy**: Central differences provide `O(h²)` accuracy for second derivatives +- **Memory**: The result is returned as a `Symmetric` matrix view +- **Alternative**: For large problems, consider computing the gradient twice instead + +## StaticArrays Support + +The cache constructor automatically detects `StaticArray` types and adjusts the `inplace` parameter accordingly for optimal performance. \ No newline at end of file diff --git a/docs/src/jacobians.md b/docs/src/jacobians.md new file mode 100644 index 0000000..2deea99 --- /dev/null +++ b/docs/src/jacobians.md @@ -0,0 +1,41 @@ +# Jacobians + +Functions for computing Jacobian matrices of vector-valued functions. + +## Functions + +```@docs +FiniteDiff.finite_difference_jacobian +FiniteDiff.finite_difference_jacobian! +``` + +## Cache + +```@docs +FiniteDiff.JacobianCache +``` + +## Function Types + +Jacobians support the following function signatures: + +- **Out-of-place**: `fx = f(x)` where both `x` and `fx` are vectors +- **In-place**: `f!(fx, x)` where `f!` modifies `fx` in-place + +## Sparse Jacobians + +FiniteDiff.jl provides efficient sparse Jacobian computation using graph coloring: + +- Pass a `colorvec` of matrix colors to enable column compression +- Provide `sparsity` as a sparse or structured matrix (`Tridiagonal`, `Banded`, etc.) +- Supports automatic sparsity pattern detection via ArrayInterfaceCore.jl +- Results are automatically decompressed unless `sparsity=nothing` + +## Performance Notes + +- **Forward differences**: `O(n)` function evaluations, `O(h)` accuracy +- **Central differences**: `O(2n)` function evaluations, `O(h²)` accuracy +- **Complex step**: `O(n)` function evaluations, machine precision accuracy +- **Sparse Jacobians**: Use graph coloring to reduce function evaluations significantly + +For non-square Jacobians, specify the output vector `fx` when creating the cache to ensure proper sizing. \ No newline at end of file diff --git a/docs/src/jvp.md b/docs/src/jvp.md new file mode 100644 index 0000000..1a3472f --- /dev/null +++ b/docs/src/jvp.md @@ -0,0 +1,48 @@ +# Jacobian-Vector Products (JVP) + +Functions for computing Jacobian-vector products efficiently without forming the full Jacobian matrix. + +## Functions + +```@docs +FiniteDiff.finite_difference_jvp +FiniteDiff.finite_difference_jvp! +``` + +## Cache + +```@docs +FiniteDiff.JVPCache +``` + +## Mathematical Background + +The JVP computes `J(x) * v` where `J(x)` is the Jacobian of function `f` at point `x` and `v` is a direction vector. This is computed using finite difference approximations: + +- **Forward**: `J(x) * v ≈ (f(x + h*v) - f(x)) / h` +- **Central**: `J(x) * v ≈ (f(x + h*v) - f(x - h*v)) / (2h)` + +where `h` is the step size and `v` is the direction vector. + +## Performance Benefits + +JVP functions are particularly efficient when you only need directional derivatives: + +- **Function evaluations**: Only 2 function evaluations (vs `O(n)` for full Jacobian) +- **Forward differences**: 2 function evaluations, `O(h)` accuracy +- **Central differences**: 2 function evaluations, `O(h²)` accuracy +- **Memory efficient**: No need to store the full Jacobian matrix + +## Use Cases + +JVP is particularly useful for: + +- **Optimization**: Computing directional derivatives along search directions +- **Sparse directions**: When `v` has few non-zero entries +- **Memory constraints**: Avoiding storage of large Jacobian matrices +- **Newton methods**: Computing Newton steps `J⁻¹ * v` iteratively + +## Limitations + +- **Complex step**: JVP does not currently support complex step differentiation (`Val(:complex)`) +- **In-place functions**: For in-place function evaluation, ensure proper cache sizing \ No newline at end of file diff --git a/docs/src/utilities.md b/docs/src/utilities.md new file mode 100644 index 0000000..d9e960c --- /dev/null +++ b/docs/src/utilities.md @@ -0,0 +1,31 @@ +# Utilities + +Internal utility functions and types that may be useful for advanced users. + +## Step Size Computation + +While these functions are primarily internal, they can be useful for understanding and customizing finite difference computations: + +### Default Step Sizes + +Different finite difference methods use different optimal step sizes to balance truncation error and round-off error: + +- **Forward differences**: `sqrt(eps(T))` - balances O(h) truncation error with round-off +- **Central differences**: `cbrt(eps(T))` - balances O(h²) truncation error with round-off +- **Hessian central**: `eps(T)^(1/4)` - balances O(h²) truncation error for second derivatives + +### Complex Step Differentiation + +Complex step differentiation uses machine epsilon since it avoids subtractive cancellation: + +⚠️ **Important**: `f` must be a function of a real variable that is also complex analytic when the input is complex! + +## Array Utilities + +Internal utilities for handling different array types and ensuring compatibility: + +- `_vec(x)`: Vectorizes arrays while preserving scalars +- `_mat(x)`: Ensures matrix format, converting vectors to column matrices +- `setindex(x, v, i...)`: Non-mutating setindex for immutable arrays + +These functions help FiniteDiff.jl work with various array types including `StaticArrays`, structured matrices, and GPU arrays. \ No newline at end of file From 9e3f51d6010a97e5d12ac2f8eb18d21e324702cb Mon Sep 17 00:00:00 2001 From: ChrisRackauckas Date: Wed, 13 Aug 2025 08:35:49 -0400 Subject: [PATCH 08/26] Restructure documentation to remove API reference level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Moved API reference content to index page (generated from make.jl) - Removed separate API reference page level from navigation - Reorganized all pages to show summaries before docstrings - Updated pages.jl to have flat navigation structure - All category pages now appear directly in sidebar - Index page contains comprehensive API guidance and navigation Documentation structure now follows: - Home (with API reference) → Category pages directly - Each page: Summary/Notes → Functions → Cache 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- docs/make.jl | 44 +++++++++++++++++++++++++++++++++++ docs/pages.jl | 15 +++++------- docs/src/api.md | 51 ----------------------------------------- docs/src/derivatives.md | 22 +++++++++--------- docs/src/gradients.md | 28 +++++++++++----------- docs/src/hessians.md | 28 +++++++++++----------- docs/src/jacobians.md | 28 +++++++++++----------- docs/src/jvp.md | 28 +++++++++++----------- 8 files changed, 117 insertions(+), 127 deletions(-) delete mode 100644 docs/src/api.md diff --git a/docs/make.jl b/docs/make.jl index cc1e891..5c2aa99 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -23,6 +23,50 @@ open(joinpath(@__DIR__, "src", "index.md"), "w") do io for line in eachline(joinpath(dirname(@__DIR__), "README.md")) println(io, line) end + + # Add API reference content + println(io, "") + println(io, "## API Reference") + println(io, "") + println(io, "```@docs") + println(io, "FiniteDiff") + println(io, "```") + println(io, "") + println(io, "FiniteDiff.jl provides fast, non-allocating finite difference calculations with support for sparsity patterns and various array types. The API is organized into several categories:") + println(io, "") + println(io, "### Function Categories") + println(io, "") + println(io, "- **[Derivatives](@ref derivatives)**: Single and multi-point derivatives of scalar functions") + println(io, "- **[Gradients](@ref gradients)**: Gradients of scalar-valued functions with respect to vector inputs") + println(io, "- **[Jacobians](@ref jacobians)**: Jacobian matrices of vector-valued functions, including sparse Jacobian support") + println(io, "- **[Hessians](@ref hessians)**: Hessian matrices of scalar-valued functions") + println(io, "- **[Jacobian-Vector Products](@ref jvp)**: Efficient computation of directional derivatives without forming full Jacobians") + println(io, "- **[Utilities](@ref utilities)**: Internal utilities and helper functions") + println(io, "") + println(io, "### Quick Start") + println(io, "") + println(io, "All functions follow a consistent API pattern:") + println(io, "") + println(io, "- **Cache-less versions**: `finite_difference_*` - convenient but allocate temporary arrays") + println(io, "- **In-place versions**: `finite_difference_*!` - efficient, non-allocating when used with caches") + println(io, "- **Cache constructors**: `*Cache` - pre-allocate work arrays for repeated computations") + println(io, "") + println(io, "### Method Selection") + println(io, "") + println(io, "Choose your finite difference method based on accuracy and performance needs:") + println(io, "") + println(io, "- **Forward differences**: Fast, `O(h)` accuracy, requires `O(n)` function evaluations") + println(io, "- **Central differences**: More accurate `O(h²)`, requires `O(2n)` function evaluations") + println(io, "- **Complex step**: Machine precision accuracy, `O(n)` evaluations, requires complex-analytic functions") + println(io, "") + println(io, "### Performance Tips") + println(io, "") + println(io, "1. **Use caches** for repeated computations to avoid allocations") + println(io, "2. **Consider sparsity** for large Jacobians with known sparsity patterns") + println(io, "3. **Choose appropriate methods** based on your accuracy requirements") + println(io, "4. **Leverage JVPs** when you only need directional derivatives") + println(io, "") + for line in eachline(joinpath(@__DIR__, "src", "reproducibility.md")) println(io, line) end diff --git a/docs/pages.jl b/docs/pages.jl index ea50627..d35c0a5 100644 --- a/docs/pages.jl +++ b/docs/pages.jl @@ -3,13 +3,10 @@ pages = [ "Home" => "index.md", "Tutorials" => "tutorials.md", - "API Reference" => [ - "Overview" => "api.md", - "Derivatives" => "derivatives.md", - "Gradients" => "gradients.md", - "Jacobians" => "jacobians.md", - "Hessians" => "hessians.md", - "Jacobian-Vector Products" => "jvp.md", - "Utilities" => "utilities.md" - ] + "Derivatives" => "derivatives.md", + "Gradients" => "gradients.md", + "Jacobians" => "jacobians.md", + "Hessians" => "hessians.md", + "Jacobian-Vector Products" => "jvp.md", + "Utilities" => "utilities.md" ] diff --git a/docs/src/api.md b/docs/src/api.md deleted file mode 100644 index f47081e..0000000 --- a/docs/src/api.md +++ /dev/null @@ -1,51 +0,0 @@ -# API Reference - -```@docs -FiniteDiff -``` - -FiniteDiff.jl provides fast, non-allocating finite difference calculations with support for sparsity patterns and various array types. The API is organized into several categories: - -## Function Categories - -### [Derivatives](@ref derivatives) -Single and multi-point derivatives of scalar functions. - -### [Gradients](@ref gradients) -Gradients of scalar-valued functions with respect to vector inputs. - -### [Jacobians](@ref jacobians) -Jacobian matrices of vector-valued functions, including sparse Jacobian support. - -### [Hessians](@ref hessians) -Hessian matrices of scalar-valued functions. - -### [Jacobian-Vector Products](@ref jvp) -Efficient computation of directional derivatives without forming full Jacobians. - -### [Utilities](@ref utilities) -Internal utilities and helper functions. - -## Quick Start - -All functions follow a consistent API pattern: - -- **Cache-less versions**: `finite_difference_*` - convenient but allocate temporary arrays -- **In-place versions**: `finite_difference_*!` - efficient, non-allocating when used with caches -- **Cache constructors**: `*Cache` - pre-allocate work arrays for repeated computations - -## Method Selection - -Choose your finite difference method based on accuracy and performance needs: - -- **Forward differences**: Fast, `O(h)` accuracy, requires `O(n)` function evaluations -- **Central differences**: More accurate `O(h²)`, requires `O(2n)` function evaluations -- **Complex step**: Machine precision accuracy, `O(n)` evaluations, requires complex-analytic functions - -## Performance Tips - -1. **Use caches** for repeated computations to avoid allocations -2. **Consider sparsity** for large Jacobians with known sparsity patterns -3. **Choose appropriate methods** based on your accuracy requirements -4. **Leverage JVPs** when you only need directional derivatives - diff --git a/docs/src/derivatives.md b/docs/src/derivatives.md index 66addae..6304b7c 100644 --- a/docs/src/derivatives.md +++ b/docs/src/derivatives.md @@ -2,6 +2,16 @@ Functions for computing derivatives of scalar-valued functions. +## Overview + +Derivatives are computed for scalar→scalar maps `f(x)` where `x` can be a single point or a collection of points. The derivative functions support: + +- **Forward differences**: `O(1)` function evaluation per point, `O(h)` accuracy +- **Central differences**: `O(2)` function evaluations per point, `O(h²)` accuracy +- **Complex step**: `O(1)` function evaluation per point, machine precision accuracy + +For optimal performance with repeated computations, use the cached versions with `DerivativeCache`. + ## Functions ```@docs @@ -13,14 +23,4 @@ FiniteDiff.finite_difference_derivative! ```@docs FiniteDiff.DerivativeCache -``` - -## Notes - -Derivatives are computed for scalar→scalar maps `f(x)` where `x` can be a single point or a collection of points. The derivative functions support: - -- **Forward differences**: `O(1)` function evaluation per point, `O(h)` accuracy -- **Central differences**: `O(2)` function evaluations per point, `O(h²)` accuracy -- **Complex step**: `O(1)` function evaluation per point, machine precision accuracy - -For optimal performance with repeated computations, use the cached versions with `DerivativeCache`. \ No newline at end of file +``` \ No newline at end of file diff --git a/docs/src/gradients.md b/docs/src/gradients.md index 9cd28bc..9e5ddf4 100644 --- a/docs/src/gradients.md +++ b/docs/src/gradients.md @@ -2,19 +2,6 @@ Functions for computing gradients of scalar-valued functions with respect to vector inputs. -## Functions - -```@docs -FiniteDiff.finite_difference_gradient -FiniteDiff.finite_difference_gradient! -``` - -## Cache - -```@docs -FiniteDiff.GradientCache -``` - ## Function Types Gradients support two types of function mappings: @@ -35,4 +22,17 @@ When using `GradientCache` with pre-computed function values: - If you provide `fx`, then `fx` will be used in forward differencing to skip a function call - You must update `cache.fx` before each call to `finite_difference_gradient!` - For immutable types (scalars, `StaticArray`), use `@set` from [Setfield.jl](https://github.com/jw3126/Setfield.jl) -- Consider aliasing existing arrays into the cache for memory efficiency \ No newline at end of file +- Consider aliasing existing arrays into the cache for memory efficiency + +## Functions + +```@docs +FiniteDiff.finite_difference_gradient +FiniteDiff.finite_difference_gradient! +``` + +## Cache + +```@docs +FiniteDiff.GradientCache +``` \ No newline at end of file diff --git a/docs/src/hessians.md b/docs/src/hessians.md index e57b9e8..fb54252 100644 --- a/docs/src/hessians.md +++ b/docs/src/hessians.md @@ -2,19 +2,6 @@ Functions for computing Hessian matrices of scalar-valued functions. -## Functions - -```@docs -FiniteDiff.finite_difference_hessian -FiniteDiff.finite_difference_hessian! -``` - -## Cache - -```@docs -FiniteDiff.HessianCache -``` - ## Function Requirements Hessian functions are designed for scalar-valued functions `f(x)` where: @@ -42,4 +29,17 @@ where `eᵢ` is the i-th unit vector and `hᵢ` is the step size in dimension i. ## StaticArrays Support -The cache constructor automatically detects `StaticArray` types and adjusts the `inplace` parameter accordingly for optimal performance. \ No newline at end of file +The cache constructor automatically detects `StaticArray` types and adjusts the `inplace` parameter accordingly for optimal performance. + +## Functions + +```@docs +FiniteDiff.finite_difference_hessian +FiniteDiff.finite_difference_hessian! +``` + +## Cache + +```@docs +FiniteDiff.HessianCache +``` \ No newline at end of file diff --git a/docs/src/jacobians.md b/docs/src/jacobians.md index 2deea99..3e9333b 100644 --- a/docs/src/jacobians.md +++ b/docs/src/jacobians.md @@ -2,19 +2,6 @@ Functions for computing Jacobian matrices of vector-valued functions. -## Functions - -```@docs -FiniteDiff.finite_difference_jacobian -FiniteDiff.finite_difference_jacobian! -``` - -## Cache - -```@docs -FiniteDiff.JacobianCache -``` - ## Function Types Jacobians support the following function signatures: @@ -38,4 +25,17 @@ FiniteDiff.jl provides efficient sparse Jacobian computation using graph colorin - **Complex step**: `O(n)` function evaluations, machine precision accuracy - **Sparse Jacobians**: Use graph coloring to reduce function evaluations significantly -For non-square Jacobians, specify the output vector `fx` when creating the cache to ensure proper sizing. \ No newline at end of file +For non-square Jacobians, specify the output vector `fx` when creating the cache to ensure proper sizing. + +## Functions + +```@docs +FiniteDiff.finite_difference_jacobian +FiniteDiff.finite_difference_jacobian! +``` + +## Cache + +```@docs +FiniteDiff.JacobianCache +``` \ No newline at end of file diff --git a/docs/src/jvp.md b/docs/src/jvp.md index 1a3472f..301f876 100644 --- a/docs/src/jvp.md +++ b/docs/src/jvp.md @@ -2,19 +2,6 @@ Functions for computing Jacobian-vector products efficiently without forming the full Jacobian matrix. -## Functions - -```@docs -FiniteDiff.finite_difference_jvp -FiniteDiff.finite_difference_jvp! -``` - -## Cache - -```@docs -FiniteDiff.JVPCache -``` - ## Mathematical Background The JVP computes `J(x) * v` where `J(x)` is the Jacobian of function `f` at point `x` and `v` is a direction vector. This is computed using finite difference approximations: @@ -45,4 +32,17 @@ JVP is particularly useful for: ## Limitations - **Complex step**: JVP does not currently support complex step differentiation (`Val(:complex)`) -- **In-place functions**: For in-place function evaluation, ensure proper cache sizing \ No newline at end of file +- **In-place functions**: For in-place function evaluation, ensure proper cache sizing + +## Functions + +```@docs +FiniteDiff.finite_difference_jvp +FiniteDiff.finite_difference_jvp! +``` + +## Cache + +```@docs +FiniteDiff.JVPCache +``` \ No newline at end of file From 436cc25547b30537a044fd73a4282707059a03b0 Mon Sep 17 00:00:00 2001 From: ChrisRackauckas Date: Wed, 13 Aug 2025 08:48:37 -0400 Subject: [PATCH 09/26] Remove API reference section completely MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Removed API reference content from index page generation - Index page now contains only README and reproducibility content - Clean documentation structure with direct category navigation - Documentation builds successfully with minimal warnings 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- docs/make.jl | 43 ------------------------------------------- 1 file changed, 43 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index 5c2aa99..0e1bf07 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -24,49 +24,6 @@ open(joinpath(@__DIR__, "src", "index.md"), "w") do io println(io, line) end - # Add API reference content - println(io, "") - println(io, "## API Reference") - println(io, "") - println(io, "```@docs") - println(io, "FiniteDiff") - println(io, "```") - println(io, "") - println(io, "FiniteDiff.jl provides fast, non-allocating finite difference calculations with support for sparsity patterns and various array types. The API is organized into several categories:") - println(io, "") - println(io, "### Function Categories") - println(io, "") - println(io, "- **[Derivatives](@ref derivatives)**: Single and multi-point derivatives of scalar functions") - println(io, "- **[Gradients](@ref gradients)**: Gradients of scalar-valued functions with respect to vector inputs") - println(io, "- **[Jacobians](@ref jacobians)**: Jacobian matrices of vector-valued functions, including sparse Jacobian support") - println(io, "- **[Hessians](@ref hessians)**: Hessian matrices of scalar-valued functions") - println(io, "- **[Jacobian-Vector Products](@ref jvp)**: Efficient computation of directional derivatives without forming full Jacobians") - println(io, "- **[Utilities](@ref utilities)**: Internal utilities and helper functions") - println(io, "") - println(io, "### Quick Start") - println(io, "") - println(io, "All functions follow a consistent API pattern:") - println(io, "") - println(io, "- **Cache-less versions**: `finite_difference_*` - convenient but allocate temporary arrays") - println(io, "- **In-place versions**: `finite_difference_*!` - efficient, non-allocating when used with caches") - println(io, "- **Cache constructors**: `*Cache` - pre-allocate work arrays for repeated computations") - println(io, "") - println(io, "### Method Selection") - println(io, "") - println(io, "Choose your finite difference method based on accuracy and performance needs:") - println(io, "") - println(io, "- **Forward differences**: Fast, `O(h)` accuracy, requires `O(n)` function evaluations") - println(io, "- **Central differences**: More accurate `O(h²)`, requires `O(2n)` function evaluations") - println(io, "- **Complex step**: Machine precision accuracy, `O(n)` evaluations, requires complex-analytic functions") - println(io, "") - println(io, "### Performance Tips") - println(io, "") - println(io, "1. **Use caches** for repeated computations to avoid allocations") - println(io, "2. **Consider sparsity** for large Jacobians with known sparsity patterns") - println(io, "3. **Choose appropriate methods** based on your accuracy requirements") - println(io, "4. **Leverage JVPs** when you only need directional derivatives") - println(io, "") - for line in eachline(joinpath(@__DIR__, "src", "reproducibility.md")) println(io, line) end From 422200c732fc55885f18b6c43fc00c23e8cca8b7 Mon Sep 17 00:00:00 2001 From: ChrisRackauckas Date: Wed, 13 Aug 2025 08:56:49 -0400 Subject: [PATCH 10/26] Add dedicated epsilon page and reorganize internal utilities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Created comprehensive "Step Size Selection" page covering: - Mathematical theory of optimal step sizes - Error analysis (truncation vs round-off) - Adaptive step sizing algorithms - Special cases for complex step and sparse Jacobians - Renamed "Utilities" to "Internal Utilities" with complete coverage of: - Array utilities (_vec, _mat, setindex overloads) - Hessian utilities (_hessian_inplace, mutable_zeromatrix) - Sparse Jacobian internals (_make_Ji, _colorediteration!) - Performance optimizations and sparsity detection - Cache resize functions and error handling - Updated pages.jl navigation structure - Documentation builds cleanly with proper organization 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- docs/pages.jl | 3 +- docs/src/epsilons.md | 71 +++++++++++++++++++++++++++++++++++++++ docs/src/utilities.md | 78 +++++++++++++++++++++++++++++++++---------- 3 files changed, 133 insertions(+), 19 deletions(-) create mode 100644 docs/src/epsilons.md diff --git a/docs/pages.jl b/docs/pages.jl index d35c0a5..402888e 100644 --- a/docs/pages.jl +++ b/docs/pages.jl @@ -8,5 +8,6 @@ pages = [ "Jacobians" => "jacobians.md", "Hessians" => "hessians.md", "Jacobian-Vector Products" => "jvp.md", - "Utilities" => "utilities.md" + "Step Size Selection" => "epsilons.md", + "Internal Utilities" => "utilities.md" ] diff --git a/docs/src/epsilons.md b/docs/src/epsilons.md new file mode 100644 index 0000000..3e8eb23 --- /dev/null +++ b/docs/src/epsilons.md @@ -0,0 +1,71 @@ +# Step Size Selection (Epsilons) + +Functions and theory for computing optimal step sizes in finite difference approximations. + +## Theory + +The choice of step size (epsilon) in finite difference methods is critical for accuracy. Too large a step leads to truncation error, while too small a step leads to round-off error. The optimal step size balances these two sources of error. + +### Error Analysis + +For a function `f` with bounded derivatives, the total error in finite difference approximations consists of: + +1. **Truncation Error**: Comes from the finite difference approximation itself + - Forward differences: `O(h)` where `h` is the step size + - Central differences: `O(h²)` + - Hessian central differences: `O(h²)` for second derivatives + +2. **Round-off Error**: Comes from floating-point arithmetic + - Forward differences: `O(eps/h)` where `eps` is machine epsilon + - Central differences: `O(eps/h)` + +### Optimal Step Sizes + +Minimizing the total error `truncation + round-off` gives optimal step sizes: + +- **Forward differences**: `h* = sqrt(eps)` - balances `O(h)` truncation with `O(eps/h)` round-off +- **Central differences**: `h* = eps^(1/3)` - balances `O(h²)` truncation with `O(eps/h)` round-off +- **Hessian central**: `h* = eps^(1/4)` - balances `O(h²)` truncation for mixed derivatives +- **Complex step**: `h* = eps` - no subtractive cancellation, only limited by machine precision + +## Adaptive Step Sizing + +The step size computation uses both relative and absolute components: + +```julia +epsilon = max(relstep * abs(x), absstep) * dir +``` + +This ensures: +- **Large values**: Use relative step `relstep * |x|` for scale-invariant accuracy +- **Small values**: Use absolute step `absstep` to avoid underflow +- **Direction**: Multiply by `dir` (±1) for forward differences + +## Implementation + +The step size computation is handled by internal functions: + +- **`compute_epsilon(fdtype, x, relstep, absstep, dir)`**: Computes the actual step size for a given finite difference method and input value +- **`default_relstep(fdtype, T)`**: Returns the optimal relative step size for a given method and numeric type + +These functions are called automatically by all finite difference routines, but understanding their behavior can help with custom implementations or debugging numerical issues. + +## Special Cases + +### Complex Step Differentiation + +For complex step differentiation, the step size is simply machine epsilon since this method avoids subtractive cancellation entirely: + +⚠️ **Important**: The function `f` must be complex analytic when the input is complex! + +### Sparse Jacobians + +When computing sparse Jacobians with graph coloring, the step size is computed based on the norm of the perturbation vector to ensure balanced accuracy across all columns in the same color group. + +## Practical Considerations + +- **Default step sizes** are optimal for most smooth functions +- **Custom step sizes** may be needed for functions with unusual scaling or near-discontinuities +- **Relative steps** should scale with the magnitude of the input +- **Absolute steps** provide a fallback for inputs near zero +- **Direction parameter** allows for one-sided differences when needed (e.g., at domain boundaries) \ No newline at end of file diff --git a/docs/src/utilities.md b/docs/src/utilities.md index d9e960c..200b04d 100644 --- a/docs/src/utilities.md +++ b/docs/src/utilities.md @@ -1,31 +1,73 @@ -# Utilities +# Internal Utilities -Internal utility functions and types that may be useful for advanced users. +Internal utility functions and algorithms used throughout FiniteDiff.jl. These functions are primarily for advanced users who need to understand or extend the library's functionality. -## Step Size Computation +## Array Utilities -While these functions are primarily internal, they can be useful for understanding and customizing finite difference computations: +Internal utilities for handling different array types and ensuring compatibility across the Julia ecosystem: -### Default Step Sizes +### Type Conversion Functions -Different finite difference methods use different optimal step sizes to balance truncation error and round-off error: +These functions help FiniteDiff.jl work with various array types including `StaticArrays`, structured matrices, and GPU arrays: -- **Forward differences**: `sqrt(eps(T))` - balances O(h) truncation error with round-off -- **Central differences**: `cbrt(eps(T))` - balances O(h²) truncation error with round-off -- **Hessian central**: `eps(T)^(1/4)` - balances O(h²) truncation error for second derivatives +- **`_vec(x)`**: Vectorizes arrays while preserving scalars unchanged +- **`_mat(x)`**: Ensures matrix format, converting vectors to column matrices +- **`setindex(x, v, i...)`**: Non-mutating setindex operations for immutable arrays -### Complex Step Differentiation +### Hessian Utilities -Complex step differentiation uses machine epsilon since it avoids subtractive cancellation: +Helper functions specifically for Hessian computation: -⚠️ **Important**: `f` must be a function of a real variable that is also complex analytic when the input is complex! +- **`_hessian_inplace(x)`**: Determines whether to use in-place operations based on array mutability +- **`__Symmetric(x)`**: Wraps matrices in `Symmetric` views for mathematical correctness +- **`mutable_zeromatrix(x)`**: Creates mutable zero matrices compatible with input arrays -## Array Utilities +## Sparse Jacobian Internals + +Graph coloring and sparse matrix algorithms for efficient Jacobian computation: + +### Matrix Construction + +- **`_make_Ji(...)`**: Constructs Jacobian contribution matrices for both sparse and dense cases +- **`_colorediteration!(...)`**: Core loop for sparse Jacobian assembly using graph coloring +- **`_findstructralnz(A)`**: Finds structural non-zero patterns in dense matrices + +### Sparsity Detection + +- **`_use_findstructralnz(sparsity)`**: Determines when to use structural sparsity information +- **`_use_sparseCSC_common_sparsity(J, sparsity)`**: Tests for common sparsity patterns between matrices + +### Performance Optimizations + +- **`fast_jacobian_setindex!(...)`**: Optimized index operations for sparse Jacobian assembly +- **`void_setindex!(...)`**: Wrapper for setindex operations that discards return values + +## JVP Utilities + +- **`resize!(cache::JVPCache, i)`**: Resizes JVP cache arrays for dynamic problems +- **`resize!(cache::JacobianCache, i)`**: Resizes Jacobian cache arrays for dynamic problems + +## Error Handling + +- **`fdtype_error(::Type{T})`**: Provides informative error messages for unsupported finite difference type combinations + +## Design Philosophy + +These internal utilities follow several design principles: + +1. **Type Stability**: All functions are designed for type-stable operations +2. **Zero Allocation**: Internal utilities avoid allocations when possible +3. **Genericity**: Support for multiple array types (dense, sparse, static, GPU) +4. **Performance**: Optimized for the specific needs of finite difference computation +5. **Safety**: Proper error handling and bounds checking where needed + +## Advanced Usage -Internal utilities for handling different array types and ensuring compatibility: +These functions are primarily internal, but advanced users may find them useful for: -- `_vec(x)`: Vectorizes arrays while preserving scalars -- `_mat(x)`: Ensures matrix format, converting vectors to column matrices -- `setindex(x, v, i...)`: Non-mutating setindex for immutable arrays +- **Custom finite difference implementations** +- **Integration with other differentiation libraries** +- **Performance optimization in specialized applications** +- **Understanding the implementation details of FiniteDiff.jl** -These functions help FiniteDiff.jl work with various array types including `StaticArrays`, structured matrices, and GPU arrays. \ No newline at end of file +Most users should rely on the main API functions rather than calling these utilities directly. \ No newline at end of file From 83ff37e01afd56c61703a870581c095b5a2e5c4c Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Wed, 13 Aug 2025 13:05:23 +0000 Subject: [PATCH 11/26] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 27ce4e5..1a7a472 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "FiniteDiff" uuid = "6a86dc24-6348-571c-b903-95158fe2bd41" -version = "2.27.0" +version = "2.28.0" [deps] ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" From 1f156774e070da4c6d086c03f37689ebed65590f Mon Sep 17 00:00:00 2001 From: ChrisRackauckas Date: Sat, 16 Aug 2025 04:49:55 -0400 Subject: [PATCH 12/26] Fix repeated evaluation of fx0 in forward gradient computation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Moved fx0 computation outside the loop in finite_difference_gradient! - Optimizes function evaluations from 2N to N+1 for forward differences - Maintains compatibility with both cached and uncached function values - Simplifies logic by eliminating conditional branches in the main loop Fixes #202 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- src/gradients.jl | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/src/gradients.jl b/src/gradients.jl index a108378..7be1bfc 100644 --- a/src/gradients.jl +++ b/src/gradients.jl @@ -372,37 +372,23 @@ function finite_difference_gradient!( end copyto!(c3, x) if fdtype == Val(:forward) + fx0 = typeof(fx) != Nothing ? fx : f(x) for i in eachindex(x) epsilon = compute_epsilon(fdtype, x[i], relstep, absstep, dir) x_old = x[i] - if typeof(fx) != Nothing - c3[i] += epsilon - dfi = (f(c3) - fx) / epsilon - c3[i] = x_old - else - fx0 = f(x) - c3[i] += epsilon - dfi = (f(c3) - fx0) / epsilon - c3[i] = x_old - end + c3[i] += epsilon + dfi = (f(c3) - fx0) / epsilon + c3[i] = x_old df[i] = real(dfi) if eltype(df) <: Complex if eltype(x) <: Complex c3[i] += im * epsilon - if typeof(fx) != Nothing - dfi = (f(c3) - fx) / (im * epsilon) - else - dfi = (f(c3) - fx0) / (im * epsilon) - end + dfi = (f(c3) - fx0) / (im * epsilon) c3[i] = x_old else c1[i] += im * epsilon - if typeof(fx) != Nothing - dfi = (f(c1) - fx) / (im * epsilon) - else - dfi = (f(c1) - fx0) / (im * epsilon) - end + dfi = (f(c1) - fx0) / (im * epsilon) c1[i] = x_old end df[i] -= im * imag(dfi) From fdf769d46d5b6f4970e979cd09674a5d45bba7f2 Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Sat, 16 Aug 2025 04:54:31 -0400 Subject: [PATCH 13/26] Update src/gradients.jl --- src/gradients.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gradients.jl b/src/gradients.jl index 7be1bfc..a95afe5 100644 --- a/src/gradients.jl +++ b/src/gradients.jl @@ -372,7 +372,7 @@ function finite_difference_gradient!( end copyto!(c3, x) if fdtype == Val(:forward) - fx0 = typeof(fx) != Nothing ? fx : f(x) + fx0 = fx === nothing ? fx : f(x) for i in eachindex(x) epsilon = compute_epsilon(fdtype, x[i], relstep, absstep, dir) x_old = x[i] From 5ce3014f9c50ca52b7a646b7377eb081828b11bd Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Sat, 16 Aug 2025 04:58:04 -0400 Subject: [PATCH 14/26] Update src/gradients.jl --- src/gradients.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gradients.jl b/src/gradients.jl index a95afe5..40bbc39 100644 --- a/src/gradients.jl +++ b/src/gradients.jl @@ -372,7 +372,7 @@ function finite_difference_gradient!( end copyto!(c3, x) if fdtype == Val(:forward) - fx0 = fx === nothing ? fx : f(x) + fx0 = fx !== nothing ? fx : f(x) for i in eachindex(x) epsilon = compute_epsilon(fdtype, x[i], relstep, absstep, dir) x_old = x[i] From d79e161a35eeef6598e1a28884e427321decefad Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Sat, 16 Aug 2025 05:06:48 -0400 Subject: [PATCH 15/26] Update ordinarydiffeq_tridiagonal_solve.jl --- test/downstream/ordinarydiffeq_tridiagonal_solve.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/downstream/ordinarydiffeq_tridiagonal_solve.jl b/test/downstream/ordinarydiffeq_tridiagonal_solve.jl index 841c638..9032a92 100644 --- a/test/downstream/ordinarydiffeq_tridiagonal_solve.jl +++ b/test/downstream/ordinarydiffeq_tridiagonal_solve.jl @@ -24,4 +24,5 @@ function loss(p) sol = solve(_prob, Rodas4P(autodiff=false), saveat=0.1) sum((sol .- sol_true).^2) end -@test ForwardDiff.gradient(loss, [1.0])[1] ≈ 0.6662949361011025 +@test ForwardDiff.gradient(loss, [1.0])[1] ≈ 0.6645766813735486 + From 62990cb47baeac956495c20270657b525dd49a0b Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Sat, 16 Aug 2025 05:07:12 -0400 Subject: [PATCH 16/26] Update runtests.jl --- test/runtests.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index e0b8b27..ba2b432 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -24,7 +24,6 @@ if GROUP == "All" || GROUP == "Downstream" @time @safetestset "ODEs" begin import OrdinaryDiffEq @time @safetestset "OrdinaryDiffEq Tridiagonal" begin include("downstream/ordinarydiffeq_tridiagonal_solve.jl") end - include(joinpath(dirname(pathof(OrdinaryDiffEq)), "..", "test/interface/sparsediff_tests.jl")) end end From 3e64a972cf849e2d61960aee74a87a543ec03307 Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Sat, 16 Aug 2025 05:36:14 -0400 Subject: [PATCH 17/26] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 1a7a472..f85df69 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "FiniteDiff" uuid = "6a86dc24-6348-571c-b903-95158fe2bd41" -version = "2.28.0" +version = "2.28.1" [deps] ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" From cfa0c68fa266329dbba13ed66f23d99e32fde2c9 Mon Sep 17 00:00:00 2001 From: Daniel_Doehring Date: Tue, 26 Aug 2025 10:58:11 +0200 Subject: [PATCH 18/26] Update Docs: Replace `SparseDiffTools.jl` with `SparseConnectivityTracer.jl` and `SparseMatrixColorings.jl` --- README.md | 5 ++--- docs/src/jacobians.md | 2 +- docs/src/tutorials.md | 18 ++++++++++++------ 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index d39b011..4de29ee 100644 --- a/README.md +++ b/README.md @@ -71,9 +71,8 @@ Coloring vectors are allowed to be supplied to the Jacobian routines, and these the directional derivatives for constructing the Jacobian. For example, an accurate NxN tridiagonal Jacobian can be computed in just 4 `f` calls by using `colorvec=repeat(1:3,N÷3)`. For information on automatically generating coloring -vectors of sparse matrices, see [SparseDiffTools.jl](https://github.com/JuliaDiff/SparseDiffTools.jl). - -Hessian coloring support is coming soon! +vectors of sparse matrices, see [SparseMatrixColorings.jl](https://github.com/gdalle/SparseMatrixColorings.jl) and +the now deprecated [SparseDiffTools.jl](https://github.com/JuliaDiff/SparseDiffTools.jl). ## Contributing diff --git a/docs/src/jacobians.md b/docs/src/jacobians.md index 3e9333b..d787d55 100644 --- a/docs/src/jacobians.md +++ b/docs/src/jacobians.md @@ -14,7 +14,7 @@ Jacobians support the following function signatures: FiniteDiff.jl provides efficient sparse Jacobian computation using graph coloring: - Pass a `colorvec` of matrix colors to enable column compression -- Provide `sparsity` as a sparse or structured matrix (`Tridiagonal`, `Banded`, etc.) +- Provide `sparsity` as a sparse (`SparseMatrixCSC`) or structured matrix (`Tridiagonal`, `Banded`, etc.) - Supports automatic sparsity pattern detection via ArrayInterfaceCore.jl - Results are automatically decompressed unless `sparsity=nothing` diff --git a/docs/src/tutorials.md b/docs/src/tutorials.md index 44f797d..e3a2989 100644 --- a/docs/src/tutorials.md +++ b/docs/src/tutorials.md @@ -100,23 +100,29 @@ etc. all work. Now let's exploit sparsity. If we knew the sparsity pattern we could write it down analytically as a sparse matrix, but let's assume we don't. Thus we can -use [SparsityDetection.jl](https://github.com/JuliaDiffEq/SparsityDetection.jl) +use [SparseConnectivityTracer.jl](https://github.com/adrhill/SparseConnectivityTracer.jl) to automatically get the sparsity pattern of the Jacobian as a sparse matrix: ```julia -using SparsityDetection, SparseArrays +using SparseConnectivityTracer, SparseArrays in = rand(10) out = similar(in) -sparsity_pattern = sparsity!(f,out,in) + +detector = TracerSparsityDetector() +sparsity_pattern = jacobian_sparsity(f,out,in,detector) + sparsejac = Float64.(sparse(sparsity_pattern)) ``` -Then we can use [SparseDiffTools.jl](https://github.com/JuliaDiffEq/SparseDiffTools.jl) +Then we can use [SparseMatrixColorings.jl](https://github.com/gdalle/SparseMatrixColorings.jl) to get the color vector: ```julia -using SparseDiffTools -colors = matrix_colors(sparsejac) +using SparseMatrixColorings +coloring_prob = ColoringProblem(; structure = :nonsymmetric, partition = :column) +coloring_alg = GreedyColoringAlgorithm(; decompression = :direct) +coloring_result = coloring(sparsejac, coloring_prob, coloring_alg) +colors = column_colors(coloring_result) ``` Now we can do sparse differentiation by passing the color vector and the sparsity From 1f79574272311110fc2072a8b06a64547bd02886 Mon Sep 17 00:00:00 2001 From: Daniel_Doehring Date: Tue, 26 Aug 2025 11:01:00 +0200 Subject: [PATCH 19/26] eg --- docs/src/jacobians.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/src/jacobians.md b/docs/src/jacobians.md index d787d55..1be2b2e 100644 --- a/docs/src/jacobians.md +++ b/docs/src/jacobians.md @@ -14,7 +14,8 @@ Jacobians support the following function signatures: FiniteDiff.jl provides efficient sparse Jacobian computation using graph coloring: - Pass a `colorvec` of matrix colors to enable column compression -- Provide `sparsity` as a sparse (`SparseMatrixCSC`) or structured matrix (`Tridiagonal`, `Banded`, etc.) +- Provide `sparsity` as a sparse (e.g. the default `SparseMatrixCSC`) + or structured matrix (`Tridiagonal`, `Banded`, etc.) - Supports automatic sparsity pattern detection via ArrayInterfaceCore.jl - Results are automatically decompressed unless `sparsity=nothing` From 42ae4528890de811927633660f54485acc4a2c5a Mon Sep 17 00:00:00 2001 From: abhro <5664668+abhro@users.noreply.github.com> Date: Sat, 11 Oct 2025 13:57:23 -0400 Subject: [PATCH 20/26] Use Documenter.jl v1 for docs/ --- docs/Project.toml | 2 +- docs/make.jl | 3 ++- docs/src/assets/Project.toml | 2 +- docs/src/reproducibility.md | 6 ++++-- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/Project.toml b/docs/Project.toml index b6eb11d..ec456b3 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -3,4 +3,4 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" [compat] -Documenter = "0.27" +Documenter = "1" diff --git a/docs/make.jl b/docs/make.jl index 0e1bf07..7e00e64 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -23,7 +23,7 @@ open(joinpath(@__DIR__, "src", "index.md"), "w") do io for line in eachline(joinpath(dirname(@__DIR__), "README.md")) println(io, line) end - + for line in eachline(joinpath(@__DIR__, "src", "reproducibility.md")) println(io, line) end @@ -38,6 +38,7 @@ makedocs(sitename="FiniteDiff.jl", doctest=false, format=Documenter.HTML(assets=["assets/favicon.ico"], canonical="https://docs.sciml.ai/FiniteDiff/stable/"), + warnonly=[:missing_docs], pages=pages) deploydocs(repo="github.com/JuliaDiff/FiniteDiff.jl.git"; push_preview=true) diff --git a/docs/src/assets/Project.toml b/docs/src/assets/Project.toml index b6eb11d..ec456b3 100644 --- a/docs/src/assets/Project.toml +++ b/docs/src/assets/Project.toml @@ -3,4 +3,4 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" [compat] -Documenter = "0.27" +Documenter = "1" diff --git a/docs/src/reproducibility.md b/docs/src/reproducibility.md index 333d67b..552f499 100644 --- a/docs/src/reproducibility.md +++ b/docs/src/reproducibility.md @@ -35,20 +35,22 @@ You can also download the manifest file and the project file. From c7f3b137dfcae15bb596ff7edfa4c01ca73bc6a7 Mon Sep 17 00:00:00 2001 From: abhro <5664668+abhro@users.noreply.github.com> Date: Sat, 11 Oct 2025 13:58:13 -0400 Subject: [PATCH 21/26] Use `[sources]` attribute in Project.toml Feature introduced in Pkg.jl v1.11 --- docs/Project.toml | 3 +++ docs/src/assets/Project.toml | 3 +++ 2 files changed, 6 insertions(+) diff --git a/docs/Project.toml b/docs/Project.toml index ec456b3..989e28b 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -4,3 +4,6 @@ FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" [compat] Documenter = "1" + +[sources] +FiniteDiff = {path = ".."} diff --git a/docs/src/assets/Project.toml b/docs/src/assets/Project.toml index ec456b3..989e28b 100644 --- a/docs/src/assets/Project.toml +++ b/docs/src/assets/Project.toml @@ -4,3 +4,6 @@ FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" [compat] Documenter = "1" + +[sources] +FiniteDiff = {path = ".."} From b90b7b8890d8bbc586495814de3347a0ea35ba5f Mon Sep 17 00:00:00 2001 From: abhro <5664668+abhro@users.noreply.github.com> Date: Sat, 11 Oct 2025 14:00:11 -0400 Subject: [PATCH 22/26] Update spacing and output for code block examples --- docs/src/tutorials.md | 46 ++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/docs/src/tutorials.md b/docs/src/tutorials.md index e3a2989..ee0d417 100644 --- a/docs/src/tutorials.md +++ b/docs/src/tutorials.md @@ -10,21 +10,21 @@ using FiniteDiff, StaticArrays fcalls = 0 function f(dx,x) # in-place - global fcalls += 1 - for i in 2:length(x)-1 - dx[i] = x[i-1] - 2x[i] + x[i+1] - end - dx[1] = -2x[1] + x[2] - dx[end] = x[end-1] - 2x[end] - nothing + global fcalls += 1 + for i in 2:length(x)-1 + dx[i] = x[i-1] - 2x[i] + x[i+1] + end + dx[1] = -2x[1] + x[2] + dx[end] = x[end-1] - 2x[end] + nothing end const N = 10 handleleft(x,i) = i==1 ? zero(eltype(x)) : x[i-1] handleright(x,i) = i==length(x) ? zero(eltype(x)) : x[i+1] function g(x) # out-of-place - global fcalls += 1 - @SVector [handleleft(x,i) - 2x[i] + handleright(x,i) for i in 1:N] + global fcalls += 1 + @SVector [handleleft(x,i) - 2x[i] + handleright(x,i) for i in 1:N] end ``` @@ -37,7 +37,7 @@ x = @SVector rand(N) FiniteDiff.finite_difference_jacobian(g,x) #= -10×10 SArray{Tuple{10,10},Float64,2,100} with indices SOneTo(10)×SOneTo(10): +10×10 SMatrix{10, 10, Float64, 100} with indices SOneTo(10)×SOneTo(10): -2.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 -2.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 -2.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 @@ -65,7 +65,7 @@ FiniteDiff.finite_difference_jacobian!(output,f,x) output #= -10×10 Array{Float64,2}: +10×10 Matrix{Float64}: -2.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 -2.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 -2.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 @@ -175,8 +175,8 @@ we get the analytical solution to the optimal matrix colors for our structured Jacobian. Now we can use this in our differencing routines: ```julia -tridiagcache = FiniteDiff.JacobianCache(x,colorvec=colors,sparsity=tridiagjac) -FiniteDiff.finite_difference_jacobian!(tridiagjac,f,x,tridiagcache) +tridiagcache = FiniteDiff.JacobianCache(x, colorvec=colors, sparsity=tridiagjac) +FiniteDiff.finite_difference_jacobian!(tridiagjac, f, x, tridiagcache) ``` It'll use a special iteration scheme dependent on the matrix type to accelerate @@ -189,14 +189,16 @@ differential equations, with a function like: ```julia function pde(out, x) - x = reshape(x, 100, 100) - out = reshape(out, 100, 100) - for i in 1:100 - for j in 1:100 - out[i, j] = x[i, j] + x[max(i -1, 1), j] + x[min(i+1, size(x, 1)), j] + x[i, max(j-1, 1)] + x[i, min(j+1, size(x, 2))] - end - end - return vec(out) + x = reshape(x, 100, 100) + out = reshape(out, 100, 100) + m = size(x, 1) + n = size(x, 2) + for i in 1:100 + for j in 1:100 + out[i, j] = x[i, j] + x[max(i-1, 1), j] + x[min(i+1, m), j] + x[i, max(j-1, 1)] + x[i, min(j+1, n)] + end + end + return vec(out) end x = rand(10000) ``` @@ -212,4 +214,4 @@ bbbcache = FiniteDiff.JacobianCache(x,colorvec=colorsbbb,sparsity=Jbbb) FiniteDiff.finite_difference_jacobian!(Jbbb, pde, x, bbbcache) ``` -And boom, a fast Jacobian filling algorithm on your special matrix. \ No newline at end of file +And boom, a fast Jacobian filling algorithm on your special matrix. From fc614fd759c19a7967d6be1a0ec0ba273ae5cef0 Mon Sep 17 00:00:00 2001 From: abhro <5664668+abhro@users.noreply.github.com> Date: Sat, 11 Oct 2025 14:00:27 -0400 Subject: [PATCH 23/26] Update syntax highlight tag in code block example --- src/gradients.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gradients.jl b/src/gradients.jl index 40bbc39..65630e1 100644 --- a/src/gradients.jl +++ b/src/gradients.jl @@ -85,7 +85,7 @@ Non-Allocating Cache Constructor # Output The output is a [`GradientCache`](@ref) struct. -```julia +```julia-repl julia> x = [1.0, 3.0] 2-element Vector{Float64}: 1.0 From 7af1dd6108d4ad0b2448a2855d51d0dd49267e4d Mon Sep 17 00:00:00 2001 From: abhro <5664668+abhro@users.noreply.github.com> Date: Sat, 11 Oct 2025 14:01:21 -0400 Subject: [PATCH 24/26] Update method signatures in docstrings Use consistent spacing and alignment for type annotations and default values --- src/derivatives.jl | 25 +++++++++---------- src/gradients.jl | 24 +++++++++---------- src/hessians.jl | 33 ++++++++++++------------- src/jacobians.jl | 60 +++++++++++++++++++++++----------------------- src/jvp.jl | 55 +++++++++++++++++++----------------------- 5 files changed, 95 insertions(+), 102 deletions(-) diff --git a/src/derivatives.jl b/src/derivatives.jl index 04cc7ff..222ff69 100644 --- a/src/derivatives.jl +++ b/src/derivatives.jl @@ -1,9 +1,10 @@ """ FiniteDiff.finite_difference_derivative( - f, x::T, - fdtype::Type{T1}=Val{:central}, - returntype::Type{T2}=eltype(x), - f_x::Union{Nothing,T}=nothing) + f, + x :: T, + fdtype :: Type{T1} = Val{:central}, + returntype :: Type{T2} = eltype(x), + f_x :: Union{Nothing,T} = nothing) Single-point derivative of scalar->scalar maps. """ @@ -43,7 +44,7 @@ end FiniteDiff.DerivativeCache( x :: AbstractArray{<:Number}, fx :: Union{Nothing,AbstractArray{<:Number}} = nothing, - epsilon :: Union{Nothing,AbstractArray{<:Real}} = nothing, + epsilon :: Union{Nothing,AbstractArray{<:Real}} = nothing, fdtype :: Type{T1} = Val{:central}, returntype :: Type{T2} = eltype(x)) @@ -146,14 +147,14 @@ end """ FiniteDiff.finite_difference_derivative!( - df::AbstractArray{<:Number}, + df :: AbstractArray{<:Number}, f, - x::AbstractArray{<:Number}, - cache::DerivativeCache{T1,T2,fdtype,returntype}; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep, - dir=true) - + x :: AbstractArray{<:Number}, + cache :: DerivativeCache{T1,T2,fdtype,returntype}; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep, + dir = true) + Compute the derivative `df` of a scalar-valued map `f` at a collection of points `x`. Cached. diff --git a/src/gradients.jl b/src/gradients.jl index 65630e1..9458142 100644 --- a/src/gradients.jl +++ b/src/gradients.jl @@ -116,12 +116,12 @@ end FiniteDiff.finite_difference_gradient( f, x, - fdtype::Type{T1}=Val{:central}, - returntype::Type{T2}=eltype(x), - inplace::Type{Val{T3}}=Val{true}; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep, - dir=true) + fdtype :: Type{T1} = Val{:central}, + returntype :: Type{T2} = eltype(x), + inplace :: Type{Val{T3}} = Val{true}; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep, + dir = true) Compute the gradient of function `f` at point `x` using finite differences. @@ -235,13 +235,13 @@ end """ FiniteDiff.finite_difference_gradient!( - df::AbstractArray{<:Number}, + df :: AbstractArray{<:Number}, f, - x::AbstractArray{<:Number}, - cache::GradientCache; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep - dir=true) + x :: AbstractArray{<:Number}, + cache :: GradientCache; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep + dir = true) Gradients are either a vector->scalar map `f(x)`, or a scalar->vector map `f(fx,x)` if `inplace=Val{true}` and `fx=f(x)` if `inplace=Val{false}`. diff --git a/src/hessians.jl b/src/hessians.jl index 46b1678..562874f 100644 --- a/src/hessians.jl +++ b/src/hessians.jl @@ -58,12 +58,9 @@ end """ HessianCache( - xpp, - xpm, - xmp, - xmm, - fdtype::Type{T1}=Val{:hcentral}, - inplace::Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false}) + xpp, xpm, xmp, xmm, + fdtype :: Type{T1} = Val{:hcentral}, + inplace :: Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false}) Non-allocating cache constructor. """ @@ -78,8 +75,8 @@ end """ HessianCache( x, - fdtype::Type{T1}=Val{:hcentral}, - inplace::Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false}) + fdtype :: Type{T1} = Val{:hcentral}, + inplace :: Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false}) Allocating cache constructor. """ @@ -94,11 +91,11 @@ end """ finite_difference_hessian( f, - x::AbstractArray{<:Number}, - fdtype::Type{T1}=Val{:hcentral}, - inplace::Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false}; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep) + x :: AbstractArray{<:Number}, + fdtype :: Type{T1} = Val{:hcentral}, + inplace :: Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false}; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep) Compute the Hessian matrix of scalar function `f` at point `x` using finite differences. @@ -171,13 +168,13 @@ end """ finite_difference_hessian!( - H::AbstractMatrix, + H :: AbstractMatrix, f, - x::AbstractArray{<:Number}, - fdtype :: Type{T1}=Val{:hcentral}, + x :: AbstractArray{<:Number}, + fdtype :: Type{T1} = Val{:hcentral}, inplace :: Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false}; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep) + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep) Cache-less. """ diff --git a/src/jacobians.jl b/src/jacobians.jl index 0793269..e1b742c 100644 --- a/src/jacobians.jl +++ b/src/jacobians.jl @@ -81,11 +81,11 @@ end """ FiniteDiff.JacobianCache( - x1 , - fx , + x1, + fx, fx1, fdtype :: Type{T1} = Val{:central}, - returntype :: Type{T2} = eltype(fx), + returntype :: Type{T2} = eltype(fx); colorvec = 1:length(x1), sparsity = nothing) @@ -189,16 +189,16 @@ end """ FiniteDiff.finite_difference_jacobian( f, - x::AbstractArray{<:Number}, - fdtype::Type{T1}=Val{:forward}, - returntype::Type{T2}=eltype(x), - f_in=nothing; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep, - colorvec=1:length(x), - sparsity=nothing, - jac_prototype=nothing, - dir=true) + x :: AbstractArray{<:Number}, + fdtype :: Type{T1} = Val{:forward}, + returntype :: Type{T2} = eltype(x), + f_in = nothing; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep, + colorvec = 1:length(x), + sparsity = nothing, + jac_prototype = nothing, + dir = true) Compute the Jacobian matrix of function `f` at point `x` using finite differences. @@ -265,12 +265,12 @@ void_setindex!(args...) = (setindex!(args...); return) f, x, cache::JacobianCache; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep, - colorvec = cache.colorvec, - sparsity = cache.sparsity, + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep, + colorvec = cache.colorvec, + sparsity = cache.sparsity, jac_prototype = nothing, - dir=true) + dir = true) Cached. """ @@ -421,14 +421,14 @@ end """ finite_difference_jacobian!( - J::AbstractMatrix, + J :: AbstractMatrix, f, - x::AbstractArray{<:Number}, - fdtype :: Type{T1}=Val{:forward}, - returntype :: Type{T2}=eltype(x), + x :: AbstractArray{<:Number}, + fdtype :: Type{T1} = Val{:forward}, + returntype :: Type{T2} = eltype(x), f_in :: Union{T2,Nothing}=nothing; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep, + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep, colorvec = 1:length(x), sparsity = ArrayInterfaceCore.has_sparsestruct(J) ? J : nothing) @@ -480,15 +480,15 @@ end """ FiniteDiff.finite_difference_jacobian!( - J::AbstractMatrix{<:Number}, + J :: AbstractMatrix{<:Number}, f, - x::AbstractArray{<:Number}, - cache::JacobianCache; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep, + x :: AbstractArray{<:Number}, + cache :: JacobianCache; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep, colorvec = cache.colorvec, sparsity = cache.sparsity, - dir=true) + dir = true) Cached. """ diff --git a/src/jvp.jl b/src/jvp.jl index 7493aa2..fa940aa 100644 --- a/src/jvp.jl +++ b/src/jvp.jl @@ -17,9 +17,7 @@ mutable struct JVPCache{X1, FX1, FDType} end """ - FiniteDiff.JVPCache( - x, - fdtype::Type{T1} = Val{:forward}) + FiniteDiff.JVPCache(x, fdtype::Type{T1} = Val{:forward}) Allocating cache constructor for Jacobian-vector product computations. @@ -47,10 +45,7 @@ function JVPCache( end """ - FiniteDiff.JVPCache( - x, - fx1, - fdtype::Type{T1} = Val{:forward}) + FiniteDiff.JVPCache(x, fx1, fdtype::Type{T1} = Val{:forward}) Non-allocating cache constructor for Jacobian-vector product computations. @@ -87,12 +82,12 @@ end """ FiniteDiff.finite_difference_jvp( f, - x::AbstractArray{<:Number}, - v::AbstractArray{<:Number}, - fdtype::Type{T1}=Val{:forward}, - f_in=nothing; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep) + x :: AbstractArray{<:Number}, + v :: AbstractArray{<:Number}, + fdtype :: Type{T1} = Val{:forward}, + f_in = nothing; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep) Compute the Jacobian-vector product `J(x) * v` using finite differences. @@ -156,8 +151,8 @@ end x, v, cache::JVPCache; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep, + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep) Cached. """ @@ -195,15 +190,15 @@ end """ finite_difference_jvp!( - jvp::AbstractArray{<:Number}, + jvp :: AbstractArray{<:Number}, f, - x::AbstractArray{<:Number}, - v::AbstractArray{<:Number}, - fdtype :: Type{T1}=Val{:forward}, - returntype :: Type{T2}=eltype(x), - f_in :: Union{T2,Nothing}=nothing; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep) + x :: AbstractArray{<:Number}, + v :: AbstractArray{<:Number}, + fdtype :: Type{T1} = Val{:forward}, + returntype :: Type{T2} = eltype(x), + f_in :: Union{T2,Nothing} = nothing; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep) Cache-less. """ @@ -229,14 +224,14 @@ end """ FiniteDiff.finite_difference_jvp!( - jvp::AbstractArray{<:Number}, + jvp :: AbstractArray{<:Number}, f, - x::AbstractArray{<:Number}, - v::AbstractArray{<:Number}, - cache::JVPCache; - relstep=default_relstep(fdtype, eltype(x)), - absstep=relstep, - dir=true) + x :: AbstractArray{<:Number}, + v :: AbstractArray{<:Number}, + cache :: JVPCache; + relstep = default_relstep(fdtype, eltype(x)), + absstep = relstep, + dir = true) Cached. """ From c49cdd273ee9c107ec817ae49e893896d4348ced Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Sun, 12 Oct 2025 05:59:28 -0400 Subject: [PATCH 25/26] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index f85df69..9cb839b 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "FiniteDiff" uuid = "6a86dc24-6348-571c-b903-95158fe2bd41" -version = "2.28.1" +version = "2.29.0" [deps] ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" From 0c0561857f9bfb228d49ff2903bbbe0011015e8a Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Sun, 12 Oct 2025 06:01:52 -0400 Subject: [PATCH 26/26] Update finitedifftests.jl for non-allocating --- test/finitedifftests.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/finitedifftests.jl b/test/finitedifftests.jl index e975202..45d38e1 100644 --- a/test/finitedifftests.jl +++ b/test/finitedifftests.jl @@ -334,7 +334,7 @@ end res = zero(x) FiniteDiff.finite_difference_gradient!(res, _f, x, cache) @test res ≈ _∇f(x) - @test_broken ret_allocs(res, _f, x, cache) == 0 + @test ret_allocs(res, _f, x, cache) == 0 # Can we now change the field? _x = rand(2) @@ -342,7 +342,7 @@ end _cache = @set _cache.fx = _f(_x) FiniteDiff.finite_difference_gradient!(res, _f,_x, _cache) @test res ≈ _∇f(_x) - @test_broken ret_allocs(res, _f, _x, _cache) == 0 + @test ret_allocs(res, _f, _x, _cache) == 0 end _g(x) = x[1]^2 + x[2]