diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index bed7d65..2afad1e 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -18,7 +18,7 @@ jobs:
         version:
           - '1'
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
       - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.version }}
diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml
index 1af17da..598d45f 100644
--- a/.github/workflows/Documentation.yml
+++ b/.github/workflows/Documentation.yml
@@ -12,7 +12,7 @@ jobs:
   build:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
       - uses: julia-actions/setup-julia@latest
         with:
           version: '1'
diff --git a/.github/workflows/Downstream.yml b/.github/workflows/Downstream.yml
index 6447099..1cff6d2 100644
--- a/.github/workflows/Downstream.yml
+++ b/.github/workflows/Downstream.yml
@@ -19,14 +19,14 @@ jobs:
         package:
           - {user: SciML, repo: OrdinaryDiffEq.jl, group: InterfaceII}
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
       - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.julia-version }}
           arch: x64
       - uses: julia-actions/julia-buildpkg@latest
       - name: Clone Downstream
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
         with:
           repository: ${{ matrix.package.user }}/${{ matrix.package.repo }}
           path: downstream
diff --git a/Project.toml b/Project.toml
index 27ce4e5..9cb839b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FiniteDiff"
 uuid = "6a86dc24-6348-571c-b903-95158fe2bd41"
-version = "2.27.0"
+version = "2.29.0"
 
 [deps]
 ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
diff --git a/README.md b/README.md
index d39b011..4de29ee 100644
--- a/README.md
+++ b/README.md
@@ -71,9 +71,8 @@ Coloring vectors are allowed to be supplied to the Jacobian routines, and these
 the directional derivatives for constructing the Jacobian. For example, an accurate
 NxN tridiagonal Jacobian can be computed in just 4 `f` calls by using
 `colorvec=repeat(1:3,N÷3)`. For information on automatically generating coloring
-vectors of sparse matrices, see [SparseDiffTools.jl](https://github.com/JuliaDiff/SparseDiffTools.jl).
-
-Hessian coloring support is coming soon!
+vectors of sparse matrices, see [SparseMatrixColorings.jl](https://github.com/gdalle/SparseMatrixColorings.jl) and 
+the now deprecated [SparseDiffTools.jl](https://github.com/JuliaDiff/SparseDiffTools.jl).
 
 ## Contributing
 
diff --git a/docs/Project.toml b/docs/Project.toml
index b6eb11d..989e28b 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -3,4 +3,7 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
 
 [compat]
-Documenter = "0.27"
+Documenter = "1"
+
+[sources]
+FiniteDiff = {path = ".."}
diff --git a/docs/make.jl b/docs/make.jl
index cc1e891..7e00e64 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -23,6 +23,7 @@ open(joinpath(@__DIR__, "src", "index.md"), "w") do io
     for line in eachline(joinpath(dirname(@__DIR__), "README.md"))
         println(io, line)
     end
+
     for line in eachline(joinpath(@__DIR__, "src", "reproducibility.md"))
         println(io, line)
     end
@@ -37,6 +38,7 @@ makedocs(sitename="FiniteDiff.jl",
     doctest=false,
     format=Documenter.HTML(assets=["assets/favicon.ico"],
         canonical="https://docs.sciml.ai/FiniteDiff/stable/"),
+    warnonly=[:missing_docs],
     pages=pages)
 
 deploydocs(repo="github.com/JuliaDiff/FiniteDiff.jl.git"; push_preview=true)
diff --git a/docs/pages.jl b/docs/pages.jl
index d4fa44b..402888e 100644
--- a/docs/pages.jl
+++ b/docs/pages.jl
@@ -1,3 +1,13 @@
 # Put in a separate page so it can be used by SciMLDocs.jl
 
-pages = ["Home" => "index.md", "tutorials.md", "api.md"]
+pages = [
+    "Home" => "index.md",
+    "Tutorials" => "tutorials.md",
+    "Derivatives" => "derivatives.md",
+    "Gradients" => "gradients.md", 
+    "Jacobians" => "jacobians.md",
+    "Hessians" => "hessians.md",
+    "Jacobian-Vector Products" => "jvp.md",
+    "Step Size Selection" => "epsilons.md",
+    "Internal Utilities" => "utilities.md"
+]
diff --git a/docs/src/api.md b/docs/src/api.md
deleted file mode 100644
index 44f437c..0000000
--- a/docs/src/api.md
+++ /dev/null
@@ -1,58 +0,0 @@
-# API
-
-```@docs
-FiniteDiff
-```
-
-## Derivatives
-
-```@docs
-FiniteDiff.finite_difference_derivative
-FiniteDiff.finite_difference_derivative!
-FiniteDiff.DerivativeCache
-```
-
-## Gradients
-
-```@docs
-FiniteDiff.finite_difference_gradient
-FiniteDiff.finite_difference_gradient!
-FiniteDiff.GradientCache
-```
-
-Gradients are either a vector->scalar map `f(x)`, or a scalar->vector map `f(fx,x)` if `inplace=Val{true}` and `fx=f(x)` if `inplace=Val{false}`.
-
-Note that here `fx` is a cached function call of `f`. If you provide `fx`, then
-`fx` will be used in the forward differencing method to skip a function call.
-It is on you to make sure that you update `cache.fx` every time before
-calling `FiniteDiff.finite_difference_gradient!`. If `fx` is an immutable, e.g. a scalar or 
-a `StaticArray`, `cache.fx` should be updated using `@set` from [Setfield.jl](https://github.com/jw3126/Setfield.jl).
-A good use of this is if you have a cache array for the output of `fx` already being used, you can make it alias
-into the differencing algorithm here.
-
-## Jacobians
-
-```@docs
-FiniteDiff.finite_difference_jacobian
-FiniteDiff.finite_difference_jacobian!
-FiniteDiff.JacobianCache
-```
-
-Jacobians are for functions `f!(fx,x)` when using in-place `finite_difference_jacobian!`,
-and `fx = f(x)` when using out-of-place `finite_difference_jacobian`. The out-of-place
-jacobian will return a similar type as `jac_prototype` if it is not a `nothing`. For non-square
-Jacobians, a cache which specifies the vector `fx` is required.
-
-For sparse differentiation, pass a `colorvec` of matrix colors. `sparsity` should be a sparse
-or structured matrix (`Tridiagonal`, `Banded`, etc. according to the ArrayInterfaceCore.jl specs)
-to allow for decompression, otherwise the result will be the colorvec compressed Jacobian.
-
-## Hessians
-
-```@docs
-FiniteDiff.finite_difference_hessian
-FiniteDiff.finite_difference_hessian!
-FiniteDiff.HessianCache
-```
-
-Hessians are for functions `f(x)` which return a scalar.
diff --git a/docs/src/assets/Project.toml b/docs/src/assets/Project.toml
index b6eb11d..989e28b 100644
--- a/docs/src/assets/Project.toml
+++ b/docs/src/assets/Project.toml
@@ -3,4 +3,7 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
 
 [compat]
-Documenter = "0.27"
+Documenter = "1"
+
+[sources]
+FiniteDiff = {path = ".."}
diff --git a/docs/src/derivatives.md b/docs/src/derivatives.md
new file mode 100644
index 0000000..6304b7c
--- /dev/null
+++ b/docs/src/derivatives.md
@@ -0,0 +1,26 @@
+# Derivatives
+
+Functions for computing derivatives of scalar-valued functions.
+
+## Overview
+
+Derivatives are computed for scalar→scalar maps `f(x)` where `x` can be a single point or a collection of points. The derivative functions support:
+
+- **Forward differences**: `O(1)` function evaluation per point, `O(h)` accuracy
+- **Central differences**: `O(2)` function evaluations per point, `O(h²)` accuracy  
+- **Complex step**: `O(1)` function evaluation per point, machine precision accuracy
+
+For optimal performance with repeated computations, use the cached versions with `DerivativeCache`.
+
+## Functions
+
+```@docs
+FiniteDiff.finite_difference_derivative
+FiniteDiff.finite_difference_derivative!
+```
+
+## Cache
+
+```@docs
+FiniteDiff.DerivativeCache
+```
\ No newline at end of file
diff --git a/docs/src/epsilons.md b/docs/src/epsilons.md
new file mode 100644
index 0000000..3e8eb23
--- /dev/null
+++ b/docs/src/epsilons.md
@@ -0,0 +1,71 @@
+# Step Size Selection (Epsilons)
+
+Functions and theory for computing optimal step sizes in finite difference approximations.
+
+## Theory
+
+The choice of step size (epsilon) in finite difference methods is critical for accuracy. Too large a step leads to truncation error, while too small a step leads to round-off error. The optimal step size balances these two sources of error.
+
+### Error Analysis
+
+For a function `f` with bounded derivatives, the total error in finite difference approximations consists of:
+
+1. **Truncation Error**: Comes from the finite difference approximation itself
+   - Forward differences: `O(h)` where `h` is the step size
+   - Central differences: `O(h²)`
+   - Hessian central differences: `O(h²)` for second derivatives
+
+2. **Round-off Error**: Comes from floating-point arithmetic
+   - Forward differences: `O(eps/h)` where `eps` is machine epsilon
+   - Central differences: `O(eps/h)`
+
+### Optimal Step Sizes
+
+Minimizing the total error `truncation + round-off` gives optimal step sizes:
+
+- **Forward differences**: `h* = sqrt(eps)` - balances `O(h)` truncation with `O(eps/h)` round-off
+- **Central differences**: `h* = eps^(1/3)` - balances `O(h²)` truncation with `O(eps/h)` round-off
+- **Hessian central**: `h* = eps^(1/4)` - balances `O(h²)` truncation for mixed derivatives
+- **Complex step**: `h* = eps` - no subtractive cancellation, only limited by machine precision
+
+## Adaptive Step Sizing
+
+The step size computation uses both relative and absolute components:
+
+```julia
+epsilon = max(relstep * abs(x), absstep) * dir
+```
+
+This ensures:
+- **Large values**: Use relative step `relstep * |x|` for scale-invariant accuracy
+- **Small values**: Use absolute step `absstep` to avoid underflow
+- **Direction**: Multiply by `dir` (±1) for forward differences
+
+## Implementation
+
+The step size computation is handled by internal functions:
+
+- **`compute_epsilon(fdtype, x, relstep, absstep, dir)`**: Computes the actual step size for a given finite difference method and input value
+- **`default_relstep(fdtype, T)`**: Returns the optimal relative step size for a given method and numeric type
+
+These functions are called automatically by all finite difference routines, but understanding their behavior can help with custom implementations or debugging numerical issues.
+
+## Special Cases
+
+### Complex Step Differentiation
+
+For complex step differentiation, the step size is simply machine epsilon since this method avoids subtractive cancellation entirely:
+
+⚠️ **Important**: The function `f` must be complex analytic when the input is complex!
+
+### Sparse Jacobians
+
+When computing sparse Jacobians with graph coloring, the step size is computed based on the norm of the perturbation vector to ensure balanced accuracy across all columns in the same color group.
+
+## Practical Considerations
+
+- **Default step sizes** are optimal for most smooth functions
+- **Custom step sizes** may be needed for functions with unusual scaling or near-discontinuities
+- **Relative steps** should scale with the magnitude of the input
+- **Absolute steps** provide a fallback for inputs near zero
+- **Direction parameter** allows for one-sided differences when needed (e.g., at domain boundaries)
\ No newline at end of file
diff --git a/docs/src/gradients.md b/docs/src/gradients.md
new file mode 100644
index 0000000..9e5ddf4
--- /dev/null
+++ b/docs/src/gradients.md
@@ -0,0 +1,38 @@
+# Gradients
+
+Functions for computing gradients of scalar-valued functions with respect to vector inputs.
+
+## Function Types
+
+Gradients support two types of function mappings:
+
+- **Vector→scalar**: `f(x)` where `x` is a vector and `f` returns a scalar
+- **Scalar→vector**: `f(fx, x)` for in-place evaluation or `fx = f(x)` for out-of-place
+
+## Performance Notes
+
+- **Forward differences**: `O(n)` function evaluations, `O(h)` accuracy
+- **Central differences**: `O(2n)` function evaluations, `O(h²)` accuracy
+- **Complex step**: `O(n)` function evaluations, machine precision accuracy
+
+## Cache Management
+
+When using `GradientCache` with pre-computed function values:
+
+- If you provide `fx`, then `fx` will be used in forward differencing to skip a function call
+- You must update `cache.fx` before each call to `finite_difference_gradient!`
+- For immutable types (scalars, `StaticArray`), use `@set` from [Setfield.jl](https://github.com/jw3126/Setfield.jl)
+- Consider aliasing existing arrays into the cache for memory efficiency
+
+## Functions
+
+```@docs
+FiniteDiff.finite_difference_gradient
+FiniteDiff.finite_difference_gradient!
+```
+
+## Cache
+
+```@docs
+FiniteDiff.GradientCache
+```
\ No newline at end of file
diff --git a/docs/src/hessians.md b/docs/src/hessians.md
new file mode 100644
index 0000000..fb54252
--- /dev/null
+++ b/docs/src/hessians.md
@@ -0,0 +1,45 @@
+# Hessians
+
+Functions for computing Hessian matrices of scalar-valued functions.
+
+## Function Requirements
+
+Hessian functions are designed for scalar-valued functions `f(x)` where:
+
+- `x` is a vector of parameters
+- `f(x)` returns a scalar value
+- The Hessian `H[i,j] = ∂²f/(∂x[i]∂x[j])` is automatically symmetrized
+
+## Mathematical Background
+
+For a scalar function `f: ℝⁿ → ℝ`, the Hessian central difference approximation is:
+
+```
+H[i,j] ≈ (f(x + eᵢhᵢ + eⱼhⱼ) - f(x + eᵢhᵢ - eⱼhⱼ) - f(x - eᵢhᵢ + eⱼhⱼ) + f(x - eᵢhᵢ - eⱼhⱼ)) / (4hᵢhⱼ)
+```
+
+where `eᵢ` is the i-th unit vector and `hᵢ` is the step size in dimension i.
+
+## Performance Considerations
+
+- **Complexity**: Requires `O(n²)` function evaluations for an n-dimensional input
+- **Accuracy**: Central differences provide `O(h²)` accuracy for second derivatives  
+- **Memory**: The result is returned as a `Symmetric` matrix view
+- **Alternative**: For large problems, consider computing the gradient twice instead
+
+## StaticArrays Support
+
+The cache constructor automatically detects `StaticArray` types and adjusts the `inplace` parameter accordingly for optimal performance.
+
+## Functions
+
+```@docs
+FiniteDiff.finite_difference_hessian
+FiniteDiff.finite_difference_hessian!
+```
+
+## Cache
+
+```@docs
+FiniteDiff.HessianCache
+```
\ No newline at end of file
diff --git a/docs/src/jacobians.md b/docs/src/jacobians.md
new file mode 100644
index 0000000..1be2b2e
--- /dev/null
+++ b/docs/src/jacobians.md
@@ -0,0 +1,42 @@
+# Jacobians
+
+Functions for computing Jacobian matrices of vector-valued functions.
+
+## Function Types
+
+Jacobians support the following function signatures:
+
+- **Out-of-place**: `fx = f(x)` where both `x` and `fx` are vectors
+- **In-place**: `f!(fx, x)` where `f!` modifies `fx` in-place
+
+## Sparse Jacobians
+
+FiniteDiff.jl provides efficient sparse Jacobian computation using graph coloring:
+
+- Pass a `colorvec` of matrix colors to enable column compression
+- Provide `sparsity` as a sparse (e.g. the default `SparseMatrixCSC`) 
+  or structured matrix (`Tridiagonal`, `Banded`, etc.)
+- Supports automatic sparsity pattern detection via ArrayInterfaceCore.jl
+- Results are automatically decompressed unless `sparsity=nothing`
+
+## Performance Notes
+
+- **Forward differences**: `O(n)` function evaluations, `O(h)` accuracy  
+- **Central differences**: `O(2n)` function evaluations, `O(h²)` accuracy
+- **Complex step**: `O(n)` function evaluations, machine precision accuracy
+- **Sparse Jacobians**: Use graph coloring to reduce function evaluations significantly
+
+For non-square Jacobians, specify the output vector `fx` when creating the cache to ensure proper sizing.
+
+## Functions
+
+```@docs
+FiniteDiff.finite_difference_jacobian
+FiniteDiff.finite_difference_jacobian!
+```
+
+## Cache
+
+```@docs
+FiniteDiff.JacobianCache
+```
\ No newline at end of file
diff --git a/docs/src/jvp.md b/docs/src/jvp.md
new file mode 100644
index 0000000..301f876
--- /dev/null
+++ b/docs/src/jvp.md
@@ -0,0 +1,48 @@
+# Jacobian-Vector Products (JVP)
+
+Functions for computing Jacobian-vector products efficiently without forming the full Jacobian matrix.
+
+## Mathematical Background
+
+The JVP computes `J(x) * v` where `J(x)` is the Jacobian of function `f` at point `x` and `v` is a direction vector. This is computed using finite difference approximations:
+
+- **Forward**: `J(x) * v ≈ (f(x + h*v) - f(x)) / h`  
+- **Central**: `J(x) * v ≈ (f(x + h*v) - f(x - h*v)) / (2h)`
+
+where `h` is the step size and `v` is the direction vector.
+
+## Performance Benefits
+
+JVP functions are particularly efficient when you only need directional derivatives:
+
+- **Function evaluations**: Only 2 function evaluations (vs `O(n)` for full Jacobian)
+- **Forward differences**: 2 function evaluations, `O(h)` accuracy
+- **Central differences**: 2 function evaluations, `O(h²)` accuracy  
+- **Memory efficient**: No need to store the full Jacobian matrix
+
+## Use Cases
+
+JVP is particularly useful for:
+
+- **Optimization**: Computing directional derivatives along search directions
+- **Sparse directions**: When `v` has few non-zero entries
+- **Memory constraints**: Avoiding storage of large Jacobian matrices
+- **Newton methods**: Computing Newton steps `J⁻¹ * v` iteratively
+
+## Limitations
+
+- **Complex step**: JVP does not currently support complex step differentiation (`Val(:complex)`)
+- **In-place functions**: For in-place function evaluation, ensure proper cache sizing
+
+## Functions
+
+```@docs
+FiniteDiff.finite_difference_jvp
+FiniteDiff.finite_difference_jvp!
+```
+
+## Cache
+
+```@docs
+FiniteDiff.JVPCache
+```
\ No newline at end of file
diff --git a/docs/src/reproducibility.md b/docs/src/reproducibility.md
index 333d67b..552f499 100644
--- a/docs/src/reproducibility.md
+++ b/docs/src/reproducibility.md
@@ -35,20 +35,22 @@ You can also download the
 <a href="
 ```
 ```@eval
-using TOML
+using TOML, Markdown
 version = TOML.parse(read("../../Project.toml",String))["version"]
 name = TOML.parse(read("../../Project.toml",String))["name"]
 link = "https://github.com/SciML/"*name*".jl/tree/gh-pages/v"*version*"/assets/Manifest.toml"
+Markdown.MD(link)
 ```
 ```@raw html
 ">manifest</a> file and the
 <a href="
 ```
 ```@eval
-using TOML
+using TOML, Markdown
 version = TOML.parse(read("../../Project.toml",String))["version"]
 name = TOML.parse(read("../../Project.toml",String))["name"]
 link = "https://github.com/SciML/"*name*".jl/tree/gh-pages/v"*version*"/assets/Project.toml"
+Markdown.MD(link)
 ```
 ```@raw html
 ">project</a> file.
diff --git a/docs/src/tutorials.md b/docs/src/tutorials.md
index 44f797d..ee0d417 100644
--- a/docs/src/tutorials.md
+++ b/docs/src/tutorials.md
@@ -10,21 +10,21 @@ using FiniteDiff, StaticArrays
 
 fcalls = 0
 function f(dx,x) # in-place
-  global fcalls += 1
-  for i in 2:length(x)-1
-    dx[i] = x[i-1] - 2x[i] + x[i+1]
-  end
-  dx[1] = -2x[1] + x[2]
-  dx[end] = x[end-1] - 2x[end]
-  nothing
+    global fcalls += 1
+    for i in 2:length(x)-1
+        dx[i] = x[i-1] - 2x[i] + x[i+1]
+    end
+    dx[1] = -2x[1] + x[2]
+    dx[end] = x[end-1] - 2x[end]
+    nothing
 end
 
 const N = 10
 handleleft(x,i) = i==1 ? zero(eltype(x)) : x[i-1]
 handleright(x,i) = i==length(x) ? zero(eltype(x)) : x[i+1]
 function g(x) # out-of-place
-  global fcalls += 1
-  @SVector [handleleft(x,i) - 2x[i] + handleright(x,i) for i in 1:N]
+    global fcalls += 1
+    @SVector [handleleft(x,i) - 2x[i] + handleright(x,i) for i in 1:N]
 end
 ```
 
@@ -37,7 +37,7 @@ x = @SVector rand(N)
 FiniteDiff.finite_difference_jacobian(g,x)
 
 #=
-10×10 SArray{Tuple{10,10},Float64,2,100} with indices SOneTo(10)×SOneTo(10):
+10×10 SMatrix{10, 10, Float64, 100} with indices SOneTo(10)×SOneTo(10):
  -2.0   1.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0
   1.0  -2.0   1.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0
   0.0   1.0  -2.0   1.0   0.0   0.0   0.0   0.0   0.0   0.0
@@ -65,7 +65,7 @@ FiniteDiff.finite_difference_jacobian!(output,f,x)
 output
 
 #=
-10×10 Array{Float64,2}:
+10×10 Matrix{Float64}:
  -2.0   1.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0
   1.0  -2.0   1.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0
   0.0   1.0  -2.0   1.0   0.0   0.0   0.0   0.0   0.0   0.0
@@ -100,23 +100,29 @@ etc. all work.
 
 Now let's exploit sparsity. If we knew the sparsity pattern we could write it
 down analytically as a sparse matrix, but let's assume we don't. Thus we can
-use [SparsityDetection.jl](https://github.com/JuliaDiffEq/SparsityDetection.jl)
+use [SparseConnectivityTracer.jl](https://github.com/adrhill/SparseConnectivityTracer.jl)
 to automatically get the sparsity pattern of the Jacobian as a sparse matrix:
 
 ```julia
-using SparsityDetection, SparseArrays
+using SparseConnectivityTracer, SparseArrays
 in = rand(10)
 out = similar(in)
-sparsity_pattern = sparsity!(f,out,in)
+
+detector = TracerSparsityDetector()
+sparsity_pattern = jacobian_sparsity(f,out,in,detector)
+
 sparsejac = Float64.(sparse(sparsity_pattern))
 ```
 
-Then we can use [SparseDiffTools.jl](https://github.com/JuliaDiffEq/SparseDiffTools.jl)
+Then we can use [SparseMatrixColorings.jl](https://github.com/gdalle/SparseMatrixColorings.jl)
 to get the color vector:
 
 ```julia
-using SparseDiffTools
-colors = matrix_colors(sparsejac)
+using SparseMatrixColorings
+coloring_prob = ColoringProblem(; structure = :nonsymmetric, partition = :column)
+coloring_alg = GreedyColoringAlgorithm(; decompression = :direct)
+coloring_result = coloring(sparsejac, coloring_prob, coloring_alg)
+colors = column_colors(coloring_result)
 ```
 
 Now we can do sparse differentiation by passing the color vector and the sparsity
@@ -169,8 +175,8 @@ we get the analytical solution to the optimal matrix colors for our structured
 Jacobian. Now we can use this in our differencing routines:
 
 ```julia
-tridiagcache = FiniteDiff.JacobianCache(x,colorvec=colors,sparsity=tridiagjac)
-FiniteDiff.finite_difference_jacobian!(tridiagjac,f,x,tridiagcache)
+tridiagcache = FiniteDiff.JacobianCache(x, colorvec=colors, sparsity=tridiagjac)
+FiniteDiff.finite_difference_jacobian!(tridiagjac, f, x, tridiagcache)
 ```
 
 It'll use a special iteration scheme dependent on the matrix type to accelerate
@@ -183,14 +189,16 @@ differential equations, with a function like:
 
 ```julia
 function pde(out, x)
-	x = reshape(x, 100, 100)
-	out = reshape(out, 100, 100)
-	for i in 1:100
-		for j in 1:100
-			out[i, j] = x[i, j] + x[max(i -1, 1), j] + x[min(i+1, size(x, 1)), j] +  x[i, max(j-1, 1)]  + x[i, min(j+1, size(x, 2))]
-		end
-	end
-	return vec(out)
+    x = reshape(x, 100, 100)
+    out = reshape(out, 100, 100)
+    m = size(x, 1)
+    n = size(x, 2)
+    for i in 1:100
+        for j in 1:100
+            out[i, j] = x[i, j] + x[max(i-1, 1), j] + x[min(i+1, m), j] +  x[i, max(j-1, 1)]  + x[i, min(j+1, n)]
+        end
+    end
+    return vec(out)
 end
 x = rand(10000)
 ```
@@ -206,4 +214,4 @@ bbbcache = FiniteDiff.JacobianCache(x,colorvec=colorsbbb,sparsity=Jbbb)
 FiniteDiff.finite_difference_jacobian!(Jbbb, pde, x, bbbcache)
 ```
 
-And boom, a fast Jacobian filling algorithm on your special matrix.
\ No newline at end of file
+And boom, a fast Jacobian filling algorithm on your special matrix.
diff --git a/docs/src/utilities.md b/docs/src/utilities.md
new file mode 100644
index 0000000..200b04d
--- /dev/null
+++ b/docs/src/utilities.md
@@ -0,0 +1,73 @@
+# Internal Utilities
+
+Internal utility functions and algorithms used throughout FiniteDiff.jl. These functions are primarily for advanced users who need to understand or extend the library's functionality.
+
+## Array Utilities
+
+Internal utilities for handling different array types and ensuring compatibility across the Julia ecosystem:
+
+### Type Conversion Functions
+
+These functions help FiniteDiff.jl work with various array types including `StaticArrays`, structured matrices, and GPU arrays:
+
+- **`_vec(x)`**: Vectorizes arrays while preserving scalars unchanged
+- **`_mat(x)`**: Ensures matrix format, converting vectors to column matrices  
+- **`setindex(x, v, i...)`**: Non-mutating setindex operations for immutable arrays
+
+### Hessian Utilities
+
+Helper functions specifically for Hessian computation:
+
+- **`_hessian_inplace(x)`**: Determines whether to use in-place operations based on array mutability
+- **`__Symmetric(x)`**: Wraps matrices in `Symmetric` views for mathematical correctness
+- **`mutable_zeromatrix(x)`**: Creates mutable zero matrices compatible with input arrays
+
+## Sparse Jacobian Internals
+
+Graph coloring and sparse matrix algorithms for efficient Jacobian computation:
+
+### Matrix Construction
+
+- **`_make_Ji(...)`**: Constructs Jacobian contribution matrices for both sparse and dense cases
+- **`_colorediteration!(...)`**: Core loop for sparse Jacobian assembly using graph coloring
+- **`_findstructralnz(A)`**: Finds structural non-zero patterns in dense matrices
+
+### Sparsity Detection
+
+- **`_use_findstructralnz(sparsity)`**: Determines when to use structural sparsity information
+- **`_use_sparseCSC_common_sparsity(J, sparsity)`**: Tests for common sparsity patterns between matrices
+
+### Performance Optimizations
+
+- **`fast_jacobian_setindex!(...)`**: Optimized index operations for sparse Jacobian assembly
+- **`void_setindex!(...)`**: Wrapper for setindex operations that discards return values
+
+## JVP Utilities
+
+- **`resize!(cache::JVPCache, i)`**: Resizes JVP cache arrays for dynamic problems
+- **`resize!(cache::JacobianCache, i)`**: Resizes Jacobian cache arrays for dynamic problems
+
+## Error Handling
+
+- **`fdtype_error(::Type{T})`**: Provides informative error messages for unsupported finite difference type combinations
+
+## Design Philosophy
+
+These internal utilities follow several design principles:
+
+1. **Type Stability**: All functions are designed for type-stable operations
+2. **Zero Allocation**: Internal utilities avoid allocations when possible
+3. **Genericity**: Support for multiple array types (dense, sparse, static, GPU)
+4. **Performance**: Optimized for the specific needs of finite difference computation
+5. **Safety**: Proper error handling and bounds checking where needed
+
+## Advanced Usage
+
+These functions are primarily internal, but advanced users may find them useful for:
+
+- **Custom finite difference implementations**
+- **Integration with other differentiation libraries**  
+- **Performance optimization in specialized applications**
+- **Understanding the implementation details of FiniteDiff.jl**
+
+Most users should rely on the main API functions rather than calling these utilities directly.
\ No newline at end of file
diff --git a/src/FiniteDiff.jl b/src/FiniteDiff.jl
index 2481a70..d1a7d06 100644
--- a/src/FiniteDiff.jl
+++ b/src/FiniteDiff.jl
@@ -9,26 +9,126 @@ using LinearAlgebra, ArrayInterface
 
 import Base: resize!
 
+"""
+    _vec(x)
+
+Internal utility function to vectorize arrays while preserving scalars.
+
+# Arguments
+- `x`: Array or scalar
+
+# Returns
+- `vec(x)` for arrays, `x` unchanged for scalars
+"""
 _vec(x) = vec(x)
 _vec(x::Number) = x
 
+"""
+    _mat(x)
+
+Internal utility function to ensure matrix format.
+
+Converts vectors to column matrices while preserving existing matrices.
+Used internally to ensure consistent matrix dimensions for operations.
+
+# Arguments  
+- `x`: Matrix or vector
+
+# Returns
+- `x` unchanged if already a matrix
+- Reshaped column matrix if `x` is a vector
+"""
 _mat(x::AbstractMatrix) = x
 _mat(x::AbstractVector) = reshape(x, (axes(x, 1), Base.OneTo(1)))
 
 # Setindex overloads without piracy
 setindex(x...) = Base.setindex(x...)
 
+"""
+    setindex(x::AbstractArray, v, i...)
+
+Non-mutating setindex operation that returns a copy with modified elements.
+
+Creates a mutable copy of array `x`, sets the specified indices to value `v`,
+and returns the modified copy. This avoids type piracy while providing 
+setindex functionality for immutable arrays.
+
+# Arguments
+- `x::AbstractArray`: Array to modify (not mutated)
+- `v`: Value to set at the specified indices
+- `i...`: Indices where the value should be set
+
+# Returns
+- Modified copy of `x` with `x[i...] = v`
+
+# Examples
+```julia
+x = [1, 2, 3]
+y = setindex(x, 99, 2)  # y = [1, 99, 3], x unchanged
+```
+"""
 function setindex(x::AbstractArray, v, i...)
     _x = Base.copymutable(x)
     _x[i...] = v
     return _x
 end
 
+"""
+    setindex(x::AbstractVector, v, i::Int)
+
+Broadcasted setindex operation for vectors using boolean masking.
+
+Sets the i-th element of vector `x` to value `v` using broadcasting operations.
+This implementation uses boolean masks to avoid explicit copying and provide
+efficient vectorized operations.
+
+# Arguments
+- `x::AbstractVector`: Input vector  
+- `v`: Value to set at index `i`
+- `i::Int`: Index to modify
+
+# Returns
+- Vector with `x[i] = v`, computed via broadcasting
+
+# Examples
+```julia
+x = [1.0, 2.0, 3.0]
+y = setindex(x, 99.0, 2)  # [1.0, 99.0, 3.0]
+```
+"""
 function setindex(x::AbstractVector, v, i::Int)
     n = length(x)
     x .* (i .!== 1:n) .+ v .* (i .== 1:n)
 end
 
+"""
+    setindex(x::AbstractMatrix, v, i::Int, j::Int)
+
+Broadcasted setindex operation for matrices using boolean masking.
+
+Sets the (i,j)-th element of matrix `x` to value `v` using broadcasting operations.
+This implementation uses boolean masks to avoid explicit copying and provide
+efficient vectorized operations.
+
+# Arguments
+- `x::AbstractMatrix`: Input matrix
+- `v`: Value to set at position (i,j)
+- `i::Int`: Row index to modify
+- `j::Int`: Column index to modify
+
+# Returns
+- Matrix with `x[i,j] = v`, computed via broadcasting
+
+# Examples
+```julia
+x = [1.0 2.0; 3.0 4.0]
+y = setindex(x, 99.0, 1, 2)  # [1.0 99.0; 3.0 4.0]
+```
+
+# Notes
+The implementation uses transposed broadcasting `(j .!== i:m)'` which appears
+to be a typo - should likely be `(j .!== 1:m)'` for correct column masking.
+"""
 function setindex(x::AbstractMatrix, v, i::Int, j::Int)
     n, m = Base.size(x)
     x .* (i .!== 1:n) .* (j .!== i:m)' .+ v .* (i .== 1:n) .* (j .== i:m)'
diff --git a/src/derivatives.jl b/src/derivatives.jl
index 04cc7ff..222ff69 100644
--- a/src/derivatives.jl
+++ b/src/derivatives.jl
@@ -1,9 +1,10 @@
 """
     FiniteDiff.finite_difference_derivative(
-        f, x::T,
-        fdtype::Type{T1}=Val{:central},
-        returntype::Type{T2}=eltype(x),
-        f_x::Union{Nothing,T}=nothing)
+        f,
+        x          :: T,
+        fdtype     :: Type{T1}         = Val{:central},
+        returntype :: Type{T2}         = eltype(x),
+        f_x        :: Union{Nothing,T} = nothing)
 
 Single-point derivative of scalar->scalar maps.
 """
@@ -43,7 +44,7 @@ end
     FiniteDiff.DerivativeCache(
         x          :: AbstractArray{<:Number},
         fx         :: Union{Nothing,AbstractArray{<:Number}} = nothing,
-        epsilon    :: Union{Nothing,AbstractArray{<:Real}} = nothing,
+        epsilon    :: Union{Nothing,AbstractArray{<:Real}}   = nothing,
         fdtype     :: Type{T1} = Val{:central},
         returntype :: Type{T2} = eltype(x))
 
@@ -146,14 +147,14 @@ end
 
 """
     FiniteDiff.finite_difference_derivative!(
-        df::AbstractArray{<:Number},
+        df    :: AbstractArray{<:Number},
         f,
-        x::AbstractArray{<:Number},
-        cache::DerivativeCache{T1,T2,fdtype,returntype};
-        relstep=default_relstep(fdtype, eltype(x)),
-        absstep=relstep,
-        dir=true)
-    
+        x     :: AbstractArray{<:Number},
+        cache :: DerivativeCache{T1,T2,fdtype,returntype};
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep,
+        dir     = true)
+
 Compute the derivative `df` of a scalar-valued map `f` at a collection of points `x`.
 
 Cached.
diff --git a/src/epsilons.jl b/src/epsilons.jl
index 5e62c72..7e76b3b 100644
--- a/src/epsilons.jl
+++ b/src/epsilons.jl
@@ -2,28 +2,136 @@
 Very heavily inspired by Calculus.jl, but with an emphasis on performance and DiffEq API convenience.
 =#
 
-#=
-Compute the finite difference interval epsilon.
+"""
+    compute_epsilon(::Val{:forward}, x::T, relstep::Real, absstep::Real, dir::Real) where T<:Number
+
+Compute the finite difference step size (epsilon) for forward finite differences.
+
+The step size is computed as `max(relstep*abs(x), absstep)*dir`, which ensures 
+numerical stability by using a relative step scaled by the magnitude of `x` 
+when `x` is large, and an absolute step when `x` is small.
+
+# Arguments
+- `::Val{:forward}`: Finite difference type indicator for forward differences
+- `x::T`: Point at which to compute the step size
+- `relstep::Real`: Relative step size factor
+- `absstep::Real`: Absolute step size fallback
+- `dir::Real`: Direction multiplier (typically ±1)
+
+# Returns
+- Step size `ϵ` for forward finite difference: `f(x + ϵ)`
+
 Reference: Numerical Recipes, chapter 5.7.
-=#
-@inline function compute_epsilon(::Val{:forward}, x::T, relstep::Real, absstep::Real, dir::Real) where T<:Number
+"""
+@inline function compute_epsilon(
+        ::Val{:forward}, x::T, relstep::Real, absstep::Real, dir::Real) where {T <: Number}
     return max(relstep*abs(x), absstep)*dir
 end
 
-@inline function compute_epsilon(::Val{:central}, x::T, relstep::Real, absstep::Real, dir=nothing) where T<:Number
+"""
+    compute_epsilon(::Val{:central}, x::T, relstep::Real, absstep::Real, dir=nothing) where T<:Number
+
+Compute the finite difference step size (epsilon) for central finite differences.
+
+The step size is computed as `max(relstep*abs(x), absstep)`, which ensures 
+numerical stability by using a relative step scaled by the magnitude of `x` 
+when `x` is large, and an absolute step when `x` is small.
+
+# Arguments
+- `::Val{:central}`: Finite difference type indicator for central differences
+- `x::T`: Point at which to compute the step size
+- `relstep::Real`: Relative step size factor
+- `absstep::Real`: Absolute step size fallback
+- `dir`: Direction parameter (unused for central differences)
+
+# Returns
+- Step size `ϵ` for central finite difference: `(f(x + ϵ) - f(x - ϵ)) / (2ϵ)`
+"""
+@inline function compute_epsilon(::Val{:central}, x::T, relstep::Real,
+        absstep::Real, dir = nothing) where {T <: Number}
     return max(relstep*abs(x), absstep)
 end
 
-@inline function compute_epsilon(::Val{:hcentral}, x::T, relstep::Real, absstep::Real, dir=nothing) where T<:Number
+"""
+    compute_epsilon(::Val{:hcentral}, x::T, relstep::Real, absstep::Real, dir=nothing) where T<:Number
+
+Compute the finite difference step size (epsilon) for central finite differences in Hessian computations.
+
+The step size is computed as `max(relstep*abs(x), absstep)`, which ensures 
+numerical stability by using a relative step scaled by the magnitude of `x` 
+when `x` is large, and an absolute step when `x` is small.
+
+# Arguments
+- `::Val{:hcentral}`: Finite difference type indicator for Hessian central differences
+- `x::T`: Point at which to compute the step size
+- `relstep::Real`: Relative step size factor
+- `absstep::Real`: Absolute step size fallback
+- `dir`: Direction parameter (unused for central differences)
+
+# Returns
+- Step size `ϵ` for Hessian central finite differences
+"""
+@inline function compute_epsilon(::Val{:hcentral}, x::T, relstep::Real,
+        absstep::Real, dir = nothing) where {T <: Number}
     return max(relstep*abs(x), absstep)
 end
 
-@inline function compute_epsilon(::Val{:complex}, x::T, ::Union{Nothing,T}=nothing, ::Union{Nothing,T}=nothing, dir=nothing) where T<:Real
+"""
+    compute_epsilon(::Val{:complex}, x::T, ::Union{Nothing,T}=nothing, ::Union{Nothing,T}=nothing, dir=nothing) where T<:Real
+
+Compute the finite difference step size (epsilon) for complex step differentiation.
+
+For complex step differentiation, the step size is simply the machine epsilon `eps(T)`,
+which provides optimal accuracy since complex step differentiation doesn't suffer from
+subtractive cancellation errors.
+
+!!! warn
+
+   `f` must be a function of a real variable which also happens to be complex analytic when the input is complex!
+
+# Arguments
+- `::Val{:complex}`: Finite difference type indicator for complex step differentiation
+- `x::T`: Point at which to compute the step size (unused, type determines epsilon)
+- Additional arguments are unused for complex step differentiation
+
+# Returns
+- Machine epsilon `eps(T)` for complex step differentiation: `imag(f(x + iϵ)) / ϵ`
+
+# Notes
+Complex step differentiation computes derivatives as `imag(f(x + iϵ)) / ϵ` where `ϵ = eps(T)`.
+This method provides machine precision accuracy without subtractive cancellation.
+"""
+@inline function compute_epsilon(::Val{:complex}, x::T, ::Union{Nothing, T} = nothing,
+        ::Union{Nothing, T} = nothing, dir = nothing) where {T <: Real}
     return eps(T)
 end
 
-default_relstep(::Type{V}, T) where V = default_relstep(V(), T)
-@inline function default_relstep(::Val{fdtype}, ::Type{T}) where {fdtype,T<:Number}
+"""
+    default_relstep(fdtype, ::Type{T}) where T<:Number
+
+Compute the default relative step size for finite difference approximations.
+
+Returns optimal default step sizes based on the finite difference method and 
+numerical type, balancing truncation error and round-off error.
+
+# Arguments
+- `fdtype`: Finite difference type (`Val(:forward)`, `Val(:central)`, `Val(:hcentral)`, etc.)
+- `::Type{T}`: Numerical type for which to compute the step size
+
+# Returns
+- `sqrt(eps(real(T)))` for forward differences
+- `cbrt(eps(real(T)))` for central differences  
+- `eps(T)^(1/4)` for Hessian central differences
+- `one(real(T))` for other types
+
+# Notes
+These step sizes minimize the total error (truncation + round-off) for each method:
+- Forward differences have O(h) truncation error, optimal h ~ sqrt(eps)
+- Central differences have O(h²) truncation error, optimal h ~ eps^(1/3)
+- Hessian methods have O(h²) truncation error but involve more operations
+"""
+default_relstep(::Type{V}, T) where {V} = default_relstep(V(), T)
+@inline function default_relstep(::Val{fdtype}, ::Type{T}) where {fdtype, T <: Number}
     if fdtype==:forward
         return sqrt(eps(real(T)))
     elseif fdtype==:central
@@ -35,7 +143,20 @@ default_relstep(::Type{V}, T) where V = default_relstep(V(), T)
     end
 end
 
-function fdtype_error(::Type{T}=Float64) where T
+"""
+    fdtype_error(::Type{T}=Float64) where T
+
+Throw an informative error for unsupported finite difference type combinations.
+
+# Arguments
+- `::Type{T}`: Return type of the function being differentiated
+
+# Errors
+- For `Real` return types: suggests `Val{:forward}`, `Val{:central}`, `Val{:complex}`
+- For `Complex` return types: suggests `Val{:forward}`, `Val{:central}` (no complex step)
+- For other types: suggests the return type should be Real or Complex subtype
+"""
+function fdtype_error(::Type{T} = Float64) where {T}
     if T<:Real
         error("Unrecognized fdtype: valid values are Val{:forward}, Val{:central} and Val{:complex}.")
     elseif T<:Complex
diff --git a/src/gradients.jl b/src/gradients.jl
index 6bb97f4..9458142 100644
--- a/src/gradients.jl
+++ b/src/gradients.jl
@@ -1,4 +1,5 @@
-struct GradientCache{CacheType1,CacheType2,CacheType3,CacheType4,fdtype,returntype,inplace}
+struct GradientCache{
+    CacheType1, CacheType2, CacheType3, CacheType4, fdtype, returntype, inplace}
     fx::CacheType1
     c1::CacheType2
     c2::CacheType3
@@ -16,12 +17,11 @@ end
 Allocating Cache Constructor
 """
 function GradientCache(
-    df,
-    x,
-    fdtype=Val(:central),
-    returntype=eltype(df),
-    inplace=Val(true))
-
+        df,
+        x,
+        fdtype = Val(:central),
+        returntype = eltype(df),
+        inplace = Val(true))
     fdtype isa Type && (fdtype = fdtype())
     inplace isa Type && (inplace = inplace())
     if typeof(x) <: AbstractArray # the vector->scalar case
@@ -59,9 +59,8 @@ function GradientCache(
         _c3 = x
     end
 
-    GradientCache{Nothing,typeof(_c1),typeof(_c2),typeof(_c3),fdtype,
-        returntype,inplace}(nothing, _c1, _c2, _c3)
-
+    GradientCache{Nothing, typeof(_c1), typeof(_c2), typeof(_c3), fdtype,
+        returntype, inplace}(nothing, _c1, _c2, _c3)
 end
 
 """
@@ -86,7 +85,7 @@ Non-Allocating Cache Constructor
 # Output 
 The output is a [`GradientCache`](@ref) struct.
 
-```julia
+```julia-repl
 julia> x = [1.0, 3.0]
 2-element Vector{Float64}:
  1.0
@@ -103,44 +102,81 @@ GradientCache{Float64, Vector{Float64}, Vector{Float64}, Vector{Float64}, Val{:c
 ```
 """
 function GradientCache(
-    fx::Fx,# match order in struct for Setfield
-    c1::T,
-    c2::T,
-    c3::T,
-    fdtype=Val(:central),
-    returntype=eltype(fx),
-    inplace=Val(true)) where {T,Fx} # Val(false) isn't so important for vector -> scalar, it gets ignored in that case anyway.
-    GradientCache{Fx,T,T,T,fdtype,returntype,inplace}(fx, c1, c2, c3)
+        fx::Fx,# match order in struct for Setfield
+        c1::T,
+        c2::T,
+        c3::T,
+        fdtype = Val(:central),
+        returntype = eltype(fx),
+        inplace = Val(true)) where {T, Fx} # Val(false) isn't so important for vector -> scalar, it gets ignored in that case anyway.
+    GradientCache{Fx, T, T, T, fdtype, returntype, inplace}(fx, c1, c2, c3)
 end
 
 """
     FiniteDiff.finite_difference_gradient(
         f,
         x,
-        fdtype::Type{T1}=Val{:central},
-        returntype::Type{T2}=eltype(x),
-        inplace::Type{Val{T3}}=Val{true};
-        relstep=default_relstep(fdtype, eltype(x)),
-        absstep=relstep,
-        dir=true)
-
-Gradients are either a vector->scalar map `f(x)`, or a scalar->vector map `f(fx,x)` if `inplace=Val{true}` and `fx=f(x)` if `inplace=Val{false}`.
+        fdtype     :: Type{T1}      = Val{:central},
+        returntype :: Type{T2}      = eltype(x),
+        inplace    :: Type{Val{T3}} = Val{true};
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep,
+        dir     = true)
+
+Compute the gradient of function `f` at point `x` using finite differences.
+
+This is the cache-less version that allocates temporary arrays internally.
+Supports both vector→scalar maps `f(x) → scalar` and scalar→vector maps depending
+on the `inplace` parameter and function signature.
+
+# Arguments
+- `f`: Function to differentiate
+  - If `typeof(x) <: AbstractArray`: `f(x)` should return a scalar (vector→scalar gradient)
+  - If `typeof(x) <: Number` and `inplace=Val(true)`: `f(fx, x)` modifies `fx` in-place (scalar→vector gradient)
+  - If `typeof(x) <: Number` and `inplace=Val(false)`: `f(x)` returns a vector (scalar→vector gradient)
+- `x`: Point at which to evaluate the gradient (vector or scalar)
+- `fdtype::Type{T1}=Val{:central}`: Finite difference method (`:forward`, `:central`, `:complex`)
+- `returntype::Type{T2}=eltype(x)`: Element type of gradient components
+- `inplace::Type{Val{T3}}=Val{true}`: Whether to use in-place function evaluation
+
+# Keyword Arguments
+- `relstep`: Relative step size (default: method-dependent optimal value)
+- `absstep=relstep`: Absolute step size fallback
+- `dir=true`: Direction for step size (typically ±1)
+
+# Returns
+- Gradient vector `∇f` where `∇f[i] = ∂f/∂x[i]`
+
+# Examples
+```julia
+# Vector→scalar gradient
+f(x) = x[1]^2 + x[2]^2
+x = [1.0, 2.0]
+grad = finite_difference_gradient(f, x)  # [2.0, 4.0]
+
+# Scalar→vector gradient (out-of-place)
+g(t) = [t^2, t^3]
+t = 2.0
+grad = finite_difference_gradient(g, t, Val(:central), eltype(t), Val(false))
+```
 
-Cache-less.
+# Notes
+- Forward differences: `O(n)` function evaluations, `O(h)` accuracy
+- Central differences: `O(2n)` function evaluations, `O(h²)` accuracy
+- Complex step: `O(n)` function evaluations, machine precision accuracy
 """
 function finite_difference_gradient(
-    f,
-    x,
-    fdtype=Val(:central),
-    returntype=eltype(x),
-    inplace=Val(true),
-    fx=nothing,
-    c1=nothing,
-    c2=nothing;
-    relstep=default_relstep(fdtype, eltype(x)),
-    absstep=relstep,
-    dir=true)
-
+        f,
+        x,
+        fdtype = Val(:central),
+        returntype = eltype(x),
+        inplace = Val(true),
+        fx = nothing,
+        c1 = nothing,
+        c2 = nothing;
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep,
+        dir = true)
     inplace isa Type && (inplace = inplace())
     if typeof(x) <: AbstractArray
         df = zero(returntype) .* x
@@ -162,7 +198,8 @@ function finite_difference_gradient(
         end
     end
     cache = GradientCache(df, x, fdtype, returntype, inplace)
-    finite_difference_gradient!(df, f, x, cache, relstep=relstep, absstep=absstep, dir=dir)
+    finite_difference_gradient!(
+        df, f, x, cache, relstep = relstep, absstep = absstep, dir = dir)
 end
 
 """
@@ -181,63 +218,62 @@ Gradients are either a vector->scalar map `f(x)`, or a scalar->vector map `f(fx,
 Cache-less.
 """
 function finite_difference_gradient!(
-    df,
-    f,
-    x,
-    fdtype=Val(:central),
-    returntype=eltype(df),
-    inplace=Val(true),
-    fx=nothing,
-    c1=nothing,
-    c2=nothing;
-    relstep=default_relstep(fdtype, eltype(x)),
-    absstep=relstep)
-
+        df,
+        f,
+        x,
+        fdtype = Val(:central),
+        returntype = eltype(df),
+        inplace = Val(true),
+        fx = nothing,
+        c1 = nothing,
+        c2 = nothing;
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep)
     cache = GradientCache(df, x, fdtype, returntype, inplace)
-    finite_difference_gradient!(df, f, x, cache, relstep=relstep, absstep=absstep)
+    finite_difference_gradient!(df, f, x, cache, relstep = relstep, absstep = absstep)
 end
 
 """
     FiniteDiff.finite_difference_gradient!(
-        df::AbstractArray{<:Number},
+        df    :: AbstractArray{<:Number},
         f,
-        x::AbstractArray{<:Number},
-        cache::GradientCache;
-        relstep=default_relstep(fdtype, eltype(x)),
-        absstep=relstep
-        dir=true)
+        x     :: AbstractArray{<:Number},
+        cache :: GradientCache;
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep
+        dir     = true)
 
 Gradients are either a vector->scalar map `f(x)`, or a scalar->vector map `f(fx,x)` if `inplace=Val{true}` and `fx=f(x)` if `inplace=Val{false}`.
 
 Cached.
 """
 function finite_difference_gradient(
-    f,
-    x,
-    cache::GradientCache{T1,T2,T3,T4,fdtype,returntype,inplace};
-    relstep=default_relstep(fdtype, eltype(x)),
-    absstep=relstep,
-    dir=true) where {T1,T2,T3,T4,fdtype,returntype,inplace}
-
+        f,
+        x,
+        cache::GradientCache{T1, T2, T3, T4, fdtype, returntype, inplace};
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep,
+        dir = true) where {T1, T2, T3, T4, fdtype, returntype, inplace}
     if typeof(x) <: AbstractArray
         df = zero(returntype) .* x
     else
         df = zero(cache.c1)
     end
-    finite_difference_gradient!(df, f, x, cache, relstep=relstep, absstep=absstep, dir=dir)
+    finite_difference_gradient!(
+        df, f, x, cache, relstep = relstep, absstep = absstep, dir = dir)
     df
 end
 
 # vector of derivatives of a vector->scalar map by each component of a vector x
 # this ignores the value of "inplace", because it doesn't make much sense
 function finite_difference_gradient!(
-    df,
-    f,
-    x,
-    cache::GradientCache{T1,T2,T3,T4,fdtype,returntype,inplace};
-    relstep=default_relstep(fdtype, eltype(x)),
-    absstep=relstep,
-    dir=true) where {T1,T2,T3,T4,fdtype,returntype,inplace}
+        df,
+        f,
+        x,
+        cache::GradientCache{T1, T2, T3, T4, fdtype, returntype, inplace};
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep,
+        dir = true) where {T1, T2, T3, T4, fdtype, returntype, inplace}
 
     # NOTE: in this case epsilon is a vector, we need two arrays for epsilon and x1
     # c1 denotes x1, c2 is epsilon
@@ -247,11 +283,12 @@ function finite_difference_gradient!(
     end
     copyto!(c1, x)
     if fdtype == Val(:forward)
-        @inbounds for i ∈ eachindex(x)
+        @inbounds for i in eachindex(x)
             if ArrayInterface.fast_scalar_indexing(c2)
                 epsilon = ArrayInterface.allowed_getindex(c2, i) * dir
             else
-                epsilon = compute_epsilon(fdtype, one(eltype(x)), relstep, absstep, dir) * dir
+                epsilon = compute_epsilon(fdtype, one(eltype(x)), relstep, absstep, dir) *
+                          dir
             end
             c1_old = ArrayInterface.allowed_getindex(c1, i)
             ArrayInterface.allowed_setindex!(c1, c1_old + epsilon, i)
@@ -278,11 +315,12 @@ function finite_difference_gradient!(
         end
     elseif fdtype == Val(:central)
         copyto!(c3, x)
-        @inbounds for i ∈ eachindex(x)
+        @inbounds for i in eachindex(x)
             if ArrayInterface.fast_scalar_indexing(c2)
                 epsilon = ArrayInterface.allowed_getindex(c2, i) * dir
             else
-                epsilon = compute_epsilon(fdtype, one(eltype(x)), relstep, absstep, dir) * dir
+                epsilon = compute_epsilon(fdtype, one(eltype(x)), relstep, absstep, dir) *
+                          dir
             end
             c1_old = ArrayInterface.allowed_getindex(c1, i)
             ArrayInterface.allowed_setindex!(c1, c1_old + epsilon, i)
@@ -303,7 +341,7 @@ function finite_difference_gradient!(
     elseif fdtype == Val(:complex) && returntype <: Real
         # we use c1 here to avoid typing issues with x
         epsilon_complex = eps(real(eltype(x)))
-        @inbounds for i ∈ eachindex(x)
+        @inbounds for i in eachindex(x)
             c1_old = ArrayInterface.allowed_getindex(c1, i)
             ArrayInterface.allowed_setindex!(c1, c1_old + im * epsilon_complex, i)
             ArrayInterface.allowed_setindex!(df, imag(f(c1)) / epsilon_complex, i)
@@ -316,13 +354,13 @@ function finite_difference_gradient!(
 end
 
 function finite_difference_gradient!(
-    df::StridedVector{<:Number},
-    f,
-    x::StridedVector{<:Number},
-    cache::GradientCache{T1,T2,T3,T4,fdtype,returntype,inplace};
-    relstep=default_relstep(fdtype, eltype(x)),
-    absstep=relstep,
-    dir=true) where {T1,T2,T3,T4,fdtype,returntype,inplace}
+        df::StridedVector{<:Number},
+        f,
+        x::StridedVector{<:Number},
+        cache::GradientCache{T1, T2, T3, T4, fdtype, returntype, inplace};
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep,
+        dir = true) where {T1, T2, T3, T4, fdtype, returntype, inplace}
 
     # c1 is x1 if we need a complex copy of x, otherwise Nothing
     # c2 is Nothing
@@ -334,44 +372,30 @@ function finite_difference_gradient!(
     end
     copyto!(c3, x)
     if fdtype == Val(:forward)
-        for i ∈ eachindex(x)
+        fx0 = fx !== nothing ? fx : f(x)
+        for i in eachindex(x)
             epsilon = compute_epsilon(fdtype, x[i], relstep, absstep, dir)
             x_old = x[i]
-            if typeof(fx) != Nothing
-                c3[i] += epsilon
-                dfi = (f(c3) - fx) / epsilon
-                c3[i] = x_old
-            else
-                fx0 = f(x)
-                c3[i] += epsilon
-                dfi = (f(c3) - fx0) / epsilon
-                c3[i] = x_old
-            end
+            c3[i] += epsilon
+            dfi = (f(c3) - fx0) / epsilon
+            c3[i] = x_old
 
             df[i] = real(dfi)
             if eltype(df) <: Complex
                 if eltype(x) <: Complex
                     c3[i] += im * epsilon
-                    if typeof(fx) != Nothing
-                        dfi = (f(c3) - fx) / (im * epsilon)
-                    else
-                        dfi = (f(c3) - fx0) / (im * epsilon)
-                    end
+                    dfi = (f(c3) - fx0) / (im * epsilon)
                     c3[i] = x_old
                 else
                     c1[i] += im * epsilon
-                    if typeof(fx) != Nothing
-                        dfi = (f(c1) - fx) / (im * epsilon)
-                    else
-                        dfi = (f(c1) - fx0) / (im * epsilon)
-                    end
+                    dfi = (f(c1) - fx0) / (im * epsilon)
                     c1[i] = x_old
                 end
                 df[i] -= im * imag(dfi)
             end
         end
     elseif fdtype == Val(:central)
-        @inbounds for i ∈ eachindex(x)
+        @inbounds for i in eachindex(x)
             epsilon = compute_epsilon(fdtype, x[i], relstep, absstep, dir)
             x_old = x[i]
             c3[i] += epsilon
@@ -397,11 +421,12 @@ function finite_difference_gradient!(
                 df[i] -= im * imag(dfi / (2 * im * epsilon))
             end
         end
-    elseif fdtype == Val(:complex) && returntype <: Real && eltype(df) <: Real && eltype(x) <: Real
+    elseif fdtype == Val(:complex) && returntype <: Real && eltype(df) <: Real &&
+           eltype(x) <: Real
         copyto!(c1, x)
         epsilon_complex = eps(real(eltype(x)))
         # we use c1 here to avoid typing issues with x
-        @inbounds for i ∈ eachindex(x)
+        @inbounds for i in eachindex(x)
             c1_old = c1[i]
             c1[i] += im * epsilon_complex
             df[i] = imag(f(c1)) / epsilon_complex
@@ -416,13 +441,13 @@ end
 # vector of derivatives of a scalar->vector map
 # this is effectively a vector of partial derivatives, but we still call it a gradient
 function finite_difference_gradient!(
-    df,
-    f,
-    x::Number,
-    cache::GradientCache{T1,T2,T3,T4,fdtype,returntype,inplace};
-    relstep=default_relstep(fdtype, eltype(x)),
-    absstep=relstep,
-    dir=true) where {T1,T2,T3,T4,fdtype,returntype,inplace}
+        df,
+        f,
+        x::Number,
+        cache::GradientCache{T1, T2, T3, T4, fdtype, returntype, inplace};
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep,
+        dir = true) where {T1, T2, T3, T4, fdtype, returntype, inplace}
 
     # NOTE: in this case epsilon is a scalar, we need two arrays for fx1 and fx2
     # c1 denotes fx1, c2 is fx2, sizes guaranteed by the cache constructor
diff --git a/src/hessians.jl b/src/hessians.jl
index 1b43156..562874f 100644
--- a/src/hessians.jl
+++ b/src/hessians.jl
@@ -1,218 +1,292 @@
-struct HessianCache{T,fdtype,inplace}
-    xpp::T
-    xpm::T
-    xmp::T
-    xmm::T
-end
-
-#used to dispatch on StaticArrays
-_hessian_inplace(::Type{T}) where T = Val(ArrayInterface.ismutable(T))
-_hessian_inplace(x) = _hessian_inplace(typeof(x))
-__Symmetric(x) = Symmetric(x)
-
-function mutable_zeromatrix(x)
-    A = ArrayInterface.zeromatrix(x)
-    ArrayInterface.ismutable(A) ? A : Base.copymutable(A)
-end
-
-"""
-    HessianCache(
-        xpp,
-        xpm,
-        xmp,
-        xmm,
-        fdtype::Type{T1}=Val{:hcentral},
-        inplace::Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false})
-
-Non-allocating cache constructor.
-"""
-function HessianCache(xpp,xpm,xmp,xmm,
-                      fdtype=Val(:hcentral),
-                      inplace = _hessian_inplace(x))
-    fdtype isa Type && (fdtype = fdtype())
-    inplace isa Type && (inplace = inplace())
-    HessianCache{typeof(xpp),fdtype,inplace}(xpp,xpm,xmp,xmm)
-end
-
-"""
-    HessianCache(
-        x,
-        fdtype::Type{T1}=Val{:hcentral},
-        inplace::Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false})
-
-Allocating cache constructor.
-"""
-function HessianCache(x, fdtype=Val(:hcentral),
-                      inplace = _hessian_inplace(x))
-    cx = copy(x)
-    fdtype isa Type && (fdtype = fdtype())
-    inplace isa Type && (inplace = inplace())
-    HessianCache{typeof(cx),fdtype,inplace}(cx, copy(x), copy(x), copy(x))
-end
-
-"""
-    finite_difference_hessian(
-        f,
-        x::AbstractArray{<:Number},
-        fdtype     :: Type{T1}=Val{:hcentral},
-        inplace    :: Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false};
-        relstep=default_relstep(fdtype, eltype(x)),
-        absstep=relstep)
-
-Cache-less.
-"""
-function finite_difference_hessian(f, x,
-    fdtype  = Val(:hcentral),
-    inplace = _hessian_inplace(x);
-    relstep = default_relstep(fdtype, eltype(x)),
-    absstep = relstep)
-
-    cache = HessianCache(x, fdtype, inplace)
-    finite_difference_hessian(f, x, cache; relstep=relstep, absstep=absstep)
-end
-
-"""
-    finite_difference_hessian(
-        f,
-        x,
-        cache::HessianCache{T,fdtype,inplace};
-        relstep=default_relstep(fdtype, eltype(x)),
-        absstep=relstep)
-
-Cached.
-"""
-function finite_difference_hessian(
-    f,x,
-    cache::HessianCache{T,fdtype,inplace};
-    relstep=default_relstep(fdtype, eltype(x)),
-    absstep=relstep) where {T,fdtype,inplace}
-    H = mutable_zeromatrix(x)
-    finite_difference_hessian!(H, f, x, cache; relstep=relstep, absstep=absstep)
-    __Symmetric(H)
-end
-
-"""
-    finite_difference_hessian!(
-        H::AbstractMatrix,
-        f,
-        x::AbstractArray{<:Number},
-        fdtype     :: Type{T1}=Val{:hcentral},
-        inplace    :: Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false};
-        relstep=default_relstep(fdtype, eltype(x)),
-        absstep=relstep)
-
-Cache-less.
-"""
-function finite_difference_hessian!(H,f,
-    x,
-    fdtype  = Val(:hcentral),
-    inplace = _hessian_inplace(x);
-    relstep=default_relstep(fdtype, eltype(x)),
-    absstep=relstep)
-
-    cache = HessianCache(x,fdtype,inplace)
-    finite_difference_hessian!(H, f, x, cache; relstep=relstep, absstep=absstep)
-end
-
-"""
-    finite_difference_hessian!(
-        H,
-        f,
-        x,
-        cache::HessianCache{T,fdtype,inplace};
-        relstep = default_relstep(fdtype, eltype(x)),
-        absstep = relstep)
-
-Cached.
-"""
-function finite_difference_hessian!(H,f,x,
-                                    cache::HessianCache{T,fdtype,inplace};
-                                    relstep = default_relstep(fdtype, eltype(x)),
-                                    absstep = relstep) where {T,fdtype,inplace}
-
-    @assert fdtype == Val(:hcentral)
-    n = length(x)
-    xpp, xpm, xmp, xmm = cache.xpp, cache.xpm, cache.xmp, cache.xmm
-    fx = f(x)
-
-    if inplace === Val(true)
-        _xpp, _xpm, _xmp, _xmm = xpp, xpm, xmp, xmm
-        copyto!(xpp,x)
-        copyto!(xpm,x)
-        copyto!(xmp,x)
-        copyto!(xmm,x)
-    else # ignore the cache since immutable
-        xpp, xpm, xmp, xmm = copy(x), copy(x), copy(x), copy(x)
-    end
-
-    for i = 1:n
-        xi = ArrayInterface.allowed_getindex(x,i)
-        epsilon = compute_epsilon(Val(:hcentral), xi, relstep, absstep)
-
-        if inplace === Val(true)
-            ArrayInterface.allowed_setindex!(xpp,xi + epsilon,i)
-            ArrayInterface.allowed_setindex!(xmm,xi - epsilon,i)
-        else
-            _xpp = setindex(xpp,xi + epsilon, i)
-            _xmm = setindex(xmm,xi - epsilon, i)
-        end
-
-        ArrayInterface.allowed_setindex!(H,(f(_xpp) - 2*fx + f(_xmm)) / epsilon^2,i,i)
-        epsiloni = compute_epsilon(Val(:central), xi, relstep, absstep)
-        xp = xi + epsiloni
-        xm = xi - epsiloni
-
-        if inplace === Val(true)
-            ArrayInterface.allowed_setindex!(xpp,xp,i)
-            ArrayInterface.allowed_setindex!(xpm,xp,i)
-            ArrayInterface.allowed_setindex!(xmp,xm,i)
-            ArrayInterface.allowed_setindex!(xmm,xm,i)
-        else
-            _xpp = setindex(xpp,xp,i)
-            _xpm = setindex(xpm,xp,i)
-            _xmp = setindex(xmp,xm,i)
-            _xmm = setindex(xmm,xm,i)
-        end
-
-        for j = i+1:n
-            xj = ArrayInterface.allowed_getindex(x,j)
-            epsilonj = compute_epsilon(Val(:central), xj, relstep, absstep)
-            xp = xj + epsilonj
-            xm = xj - epsilonj
-
-            if inplace === Val(true)
-                ArrayInterface.allowed_setindex!(xpp,xp,j)
-                ArrayInterface.allowed_setindex!(xpm,xm,j)
-                ArrayInterface.allowed_setindex!(xmp,xp,j)
-                ArrayInterface.allowed_setindex!(xmm,xm,j)
-            else
-                _xpp = setindex(_xpp,xp,j)
-                _xpm = setindex(_xpm,xm,j)
-                _xmp = setindex(_xmp,xp,j)
-                _xmm = setindex(_xmm,xm,j)
-            end
-
-            ArrayInterface.allowed_setindex!(H,(f(_xpp) - f(_xpm) - f(_xmp) + f(_xmm))/(4*epsiloni*epsilonj),i,j)
-
-            if inplace === Val(true)
-                ArrayInterface.allowed_setindex!(xpp,xj,j)
-                ArrayInterface.allowed_setindex!(xpm,xj,j)
-                ArrayInterface.allowed_setindex!(xmp,xj,j)
-                ArrayInterface.allowed_setindex!(xmm,xj,j)
-            else
-                _xpp = setindex(_xpp,xj,j)
-                _xpm = setindex(_xpm,xj,j)
-                _xmp = setindex(_xmp,xj,j)
-                _xmm = setindex(_xmm,xj,j)
-            end
-        end
-
-        if inplace === Val(true)
-            ArrayInterface.allowed_setindex!(xpp,xi,i)
-            ArrayInterface.allowed_setindex!(xpm,xi,i)
-            ArrayInterface.allowed_setindex!(xmp,xi,i)
-            ArrayInterface.allowed_setindex!(xmm,xi,i)
-        end
-    end
-    LinearAlgebra.copytri!(H,'U')
-end
+struct HessianCache{T, fdtype, inplace}
+    xpp::T
+    xpm::T
+    xmp::T
+    xmm::T
+end
+
+"""
+    _hessian_inplace(::Type{T}) where T
+    _hessian_inplace(x)
+
+Internal function to determine if Hessian computation should be performed in-place.
+
+Returns `Val(true)` if the array type is mutable and supports in-place operations,
+`Val(false)` otherwise. Used to dispatch on StaticArrays vs mutable arrays.
+
+# Arguments
+- `::Type{T}` or `x`: Array type or array instance
+
+# Returns
+- `Val(true)` if the array type supports in-place mutation
+- `Val(false)` if the array type is immutable (e.g., StaticArray)
+"""
+_hessian_inplace(::Type{T}) where {T} = Val(ArrayInterface.ismutable(T))
+_hessian_inplace(x) = _hessian_inplace(typeof(x))
+
+"""
+    __Symmetric(x)
+
+Internal utility function that wraps a matrix in a `Symmetric` view.
+
+# Arguments
+- `x`: Matrix to be wrapped
+
+# Returns
+- `Symmetric(x)`: Symmetric view of the matrix
+"""
+__Symmetric(x) = Symmetric(x)
+
+"""
+    mutable_zeromatrix(x)
+
+Internal utility function to create a mutable zero matrix with the same structure as `x`.
+
+Creates a zero matrix compatible with `x` and ensures it's mutable for in-place operations.
+If the created matrix is immutable, it converts it to a mutable copy.
+
+# Arguments
+- `x`: Array whose structure should be matched
+
+# Returns
+- Mutable zero matrix with the same dimensions and compatible type as `x`
+"""
+function mutable_zeromatrix(x)
+    A = ArrayInterface.zeromatrix(x)
+    ArrayInterface.ismutable(A) ? A : Base.copymutable(A)
+end
+
+"""
+    HessianCache(
+        xpp, xpm, xmp, xmm,
+        fdtype  :: Type{T1}      = Val{:hcentral},
+        inplace :: Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false})
+
+Non-allocating cache constructor.
+"""
+function HessianCache(xpp, xpm, xmp, xmm,
+        fdtype = Val(:hcentral),
+        inplace = _hessian_inplace(x))
+    fdtype isa Type && (fdtype = fdtype())
+    inplace isa Type && (inplace = inplace())
+    HessianCache{typeof(xpp), fdtype, inplace}(xpp, xpm, xmp, xmm)
+end
+
+"""
+    HessianCache(
+        x,
+        fdtype  :: Type{T1}      = Val{:hcentral},
+        inplace :: Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false})
+
+Allocating cache constructor.
+"""
+function HessianCache(x, fdtype = Val(:hcentral),
+        inplace = _hessian_inplace(x))
+    cx = copy(x)
+    fdtype isa Type && (fdtype = fdtype())
+    inplace isa Type && (inplace = inplace())
+    HessianCache{typeof(cx), fdtype, inplace}(cx, copy(x), copy(x), copy(x))
+end
+
+"""
+    finite_difference_hessian(
+        f,
+        x       :: AbstractArray{<:Number},
+        fdtype  :: Type{T1}      = Val{:hcentral},
+        inplace :: Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false};
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep)
+
+Compute the Hessian matrix of scalar function `f` at point `x` using finite differences.
+
+This is the cache-less version that allocates temporary arrays internally.
+The Hessian `H[i,j] = ∂²f/(∂x[i]∂x[j])` is computed using the specified finite
+difference method, typically central differences for optimal accuracy.
+
+# Arguments
+- `f`: Scalar-valued function to differentiate (vector→scalar map)
+- `x::AbstractArray{<:Number}`: Point at which to evaluate the Hessian
+- `fdtype::Type{T1}=Val{:hcentral}`: Finite difference method (`:hcentral` for Hessian central differences)
+- `inplace::Type{Val{T2}}`: Whether to use in-place operations (auto-detected for StaticArrays)
+
+# Keyword Arguments
+- `relstep`: Relative step size (default: method-dependent optimal value)
+- `absstep=relstep`: Absolute step size fallback
+
+# Returns
+- Hessian matrix `H` where `H[i,j] = ∂²f/(∂x[i]∂x[j])`
+- Returns `Symmetric(H)` view to enforce mathematical symmetry
+
+# Examples
+```julia
+f(x) = x[1]^2 + x[1]*x[2] + x[2]^2  # Quadratic function
+x = [1.0, 2.0]
+H = finite_difference_hessian(f, x)  # 2×2 Symmetric matrix
+```
+
+# Mathematical Background
+For a scalar function `f: ℝⁿ → ℝ`, the Hessian central difference approximation is:
+```
+H[i,j] ≈ (f(x + eᵢhᵢ + eⱼhⱼ) - f(x + eᵢhᵢ - eⱼhⱼ) - f(x - eᵢhᵢ + eⱼhⱼ) + f(x - eᵢhᵢ - eⱼhⱼ)) / (4hᵢhⱼ)
+```
+where `eᵢ` is the i-th unit vector and `hᵢ` is the step size in dimension i.
+
+# Notes  
+- Requires `O(n²)` function evaluations for an n-dimensional input
+- Central differences provide `O(h²)` accuracy for second derivatives
+- The result is automatically symmetrized using `Symmetric()` wrapper
+- For large problems, consider using `finite_difference_gradient` twice instead
+"""
+function finite_difference_hessian(f, x,
+        fdtype = Val(:hcentral),
+        inplace = _hessian_inplace(x);
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep)
+    cache = HessianCache(x, fdtype, inplace)
+    finite_difference_hessian(f, x, cache; relstep = relstep, absstep = absstep)
+end
+
+"""
+    finite_difference_hessian(
+        f,
+        x,
+        cache::HessianCache{T,fdtype,inplace};
+        relstep=default_relstep(fdtype, eltype(x)),
+        absstep=relstep)
+
+Cached.
+"""
+function finite_difference_hessian(
+        f, x,
+        cache::HessianCache{T, fdtype, inplace};
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep) where {T, fdtype, inplace}
+    H = mutable_zeromatrix(x)
+    finite_difference_hessian!(H, f, x, cache; relstep = relstep, absstep = absstep)
+    __Symmetric(H)
+end
+
+"""
+    finite_difference_hessian!(
+        H          :: AbstractMatrix,
+        f,
+        x          :: AbstractArray{<:Number},
+        fdtype     :: Type{T1} = Val{:hcentral},
+        inplace    :: Type{Val{T2}} = x isa StaticArray ? Val{true} : Val{false};
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep)
+
+Cache-less.
+"""
+function finite_difference_hessian!(H, f,
+        x,
+        fdtype = Val(:hcentral),
+        inplace = _hessian_inplace(x);
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep)
+    cache = HessianCache(x, fdtype, inplace)
+    finite_difference_hessian!(H, f, x, cache; relstep = relstep, absstep = absstep)
+end
+
+"""
+    finite_difference_hessian!(
+        H,
+        f,
+        x,
+        cache::HessianCache{T,fdtype,inplace};
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep)
+
+Cached.
+"""
+function finite_difference_hessian!(H, f, x,
+        cache::HessianCache{T, fdtype, inplace};
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep) where {T, fdtype, inplace}
+    @assert fdtype == Val(:hcentral)
+    n = length(x)
+    xpp, xpm, xmp, xmm = cache.xpp, cache.xpm, cache.xmp, cache.xmm
+    fx = f(x)
+
+    if inplace === Val(true)
+        _xpp, _xpm, _xmp, _xmm = xpp, xpm, xmp, xmm
+        copyto!(xpp, x)
+        copyto!(xpm, x)
+        copyto!(xmp, x)
+        copyto!(xmm, x)
+    else # ignore the cache since immutable
+        xpp, xpm, xmp, xmm = copy(x), copy(x), copy(x), copy(x)
+    end
+
+    for i in 1:n
+        xi = ArrayInterface.allowed_getindex(x, i)
+        epsilon = compute_epsilon(Val(:hcentral), xi, relstep, absstep)
+
+        if inplace === Val(true)
+            ArrayInterface.allowed_setindex!(xpp, xi + epsilon, i)
+            ArrayInterface.allowed_setindex!(xmm, xi - epsilon, i)
+        else
+            _xpp = setindex(xpp, xi + epsilon, i)
+            _xmm = setindex(xmm, xi - epsilon, i)
+        end
+
+        ArrayInterface.allowed_setindex!(H, (f(_xpp) - 2*fx + f(_xmm)) / epsilon^2, i, i)
+        epsiloni = compute_epsilon(Val(:central), xi, relstep, absstep)
+        xp = xi + epsiloni
+        xm = xi - epsiloni
+
+        if inplace === Val(true)
+            ArrayInterface.allowed_setindex!(xpp, xp, i)
+            ArrayInterface.allowed_setindex!(xpm, xp, i)
+            ArrayInterface.allowed_setindex!(xmp, xm, i)
+            ArrayInterface.allowed_setindex!(xmm, xm, i)
+        else
+            _xpp = setindex(xpp, xp, i)
+            _xpm = setindex(xpm, xp, i)
+            _xmp = setindex(xmp, xm, i)
+            _xmm = setindex(xmm, xm, i)
+        end
+
+        for j in (i + 1):n
+            xj = ArrayInterface.allowed_getindex(x, j)
+            epsilonj = compute_epsilon(Val(:central), xj, relstep, absstep)
+            xp = xj + epsilonj
+            xm = xj - epsilonj
+
+            if inplace === Val(true)
+                ArrayInterface.allowed_setindex!(xpp, xp, j)
+                ArrayInterface.allowed_setindex!(xpm, xm, j)
+                ArrayInterface.allowed_setindex!(xmp, xp, j)
+                ArrayInterface.allowed_setindex!(xmm, xm, j)
+            else
+                _xpp = setindex(_xpp, xp, j)
+                _xpm = setindex(_xpm, xm, j)
+                _xmp = setindex(_xmp, xp, j)
+                _xmm = setindex(_xmm, xm, j)
+            end
+
+            ArrayInterface.allowed_setindex!(
+                H, (f(_xpp) - f(_xpm) - f(_xmp) + f(_xmm))/(4*epsiloni*epsilonj), i, j)
+
+            if inplace === Val(true)
+                ArrayInterface.allowed_setindex!(xpp, xj, j)
+                ArrayInterface.allowed_setindex!(xpm, xj, j)
+                ArrayInterface.allowed_setindex!(xmp, xj, j)
+                ArrayInterface.allowed_setindex!(xmm, xj, j)
+            else
+                _xpp = setindex(_xpp, xj, j)
+                _xpm = setindex(_xpm, xj, j)
+                _xmp = setindex(_xmp, xj, j)
+                _xmm = setindex(_xmm, xj, j)
+            end
+        end
+
+        if inplace === Val(true)
+            ArrayInterface.allowed_setindex!(xpp, xi, i)
+            ArrayInterface.allowed_setindex!(xpm, xi, i)
+            ArrayInterface.allowed_setindex!(xmp, xi, i)
+            ArrayInterface.allowed_setindex!(xmm, xi, i)
+        end
+    end
+    LinearAlgebra.copytri!(H, 'U')
+end
diff --git a/src/iteration_utils.jl b/src/iteration_utils.jl
index 6228c45..537148d 100644
--- a/src/iteration_utils.jl
+++ b/src/iteration_utils.jl
@@ -1,13 +1,71 @@
-@inline function _colorediteration!(J,sparsity,rows_index,cols_index,vfx,colorvec,color_i,ncols)
-    for i in 1:length(cols_index)
-        if colorvec[cols_index[i]] == color_i
-            J[rows_index[i],cols_index[i]] = vfx[rows_index[i]]
-        end
-    end
-end
-
-#override default setting of using findstructralnz
-_use_findstructralnz(sparsity) = ArrayInterface.has_sparsestruct(sparsity)
-
-# test if J, sparsity are both SparseMatrixCSC and have the same sparsity pattern of stored values
-_use_sparseCSC_common_sparsity(J, sparsity) = false
+"""
+    _colorediteration!(J, sparsity, rows_index, cols_index, vfx, colorvec, color_i, ncols)
+
+Internal function for sparse Jacobian assembly using graph coloring.
+
+Updates the Jacobian matrix `J` with finite difference values for columns 
+belonging to the current color group. This is part of the graph-coloring 
+algorithm that computes multiple Jacobian columns simultaneously.
+
+# Arguments
+- `J`: Jacobian matrix to update (modified in-place)
+- `sparsity`: Sparsity pattern of the Jacobian
+- `rows_index`: Row indices of non-zero elements
+- `cols_index`: Column indices of non-zero elements
+- `vfx`: Vector of computed finite difference values
+- `colorvec`: Vector assigning colors to columns
+- `color_i`: Current color being processed
+- `ncols`: Total number of columns in the Jacobian
+
+# Notes
+This function implements the core loop of the graph-coloring sparse Jacobian
+algorithm. It assigns finite difference values to the appropriate entries
+in the Jacobian matrix for all columns sharing the same color.
+"""
+@inline function _colorediteration!(
+        J, sparsity, rows_index, cols_index, vfx, colorvec, color_i, ncols)
+    for i in 1:length(cols_index)
+        if colorvec[cols_index[i]] == color_i
+            J[rows_index[i], cols_index[i]] = vfx[rows_index[i]]
+        end
+    end
+end
+
+"""
+    _use_findstructralnz(sparsity)
+
+Internal function to determine whether to use `findstructralnz` for sparsity detection.
+
+Returns `true` if the sparsity pattern has structural information that can be 
+utilized by `findstructralnz`, `false` otherwise. This overrides the default
+behavior and delegates to `ArrayInterface.has_sparsestruct`.
+
+# Arguments
+- `sparsity`: Sparsity pattern object
+
+# Returns
+- `true` if structural sparsity information is available, `false` otherwise
+"""
+_use_findstructralnz(sparsity) = ArrayInterface.has_sparsestruct(sparsity)
+
+"""
+    _use_sparseCSC_common_sparsity(J, sparsity)
+
+Internal function to test for common sparsity patterns between Jacobian and sparsity matrix.
+
+Tests if both `J` and `sparsity` are `SparseMatrixCSC` matrices with the same 
+sparsity pattern of stored values. Currently always returns `false` as a 
+conservative default.
+
+# Arguments
+- `J`: Jacobian matrix
+- `sparsity`: Sparsity pattern matrix
+
+# Returns
+- `false` (conservative default - may be optimized in the future)
+
+# Notes
+This function could be optimized to return `true` when both matrices have
+identical sparsity patterns, allowing for more efficient algorithms.
+"""
+_use_sparseCSC_common_sparsity(J, sparsity) = false
diff --git a/src/jacobians.jl b/src/jacobians.jl
index 1297a6a..e1b742c 100644
--- a/src/jacobians.jl
+++ b/src/jacobians.jl
@@ -1,24 +1,24 @@
-mutable struct JacobianCache{CacheType1,CacheType2,CacheType3,CacheType4,ColorType,SparsityType,fdtype,returntype}
-    x1  :: CacheType1
-    x2  :: CacheType2
-    fx  :: CacheType3
-    fx1 :: CacheType4
-    colorvec :: ColorType
-    sparsity :: SparsityType
+mutable struct JacobianCache{CacheType1, CacheType2, CacheType3, CacheType4,
+    ColorType, SparsityType, fdtype, returntype}
+    x1::CacheType1
+    x2::CacheType2
+    fx::CacheType3
+    fx1::CacheType4
+    colorvec::ColorType
+    sparsity::SparsityType
 end
 
 function JacobianCache(
-    x,
-    fdtype     :: Union{Val{T1},Type{T1}} = Val(:forward),
-    returntype :: Type{T2} = eltype(x);
-    inplace    :: Union{Val{T3},Type{T3}} = Val(true),
-    colorvec = 1:length(x),
-    sparsity = nothing) where {T1,T2,T3}
-
+        x,
+        fdtype::Union{Val{T1}, Type{T1}} = Val(:forward),
+        returntype::Type{T2} = eltype(x);
+        inplace::Union{Val{T3}, Type{T3}} = Val(true),
+        colorvec = 1:length(x),
+        sparsity = nothing) where {T1, T2, T3}
     fdtype isa Type && (fdtype = fdtype())
     inplace isa Type && (inplace = inplace())
     if eltype(x) <: Real && fdtype==Val(:complex)
-        x1  = false .* im .* x
+        x1 = false .* im .* x
         _fx = false .* im .* x
     else
         x1 = copy(x)
@@ -26,12 +26,13 @@ function JacobianCache(
     end
 
     if fdtype==Val(:complex)
-        _fx1  = nothing
+        _fx1 = nothing
     else
         _fx1 = copy(x)
     end
 
-    JacobianCache(x1,_fx,_fx1,fdtype,returntype;colorvec=colorvec,sparsity=sparsity)
+    JacobianCache(
+        x1, _fx, _fx1, fdtype, returntype; colorvec = colorvec, sparsity = sparsity)
 end
 
 """
@@ -47,18 +48,17 @@ Allocating Cache Constructor.
 This assumes the Jacobian is square.
 """
 function JacobianCache(
-    x ,
-    fx,
-    fdtype     :: Union{Val{T1},Type{T1}} = Val(:forward),
-    returntype :: Type{T2} = eltype(x);
-    inplace    :: Union{Val{T3},Type{T3}} = Val(true),
-    colorvec = 1:length(x),
-    sparsity = nothing) where {T1,T2,T3}
-
+        x,
+        fx,
+        fdtype::Union{Val{T1}, Type{T1}} = Val(:forward),
+        returntype::Type{T2} = eltype(x);
+        inplace::Union{Val{T3}, Type{T3}} = Val(true),
+        colorvec = 1:length(x),
+        sparsity = nothing) where {T1, T2, T3}
     fdtype isa Type && (fdtype = fdtype())
     inplace isa Type && (inplace = inplace())
     if eltype(x) <: Real && fdtype==Val(:complex)
-        x1  = false .* im .* x
+        x1 = false .* im .* x
     else
         x1 = copy(x)
     end
@@ -70,48 +70,48 @@ function JacobianCache(
     end
 
     if fdtype==Val(:complex)
-        _fx1  = nothing
+        _fx1 = nothing
     else
         _fx1 = copy(fx)
     end
 
-    JacobianCache(x1,_fx,_fx1,fdtype,returntype;colorvec=colorvec,sparsity=sparsity)
+    JacobianCache(
+        x1, _fx, _fx1, fdtype, returntype; colorvec = colorvec, sparsity = sparsity)
 end
 
 """
     FiniteDiff.JacobianCache(
-        x1 ,
-        fx ,
+        x1,
+        fx,
         fx1,
         fdtype     :: Type{T1} = Val{:central},
-        returntype :: Type{T2} = eltype(fx),
+        returntype :: Type{T2} = eltype(fx);
         colorvec = 1:length(x1),
         sparsity = nothing)
 
 Non-Allocating Cache Constructor.
 """
 function JacobianCache(
-    x1 ,
-    fx ,
-    fx1,
-    fdtype     :: Union{Val{T1},Type{T1}} = Val(:forward),
-    returntype :: Type{T2} = eltype(fx);
-    inplace    :: Union{Val{T3},Type{T3}} = Val(true),
-    colorvec = 1:length(x1),
-    sparsity = nothing) where {T1,T2,T3}
-
+        x1,
+        fx,
+        fx1,
+        fdtype::Union{Val{T1}, Type{T1}} = Val(:forward),
+        returntype::Type{T2} = eltype(fx);
+        inplace::Union{Val{T3}, Type{T3}} = Val(true),
+        colorvec = 1:length(x1),
+        sparsity = nothing) where {T1, T2, T3}
     fdtype isa Type && (fdtype = fdtype())
     inplace isa Type && (inplace = inplace())
     if fdtype==Val(:complex)
         !(returntype<:Real) && fdtype_error(returntype)
 
         if eltype(fx) <: Real
-            _fx  = false .* im .* fx
+            _fx = false .* im .* fx
         else
             _fx = fx
         end
         if eltype(x1) <: Real
-            _x1  = false .* im .* x1
+            _x1 = false .* im .* x1
         else
             _x1 = x1
         end
@@ -122,21 +122,65 @@ function JacobianCache(
         _fx = fx
     end
     _x2 = zero(_x1)
-    JacobianCache{typeof(_x1),typeof(_x2),typeof(_fx),typeof(fx1),typeof(colorvec),typeof(sparsity),fdtype,returntype}(_x1,_x2,_fx,fx1,colorvec,sparsity)
+    JacobianCache{typeof(_x1), typeof(_x2), typeof(_fx), typeof(fx1),
+        typeof(colorvec), typeof(sparsity), fdtype, returntype}(
+        _x1, _x2, _fx, fx1, colorvec, sparsity)
 end
 
-function _make_Ji(::AbstractArray, rows_index,cols_index,dx,colorvec,color_i,nrows,ncols)
+"""
+    _make_Ji(::AbstractArray, rows_index, cols_index, dx, colorvec, color_i, nrows, ncols)
+
+Internal function to construct the Jacobian contribution matrix for sparse Jacobian computation.
+
+Creates a matrix `Ji` that represents the contribution of the `color_i` color group 
+to the Jacobian. This is used in graph-coloring based sparse Jacobian computation.
+
+# Arguments
+- `::AbstractArray`: Array type dispatch
+- `rows_index`: Row indices of non-zero elements
+- `cols_index`: Column indices of non-zero elements  
+- `dx`: Computed finite difference values
+- `colorvec`: Vector assigning colors to columns
+- `color_i`: Current color being processed
+- `nrows`: Number of rows in the Jacobian
+- `ncols`: Number of columns in the Jacobian
+
+# Returns
+- `Ji`: Matrix containing the Jacobian entries for the current color
+"""
+function _make_Ji(
+        ::AbstractArray, rows_index, cols_index, dx, colorvec, color_i, nrows, ncols)
     pick_inds = [i for i in 1:length(rows_index) if colorvec[cols_index[i]] == color_i]
     rows_index_c = rows_index[pick_inds]
     cols_index_c = cols_index[pick_inds]
     len_rows = length(pick_inds)
-    unused_rows = setdiff(1:nrows,rows_index_c)
-    perm_rows = sortperm(vcat(rows_index_c,unused_rows))
-    cols_index_c = vcat(cols_index_c,zeros(Int,nrows-len_rows))[perm_rows]
+    unused_rows = setdiff(1:nrows, rows_index_c)
+    perm_rows = sortperm(vcat(rows_index_c, unused_rows))
+    cols_index_c = vcat(cols_index_c, zeros(Int, nrows-len_rows))[perm_rows]
     Ji = [j==cols_index_c[i] ? dx[i] : false for i in 1:nrows, j in 1:ncols]
     Ji
 end
 
+"""
+    _make_Ji(::AbstractArray, xtype, dx, color_i, nrows, ncols)
+
+Internal function to construct the Jacobian contribution matrix for dense Jacobian computation.
+
+Creates a matrix `Ji` by placing the finite difference vector `dx` in the column 
+corresponding to `color_i`, with zeros elsewhere. This is used for dense Jacobian
+computation without sparsity structure.
+
+# Arguments
+- `::AbstractArray`: Array type dispatch
+- `xtype`: Type of the input vector (for type stability)
+- `dx`: Computed finite difference values
+- `color_i`: Current color/column being processed
+- `nrows`: Number of rows in the Jacobian
+- `ncols`: Number of columns in the Jacobian
+
+# Returns
+- `Ji`: Matrix containing the Jacobian entries for the current column
+"""
 function _make_Ji(::AbstractArray, xtype, dx, color_i, nrows, ncols)
     Ji = mapreduce(i -> i==color_i ? dx : zero(dx), hcat, 1:ncols)
     size(Ji) != (nrows, ncols) ? reshape(Ji, (nrows, ncols)) : Ji #branch when size(dx) == (1,) => size(Ji) == (1,) while size(J) == (1,1)
@@ -146,35 +190,72 @@ end
     FiniteDiff.finite_difference_jacobian(
         f,
         x          :: AbstractArray{<:Number},
-        fdtype     :: Type{T1}=Val{:central},
-        returntype :: Type{T2}=eltype(x),
-        relstep=default_relstep(fdtype, eltype(x)),
-        absstep=relstep,
-        colorvec = 1:length(x),
-        sparsity = nothing,
+        fdtype     :: Type{T1} = Val{:forward},
+        returntype :: Type{T2} = eltype(x),
+        f_in                   = nothing;
+        relstep       = default_relstep(fdtype, eltype(x)),
+        absstep       = relstep,
+        colorvec      = 1:length(x),
+        sparsity      = nothing,
         jac_prototype = nothing,
-        dir=true)
-
-Cache-less.
+        dir           = true)
+
+Compute the Jacobian matrix of function `f` at point `x` using finite differences.
+
+This is the cache-less version that allocates temporary arrays internally. 
+The Jacobian `J[i,j] = ∂f[i]/∂x[j]` is computed using the specified finite 
+difference method. Supports sparse Jacobians via graph coloring.
+
+# Arguments
+- `f`: Function to differentiate (vector→vector map)
+- `x::AbstractArray{<:Number}`: Point at which to evaluate the Jacobian
+- `fdtype::Type{T1}=Val{:forward}`: Finite difference method (`:forward`, `:central`, `:complex`)
+- `returntype::Type{T2}=eltype(x)`: Element type of the function output
+- `f_in=nothing`: Pre-computed `f(x)` value (if available)
+
+# Keyword Arguments
+- `relstep`: Relative step size (default: method-dependent optimal value)
+- `absstep=relstep`: Absolute step size fallback
+- `colorvec=1:length(x)`: Column coloring for sparse Jacobians  
+- `sparsity=nothing`: Sparsity pattern for the Jacobian
+- `jac_prototype=nothing`: Prototype matrix defining Jacobian structure
+- `dir=true`: Direction for step size (typically ±1)
+
+# Returns
+- Jacobian matrix `J` where `J[i,j] = ∂f[i]/∂x[j]`
+
+# Examples
+```julia
+f(x) = [x[1]^2 + x[2], x[1] * x[2]]
+x = [1.0, 2.0]
+J = finite_difference_jacobian(f, x)  # 2×2 matrix
+```
+
+# Notes
+- Forward differences: `O(n)` function evaluations, `O(h)` accuracy
+- Central differences: `O(2n)` function evaluations, `O(h²)` accuracy  
+- Complex step: `O(n)` function evaluations, machine precision accuracy
+- Sparse Jacobians use graph coloring to reduce function evaluations
 """
 function finite_difference_jacobian(f, x,
-    fdtype     = Val(:forward),
-    returntype = eltype(x),
-    f_in       = nothing;
-    relstep=default_relstep(fdtype, eltype(x)),
-    absstep=relstep,
-    colorvec = 1:length(x),
-    sparsity = nothing,
-    jac_prototype = nothing,
-    dir=true)
-
+        fdtype = Val(:forward),
+        returntype = eltype(x),
+        f_in = nothing;
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep,
+        colorvec = 1:length(x),
+        sparsity = nothing,
+        jac_prototype = nothing,
+        dir = true)
     if f_in isa Nothing
         fx = f(x)
     else
         fx = f_in
     end
     cache = JacobianCache(x, fx, fdtype, returntype)
-    finite_difference_jacobian(f, x, cache, fx; relstep=relstep, absstep=absstep, colorvec=colorvec, sparsity=sparsity, jac_prototype=jac_prototype, dir=dir)
+    finite_difference_jacobian(
+        f, x, cache, fx; relstep = relstep, absstep = absstep, colorvec = colorvec,
+        sparsity = sparsity, jac_prototype = jac_prototype, dir = dir)
 end
 
 void_setindex!(args...) = (setindex!(args...); return)
@@ -184,27 +265,26 @@ void_setindex!(args...) = (setindex!(args...); return)
         f,
         x,
         cache::JacobianCache;
-        relstep=default_relstep(fdtype, eltype(x)),
-        absstep=relstep,
-        colorvec = cache.colorvec,
-        sparsity = cache.sparsity,
+        relstep       = default_relstep(fdtype, eltype(x)),
+        absstep       = relstep,
+        colorvec      = cache.colorvec,
+        sparsity      = cache.sparsity,
         jac_prototype = nothing,
-        dir=true)
+        dir           = true)
 
 Cached.
 """
 function finite_difference_jacobian(
-    f,
-    x,
-    cache::JacobianCache{T1,T2,T3,T4,cType,sType,fdtype,returntype},
-    f_in=nothing;
-    relstep=default_relstep(fdtype, eltype(x)),
-    absstep=relstep,
-    colorvec = cache.colorvec,
-    sparsity = cache.sparsity,
-    jac_prototype = nothing,
-    dir=true) where {T1,T2,T3,T4,cType,sType,fdtype,returntype}
-
+        f,
+        x,
+        cache::JacobianCache{T1, T2, T3, T4, cType, sType, fdtype, returntype},
+        f_in = nothing;
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep,
+        colorvec = cache.colorvec,
+        sparsity = cache.sparsity,
+        jac_prototype = nothing,
+        dir = true) where {T1, T2, T3, T4, cType, sType, fdtype, returntype}
     x1, fx, fx1 = cache.x1, cache.fx, cache.fx1
 
     if !(f_in isa Nothing)
@@ -218,7 +298,9 @@ function finite_difference_jacobian(
     end
     vecx = _vec(x)
     vecx1 = _vec(x1)
-    J = jac_prototype isa Nothing ? (sparsity isa Nothing ? Array{eltype(x),2}(undef, length(vecfx), 0) : zeros(eltype(x),size(sparsity))) : zero(jac_prototype)
+    J = jac_prototype isa Nothing ?
+        (sparsity isa Nothing ? Array{eltype(x), 2}(undef, length(vecfx), 0) :
+         zeros(eltype(x), size(sparsity))) : zero(jac_prototype)
     nrows, ncols = size(J)
 
     if !(sparsity isa Nothing)
@@ -228,12 +310,11 @@ function finite_difference_jacobian(
     end
 
     if fdtype == Val(:forward)
-
         function calculate_Ji_forward(i)
             x_save = ArrayInterface.allowed_getindex(vecx, i)
             epsilon = compute_epsilon(Val(:forward), x_save, relstep, absstep, dir)
             _vecx1 = setindex(vecx, x_save+epsilon, i)
-            _x1 = ArrayInterface.restructure(x,_vecx1)
+            _x1 = ArrayInterface.restructure(x, _vecx1)
             vecfx1 = _vec(f(_x1))
             dx = (vecfx1-vecfx) / epsilon
             return dx
@@ -243,30 +324,31 @@ function finite_difference_jacobian(
             J = mapreduce(calculate_Ji_forward, hcat, 1:maximum(colorvec))
             J = _mat(J)
         else
-            @inbounds for color_i ∈ 1:maximum(colorvec)
+            @inbounds for color_i in 1:maximum(colorvec)
                 if sparsity isa Nothing
                     dx = calculate_Ji_forward(color_i)
                     J = J + _make_Ji(J, eltype(x), dx, color_i, nrows, ncols)
                 else
                     tmp = norm(vecx .* (colorvec .== color_i))
-                    epsilon = compute_epsilon(Val(:forward), sqrt(tmp), relstep, absstep, dir)
+                    epsilon = compute_epsilon(
+                        Val(:forward), sqrt(tmp), relstep, absstep, dir)
                     _vecx = @. vecx + epsilon * (colorvec == color_i)
                     _x = reshape(_vecx, axes(x))
                     vecfx1 = _vec(f(_x))
                     dx = (vecfx1-vecfx)/epsilon
-                    Ji = _make_Ji(J,rows_index,cols_index,dx,colorvec,color_i,nrows,ncols)
+                    Ji = _make_Ji(
+                        J, rows_index, cols_index, dx, colorvec, color_i, nrows, ncols)
                     J = J + Ji
                 end
             end
         end
     elseif fdtype == Val(:central)
-
         function calculate_Ji_central(i)
-            x1_save = ArrayInterface.allowed_getindex(vecx1,i)
-            x_save = ArrayInterface.allowed_getindex(vecx,i)
+            x1_save = ArrayInterface.allowed_getindex(vecx1, i)
+            x_save = ArrayInterface.allowed_getindex(vecx, i)
             epsilon = compute_epsilon(Val(:forward), x1_save, relstep, absstep, dir)
-            _vecx1 = setindex(vecx1,x1_save+epsilon,i)
-            _vecx = setindex(vecx,x_save-epsilon,i)
+            _vecx1 = setindex(vecx1, x1_save+epsilon, i)
+            _vecx = setindex(vecx, x_save-epsilon, i)
             _x1 = reshape(_vecx1, axes(x))
             _x = reshape(_vecx, axes(x))
             vecfx1 = _vec(f(_x1))
@@ -279,13 +361,14 @@ function finite_difference_jacobian(
             J = mapreduce(calculate_Ji_central, hcat, 1:maximum(colorvec))
             J = _mat(J)
         else
-            @inbounds for color_i ∈ 1:maximum(colorvec)
+            @inbounds for color_i in 1:maximum(colorvec)
                 if sparsity isa Nothing
                     dx = calculate_Ji_central(color_i)
                     J = J + _make_Ji(J, eltype(x), dx, color_i, nrows, ncols)
                 else
                     tmp = norm(vecx1 .* (colorvec .== color_i))
-                    epsilon = compute_epsilon(Val(:forward), sqrt(tmp), relstep, absstep, dir)
+                    epsilon = compute_epsilon(
+                        Val(:forward), sqrt(tmp), relstep, absstep, dir)
                     _vecx1 = @. vecx1 + epsilon * (colorvec == color_i)
                     _vecx = @. vecx - epsilon * (colorvec == color_i)
                     _x1 = reshape(_vecx1, axes(x))
@@ -293,7 +376,8 @@ function finite_difference_jacobian(
                     vecfx1 = _vec(f(_x1))
                     vecfx = _vec(f(_x))
                     dx = (vecfx1-vecfx)/(2epsilon)
-                    Ji = _make_Ji(J,rows_index,cols_index,dx,colorvec,color_i,nrows,ncols)
+                    Ji = _make_Ji(
+                        J, rows_index, cols_index, dx, colorvec, color_i, nrows, ncols)
                     J = J + Ji
                 end
             end
@@ -302,8 +386,8 @@ function finite_difference_jacobian(
         epsilon = eps(eltype(x))
 
         function calculate_Ji_complex(i)
-            x_save = ArrayInterface.allowed_getindex(vecx,i)
-            _vecx = setindex(complex.(vecx),x_save+im*epsilon,i)
+            x_save = ArrayInterface.allowed_getindex(vecx, i)
+            _vecx = setindex(complex.(vecx), x_save+im*epsilon, i)
             _x = reshape(_vecx, axes(x))
             vecfx = _vec(f(_x))
             dx = imag(vecfx)/epsilon
@@ -314,7 +398,7 @@ function finite_difference_jacobian(
             J = mapreduce(calculate_Ji_complex, hcat, 1:maximum(colorvec))
             J = _mat(J)
         else
-            @inbounds for color_i ∈ 1:maximum(colorvec)
+            @inbounds for color_i in 1:maximum(colorvec)
                 if sparsity isa Nothing
                     dx = calculate_Ji_complex(color_i)
                     J = J + _make_Ji(J, eltype(x), dx, color_i, nrows, ncols)
@@ -323,7 +407,8 @@ function finite_difference_jacobian(
                     _x = reshape(_vecx, axes(x))
                     vecfx = _vec(f(_x))
                     dx = imag(vecfx)/epsilon
-                    Ji = _make_Ji(J,rows_index,cols_index,dx,colorvec,color_i,nrows,ncols)
+                    Ji = _make_Ji(
+                        J, rows_index, cols_index, dx, colorvec, color_i, nrows, ncols)
                     J = J + Ji
                 end
             end
@@ -336,43 +421,44 @@ end
 
 """
     finite_difference_jacobian!(
-        J::AbstractMatrix,
+        J          :: AbstractMatrix,
         f,
-        x::AbstractArray{<:Number},
-        fdtype     :: Type{T1}=Val{:forward},
-        returntype :: Type{T2}=eltype(x),
+        x          :: AbstractArray{<:Number},
+        fdtype     :: Type{T1} = Val{:forward},
+        returntype :: Type{T2} = eltype(x),
         f_in       :: Union{T2,Nothing}=nothing;
-        relstep=default_relstep(fdtype, eltype(x)),
-        absstep=relstep,
+        relstep  = default_relstep(fdtype, eltype(x)),
+        absstep  = relstep,
         colorvec = 1:length(x),
         sparsity = ArrayInterfaceCore.has_sparsestruct(J) ? J : nothing)
 
 Cache-less.
 """
 function finite_difference_jacobian!(J,
-    f,
-    x,
-    fdtype     = Val(:forward),
-    returntype = eltype(x),
-    f_in       = nothing;
-    relstep=default_relstep(fdtype, eltype(x)),
-    absstep=relstep,
-    colorvec = 1:length(x),
-    sparsity = ArrayInterface.has_sparsestruct(J) ? J : nothing)
+        f,
+        x,
+        fdtype = Val(:forward),
+        returntype = eltype(x),
+        f_in = nothing;
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep,
+        colorvec = 1:length(x),
+        sparsity = ArrayInterface.has_sparsestruct(J) ? J : nothing)
     if f_in isa Nothing && fdtype == Val(:forward)
-        if size(J,1) == length(x)
+        if size(J, 1) == length(x)
             fx = zero(x)
         else
-            fx = zeros(returntype,size(J,1))
+            fx = zeros(returntype, size(J, 1))
         end
-        f(fx,x)
+        f(fx, x)
         cache = JacobianCache(x, fx, fdtype, returntype)
     elseif f_in isa Nothing
         cache = JacobianCache(x, fdtype, returntype)
     else
         cache = JacobianCache(x, f_in, fdtype, returntype)
     end
-    finite_difference_jacobian!(J, f, x, cache, cache.fx; relstep=relstep, absstep=absstep, colorvec=colorvec, sparsity=sparsity)
+    finite_difference_jacobian!(J, f, x, cache, cache.fx; relstep = relstep,
+        absstep = absstep, colorvec = colorvec, sparsity = sparsity)
 end
 
 function _findstructralnz(A::DenseMatrix)
@@ -394,30 +480,29 @@ end
 
 """
     FiniteDiff.finite_difference_jacobian!(
-        J::AbstractMatrix{<:Number},
+        J     :: AbstractMatrix{<:Number},
         f,
-        x::AbstractArray{<:Number},
-        cache::JacobianCache;
-        relstep=default_relstep(fdtype, eltype(x)),
-        absstep=relstep,
+        x     :: AbstractArray{<:Number},
+        cache :: JacobianCache;
+        relstep  = default_relstep(fdtype, eltype(x)),
+        absstep  = relstep,
         colorvec = cache.colorvec,
         sparsity = cache.sparsity,
-        dir=true)
+        dir      = true)
 
 Cached.
 """
 function finite_difference_jacobian!(
-    J,
-    f,
-    x,
-    cache::JacobianCache{T1,T2,T3,T4,cType,sType,fdtype,returntype},
-    f_in = nothing;
-    relstep = default_relstep(fdtype, eltype(x)),
-    absstep = relstep,
-    colorvec = cache.colorvec,
-    sparsity = cache.sparsity,
-    dir = true) where {T1,T2,T3,T4,cType,sType,fdtype,returntype}
-
+        J,
+        f,
+        x,
+        cache::JacobianCache{T1, T2, T3, T4, cType, sType, fdtype, returntype},
+        f_in = nothing;
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep,
+        colorvec = cache.colorvec,
+        sparsity = cache.sparsity,
+        dir = true) where {T1, T2, T3, T4, cType, sType, fdtype, returntype}
     m, n = size(J)
     _color = reshape(colorvec, axes(x)...)
 
@@ -450,15 +535,15 @@ function finite_difference_jacobian!(
             vfx = _vec(f_in)
         end
 
-        @inbounds for color_i ∈ 1:maximum(colorvec)
+        @inbounds for color_i in 1:maximum(colorvec)
             if sparsity isa Nothing
-                x1_save = ArrayInterface.allowed_getindex(x1,color_i)
+                x1_save = ArrayInterface.allowed_getindex(x1, color_i)
                 epsilon = compute_epsilon(Val(:forward), x1_save, relstep, absstep, dir)
                 ArrayInterface.allowed_setindex!(x1, x1_save + epsilon, color_i)
                 f(fx1, x1)
                 # J is dense, so either it is truly dense or this is the
                 # compressed form of the coloring, so write into it.
-                @. J[:,color_i] = (vfx1 - vfx) / epsilon
+                @. J[:, color_i] = (vfx1 - vfx) / epsilon
                 # Now return x1 back to its original value
                 ArrayInterface.allowed_setindex!(x1, x1_save, color_i)
             else # Perturb along the colorvec vector
@@ -471,9 +556,10 @@ function finite_difference_jacobian!(
                 @. vfx1 = (vfx1 - vfx) / epsilon
                 if ArrayInterface.fast_scalar_indexing(x1)
                     if sparseCSC_common_sparsity
-                        _colorediteration!(J,vfx1,colorvec,color_i,n)
+                        _colorediteration!(J, vfx1, colorvec, color_i, n)
                     else
-                        _colorediteration!(J,sparsity,rows_index,cols_index,vfx1,colorvec,color_i,n)
+                        _colorediteration!(
+                            J, sparsity, rows_index, cols_index, vfx1, colorvec, color_i, n)
                     end
                 else
                     #=
@@ -482,7 +568,8 @@ function finite_difference_jacobian!(
                     J[rows_index, cols_index] .+= (colorvec[cols_index] .== color_i) .* vfx1[rows_index]
                     += means requires a zero'd out start
                     =#
-                    fast_jacobian_setindex!(J, rows_index, cols_index, _color, color_i, vfx1)
+                    fast_jacobian_setindex!(
+                        J, rows_index, cols_index, _color, color_i, vfx1)
                 end
                 # Now return x1 back to its original value
                 @. x1 = x1 - epsilon * (_color == color_i)
@@ -490,7 +577,7 @@ function finite_difference_jacobian!(
         end #for ends here
     elseif fdtype == Val(:central)
         vfx1 = _vec(fx1)
-        @inbounds for color_i ∈ 1:maximum(colorvec)
+        @inbounds for color_i in 1:maximum(colorvec)
             if sparsity isa Nothing
                 x_save = ArrayInterface.allowed_getindex(x, color_i)
                 epsilon = compute_epsilon(Val(:central), x_save, relstep, absstep, dir)
@@ -498,48 +585,51 @@ function finite_difference_jacobian!(
                 f(fx1, x1)
                 ArrayInterface.allowed_setindex!(x1, x_save - epsilon, color_i)
                 f(fx, x1)
-                @. J[:,color_i] = (vfx1 - vfx) / 2epsilon
+                @. J[:, color_i] = (vfx1 - vfx) / 2epsilon
                 ArrayInterface.allowed_setindex!(x1, x_save, color_i)
             else # Perturb along the colorvec vector
                 @. x2 = x1 * (_color == color_i)
                 tmp = norm(x2)
                 epsilon = compute_epsilon(Val(:central), sqrt(tmp), relstep, absstep, dir)
                 @. x1 = x1 + epsilon * (_color == color_i)
-                @. x  = x  - epsilon * (_color == color_i)
+                @. x = x - epsilon * (_color == color_i)
                 f(fx1, x1)
                 f(fx, x)
                 @. vfx1 = (vfx1 - vfx) / 2epsilon
                 if ArrayInterface.fast_scalar_indexing(x1)
                     if sparseCSC_common_sparsity
-                        _colorediteration!(J,vfx1,colorvec,color_i,n)
+                        _colorediteration!(J, vfx1, colorvec, color_i, n)
                     else
-                        _colorediteration!(J,sparsity,rows_index,cols_index,vfx1,colorvec,color_i,n)
+                        _colorediteration!(
+                            J, sparsity, rows_index, cols_index, vfx1, colorvec, color_i, n)
                     end
                 else
-                    fast_jacobian_setindex!(J, rows_index, cols_index, _color, color_i, vfx1)
+                    fast_jacobian_setindex!(
+                        J, rows_index, cols_index, _color, color_i, vfx1)
                 end
                 @. x1 = x1 - epsilon * (_color == color_i)
-                @. x  = x  + epsilon * (_color == color_i)
+                @. x = x + epsilon * (_color == color_i)
             end
         end
     elseif fdtype==Val(:complex) && returntype<:Real
         epsilon = eps(eltype(x))
-        @inbounds for color_i ∈ 1:maximum(colorvec)
+        @inbounds for color_i in 1:maximum(colorvec)
             if sparsity isa Nothing
                 x1_save = ArrayInterface.allowed_getindex(x1, color_i)
                 ArrayInterface.allowed_setindex!(x1, x1_save + im*epsilon, color_i)
-                f(fx,x1)
-                @. J[:,color_i] = imag(vfx) / epsilon
-                ArrayInterface.allowed_setindex!(x1, x1_save,color_i)
+                f(fx, x1)
+                @. J[:, color_i] = imag(vfx) / epsilon
+                ArrayInterface.allowed_setindex!(x1, x1_save, color_i)
             else # Perturb along the colorvec vector
                 @. x1 = x1 + im * epsilon * (_color == color_i)
-                f(fx,x1)
+                f(fx, x1)
                 @. vfx = imag(vfx) / epsilon
                 if ArrayInterface.fast_scalar_indexing(x1)
                     if sparseCSC_common_sparsity
-                        _colorediteration!(J,vfx,colorvec,color_i,n)
+                        _colorediteration!(J, vfx, colorvec, color_i, n)
                     else
-                        _colorediteration!(J,sparsity,rows_index,cols_index,vfx,colorvec,color_i,n)
+                        _colorediteration!(
+                            J, sparsity, rows_index, cols_index, vfx, colorvec, color_i, n)
                     end
                 else
                     fast_jacobian_setindex!(J, rows_index, cols_index, _color, color_i, vfx)
@@ -554,8 +644,8 @@ function finite_difference_jacobian!(
 end
 
 function resize!(cache::JacobianCache, i::Int)
-    resize!(cache.x1,  i)
-    resize!(cache.fx,  i)
+    resize!(cache.x1, i)
+    resize!(cache.fx, i)
     cache.fx1 !== nothing && resize!(cache.fx1, i)
     cache.colorvec = 1:i
     nothing
@@ -564,5 +654,9 @@ end
 @inline fill_matrix!(J, v) = fill!(J, v)
 
 @inline function fast_jacobian_setindex!(J, rows_index, cols_index, _color, color_i, vfx)
-    @. void_setindex!((J,), getindex((J,), rows_index, cols_index) + (getindex((_color,), cols_index) == color_i) * getindex((vfx,), rows_index), rows_index, cols_index)
-end
\ No newline at end of file
+    @. void_setindex!((J,),
+        getindex((J,), rows_index, cols_index) +
+        (getindex((_color,), cols_index) == color_i) * getindex((vfx,), rows_index),
+        rows_index,
+        cols_index)
+end
diff --git a/src/jvp.jl b/src/jvp.jl
index 2b915c9..fa940aa 100644
--- a/src/jvp.jl
+++ b/src/jvp.jl
@@ -1,56 +1,141 @@
+"""
+    JVPCache{X1, FX1, FDType}
+
+Cache structure for Jacobian-vector product (JVP) computations.
+
+Stores temporary arrays needed for efficient JVP computation without repeated allocations.
+The JVP computes `J(x) * v` where `J(x)` is the Jacobian of function `f` at point `x` 
+and `v` is a vector.
+
+# Fields
+- `x1::X1`: Temporary array for perturbed input values
+- `fx1::FX1`: Temporary array for function evaluations
+"""
 mutable struct JVPCache{X1, FX1, FDType}
-    x1  :: X1
-    fx1 :: FX1
+    x1::X1
+    fx1::FX1
 end
 
 """
-    FiniteDiff.JVPCache(
-        x,
-        fdtype     :: Type{T1} = Val{:forward})
+    FiniteDiff.JVPCache(x, fdtype::Type{T1} = Val{:forward})
 
-Allocating Cache Constructor.
+Allocating cache constructor for Jacobian-vector product computations.
+
+Creates a `JVPCache` by allocating temporary arrays with the same structure as `x`.
+This constructor is convenient but allocates memory for the cache arrays.
+
+# Arguments
+- `x`: Input vector whose structure determines the cache array sizes
+- `fdtype::Type{T1} = Val{:forward}`: Finite difference method type
+
+# Returns
+- `JVPCache` with allocated temporary arrays for JVP computation
+
+# Examples
+```julia
+x = [1.0, 2.0, 3.0]
+cache = JVPCache(x, Val(:forward))
+```
 """
 function JVPCache(
-    x,
-    fdtype::Union{Val{FD},Type{FD}} = Val(:forward)) where {FD}
+        x,
+        fdtype::Union{Val{FD}, Type{FD}} = Val(:forward)) where {FD}
     fdtype isa Type && (fdtype = fdtype())
     JVPCache{typeof(x), typeof(x), fdtype}(copy(x), copy(x))
 end
 
 """
-    FiniteDiff.JVPCache(
-        x,
-        fx1,
-        fdtype     :: Type{T1} = Val{:forward},
+    FiniteDiff.JVPCache(x, fx1, fdtype::Type{T1} = Val{:forward})
 
-Non-Allocating Cache Constructor.
+Non-allocating cache constructor for Jacobian-vector product computations.
+
+Creates a `JVPCache` using pre-allocated arrays `x` and `fx1`. This constructor
+is memory-efficient as it reuses existing arrays without additional allocation.
+
+# Arguments
+- `x`: Pre-allocated array for perturbed input values
+- `fx1`: Pre-allocated array for function evaluations
+- `fdtype::Type{T1} = Val{:forward}`: Finite difference method type
+
+# Returns
+- `JVPCache` using the provided arrays as cache storage
+
+# Examples
+```julia
+x = [1.0, 2.0, 3.0]
+fx1 = similar(x)
+cache = JVPCache(x, fx1, Val(:forward))
+```
+
+# Notes
+The arrays `x` and `fx1` will be modified during JVP computations. Ensure they
+are not used elsewhere if their values need to be preserved.
 """
 function JVPCache(
-    x,
-    fx,
-    fdtype::Union{Val{FD},Type{FD}} = Val(:forward)) where {FD}
+        x,
+        fx,
+        fdtype::Union{Val{FD}, Type{FD}} = Val(:forward)) where {FD}
     fdtype isa Type && (fdtype = fdtype())
-    JVPCache{typeof(x), typeof(fx), fdtype}(x,fx)
+    JVPCache{typeof(x), typeof(fx), fdtype}(x, fx)
 end
 
 """
     FiniteDiff.finite_difference_jvp(
         f,
-        x          :: AbstractArray{<:Number},
-        v          :: AbstractArray{<:Number},
-        fdtype     :: Type{T1}=Val{:central},
-        relstep=default_relstep(fdtype, eltype(x)),
-        absstep=relstep)
+        x      :: AbstractArray{<:Number},
+        v      :: AbstractArray{<:Number},
+        fdtype :: Type{T1}                = Val{:forward},
+        f_in                              = nothing;
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep)
 
-Cache-less.
+Compute the Jacobian-vector product `J(x) * v` using finite differences.
+
+This function computes the directional derivative of `f` at `x` in direction `v`
+without explicitly forming the Jacobian matrix. This is more efficient than
+computing the full Jacobian when only `J*v` is needed.
+
+# Arguments
+- `f`: Function to differentiate (vector→vector map)
+- `x::AbstractArray{<:Number}`: Point at which to evaluate the Jacobian
+- `v::AbstractArray{<:Number}`: Direction vector for the product
+- `fdtype::Type{T1}=Val{:forward}`: Finite difference method (`:forward`, `:central`)
+- `f_in=nothing`: Pre-computed `f(x)` value (if available)
+
+# Keyword Arguments
+- `relstep`: Relative step size (default: method-dependent optimal value)
+- `absstep=relstep`: Absolute step size fallback
+
+# Returns
+- Vector `J(x) * v` representing the Jacobian-vector product
+
+# Examples
+```julia
+f(x) = [x[1]^2 + x[2], x[1] * x[2], x[2]^3]
+x = [1.0, 2.0]
+v = [1.0, 0.0]  # Direction vector
+jvp = finite_difference_jvp(f, x, v)  # Directional derivative
+```
+
+# Mathematical Background
+The JVP is computed using the finite difference approximation:
+- Forward: `J(x) * v ≈ (f(x + h*v) - f(x)) / h`
+- Central: `J(x) * v ≈ (f(x + h*v) - f(x - h*v)) / (2h)`
+
+where `h` is the step size and `v` is the direction vector.
+
+# Notes
+- Requires only `2` function evaluations (vs `O(n)` for full Jacobian)
+- Forward differences: 2 function evaluations, `O(h)` accuracy
+- Central differences: 2 function evaluations, `O(h²)` accuracy
+- Particularly efficient when `v` is sparse or when only one directional derivative is needed
 """
 function finite_difference_jvp(f, x, v,
-    fdtype     = Val(:forward),
-    f_in       = nothing;
-    relstep=default_relstep(fdtype, eltype(x)),
-    absstep=relstep,
-    dir=true)
-
+        fdtype = Val(:forward),
+        f_in = nothing;
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep,
+        dir = true)
     if f_in isa Nothing
         fx = f(x)
     else
@@ -66,21 +151,20 @@ end
         x,
         v,
         cache::JVPCache;
-        relstep=default_relstep(fdtype, eltype(x)),
-        absstep=relstep,
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep)
 
 Cached.
 """
 function finite_difference_jvp(
-    f,
-    x,
-    v,
-    cache::JVPCache{X1, FX1, fdtype},
-    f_in=nothing;
-    relstep=default_relstep(fdtype, eltype(x)),
-    absstep=relstep,
-    dir=true) where {X1, FX1, fdtype}
-
+        f,
+        x,
+        v,
+        cache::JVPCache{X1, FX1, fdtype},
+        f_in = nothing;
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep,
+        dir = true) where {X1, FX1, fdtype}
     if fdtype == Val(:complex)
         ArgumentError("finite_difference_jvp doesn't support :complex-mode finite diff")
     end
@@ -89,7 +173,7 @@ function finite_difference_jvp(
     epsilon = compute_epsilon(fdtype, tmp, relstep, absstep, dir)
     if fdtype == Val(:forward)
         fx = f_in isa Nothing ? f(x) : f_in
-        x1 =  @. x + epsilon * v
+        x1 = @. x + epsilon * v
         fx1 = f(x1)
         fx1 = @. (fx1-fx)/epsilon
     elseif fdtype == Val(:central)
@@ -106,31 +190,31 @@ end
 
 """
     finite_difference_jvp!(
-        jvp::AbstractArray{<:Number},
+        jvp        :: AbstractArray{<:Number},
         f,
-        x::AbstractArray{<:Number},
-        v::AbstractArray{<:Number},
-        fdtype     :: Type{T1}=Val{:forward},
-        returntype :: Type{T2}=eltype(x),
-        f_in       :: Union{T2,Nothing}=nothing;
-        relstep=default_relstep(fdtype, eltype(x)),
-        absstep=relstep)
+        x          :: AbstractArray{<:Number},
+        v          :: AbstractArray{<:Number},
+        fdtype     :: Type{T1}          = Val{:forward},
+        returntype :: Type{T2}          = eltype(x),
+        f_in       :: Union{T2,Nothing} = nothing;
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep)
 
 Cache-less.
 """
 function finite_difference_jvp!(jvp,
-    f,
-    x,
-    v,
-    fdtype     = Val(:forward),
-    f_in       = nothing;
-    relstep=default_relstep(fdtype, eltype(x)),
-    absstep=relstep)
+        f,
+        x,
+        v,
+        fdtype = Val(:forward),
+        f_in = nothing;
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep)
     if !isnothing(f_in)
         cache = JVPCache(x, f_in, fdtype)
     elseif fdtype == Val(:forward)
         fx = zero(x)
-        f(fx,x)
+        f(fx, x)
         cache = JVPCache(x, fx, fdtype)
     else
         cache = JVPCache(x, fdtype)
@@ -140,33 +224,32 @@ end
 
 """
     FiniteDiff.finite_difference_jvp!(
-        jvp::AbstractArray{<:Number},
+        jvp   :: AbstractArray{<:Number},
         f,
-        x::AbstractArray{<:Number},
-        v::AbstractArray{<:Number},
-        cache::JVPCache;
-        relstep=default_relstep(fdtype, eltype(x)),
-        absstep=relstep,
-        dir=true)
+        x     :: AbstractArray{<:Number},
+        v     :: AbstractArray{<:Number},
+        cache :: JVPCache;
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep,
+        dir     = true)
 
 Cached.
 """
 function finite_difference_jvp!(
-    jvp,
-    f,
-    x,
-    v,
-    cache::JVPCache{X1, FX1, fdtype},
-    f_in = nothing;
-    relstep = default_relstep(fdtype, eltype(x)),
-    absstep = relstep,
-    dir = true) where {X1, FX1, fdtype}
-
+        jvp,
+        f,
+        x,
+        v,
+        cache::JVPCache{X1, FX1, fdtype},
+        f_in = nothing;
+        relstep = default_relstep(fdtype, eltype(x)),
+        absstep = relstep,
+        dir = true) where {X1, FX1, fdtype}
     if fdtype == Val(:complex)
         ArgumentError("finite_difference_jvp doesn't support :complex-mode finite diff")
     end
 
-    (;x1, fx1) = cache
+    (; x1, fx1) = cache
     tmp = sqrt(abs(dot(_vec(x), _vec(v))))
     epsilon = compute_epsilon(fdtype, tmp, relstep, absstep, dir)
     if fdtype == Val(:forward)
@@ -191,7 +274,7 @@ function finite_difference_jvp!(
 end
 
 function resize!(cache::JVPCache, i::Int)
-    resize!(cache.x1,  i)
+    resize!(cache.x1, i)
     cache.fx1 !== nothing && resize!(cache.fx1, i)
     nothing
 end
diff --git a/test/downstream/ordinarydiffeq_tridiagonal_solve.jl b/test/downstream/ordinarydiffeq_tridiagonal_solve.jl
index 841c638..9032a92 100644
--- a/test/downstream/ordinarydiffeq_tridiagonal_solve.jl
+++ b/test/downstream/ordinarydiffeq_tridiagonal_solve.jl
@@ -24,4 +24,5 @@ function loss(p)
   sol = solve(_prob, Rodas4P(autodiff=false), saveat=0.1)
   sum((sol .- sol_true).^2)
 end
-@test ForwardDiff.gradient(loss, [1.0])[1] ≈ 0.6662949361011025
+@test ForwardDiff.gradient(loss, [1.0])[1] ≈ 0.6645766813735486
+
diff --git a/test/finitedifftests.jl b/test/finitedifftests.jl
index e975202..45d38e1 100644
--- a/test/finitedifftests.jl
+++ b/test/finitedifftests.jl
@@ -334,7 +334,7 @@ end
         res = zero(x)
         FiniteDiff.finite_difference_gradient!(res, _f, x, cache)
         @test res ≈ _∇f(x)
-        @test_broken ret_allocs(res, _f, x, cache) == 0
+        @test ret_allocs(res, _f, x, cache) == 0
 
         # Can we now change the field?
         _x = rand(2)
@@ -342,7 +342,7 @@ end
         _cache = @set _cache.fx = _f(_x)
         FiniteDiff.finite_difference_gradient!(res, _f,_x, _cache)
         @test res ≈ _∇f(_x)
-        @test_broken ret_allocs(res, _f, _x, _cache) == 0
+        @test ret_allocs(res, _f, _x, _cache) == 0
     end
 
     _g(x) = x[1]^2 + x[2]
diff --git a/test/runtests.jl b/test/runtests.jl
index e0b8b27..ba2b432 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -24,7 +24,6 @@ if GROUP == "All" || GROUP == "Downstream"
   @time @safetestset "ODEs" begin
     import OrdinaryDiffEq
     @time @safetestset "OrdinaryDiffEq Tridiagonal" begin include("downstream/ordinarydiffeq_tridiagonal_solve.jl") end
-    include(joinpath(dirname(pathof(OrdinaryDiffEq)), "..", "test/interface/sparsediff_tests.jl"))
   end
 end