Fix interface so that callers can inspect results

penelopeysm · penelopeysm · commit 57a153744166 · 2025-04-15T18:33:58.000+01:00
diff --git a/src/test_utils/ad.jl b/src/test_utils/ad.jl
@@ -10,13 +10,7 @@ using Random: Random, Xoshiro
 using Statistics: median
 using Test: @test
 
-export ADResult, run_ad
-
-# This function needed to work around the fact that different backends can
-# return different AbstractArrays for the gradient. See
-# https://github.com/JuliaDiff/DifferentiationInterface.jl/issues/754 for more
-# context.
-_to_vec_f64(x::AbstractArray) = x isa Vector{Float64} ? x : collect(Float64, x)
+export ADResult, run_ad, ADIncorrectException
 
 """
     REFERENCE_ADTYPE
@@ -27,33 +21,50 @@ it's the default AD backend used in Turing.jl.
 const REFERENCE_ADTYPE = AutoForwardDiff()
 
 """
-    ADResult
+    ADIncorrectException{T<:Real}
+
+Exception thrown when an AD backend returns an incorrect value or gradient.
+
+The type parameter `T` is the numeric type of the value and gradient.
+"""
+struct ADIncorrectException{T<:Real} <: Exception
+    value_expected::T
+    value_actual::T
+    grad_expected::Vector{T}
+    grad_actual::Vector{T}
+end
+
+"""
+    ADResult{Tparams<:Real,Tresult<:Real}
 
 Data structure to store the results of the AD correctness test.
+
+The type parameter `Tparams` is the numeric type of the parameters passed in;
+`Tresult` is the type of the value and the gradient.
 """
-struct ADResult
+struct ADResult{Tparams<:Real,Tresult<:Real}
     "The DynamicPPL model that was tested"
     model::Model
     "The VarInfo that was used"
     varinfo::AbstractVarInfo
     "The values at which the model was evaluated"
-    params::Vector{<:Real}
+    params::Vector{Tparams}
     "The AD backend that was tested"
     adtype::AbstractADType
     "The absolute tolerance for the value of logp"
-    value_atol::Real
+    value_atol::Tresult
     "The absolute tolerance for the gradient of logp"
-    grad_atol::Real
+    grad_atol::Tresult
     "The expected value of logp"
-    value_expected::Union{Nothing,Float64}
+    value_expected::Union{Nothing,Tresult}
     "The expected gradient of logp"
-    grad_expected::Union{Nothing,Vector{Float64}}
+    grad_expected::Union{Nothing,Vector{Tresult}}
     "The value of logp (calculated using `adtype`)"
-    value_actual::Union{Nothing,Real}
+    value_actual::Union{Nothing,Tresult}
     "The gradient of logp (calculated using `adtype`)"
-    grad_actual::Union{Nothing,Vector{Float64}}
+    grad_actual::Union{Nothing,Vector{Tresult}}
     "If benchmarking was requested, the time taken by the AD backend to calculate the gradient of logp, divided by the time taken to evaluate logp itself"
-    time_vs_primal::Union{Nothing,Float64}
+    time_vs_primal::Union{Nothing,Tresult}
 end
 
 """
@@ -72,19 +83,20 @@ end
         verbose=true,
     )::ADResult
 
+### Description
+
 Test the correctness and/or benchmark the AD backend `adtype` for the model
 `model`.
 
 Whether to test and benchmark is controlled by the `test` and `benchmark`
 keyword arguments. By default, `test` is `true` and `benchmark` is `false`.
 
-Returns an [`ADResult`](@ref) object, which contains the results of the
-test and/or benchmark.
-
 Note that to run AD successfully you will need to import the AD backend itself.
 For example, to test with `AutoReverseDiff()` you will need to run `import
 ReverseDiff`.
 
+### Arguments
+
 There are two positional arguments, which absolutely must be provided:
 
 1. `model` - The model being tested.
@@ -146,14 +158,23 @@ Everything else is optional, and can be categorised into several groups:
 
    By default, this function prints messages when it runs. To silence it, set
    `verbose=false`.
+
+### Returns / Throws
+
+Returns an [`ADResult`](@ref) object, which contains the results of the
+test and/or benchmark.
+
+If `test` is `true` and the AD backend returns an incorrect value or gradient, an
+`ADIncorrectException` is thrown. If a different error occurs, it will be
+thrown as-is.
 """
 function run_ad(
     model::Model,
     adtype::AbstractADType;
-    test=true,
-    benchmark=false,
-    value_atol=1e-6,
-    grad_atol=1e-6,
+    test::Bool=true,
+    benchmark::Bool=false,
+    value_atol::Real=1e-6,
+    grad_atol::Real=1e-6,
     linked::Bool=true,
     varinfo::AbstractVarInfo=VarInfo(model),
     params::Union{Nothing,Vector{<:Real}}=nothing,
@@ -167,14 +188,14 @@ function run_ad(
     if isnothing(params)
         params = varinfo[:]
     end
-    params = map(identity, params)
+    params = map(identity, params)  # Concretise
 
     verbose && @info "Running AD on $(model.f) with $(adtype)\n"
     verbose && println("       params : $(params)")
     ldf = LogDensityFunction(model, varinfo; adtype=adtype)
 
     value, grad = logdensity_and_gradient(ldf, params)
-    grad = _to_vec_f64(grad)
+    grad = collect(grad)
     verbose && println("       actual : $((value, grad))")
 
     if test
@@ -186,10 +207,11 @@ function run_ad(
             expected_value_and_grad
         end
         verbose && println("     expected : $((value_true, grad_true))")
-        grad_true = _to_vec_f64(grad_true)
-        # Then compare
-        @test isapprox(value, value_true; atol=value_atol)
-        @test isapprox(grad, grad_true; atol=grad_atol)
+        grad_true = collect(grad_true)
+
+        exc() = throw(ADIncorrectException(value, value_true, grad, grad_true))
+        isapprox(value, value_true; atol=value_atol) || exc()
+        isapprox(grad, grad_true; atol=grad_atol) || exc()
     else
         value_true = nothing
         grad_true = nothing