diff --git a/Project.toml b/Project.toml index 8ebc70e1..b8ed300a 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "InferenceObjects" uuid = "b5cf5a8d-e756-4ee3-b014-01d49d192c00" authors = ["Seth Axen and contributors"] -version = "0.4.12" +version = "0.4.13" [deps] ANSIColoredPrinters = "a4c015fc-c6ff-483c-b24f-f7ea428134e9" @@ -32,7 +32,7 @@ MLJBase = "1" NCDatasets = "0.12.6, 0.13, 0.14" OffsetArrays = "1" OrderedCollections = "1.6" -PosteriorStats = "0.1.1, 0.2" +PosteriorStats = "0.3" Random = "1" StatsBase = "0.33.7, 0.34" Tables = "1.11.0" diff --git a/docs/Project.toml b/docs/Project.toml index f0967c9d..6eeb8e83 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -8,6 +8,9 @@ NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" PosteriorStats = "7f36be82-ad55-44ba-a5c0-b8b5480d7aa5" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +[sources] +InferenceObjects = {path = ".."} + [compat] Documenter = "1" DocumenterInterLinks = "1" diff --git a/docs/make.jl b/docs/make.jl index 88dc457e..82a9270a 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -29,34 +29,37 @@ doctestfilters = [ r"\s+\"created_at\" => .*", # ignore timestamps in doctests ] -makedocs(; - modules=[ - InferenceObjects, - Base.get_extension(InferenceObjects, :InferenceObjectsMCMCDiagnosticToolsExt), - Base.get_extension(InferenceObjects, :InferenceObjectsPosteriorStatsExt), - ], - authors="Seth Axen and contributors", - repo=Remotes.GitHub("arviz-devs", "InferenceObjects.jl"), - sitename="InferenceObjects.jl", - format=Documenter.HTML(; - prettyurls=get(ENV, "CI", "false") == "true", - canonical="https://arviz-devs.github.io/InferenceObjects.jl", - edit_link="main", - assets=String[], - ), - pages=[ - "Home" => "index.md", - "Dataset" => "dataset.md", - "InferenceData" => "inference_data.md", - "Extensions" => [ - "MCMCDiagnosticTools" => "extensions/mcmcdiagnostictools.md", - "PosteriorStats" => "extensions/posteriorstats.md", +# Increase the terminal width from 80 to 100 chars to avoid column truncation +withenv("COLUMNS" => 100) do + makedocs(; + modules=[ + InferenceObjects, + Base.get_extension(InferenceObjects, :InferenceObjectsMCMCDiagnosticToolsExt), + Base.get_extension(InferenceObjects, :InferenceObjectsPosteriorStatsExt), ], - ], - doctestfilters=doctestfilters, - warnonly=:missing_docs, - plugins=[links], -) + authors="Seth Axen and contributors", + repo=Remotes.GitHub("arviz-devs", "InferenceObjects.jl"), + sitename="InferenceObjects.jl", + format=Documenter.HTML(; + prettyurls=get(ENV, "CI", "false") == "true", + canonical="https://arviz-devs.github.io/InferenceObjects.jl", + edit_link="main", + assets=String[], + ), + pages=[ + "Home" => "index.md", + "Dataset" => "dataset.md", + "InferenceData" => "inference_data.md", + "Extensions" => [ + "MCMCDiagnosticTools" => "extensions/mcmcdiagnostictools.md", + "PosteriorStats" => "extensions/posteriorstats.md", + ], + ], + doctestfilters=doctestfilters, + warnonly=:missing_docs, + plugins=[links], + ) +end # run doctests on extensions function get_extension(mod::Module, name::Symbol) @@ -67,16 +70,6 @@ function get_extension(mod::Module, name::Symbol) end end -using MCMCDiagnosticTools: MCMCDiagnosticTools -using PosteriorStats: PosteriorStats -for extended_pkg in (MCMCDiagnosticTools, PosteriorStats) - extension_name = Symbol("InferenceObjects", extended_pkg, "Ext") - @info "Running doctests for extension $(extension_name)" - mod = get_extension(InferenceObjects, extension_name) - DocMeta.setdocmeta!(mod, :DocTestSetup, :(using $(Symbol(extended_pkg)))) - doctest(mod; manual=false) -end - deploydocs(; repo="github.com/arviz-devs/InferenceObjects.jl", devbranch="main", push_preview=true ) diff --git a/ext/InferenceObjectsPosteriorStatsExt/InferenceObjectsPosteriorStatsExt.jl b/ext/InferenceObjectsPosteriorStatsExt/InferenceObjectsPosteriorStatsExt.jl index 252a3182..0300a647 100644 --- a/ext/InferenceObjectsPosteriorStatsExt/InferenceObjectsPosteriorStatsExt.jl +++ b/ext/InferenceObjectsPosteriorStatsExt/InferenceObjectsPosteriorStatsExt.jl @@ -6,13 +6,15 @@ using InferenceObjects: InferenceObjects using PosteriorStats: PosteriorStats using StatsBase: StatsBase -import PosteriorStats: hdi, loo, loo_pit, r2_score, summarize, waic +import PosteriorStats: eti, hdi, loo, loo_pit, r2_score, summarize, waic import StatsBase: summarystats -export hdi, loo, loo_pit, r2_score, summarize, waic, summarystats +export eti, hdi, loo, loo_pit, r2_score, summarize, waic, summarystats + +maplayers = isdefined(DimensionalData, :maplayers) ? DimensionalData.maplayers : map include("utils.jl") -include("hdi.jl") +include("ci.jl") include("loo.jl") include("waic.jl") include("loo_pit.jl") diff --git a/ext/InferenceObjectsPosteriorStatsExt/ci.jl b/ext/InferenceObjectsPosteriorStatsExt/ci.jl new file mode 100644 index 00000000..f87e4f4d --- /dev/null +++ b/ext/InferenceObjectsPosteriorStatsExt/ci.jl @@ -0,0 +1,27 @@ + +for (ci_fun, ci_desc) in + (:eti => "equal-tailed interval (ETI)", :hdi => "highest density interval (HDI)") + @eval begin + # this pattern ensures that the type is completely specified at compile time + @doc """ + $($ci_fun)(data::InferenceData; kwargs...) -> Dataset + $($ci_fun)(data::Dataset; kwargs...) -> Dataset + + Calculate the $($ci_desc) for each parameter in the data. + + For more details and a description of the `kwargs`, see + [`PosteriorStats.$($ci_fun)`](@extref). + """ + function PosteriorStats.$(ci_fun)(data::InferenceObjects.InferenceData; kwargs...) + return PosteriorStats.$(ci_fun)(data.posterior; kwargs...) + end + function PosteriorStats.$(ci_fun)(data::InferenceObjects.Dataset; kwargs...) + ds = maplayers(data) do var + return _as_dimarray( + PosteriorStats.$(ci_fun)(_params_array(var); kwargs...), var + ) + end + return DimensionalData.rebuild(ds; metadata=DimensionalData.NoMetadata()) + end + end +end diff --git a/ext/InferenceObjectsPosteriorStatsExt/hdi.jl b/ext/InferenceObjectsPosteriorStatsExt/hdi.jl deleted file mode 100644 index 7c2e3191..00000000 --- a/ext/InferenceObjectsPosteriorStatsExt/hdi.jl +++ /dev/null @@ -1,37 +0,0 @@ -# this pattern ensures that the type is completely specified at compile time -const HDI_BOUND_DIM = let - dims = Dimensions.format(Dimensions.Dim{:hdi_bound}([:lower, :upper]), Base.OneTo(2)) - # some versions of DimensionalData return a tuple here, others return a Dim - dims isa Tuple ? only(dims) : dims -end - -@doc """ - hdi(data::InferenceData; kwargs...) -> Dataset - hdi(data::Dataset; kwargs...) -> Dataset - -Calculate the highest density interval (HDI) for each parameter in the data. - -For more details and a description of the `kwargs`, see [`PosteriorStats.hdi`](@extref). -""" -function PosteriorStats.hdi(data::InferenceObjects.InferenceData; kwargs...) - return PosteriorStats.hdi(data.posterior; kwargs...) -end -function PosteriorStats.hdi(data::InferenceObjects.Dataset; kwargs...) - results = map(DimensionalData.data(data), DimensionalData.layerdims(data)) do var, dims - x = _draw_chains_params_array(DimensionalData.DimArray(var, dims)) - r = PosteriorStats.hdi(x; kwargs...) - lower, upper = map(Base.Fix2(_as_dimarray, x), r) - return cat(lower, upper; dims=HDI_BOUND_DIM) - end - dims = Dimensions.combinedims(( - Dimensions.otherdims(data, InferenceObjects.DEFAULT_SAMPLE_DIMS)..., HDI_BOUND_DIM - )) - return DimensionalData.rebuild( - data; - data=map(parent, results), - dims, - layerdims=map(Dimensions.dims, results), - refdims=(), - metadata=DimensionalData.NoMetadata(), - ) -end diff --git a/ext/InferenceObjectsPosteriorStatsExt/loo.jl b/ext/InferenceObjectsPosteriorStatsExt/loo.jl index 4210e7dc..4abac8d8 100644 --- a/ext/InferenceObjectsPosteriorStatsExt/loo.jl +++ b/ext/InferenceObjectsPosteriorStatsExt/loo.jl @@ -19,8 +19,8 @@ julia> idata = load_example_data("centered_eight"); julia> loo(idata) PSISLOOResult with estimates - elpd elpd_mcse p p_mcse - -31 1.4 0.9 0.33 + elpd se_elpd p se_p + -31 1.4 0.9 0.33 and PSISResult with 500 draws, 4 chains, and 8 parameters Pareto shape (k) diagnostic values: diff --git a/ext/InferenceObjectsPosteriorStatsExt/summarize.jl b/ext/InferenceObjectsPosteriorStatsExt/summarize.jl index ee35fefa..6a838f8b 100644 --- a/ext/InferenceObjectsPosteriorStatsExt/summarize.jl +++ b/ext/InferenceObjectsPosteriorStatsExt/summarize.jl @@ -13,12 +13,12 @@ function StatsBase.summarystats(data::InferenceObjects.Dataset; kwargs...) end @doc """ - summarize(data::InferenceData, group=:posterior, stats_funs...; kwargs...) + summarize(data::InferenceData, stats_funs...; group=:posterior, kwargs...) summarize(data::Dataset, stats_funs...; kwargs...) Compute summary statistics for the data using the provided functions. -For verbose variable labels, provide `compat_labels=false`. For details on `stats_funs` and +For verbose variable labels, provide `compact_labels=false`. For details on `stats_funs` and `kwargs`, see [`PosteriorStats.summarize`](@extref). # Examples @@ -33,18 +33,17 @@ julia> data = load_example_data("centered_eight"); julia> summarize(data) SummaryStats - mean std hdi_3% hdi_97% mcse_mean mcse_std ess ⋯ - mu 4.2 3.3 -1.61 10.3 0.21 0.088 ⋯ - theta[Choate] 6.4 5.9 -3.68 17.9 0.25 0.20 ⋯ - theta[Deerfield] 5.0 4.9 -4.98 13.4 0.21 0.15 ⋯ - theta[Phillips Andover] 3.4 5.4 -7.54 12.9 0.23 0.17 ⋯ - theta[Phillips Exeter] 4.8 5.2 -5.11 14.1 0.21 0.21 ⋯ - theta[Hotchkiss] 3.5 4.8 -6.12 12.0 0.25 0.15 ⋯ - theta[Lawrenceville] 3.7 5.2 -6.50 12.7 0.22 0.21 ⋯ - theta[St. Paul's] 6.5 5.2 -2.67 16.9 0.22 0.15 ⋯ - theta[Mt. Hermon] 4.8 5.7 -5.97 15.4 0.24 0.23 ⋯ - tau 4.3 3.0 0.715 9.41 0.22 0.14 ⋯ - 3 columns omitted + mean std eti94 ess_tail ess_bulk rhat mcse_mean mcse_std + mu 4.2 3.3 -2.11 .. 9.90 622 241 1.03 0.21 0.088 + theta[Choate] 6.4 5.9 -3.05 .. 19.1 937 572 1.01 0.25 0.20 + theta[Deerfield] 5.0 4.9 -4.49 .. 14.2 1214 532 1.01 0.21 0.15 + theta[Phillips Andover] 3.4 5.4 -8.17 .. 12.7 1017 511 1.01 0.23 0.17 + theta[Phillips Exeter] 4.8 5.2 -4.84 .. 14.5 911 572 1.01 0.21 0.21 + theta[Hotchkiss] 3.5 4.8 -6.11 .. 12.0 789 347 1.02 0.25 0.15 + theta[Lawrenceville] 3.7 5.2 -6.62 .. 12.6 957 506 1.01 0.22 0.21 + theta[St. Paul's] 6.5 5.2 -2.38 .. 18.3 1031 528 1.01 0.22 0.15 + theta[Mt. Hermon] 4.8 5.7 -5.52 .. 16.0 1045 538 1.01 0.24 0.23 + tau 4.3 3.0 1.06 .. 11.5 214 128 1.03 0.22 0.14 ``` Compute the mean, standard deviation, median, and median absolute deviation of the `theta` diff --git a/ext/InferenceObjectsPosteriorStatsExt/utils.jl b/ext/InferenceObjectsPosteriorStatsExt/utils.jl index bc309ecf..aa9d13c4 100644 --- a/ext/InferenceObjectsPosteriorStatsExt/utils.jl +++ b/ext/InferenceObjectsPosteriorStatsExt/utils.jl @@ -159,8 +159,17 @@ function observations_and_predictions( end end +# reshape to (ndraws, nchains, nparams...) and drop the dimensions +function _params_array(data) + sample_dims = Dimensions.dims(data, InferenceObjects.DEFAULT_SAMPLE_DIMS) + param_dims = Dimensions.otherdims(data, sample_dims) + dims_combined = Dimensions.combinedims((sample_dims..., param_dims...)) + Dimensions.dimsmatch(Dimensions.dims(data), dims_combined) && return data + return PermutedDimsArray(data, dims_combined) +end + _as_dimarray(x::DimensionalData.AbstractDimArray, ::DimensionalData.AbstractDimArray) = x -function _as_dimarray(x::Union{Real,Missing}, arr::DimensionalData.AbstractDimArray) +function _as_dimarray(x, arr::DimensionalData.AbstractDimArray) return Dimensions.rebuild(arr, fill(x), ()) end diff --git a/ext/InferenceObjectsPosteriorStatsExt/waic.jl b/ext/InferenceObjectsPosteriorStatsExt/waic.jl index 854ff56a..c95c6dee 100644 --- a/ext/InferenceObjectsPosteriorStatsExt/waic.jl +++ b/ext/InferenceObjectsPosteriorStatsExt/waic.jl @@ -19,8 +19,8 @@ julia> idata = load_example_data("centered_eight"); julia> waic(idata) WAICResult with estimates - elpd elpd_mcse p p_mcse - -31 1.4 0.9 0.32 + elpd se_elpd p se_p + -31 1.4 0.9 0.32 ``` """ function PosteriorStats.waic( diff --git a/test/posteriorstats.jl b/test/posteriorstats.jl index 9f648e62..a6888c35 100644 --- a/test/posteriorstats.jl +++ b/test/posteriorstats.jl @@ -4,10 +4,14 @@ using InferenceObjects using PosteriorStats using Statistics using StatsBase +using Tables using Test +_as_array(x) = fill(x) +_as_array(x::AbstractArray) = x + @testset "PosteriorStats integration" begin - @testset "hdi" begin + @testset for ci_fun in (eti, hdi) nt = (x=randn(1000, 3), y=randn(1000, 3, 4), z=randn(1000, 3, 4, 2)) posterior = convert_to_dataset(nt) posterior_perm = convert_to_dataset(( @@ -17,23 +21,17 @@ using Test )) idata = InferenceData(; posterior) @testset for prob in (0.76, 0.93) - if VERSION ≥ v"1.9" - @test_broken @inferred hdi(posterior; prob) - end - r1 = hdi(posterior; prob) - r1_perm = hdi(posterior_perm; prob) + @test_broken @inferred ci_fun(posterior; prob) + r1 = ci_fun(posterior; prob) + r1_perm = ci_fun(posterior_perm; prob) for k in (:x, :y, :z) - rk = hdi(posterior[k]; prob) - @test r1[k][hdi_bound=At(:lower)] == rk.lower - @test r1[k][hdi_bound=At(:upper)] == rk.upper + rk = ci_fun(posterior[k]; prob) + @test r1[k] == _as_array(rk) # equality check is safe because these are always data values - @test r1_perm[k][hdi_bound=At(:lower)] == rk.lower - @test r1_perm[k][hdi_bound=At(:upper)] == rk.upper - end - if VERSION ≥ v"1.9" - @test_broken @inferred hdi(idata; prob) + @test r1_perm[k] == _as_array(rk) end - r2 = hdi(idata; prob) + @test_broken @inferred ci_fun(idata; prob) + r2 = ci_fun(idata; prob) @test r1 == r2 end end @@ -55,12 +53,11 @@ using Test @test loo_result1.pointwise isa Dataset if length(sz) == 2 @test issetequal( - keys(loo_result1.pointwise), - (:elpd, :elpd_mcse, :p, :reff, :pareto_shape), + keys(loo_result1.pointwise), (:elpd, :se_elpd, :p, :reff, :pareto_shape) ) else @test loo_result1.pointwise.elpd == loo_result.pointwise.elpd - @test loo_result1.pointwise.elpd_mcse == loo_result.pointwise.elpd_mcse + @test loo_result1.pointwise.se_elpd == loo_result.pointwise.se_elpd @test loo_result1.pointwise.p == loo_result.pointwise.p @test loo_result1.pointwise.reff == loo_result.pointwise.reff @test loo_result1.pointwise.pareto_shape == @@ -74,21 +71,21 @@ using Test loo_result2 = loo(idata2) @test loo_result2.estimates.elpd ≈ loo_result1.estimates.elpd atol = atol_perm @test isapprox( - loo_result2.estimates.elpd_mcse, - loo_result1.estimates.elpd_mcse; + loo_result2.estimates.se_elpd, + loo_result1.estimates.se_elpd; nans=true, atol=atol_perm, ) @test loo_result2.estimates.p ≈ loo_result1.estimates.p atol = atol_perm @test isapprox( - loo_result2.estimates.p_mcse, - loo_result1.estimates.p_mcse; + loo_result2.estimates.se_p, + loo_result1.estimates.se_p; nans=true, atol=atol_perm, ) @test isapprox( - loo_result2.pointwise.elpd_mcse, - loo_result1.pointwise.elpd_mcse; + loo_result2.pointwise.se_elpd, + loo_result1.pointwise.se_elpd; nans=true, atol=atol_perm, ) @@ -120,8 +117,7 @@ using Test @test_throws Exception loo_pit(idata1; y_name=:z) @test_throws Exception loo_pit(idata1; y_pred_name=:z) @test_throws Exception loo_pit(idata1; log_likelihood_name=:z) - @test loo_pit(idata1) == pit_vals - VERSION ≥ v"1.7" && @inferred loo_pit(idata1) + @test @inferred(loo_pit(idata1)) == pit_vals @test loo_pit(idata1; y_name=:y) == pit_vals @test loo_pit(idata1; y_name=:y, y_pred_name=:y, log_likelihood_name=:y) == pit_vals @@ -144,8 +140,7 @@ using Test posterior_predictive=Dataset((; y=y_pred)), sample_stats=Dataset((; log_likelihood=log_like)), ) - @test loo_pit(idata3) == pit_vals - VERSION ≥ v"1.7" && @inferred loo_pit(idata3) + @test @inferred(loo_pit(idata3)) == pit_vals all_dims_perm = (param_dims..., reverse(sample_dims)...) idata4 = InferenceData(; @@ -153,8 +148,7 @@ using Test posterior_predictive=Dataset((; y=permutedims(y_pred, all_dims_perm))), log_likelihood=Dataset((; y=permutedims(log_like, all_dims_perm))), ) - @test loo_pit(idata4) ≈ pit_vals - VERSION ≥ v"1.7" && @inferred loo_pit(idata4) + @test @inferred(loo_pit(idata4)) ≈ pit_vals idata5 = InferenceData(; observed_data=Dataset((; y)), posterior_predictive=Dataset((; y=y_pred)) @@ -165,8 +159,7 @@ using Test @testset "r2_score" begin @testset for name in ("regression1d", "regression10d") idata = load_example_data(name) - VERSION ≥ v"1.9" && @inferred r2_score(idata) - r2_val = r2_score(idata) + r2_val = @inferred(r2_score(idata)) @test r2_val == r2_score( idata.observed_data.y, PermutedDimsArray(idata.posterior_predictive.y, (:draw, :chain, :y_dim_0)), @@ -208,15 +201,15 @@ using Test waic_result2 = waic(idata2) @test waic_result2.estimates.elpd ≈ waic_result1.estimates.elpd atol = atol_perm @test isapprox( - waic_result2.estimates.elpd_mcse, - waic_result1.estimates.elpd_mcse; + waic_result2.estimates.se_elpd, + waic_result1.estimates.se_elpd; nans=true, atol=atol_perm, ) @test waic_result2.estimates.p ≈ waic_result1.estimates.p atol = atol_perm @test isapprox( - waic_result2.estimates.p_mcse, - waic_result1.estimates.p_mcse; + waic_result2.estimates.se_p, + waic_result1.estimates.se_p; nans=true, atol=atol_perm, ) @@ -231,7 +224,7 @@ using Test ) result1 = compare(eight_schools_data) result2 = compare(map(loo, eight_schools_data)) - @testset for k in (:name, :rank, :elpd_diff, :elpd_diff_mcse, :weight) + @testset for k in (:name, :rank, :elpd_diff, :se_elpd_diff, :weight) @test getproperty(result1, k) == getproperty(result2, k) end end @@ -248,13 +241,13 @@ using Test ) slices = [ data.x, - data.y[a=1], - data.y[a=2], - data.y[a=3], - data.z[b=1, c=1], - data.z[b=2, c=1], - data.z[b=1, c=2], - data.z[b=2, c=2], + data.y[a = 1], + data.y[a = 2], + data.y[a = 3], + data.z[b = 1, c = 1], + data.z[b = 2, c = 1], + data.z[b = 1, c = 2], + data.z[b = 2, c = 2], ] var_names = [ "x", @@ -270,9 +263,10 @@ using Test stats = @inferred SummaryStats summarize(data, mean, std; name="Stats") @test stats.name == "Stats" - @test stats[:parameter] == var_names - @test stats[:mean] ≈ map(mean, slices) - @test stats[:std] ≈ map(std, slices) + stats_nt = Tables.columntable(stats) + @test stats_nt.label == var_names + @test stats_nt.mean ≈ map(mean, slices) + @test stats_nt.std ≈ map(std, slices) stats_def = summarize(arr; var_names) @test summarize(data) == stats_def