From b97334b965c074e7c8b39375dc9e2e9b1fbc72ea Mon Sep 17 00:00:00 2001 From: Markus Hauru Date: Fri, 14 Feb 2025 17:18:03 +0000 Subject: [PATCH 1/8] Bump minor version to v0.37 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index c3285fc41b..396838cef0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "Turing" uuid = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" -version = "0.36.2" +version = "0.37.0" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" From 08e4c08ed42001f943adfd85f236c760fa7ce425 Mon Sep 17 00:00:00 2001 From: Markus Hauru Date: Fri, 14 Feb 2025 17:23:35 +0000 Subject: [PATCH 2/8] Remove selector/space stuff (#2458) * Remove selector stuff from ESS * Remove selector stuff from MH * Remove selector stuff from HMC * Remove selector stuff from Emcee * Remove selector stuff from IS * Add missing getspace methods * Remove selector stuff for particle methods * Fix an HMC selector bug * Code style * Fix Emcee selector bug * Fix typo in ESS tests * Fix some constructor overwrites * Remove unnecessary tests * Remove selector stuff from SGHMC * Remove drop_space and other non-longer-necessary deprecation measures * Bump minor version 0.37. Add a HISTORY.md entry * Apply suggestions from code review Co-authored-by: Penelope Yong * Remove unnecessary type parameters Co-authored-by: Penelope Yong * Simplify constructors in particle_mcmc.jl * Remove calls to setgid and updategid --------- Co-authored-by: Penelope Yong --- HISTORY.md | 6 +++ src/mcmc/Inference.jl | 28 +---------- src/mcmc/emcee.jl | 6 +-- src/mcmc/ess.jl | 40 +++++----------- src/mcmc/gibbs.jl | 45 +----------------- src/mcmc/hmc.jl | 59 +++++++---------------- src/mcmc/is.jl | 6 +-- src/mcmc/mh.jl | 84 +++++++++++++------------------- src/mcmc/particle_mcmc.jl | 95 ++++++++++++------------------------- src/mcmc/repeat_sampler.jl | 3 +- src/mcmc/sghmc.jl | 37 +++++---------- test/dynamicppl/compiler.jl | 4 +- test/mcmc/Inference.jl | 2 +- test/mcmc/ess.jl | 13 ++--- test/mcmc/gibbs.jl | 28 +---------- test/mcmc/hmc.jl | 8 ---- test/mcmc/mh.jl | 18 +++---- test/mcmc/particle_mcmc.jl | 72 ---------------------------- test/mcmc/sghmc.jl | 14 +----- 19 files changed, 137 insertions(+), 431 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 64be691064..82b2fd0812 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,3 +1,9 @@ +# Release 0.37.0 + +## Breaking changes + +0.37 removes the old Gibbs constructors deprecated in 0.36. + # Release 0.36.0 ## Breaking changes diff --git a/src/mcmc/Inference.jl b/src/mcmc/Inference.jl index e0cbd3dc24..fa79656308 100644 --- a/src/mcmc/Inference.jl +++ b/src/mcmc/Inference.jl @@ -91,18 +91,6 @@ abstract type Hamiltonian <: InferenceAlgorithm end abstract type StaticHamiltonian <: Hamiltonian end abstract type AdaptiveHamiltonian <: Hamiltonian end -# TODO(mhauru) Remove the below function once all the space/Selector stuff has been removed. -""" - drop_space(alg::InferenceAlgorithm) - -Return an `InferenceAlgorithm` like `alg`, but with all space information removed. -""" -function drop_space end - -function drop_space(sampler::Sampler) - return Sampler(drop_space(sampler.alg), sampler.selector) -end - include("repeat_sampler.jl") """ @@ -146,11 +134,6 @@ struct ExternalSampler{S<:AbstractSampler,AD<:ADTypes.AbstractADType,Unconstrain end end -# External samplers don't have notion of space to begin with. -drop_space(x::ExternalSampler) = x - -DynamicPPL.getspace(::ExternalSampler) = () - """ requires_unconstrained_space(sampler::ExternalSampler) @@ -217,8 +200,6 @@ Algorithm for sampling from the prior. """ struct Prior <: InferenceAlgorithm end -drop_space(x::Prior) = x - function AbstractMCMC.step( rng::Random.AbstractRNG, model::DynamicPPL.Model, @@ -592,13 +573,6 @@ include("emcee.jl") # Typing tools # ################ -for alg in (:SMC, :PG, :MH, :IS, :ESS, :Emcee) - @eval DynamicPPL.getspace(::$alg{space}) where {space} = space -end -for alg in (:HMC, :HMCDA, :NUTS, :SGLD, :SGHMC) - @eval DynamicPPL.getspace(::$alg{<:Any,space}) where {space} = space -end - function DynamicPPL.get_matching_type( spl::Sampler{<:Union{PG,SMC}}, vi, ::Type{TV} ) where {T,N,TV<:Array{T,N}} @@ -609,6 +583,8 @@ end # Utilities # ############## +# TODO(mhauru) Remove this once DynamicPPL has removed all its Selector stuff. +DynamicPPL.getspace(::InferenceAlgorithm) = () DynamicPPL.getspace(spl::Sampler) = getspace(spl.alg) DynamicPPL.inspace(vn::VarName, spl::Sampler) = inspace(vn, getspace(spl.alg)) diff --git a/src/mcmc/emcee.jl b/src/mcmc/emcee.jl index 816d90578a..9d272f5a0a 100644 --- a/src/mcmc/emcee.jl +++ b/src/mcmc/emcee.jl @@ -13,7 +13,7 @@ Foreman-Mackey, D., Hogg, D. W., Lang, D., & Goodman, J. (2013). emcee: The MCMC Hammer. Publications of the Astronomical Society of the Pacific, 125 (925), 306. https://doi.org/10.1086/670067 """ -struct Emcee{space,E<:AMH.Ensemble} <: InferenceAlgorithm +struct Emcee{E<:AMH.Ensemble} <: InferenceAlgorithm ensemble::E end @@ -23,11 +23,9 @@ function Emcee(n_walkers::Int, stretch_length=2.0) # ensemble sampling. prop = AMH.StretchProposal(nothing, stretch_length) ensemble = AMH.Ensemble(n_walkers, prop) - return Emcee{(),typeof(ensemble)}(ensemble) + return Emcee{typeof(ensemble)}(ensemble) end -drop_space(alg::Emcee{space,E}) where {space,E} = Emcee{(),E}(alg.ensemble) - struct EmceeState{V<:AbstractVarInfo,S} vi::V states::S diff --git a/src/mcmc/ess.jl b/src/mcmc/ess.jl index aa1a9fe380..98d0adabc8 100644 --- a/src/mcmc/ess.jl +++ b/src/mcmc/ess.jl @@ -20,12 +20,7 @@ Mean │ 1 │ m │ 0.824853 │ ``` """ -struct ESS{space} <: InferenceAlgorithm end - -ESS() = ESS{()}() -ESS(space::Symbol) = ESS{(space,)}() - -drop_space(alg::ESS) = ESS() +struct ESS <: InferenceAlgorithm end # always accept in the first step function DynamicPPL.initialstep( @@ -35,7 +30,7 @@ function DynamicPPL.initialstep( vns = _getvns(vi, spl) length(vns) == 1 || error("[ESS] does only support one variable ($(length(vns)) variables specified)") - for vn in vns[1] + for vn in only(vns) dist = getdist(vi, vn) EllipticalSliceSampling.isgaussian(typeof(dist)) || error("[ESS] only supports Gaussian prior distributions") @@ -48,7 +43,7 @@ function AbstractMCMC.step( rng::AbstractRNG, model::Model, spl::Sampler{<:ESS}, vi::AbstractVarInfo; kwargs... ) # obtain previous sample - f = vi[spl] + f = vi[:] # define previous sampler state # (do not use cache to avoid in-place sampling from prior) @@ -129,13 +124,11 @@ function (ℓ::ESSLogLikelihood)(f::AbstractVector) end function DynamicPPL.tilde_assume( - rng::Random.AbstractRNG, ctx::DefaultContext, sampler::Sampler{<:ESS}, right, vn, vi + rng::Random.AbstractRNG, ::DefaultContext, ::Sampler{<:ESS}, right, vn, vi ) - return if inspace(vn, sampler) - DynamicPPL.tilde_assume(rng, LikelihoodContext(), SampleFromPrior(), right, vn, vi) - else - DynamicPPL.tilde_assume(rng, ctx, SampleFromPrior(), right, vn, vi) - end + return DynamicPPL.tilde_assume( + rng, LikelihoodContext(), SampleFromPrior(), right, vn, vi + ) end function DynamicPPL.tilde_observe( @@ -145,22 +138,11 @@ function DynamicPPL.tilde_observe( end function DynamicPPL.dot_tilde_assume( - rng::Random.AbstractRNG, - ctx::DefaultContext, - sampler::Sampler{<:ESS}, - right, - left, - vns, - vi, + rng::Random.AbstractRNG, ::DefaultContext, ::Sampler{<:ESS}, right, left, vns, vi ) - # TODO: Or should we do `all(Base.Fix2(inspace, sampler), vns)`? - return if inspace(first(vns), sampler) - DynamicPPL.dot_tilde_assume( - rng, LikelihoodContext(), SampleFromPrior(), right, left, vns, vi - ) - else - DynamicPPL.dot_tilde_assume(rng, ctx, SampleFromPrior(), right, left, vns, vi) - end + return DynamicPPL.dot_tilde_assume( + rng, LikelihoodContext(), SampleFromPrior(), right, left, vns, vi + ) end function DynamicPPL.dot_tilde_observe( diff --git a/src/mcmc/gibbs.jl b/src/mcmc/gibbs.jl index 5af01388e5..c318cabe01 100644 --- a/src/mcmc/gibbs.jl +++ b/src/mcmc/gibbs.jl @@ -345,7 +345,7 @@ struct Gibbs{N,V<:NTuple{N,AbstractVector{<:VarName}},A<:NTuple{N,Any}} <: # Ensure that samplers have the same selector, and that varnames are lists of # VarNames. - samplers = tuple(map(set_selector ∘ drop_space, samplers)...) + samplers = tuple(map(set_selector, samplers)...) varnames = tuple(map(to_varname_list, varnames)...) return new{length(samplers),typeof(varnames),typeof(samplers)}(varnames, samplers) end @@ -355,49 +355,6 @@ function Gibbs(algs::Pair...) return Gibbs(map(first, algs), map(last, algs)) end -# The below two constructors only provide backwards compatibility with the constructor of -# the old Gibbs sampler. They are deprecated and will be removed in the future. -function Gibbs(alg1::InferenceAlgorithm, other_algs::InferenceAlgorithm...) - algs = [alg1, other_algs...] - varnames = map(algs) do alg - space = getspace(alg) - if (space isa VarName) - space - elseif (space isa Symbol) - VarName{space}() - else - tuple((s isa Symbol ? VarName{s}() : s for s in space)...) - end - end - msg = ( - "Specifying which sampler to use with which variable using syntax like " * - "`Gibbs(NUTS(:x), MH(:y))` is deprecated and will be removed in the future. " * - "Please use `Gibbs(; x=NUTS(), y=MH())` instead. If you want different iteration " * - "counts for different subsamplers, use e.g. " * - "`Gibbs(@varname(x) => RepeatSampler(NUTS(), 2), @varname(y) => MH())`" - ) - Base.depwarn(msg, :Gibbs) - return Gibbs(varnames, map(set_selector ∘ drop_space, algs)) -end - -function Gibbs( - alg_with_iters1::Tuple{<:InferenceAlgorithm,Int}, - other_algs_with_iters::Tuple{<:InferenceAlgorithm,Int}..., -) - algs_with_iters = [alg_with_iters1, other_algs_with_iters...] - algs = Iterators.map(first, algs_with_iters) - iters = Iterators.map(last, algs_with_iters) - algs_duplicated = Iterators.flatten(( - Iterators.repeated(alg, iter) for (alg, iter) in zip(algs, iters) - )) - # This calls the other deprecated constructor from above, hence no need for a depwarn - # here. - return Gibbs(algs_duplicated...) -end - -# TODO: Remove when no longer needed. -DynamicPPL.getspace(::Gibbs) = () - struct GibbsState{V<:DynamicPPL.AbstractVarInfo,S} vi::V states::S diff --git a/src/mcmc/hmc.jl b/src/mcmc/hmc.jl index 80de196c6f..524d02c6af 100644 --- a/src/mcmc/hmc.jl +++ b/src/mcmc/hmc.jl @@ -53,7 +53,7 @@ sample(gdemo([1.5, 2]), HMC(0.1, 10), 1000) sample(gdemo([1.5, 2]), HMC(0.01, 10), 1000) ``` """ -struct HMC{AD,space,metricT<:AHMC.AbstractMetric} <: StaticHamiltonian +struct HMC{AD,metricT<:AHMC.AbstractMetric} <: StaticHamiltonian ϵ::Float64 # leapfrog step size n_leapfrog::Int # leapfrog step number adtype::AD @@ -62,24 +62,18 @@ end function HMC( ϵ::Float64, n_leapfrog::Int, - ::Type{metricT}, - space::Tuple; + ::Type{metricT}; adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, ) where {metricT<:AHMC.AbstractMetric} - return HMC{typeof(adtype),space,metricT}(ϵ, n_leapfrog, adtype) + return HMC{typeof(adtype),metricT}(ϵ, n_leapfrog, adtype) end function HMC( ϵ::Float64, - n_leapfrog::Int, - space::Symbol...; + n_leapfrog::Int; metricT=AHMC.UnitEuclideanMetric, adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, ) - return HMC(ϵ, n_leapfrog, metricT, space; adtype=adtype) -end - -function drop_space(alg::HMC{AD,space,metricT}) where {AD,space,metricT} - return HMC{AD,(),metricT}(alg.ϵ, alg.n_leapfrog, alg.adtype) + return HMC(ϵ, n_leapfrog, metricT; adtype=adtype) end DynamicPPL.initialsampler(::Sampler{<:Hamiltonian}) = SampleFromUniform() @@ -336,7 +330,7 @@ Hoffman, Matthew D., and Andrew Gelman. "The No-U-turn sampler: adaptively setting path lengths in Hamiltonian Monte Carlo." Journal of Machine Learning Research 15, no. 1 (2014): 1593-1623. """ -struct HMCDA{AD,space,metricT<:AHMC.AbstractMetric} <: AdaptiveHamiltonian +struct HMCDA{AD,metricT<:AHMC.AbstractMetric} <: AdaptiveHamiltonian n_adapts::Int # number of samples with adaption for ϵ δ::Float64 # target accept rate λ::Float64 # target leapfrog length @@ -349,11 +343,10 @@ function HMCDA( δ::Float64, λ::Float64, ϵ::Float64, - ::Type{metricT}, - space::Tuple; + ::Type{metricT}; adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, ) where {metricT<:AHMC.AbstractMetric} - return HMCDA{typeof(adtype),space,metricT}(n_adapts, δ, λ, ϵ, adtype) + return HMCDA{typeof(adtype),metricT}(n_adapts, δ, λ, ϵ, adtype) end function HMCDA( @@ -363,7 +356,7 @@ function HMCDA( metricT=AHMC.UnitEuclideanMetric, adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, ) - return HMCDA(-1, δ, λ, init_ϵ, metricT, (); adtype=adtype) + return HMCDA(-1, δ, λ, init_ϵ, metricT; adtype=adtype) end function HMCDA(n_adapts::Int, δ::Float64, λ::Float64, ::Tuple{}; kwargs...) @@ -373,17 +366,12 @@ end function HMCDA( n_adapts::Int, δ::Float64, - λ::Float64, - space::Symbol...; + λ::Float64; init_ϵ::Float64=0.0, metricT=AHMC.UnitEuclideanMetric, adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, ) - return HMCDA(n_adapts, δ, λ, init_ϵ, metricT, space; adtype=adtype) -end - -function drop_space(alg::HMCDA{AD,space,metricT}) where {AD,space,metricT} - return HMCDA{AD,(),metricT}(alg.n_adapts, alg.δ, alg.λ, alg.ϵ, alg.adtype) + return HMCDA(n_adapts, δ, λ, init_ϵ, metricT; adtype=adtype) end """ @@ -409,7 +397,7 @@ Arguments: If not specified, `ForwardDiff` is used, with its `chunksize` automatically determined. """ -struct NUTS{AD,space,metricT<:AHMC.AbstractMetric} <: AdaptiveHamiltonian +struct NUTS{AD,metricT<:AHMC.AbstractMetric} <: AdaptiveHamiltonian n_adapts::Int # number of samples with adaption for ϵ δ::Float64 # target accept rate max_depth::Int # maximum tree depth @@ -424,11 +412,10 @@ function NUTS( max_depth::Int, Δ_max::Float64, ϵ::Float64, - ::Type{metricT}, - space::Tuple; + ::Type{metricT}; adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, ) where {metricT} - return NUTS{typeof(adtype),space,metricT}(n_adapts, δ, max_depth, Δ_max, ϵ, adtype) + return NUTS{typeof(adtype),metricT}(n_adapts, δ, max_depth, Δ_max, ϵ, adtype) end function NUTS(n_adapts::Int, δ::Float64, ::Tuple{}; kwargs...) @@ -437,15 +424,14 @@ end function NUTS( n_adapts::Int, - δ::Float64, - space::Symbol...; + δ::Float64; max_depth::Int=10, Δ_max::Float64=1000.0, init_ϵ::Float64=0.0, metricT=AHMC.DiagEuclideanMetric, adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, ) - return NUTS(n_adapts, δ, max_depth, Δ_max, init_ϵ, metricT, space; adtype=adtype) + return NUTS(n_adapts, δ, max_depth, Δ_max, init_ϵ, metricT; adtype=adtype) end function NUTS( @@ -456,21 +442,15 @@ function NUTS( metricT=AHMC.DiagEuclideanMetric, adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, ) - return NUTS(-1, δ, max_depth, Δ_max, init_ϵ, metricT, (); adtype=adtype) + return NUTS(-1, δ, max_depth, Δ_max, init_ϵ, metricT; adtype=adtype) end function NUTS(; kwargs...) return NUTS(-1, 0.65; kwargs...) end -function drop_space(alg::NUTS{AD,space,metricT}) where {AD,space,metricT} - return NUTS{AD,(),metricT}( - alg.n_adapts, alg.δ, alg.max_depth, alg.Δ_max, alg.ϵ, alg.adtype - ) -end - for alg in (:HMC, :HMCDA, :NUTS) - @eval getmetricT(::$alg{<:Any,<:Any,metricT}) where {metricT} = metricT + @eval getmetricT(::$alg{<:Any,metricT}) where {metricT} = metricT end ##### @@ -515,7 +495,6 @@ end function DynamicPPL.assume( rng, spl::Sampler{<:Hamiltonian}, dist::Distribution, vn::VarName, vi ) - DynamicPPL.updategid!(vi, vn, spl) return DynamicPPL.assume(dist, vn, vi) end @@ -527,7 +506,6 @@ function DynamicPPL.dot_assume( var::AbstractMatrix, vi, ) - DynamicPPL.updategid!.(Ref(vi), vns, Ref(spl)) return DynamicPPL.dot_assume(dist, var, vns, vi) end function DynamicPPL.dot_assume( @@ -538,7 +516,6 @@ function DynamicPPL.dot_assume( var::AbstractArray, vi, ) - DynamicPPL.updategid!.(Ref(vi), vns, Ref(spl)) return DynamicPPL.dot_assume(dists, var, vns, vi) end diff --git a/src/mcmc/is.jl b/src/mcmc/is.jl index 083bc7bc3a..23a7bbef19 100644 --- a/src/mcmc/is.jl +++ b/src/mcmc/is.jl @@ -24,11 +24,7 @@ end sample(gdemo([1.5, 2]), IS(), 1000) ``` """ -struct IS{space} <: InferenceAlgorithm end - -IS() = IS{()}() - -drop_space(alg::IS) = IS() +struct IS <: InferenceAlgorithm end DynamicPPL.initialsampler(sampler::Sampler{<:IS}) = sampler diff --git a/src/mcmc/mh.jl b/src/mcmc/mh.jl index edd46a4572..33587f1e47 100644 --- a/src/mcmc/mh.jl +++ b/src/mcmc/mh.jl @@ -2,10 +2,6 @@ ### Sampler states ### -struct MH{space,P} <: InferenceAlgorithm - proposals::P -end - proposal(p::AdvancedMH.Proposal) = p proposal(f::Function) = AdvancedMH.StaticProposal(f) proposal(d::Distribution) = AdvancedMH.StaticProposal(d) @@ -13,15 +9,15 @@ proposal(cov::AbstractMatrix) = AdvancedMH.RandomWalkProposal(MvNormal(cov)) proposal(x) = error("proposals of type ", typeof(x), " are not supported") """ - MH(space...) + MH(proposals...) Construct a Metropolis-Hastings algorithm. -The arguments `space` can be +The arguments `proposals` can be - Blank (i.e. `MH()`), in which case `MH` defaults to using the prior for each parameter as the proposal distribution. - An iterable of pairs or tuples mapping a `Symbol` to a `AdvancedMH.Proposal`, `Distribution`, or `Function` - that generates returns a conditional proposal distribution. + that returns a conditional proposal distribution. - A covariance matrix to use as for mean-zero multivariate normal proposals. # Examples @@ -108,45 +104,41 @@ mean(chain) ``` """ -function MH(space...) - syms = Symbol[] - - prop_syms = Symbol[] - props = AMH.Proposal[] - - for s in space - if s isa Symbol - # If it's just a symbol, proceed as normal. - push!(syms, s) - elseif s isa Pair || s isa Tuple - # Check to see whether it's a pair that specifies a kernel - # or a specific proposal distribution. - push!(prop_syms, s[1]) - push!(props, proposal(s[2])) - elseif length(space) == 1 - # If we hit this block, check to see if it's - # a run-of-the-mill proposal or covariance - # matrix. - prop = proposal(s) - - # Return early, we got a covariance matrix. - return MH{(),typeof(prop)}(prop) - else - # Try to convert it to a proposal anyways, - # throw an error if not acceptable. - prop = proposal(s) - push!(props, prop) +struct MH{P} <: InferenceAlgorithm + proposals::P + + function MH(proposals...) + prop_syms = Symbol[] + props = AMH.Proposal[] + + for s in proposals + if s isa Pair || s isa Tuple + # Check to see whether it's a pair that specifies a kernel + # or a specific proposal distribution. + push!(prop_syms, s[1]) + push!(props, proposal(s[2])) + elseif length(proposals) == 1 + # If we hit this block, check to see if it's + # a run-of-the-mill proposal or covariance + # matrix. + prop = proposal(s) + + # Return early, we got a covariance matrix. + return new{typeof(prop)}(prop) + else + # Try to convert it to a proposal anyways, + # throw an error if not acceptable. + prop = proposal(s) + push!(props, prop) + end end - end - proposals = NamedTuple{tuple(prop_syms...)}(tuple(props...)) - syms = vcat(syms, prop_syms) + proposals = NamedTuple{tuple(prop_syms...)}(tuple(props...)) - return MH{tuple(syms...),typeof(proposals)}(proposals) + return new{typeof(proposals)}(proposals) + end end -drop_space(alg::MH{space,P}) where {space,P} = MH{(),P}(alg.proposals) - # Some of the proposals require working in unconstrained space. transform_maybe(proposal::AMH.Proposal) = proposal function transform_maybe(proposal::AMH.RandomWalkProposal) @@ -351,7 +343,7 @@ function propose!!( ) # If this is the case, we can just draw directly from the proposal # matrix. - vals = vi[spl] + vals = vi[:] # Create a sampler and the previous transition. mh_sampler = AMH.MetropolisHastings(spl.alg.proposals) @@ -406,9 +398,6 @@ function DynamicPPL.assume( ) # Just defer to `SampleFromPrior`. retval = DynamicPPL.assume(rng, SampleFromPrior(), dist, vn, vi) - # Update the Gibbs IDs because they might have been assigned in the `SampleFromPrior` call. - DynamicPPL.updategid!(vi, vn, spl) - # Return. return retval end @@ -422,9 +411,6 @@ function DynamicPPL.dot_assume( ) # Just defer to `SampleFromPrior`. retval = DynamicPPL.dot_assume(rng, SampleFromPrior(), dist, vns[1], var, vi) - # Update the Gibbs IDs because they might have been assigned in the `SampleFromPrior` call. - DynamicPPL.updategid!.((vi,), vns, (spl,)) - # Return. return retval end function DynamicPPL.dot_assume( @@ -437,8 +423,6 @@ function DynamicPPL.dot_assume( ) # Just defer to `SampleFromPrior`. retval = DynamicPPL.dot_assume(rng, SampleFromPrior(), dists, vns, var, vi) - # Update the Gibbs IDs because they might have been assigned in the `SampleFromPrior` call. - DynamicPPL.updategid!.((vi,), vns, (spl,)) return retval end diff --git a/src/mcmc/particle_mcmc.jl b/src/mcmc/particle_mcmc.jl index c5abb56f1d..733d572c73 100644 --- a/src/mcmc/particle_mcmc.jl +++ b/src/mcmc/particle_mcmc.jl @@ -15,38 +15,29 @@ Sequential Monte Carlo sampler. $(TYPEDFIELDS) """ -struct SMC{space,R} <: ParticleInference +struct SMC{R} <: ParticleInference resampler::R end """ - SMC(space...) - SMC([resampler = AdvancedPS.ResampleWithESSThreshold(), space = ()]) - SMC([resampler = AdvancedPS.resample_systematic, ]threshold[, space = ()]) + SMC([resampler = AdvancedPS.ResampleWithESSThreshold()]) + SMC([resampler = AdvancedPS.resample_systematic, ]threshold) -Create a sequential Monte Carlo sampler of type [`SMC`](@ref) for the variables in `space`. +Create a sequential Monte Carlo sampler of type [`SMC`](@ref). If the algorithm for the resampling step is not specified explicitly, systematic resampling is performed if the estimated effective sample size per particle drops below 0.5. """ -function SMC(resampler=AdvancedPS.ResampleWithESSThreshold(), space::Tuple=()) - return SMC{space,typeof(resampler)}(resampler) -end +SMC() = SMC(AdvancedPS.ResampleWithESSThreshold()) # Convenient constructors with ESS threshold -function SMC(resampler, threshold::Real, space::Tuple=()) - return SMC(AdvancedPS.ResampleWithESSThreshold(resampler, threshold), space) +function SMC(resampler, threshold::Real) + return SMC(AdvancedPS.ResampleWithESSThreshold(resampler, threshold)) end -function SMC(threshold::Real, space::Tuple=()) - return SMC(AdvancedPS.resample_systematic, threshold, space) +function SMC(threshold::Real) + return SMC(AdvancedPS.resample_systematic, threshold) end -# If only the space is defined -SMC(space::Symbol...) = SMC(space) -SMC(space::Tuple) = SMC(AdvancedPS.ResampleWithESSThreshold(), space) - -drop_space(alg::SMC{space,R}) where {space,R} = SMC{(),R}(alg.resampler) - struct SMCTransition{T,F<:AbstractFloat} <: AbstractTransition "The parameters for any given sample." θ::T @@ -184,7 +175,7 @@ Particle Gibbs sampler. $(TYPEDFIELDS) """ -struct PG{space,R} <: ParticleInference +struct PG{R} <: ParticleInference """Number of particles.""" nparticles::Int """Resampling algorithm.""" @@ -192,38 +183,26 @@ struct PG{space,R} <: ParticleInference end """ - PG(n, space...) - PG(n, [resampler = AdvancedPS.ResampleWithESSThreshold(), space = ()]) - PG(n, [resampler = AdvancedPS.resample_systematic, ]threshold[, space = ()]) + PG(n, [resampler = AdvancedPS.ResampleWithESSThreshold()]) + PG(n, [resampler = AdvancedPS.resample_systematic, ]threshold) -Create a Particle Gibbs sampler of type [`PG`](@ref) with `n` particles for the variables -in `space`. +Create a Particle Gibbs sampler of type [`PG`](@ref) with `n` particles. If the algorithm for the resampling step is not specified explicitly, systematic resampling is performed if the estimated effective sample size per particle drops below 0.5. """ -function PG( - nparticles::Int, resampler=AdvancedPS.ResampleWithESSThreshold(), space::Tuple=() -) - return PG{space,typeof(resampler)}(nparticles, resampler) +function PG(nparticles::Int) + return PG(nparticles, AdvancedPS.ResampleWithESSThreshold()) end # Convenient constructors with ESS threshold -function PG(nparticles::Int, resampler, threshold::Real, space::Tuple=()) - return PG(nparticles, AdvancedPS.ResampleWithESSThreshold(resampler, threshold), space) -end -function PG(nparticles::Int, threshold::Real, space::Tuple=()) - return PG(nparticles, AdvancedPS.resample_systematic, threshold, space) +function PG(nparticles::Int, resampler, threshold::Real) + return PG(nparticles, AdvancedPS.ResampleWithESSThreshold(resampler, threshold)) end - -# If only the number of particles and the space is defined -PG(nparticles::Int, space::Symbol...) = PG(nparticles, space) -function PG(nparticles::Int, space::Tuple) - return PG(nparticles, AdvancedPS.ResampleWithESSThreshold(), space) +function PG(nparticles::Int, threshold::Real) + return PG(nparticles, AdvancedPS.resample_systematic, threshold) end -drop_space(alg::PG{space,R}) where {space,R} = PG{(),R}(alg.nparticles, alg.resampler) - """ CSMC(...) @@ -384,31 +363,19 @@ function DynamicPPL.assume( vi = trace_local_varinfo_maybe(_vi) trng = trace_local_rng_maybe(rng) - if inspace(vn, spl) - if ~haskey(vi, vn) - r = rand(trng, dist) - push!!(vi, vn, r, dist, spl) - elseif is_flagged(vi, vn, "del") - unset_flag!(vi, vn, "del") # Reference particle parent - r = rand(trng, dist) - vi[vn] = DynamicPPL.tovec(r) - DynamicPPL.setgid!(vi, spl.selector, vn) - setorder!(vi, vn, get_num_produce(vi)) - else - DynamicPPL.updategid!(vi, vn, spl) # Pick data from reference particle - r = vi[vn] - end - # TODO: Should we make this `zero(promote_type(eltype(dist), eltype(r)))` or something? - lp = 0 - else # vn belongs to other sampler <=> conditioning on vn - if haskey(vi, vn) - r = vi[vn] - else - r = rand(rng, dist) - push!!(vi, vn, r, dist, DynamicPPL.Selector(:invalid)) - end - lp = logpdf_with_trans(dist, r, istrans(vi, vn)) + if ~haskey(vi, vn) + r = rand(trng, dist) + push!!(vi, vn, r, dist, spl) + elseif is_flagged(vi, vn, "del") + unset_flag!(vi, vn, "del") # Reference particle parent + r = rand(trng, dist) + vi[vn] = DynamicPPL.tovec(r) + setorder!(vi, vn, get_num_produce(vi)) + else + r = vi[vn] end + # TODO: Should we make this `zero(promote_type(eltype(dist), eltype(r)))` or something? + lp = 0 return r, lp, vi end diff --git a/src/mcmc/repeat_sampler.jl b/src/mcmc/repeat_sampler.jl index a3e38f46a9..2f8ab86744 100644 --- a/src/mcmc/repeat_sampler.jl +++ b/src/mcmc/repeat_sampler.jl @@ -28,10 +28,9 @@ function RepeatSampler(alg::InferenceAlgorithm, num_repeat::Int) return RepeatSampler(Sampler(alg), num_repeat) end -drop_space(rs::RepeatSampler) = RepeatSampler(drop_space(rs.sampler), rs.num_repeat) getADType(spl::RepeatSampler) = getADType(spl.sampler) DynamicPPL.default_chain_type(sampler::RepeatSampler) = default_chain_type(sampler.sampler) -DynamicPPL.getspace(spl::RepeatSampler) = getspace(spl.sampler) +# TODO(mhauru) Remove the below once DynamicPPL has removed all its Selector stuff. DynamicPPL.inspace(vn::VarName, spl::RepeatSampler) = inspace(vn, spl.sampler) function setparams_varinfo!!(model::DynamicPPL.Model, sampler::RepeatSampler, state, params) diff --git a/src/mcmc/sghmc.jl b/src/mcmc/sghmc.jl index c79337c500..7cf5cd6e4d 100644 --- a/src/mcmc/sghmc.jl +++ b/src/mcmc/sghmc.jl @@ -1,7 +1,7 @@ """ - SGHMC{AD,space} + SGHMC{AD} -Stochastic Gradient Hamiltonian Monte Carlo (SGHMC) sampler.e +Stochastic Gradient Hamiltonian Monte Carlo (SGHMC) sampler. # Fields $(TYPEDFIELDS) @@ -12,15 +12,14 @@ Tianqi Chen, Emily Fox, & Carlos Guestrin (2014). Stochastic Gradient Hamiltonia Carlo. In: Proceedings of the 31st International Conference on Machine Learning (pp. 1683–1691). """ -struct SGHMC{AD,space,T<:Real} <: StaticHamiltonian +struct SGHMC{AD,T<:Real} <: StaticHamiltonian learning_rate::T momentum_decay::T adtype::AD end """ - SGHMC( - space::Symbol...; + SGHMC(; learning_rate::Real, momentum_decay::Real, adtype::ADTypes.AbstractADType = AutoForwardDiff(), @@ -37,20 +36,13 @@ Tianqi Chen, Emily Fox, & Carlos Guestrin (2014). Stochastic Gradient Hamiltonia Carlo. In: Proceedings of the 31st International Conference on Machine Learning (pp. 1683–1691). """ -function SGHMC( - space::Symbol...; +function SGHMC(; learning_rate::Real, momentum_decay::Real, adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, ) _learning_rate, _momentum_decay = promote(learning_rate, momentum_decay) - return SGHMC{typeof(adtype),space,typeof(_learning_rate)}( - _learning_rate, _momentum_decay, adtype - ) -end - -function drop_space(alg::SGHMC{AD,space,T}) where {AD,space,T} - return SGHMC{AD,(),T}(alg.learning_rate, alg.momentum_decay, alg.adtype) + return SGHMC(_learning_rate, _momentum_decay, adtype) end struct SGHMCState{L,V<:AbstractVarInfo,T<:AbstractVector{<:Real}} @@ -128,16 +120,12 @@ Max Welling & Yee Whye Teh (2011). Bayesian Learning via Stochastic Gradient Lan Dynamics. In: Proceedings of the 28th International Conference on Machine Learning (pp. 681–688). """ -struct SGLD{AD,space,S} <: StaticHamiltonian +struct SGLD{AD,S} <: StaticHamiltonian "Step size function." stepsize::S adtype::AD end -function drop_space(alg::SGLD{AD,space,S}) where {AD,space,S} - return SGLD{AD,(),S}(alg.stepsize, alg.adtype) -end - struct PolynomialStepsize{T<:Real} "Constant scale factor of the step size." a::T @@ -172,8 +160,7 @@ end (f::PolynomialStepsize)(t::Int) = f.a / (t + f.b)^f.γ """ - SGLD( - space::Symbol...; + SGLD(; stepsize = PolynomialStepsize(0.01), adtype::ADTypes.AbstractADType = AutoForwardDiff(), ) @@ -193,12 +180,10 @@ Dynamics. In: Proceedings of the 28th International Conference on Machine Learni See also: [`PolynomialStepsize`](@ref) """ -function SGLD( - space::Symbol...; - stepsize=PolynomialStepsize(0.01), - adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, +function SGLD(; + stepsize=PolynomialStepsize(0.01), adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE ) - return SGLD{typeof(adtype),space,typeof(stepsize)}(stepsize, adtype) + return SGLD(stepsize, adtype) end struct SGLDTransition{T,F<:Real} <: AbstractTransition diff --git a/test/dynamicppl/compiler.jl b/test/dynamicppl/compiler.jl index 7f5726614a..1c50c2d9e9 100644 --- a/test/dynamicppl/compiler.jl +++ b/test/dynamicppl/compiler.jl @@ -133,9 +133,7 @@ const gdemo_default = gdemo_d() end chain = sample( - newinterface(obs), - HMC(0.75, 3, :p, :x; adtype=AutoForwardDiff(; chunksize=2)), - 100, + newinterface(obs), HMC(0.75, 3; adtype=AutoForwardDiff(; chunksize=2)), 100 ) end @testset "no return" begin diff --git a/test/mcmc/Inference.jl b/test/mcmc/Inference.jl index da29e77089..62963b4c45 100644 --- a/test/mcmc/Inference.jl +++ b/test/mcmc/Inference.jl @@ -368,7 +368,7 @@ using Turing sample( StableRNG(seed), newinterface(obs), - HMC(0.75, 3, :p, :x; adtype=Turing.AutoForwardDiff(; chunksize=2)), + HMC(0.75, 3; adtype=Turing.AutoForwardDiff(; chunksize=2)), 100, ) end diff --git a/test/mcmc/ess.jl b/test/mcmc/ess.jl index 5533d11d7e..4675f61e20 100644 --- a/test/mcmc/ess.jl +++ b/test/mcmc/ess.jl @@ -30,18 +30,13 @@ using Turing N = 10 s1 = ESS() - s2 = ESS(:m) - for s in (s1, s2) - @test DynamicPPL.alg_str(Sampler(s, demo_default)) == "ESS" - end + @test DynamicPPL.alg_str(Sampler(s1, demo_default)) == "ESS" c1 = sample(demo_default, s1, N) - c2 = sample(demo_default, s2, N) - c3 = sample(demodot_default, s1, N) - c4 = sample(demodot_default, s2, N) + c2 = sample(demodot_default, s1, N) - s3 = Gibbs(:m => ESS(), :s => MH()) - c5 = sample(gdemo_default, s3, N) + s2 = Gibbs(:m => ESS(), :s => MH()) + c3 = sample(gdemo_default, s2, N) end @testset "ESS inference" begin diff --git a/test/mcmc/gibbs.jl b/test/mcmc/gibbs.jl index cd5d05a6bd..58db233f8b 100644 --- a/test/mcmc/gibbs.jl +++ b/test/mcmc/gibbs.jl @@ -17,7 +17,7 @@ using Random: Random using ReverseDiff: ReverseDiff import Mooncake using StableRNGs: StableRNG -using Test: @inferred, @test, @test_broken, @test_deprecated, @test_throws, @testset +using Test: @inferred, @test, @test_broken, @test_throws, @testset using Turing using Turing: Inference using Turing.Inference: AdvancedHMC, AdvancedMH @@ -150,7 +150,6 @@ end # Methods we need to define to be able to use AlgWrapper instead of an actual algorithm. # They all just propagate the call to the inner algorithm. Inference.isgibbscomponent(wrap::AlgWrapper) = Inference.isgibbscomponent(wrap.inner) - Inference.drop_space(wrap::AlgWrapper) = AlgWrapper(Inference.drop_space(wrap.inner)) function Inference.setparams_varinfo!!( model::DynamicPPL.Model, sampler::DynamicPPL.Sampler{<:AlgWrapper}, @@ -270,31 +269,6 @@ end @testset "Testing gibbs.jl with $adbackend" for adbackend in ADUtils.adbackends @info "Starting Gibbs tests with $adbackend" - @testset "Deprecated Gibbs constructors" begin - N = 10 - @test_deprecated s1 = Gibbs(HMC(0.1, 5, :s, :m; adtype=adbackend)) - @test_deprecated s2 = Gibbs(PG(10, :s, :m)) - @test_deprecated s3 = Gibbs(PG(3, :s), HMC(0.4, 8, :m; adtype=adbackend)) - @test_deprecated s4 = Gibbs(PG(3, :s), HMC(0.4, 8, :m; adtype=adbackend)) - @test_deprecated s5 = Gibbs(CSMC(3, :s), HMC(0.4, 8, :m; adtype=adbackend)) - @test_deprecated s6 = Gibbs(HMC(0.1, 5, :s; adtype=adbackend), ESS(:m)) - @test_deprecated s7 = Gibbs((HMC(0.1, 5, :s; adtype=adbackend), 2), (ESS(:m), 3)) - for s in (s1, s2, s3, s4, s5, s6, s7) - @test DynamicPPL.alg_str(Turing.Sampler(s, gdemo_default)) == "Gibbs" - end - - # Check that the samplers work despite using the deprecated constructor. - sample(gdemo_default, s1, N) - sample(gdemo_default, s2, N) - sample(gdemo_default, s3, N) - sample(gdemo_default, s4, N) - sample(gdemo_default, s5, N) - sample(gdemo_default, s6, N) - sample(gdemo_default, s7, N) - - g = Turing.Sampler(s3, gdemo_default) - @test sample(gdemo_default, g, N) isa MCMCChains.Chains - end @testset "Gibbs constructors" begin # Create Gibbs samplers with various configurations and ways of passing the diff --git a/test/mcmc/hmc.jl b/test/mcmc/hmc.jl index d45846f3d4..3afe1332c7 100644 --- a/test/mcmc/hmc.jl +++ b/test/mcmc/hmc.jl @@ -162,10 +162,6 @@ using Turing sampler = Sampler(alg, gdemo_default) @test DynamicPPL.alg_str(sampler) == "HMCDA" - alg = HMCDA(200, 0.8, 0.75, :s; adtype=adbackend) - sampler = Sampler(alg, gdemo_default) - @test DynamicPPL.alg_str(sampler) == "HMCDA" - @test isa(alg, HMCDA) @test isa(sampler, Sampler{<:Turing.Hamiltonian}) end @@ -184,10 +180,6 @@ using Turing alg = NUTS(0.65; adtype=adbackend) sampler = Sampler(alg, gdemo_default) @test DynamicPPL.alg_str(sampler) == "NUTS" - - alg = NUTS(200, 0.65, :m; adtype=adbackend) - sampler = Sampler(alg, gdemo_default) - @test DynamicPPL.alg_str(sampler) == "NUTS" end @testset "check discard" begin diff --git a/test/mcmc/mh.jl b/test/mcmc/mh.jl index 7d5a841d45..69cf5a849b 100644 --- a/test/mcmc/mh.jl +++ b/test/mcmc/mh.jl @@ -24,26 +24,28 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var)) @testset "mh constructor" begin N = 10 s1 = MH((:s, InverseGamma(2, 3)), (:m, GKernel(3.0))) - s2 = MH(:s, :m) + s2 = MH(:s => InverseGamma(2, 3), :m => GKernel(3.0)) s3 = MH() - for s in (s1, s2, s3) + s4 = MH([1.0 0.1; 0.1 1.0]) + for s in (s1, s2, s3, s4) @test DynamicPPL.alg_str(Sampler(s, gdemo_default)) == "MH" end c1 = sample(gdemo_default, s1, N) c2 = sample(gdemo_default, s2, N) c3 = sample(gdemo_default, s3, N) - - s4 = Gibbs(:m => MH(), :s => MH()) c4 = sample(gdemo_default, s4, N) - # s5 = externalsampler(MH(gdemo_default, proposal_type=AdvancedMH.RandomWalkProposal)) - # c5 = sample(gdemo_default, s5, N) + s5 = Gibbs(:m => MH(), :s => MH()) + c5 = sample(gdemo_default, s5, N) + + # s6 = externalsampler(MH(gdemo_default, proposal_type=AdvancedMH.RandomWalkProposal)) + # c6 = sample(gdemo_default, s6, N) # NOTE: Broken because MH doesn't really follow the `logdensity` interface, but calls # it with `NamedTuple` instead of `AbstractVector`. - # s6 = externalsampler(MH(gdemo_default, proposal_type=AdvancedMH.StaticProposal)) - # c6 = sample(gdemo_default, s6, N) + # s7 = externalsampler(MH(gdemo_default, proposal_type=AdvancedMH.StaticProposal)) + # c7 = sample(gdemo_default, s7, N) end @testset "mh inference" begin diff --git a/test/mcmc/particle_mcmc.jl b/test/mcmc/particle_mcmc.jl index 3378fea32b..699ee68547 100644 --- a/test/mcmc/particle_mcmc.jl +++ b/test/mcmc/particle_mcmc.jl @@ -4,7 +4,6 @@ using ..Models: gdemo_default #using ..Models: MoGtest, MoGtest_default using AdvancedPS: ResampleWithESSThreshold, resample_systematic, resample_multinomial using Distributions: Bernoulli, Beta, Gamma, Normal, sample -using DynamicPPL: getspace using Random: Random using Test: @test, @test_throws, @testset using Turing @@ -13,47 +12,15 @@ using Turing @testset "constructor" begin s = SMC() @test s.resampler == ResampleWithESSThreshold() - @test getspace(s) === () - - s = SMC(:x) - @test s.resampler == ResampleWithESSThreshold() - @test getspace(s) === (:x,) - - s = SMC((:x,)) - @test s.resampler == ResampleWithESSThreshold() - @test getspace(s) === (:x,) - - s = SMC(:x, :y) - @test s.resampler == ResampleWithESSThreshold() - @test getspace(s) === (:x, :y) - - s = SMC((:x, :y)) - @test s.resampler == ResampleWithESSThreshold() - @test getspace(s) === (:x, :y) s = SMC(0.6) @test s.resampler === ResampleWithESSThreshold(resample_systematic, 0.6) - @test getspace(s) === () - - s = SMC(0.6, (:x,)) - @test s.resampler === ResampleWithESSThreshold(resample_systematic, 0.6) - @test getspace(s) === (:x,) s = SMC(resample_multinomial, 0.6) @test s.resampler === ResampleWithESSThreshold(resample_multinomial, 0.6) - @test getspace(s) === () - - s = SMC(resample_multinomial, 0.6, (:x,)) - @test s.resampler === ResampleWithESSThreshold(resample_multinomial, 0.6) - @test getspace(s) === (:x,) s = SMC(resample_systematic) @test s.resampler === resample_systematic - @test getspace(s) === () - - s = SMC(resample_systematic, (:x,)) - @test s.resampler === resample_systematic - @test getspace(s) === (:x,) end @testset "models" begin @@ -106,57 +73,18 @@ end s = PG(10) @test s.nparticles == 10 @test s.resampler == ResampleWithESSThreshold() - @test getspace(s) === () - - s = PG(20, :x) - @test s.nparticles == 20 - @test s.resampler == ResampleWithESSThreshold() - @test getspace(s) === (:x,) - - s = PG(30, (:x,)) - @test s.nparticles == 30 - @test s.resampler == ResampleWithESSThreshold() - @test getspace(s) === (:x,) - - s = PG(40, :x, :y) - @test s.nparticles == 40 - @test s.resampler == ResampleWithESSThreshold() - @test getspace(s) === (:x, :y) - - s = PG(50, (:x, :y)) - @test s.nparticles == 50 - @test s.resampler == ResampleWithESSThreshold() - @test getspace(s) === (:x, :y) s = PG(60, 0.6) @test s.nparticles == 60 @test s.resampler === ResampleWithESSThreshold(resample_systematic, 0.6) - @test getspace(s) === () - - s = PG(70, 0.6, (:x,)) - @test s.nparticles == 70 - @test s.resampler === ResampleWithESSThreshold(resample_systematic, 0.6) - @test getspace(s) === (:x,) s = PG(80, resample_multinomial, 0.6) @test s.nparticles == 80 @test s.resampler === ResampleWithESSThreshold(resample_multinomial, 0.6) - @test getspace(s) === () - - s = PG(90, resample_multinomial, 0.6, (:x,)) - @test s.nparticles == 90 - @test s.resampler === ResampleWithESSThreshold(resample_multinomial, 0.6) - @test getspace(s) === (:x,) s = PG(100, resample_systematic) @test s.nparticles == 100 @test s.resampler === resample_systematic - @test getspace(s) === () - - s = PG(110, resample_systematic, (:x,)) - @test s.nparticles == 110 - @test s.resampler === resample_systematic - @test getspace(s) === (:x,) end @testset "logevidence" begin diff --git a/test/mcmc/sghmc.jl b/test/mcmc/sghmc.jl index c1d07d2ced..c878f755de 100644 --- a/test/mcmc/sghmc.jl +++ b/test/mcmc/sghmc.jl @@ -19,12 +19,7 @@ using Turing sampler = Turing.Sampler(alg) @test sampler isa Turing.Sampler{<:SGHMC} - alg = SGHMC(:m; learning_rate=0.01, momentum_decay=0.1, adtype=adbackend) - @test alg isa SGHMC - sampler = Turing.Sampler(alg) - @test sampler isa Turing.Sampler{<:SGHMC} - - alg = SGHMC(:s; learning_rate=0.01, momentum_decay=0.1, adtype=adbackend) + alg = SGHMC(; learning_rate=0.01, momentum_decay=0.1, adtype=adbackend) @test alg isa SGHMC sampler = Turing.Sampler(alg) @test sampler isa Turing.Sampler{<:SGHMC} @@ -45,12 +40,7 @@ end sampler = Turing.Sampler(alg) @test sampler isa Turing.Sampler{<:SGLD} - alg = SGLD(:m; stepsize=PolynomialStepsize(0.25), adtype=adbackend) - @test alg isa SGLD - sampler = Turing.Sampler(alg) - @test sampler isa Turing.Sampler{<:SGLD} - - alg = SGLD(:s; stepsize=PolynomialStepsize(0.25), adtype=adbackend) + alg = SGLD(; stepsize=PolynomialStepsize(0.25), adtype=adbackend) @test alg isa SGLD sampler = Turing.Sampler(alg) @test sampler isa Turing.Sampler{<:SGLD} From 436a77c18c3481e6a851e5d2f05859d990a2e50c Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 25 Feb 2025 20:55:54 +0000 Subject: [PATCH 3/8] Bump Mooncake compat to 0.4.95 --- test/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Project.toml b/test/Project.toml index e96d505e73..dea22415d1 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -59,7 +59,7 @@ LinearAlgebra = "1" LogDensityProblems = "2" LogDensityProblemsAD = "1.4" MCMCChains = "5, 6" -Mooncake = "0.4.61" +Mooncake = "0.4.95" NamedArrays = "0.9.4, 0.10" Optim = "1" Optimization = "3, 4" From b3cc1e476aa53ea0f67f3957f1aee09ddfc77dbb Mon Sep 17 00:00:00 2001 From: Markus Hauru Date: Thu, 13 Mar 2025 15:59:55 +0000 Subject: [PATCH 4/8] Support for DynamicPPL v0.35 (#2488) * Progress towards compat with DPPL v0.35 * More fixing of DPPL v0.35 stuff * Fix LogDensityFunction argument order * More minor bugfixes * [TEMP] Commit Manifest pointing to DynamicPPL#release-0.35 * remove LogDensityProblemsAD (#2490) * Remove LogDensityProblemsAD, part 1 * update Optimisation code to not use LogDensityProblemsAD * Fix field name change * Don't put chunksize=0 * Remove LogDensityProblemsAD dep * Improve OptimLogDensity docstring * Remove unneeded model argument to _optimize * Fix more tests * Remove essential/ad from the list of CI groups * Fix HMC function * more test fixes (#2491) * Remove LogDensityProblemsAD, part 1 * update Optimisation code to not use LogDensityProblemsAD * Fix field name change * Don't put chunksize=0 * Remove LogDensityProblemsAD dep * Improve OptimLogDensity docstring * Remove unneeded model argument to _optimize * Fix more tests * Remove essential/ad from the list of CI groups * Fix HMC function * More test fixes * Remove Manifest * More fixes for DynamicPPL 0.35 (#2494) * Remove test/dynamicppl/compiler.jl * Remove old regression tests * Remove vdemo2 * Fix last test * Add HISTORY.md entry about DPPL 0.35 * Allow ESS to sample variables with different symbols * Update a TODO note --------- Co-authored-by: Penelope Yong --- .github/workflows/Tests.yml | 4 +- HISTORY.md | 13 ++ Project.toml | 4 +- ext/TuringDynamicHMCExt.jl | 45 ++-- ext/TuringOptimExt.jl | 49 ++-- src/Turing.jl | 2 +- src/essential/container.jl | 2 +- src/mcmc/Inference.jl | 47 +--- src/mcmc/abstractmcmc.jl | 48 +--- src/mcmc/emcee.jl | 8 +- src/mcmc/ess.jl | 57 ++--- src/mcmc/gibbs.jl | 72 +----- src/mcmc/hmc.jl | 93 +++----- src/mcmc/is.jl | 6 +- src/mcmc/mh.jl | 64 +----- src/mcmc/particle_mcmc.jl | 8 +- src/mcmc/sghmc.jl | 32 +-- src/optimisation/Optimisation.jl | 178 +++++++------- test/Project.toml | 2 +- test/dynamicppl/compiler.jl | 369 ------------------------------ test/essential/ad.jl | 246 -------------------- test/ext/OptimInterface.jl | 7 +- test/mcmc/Inference.jl | 17 +- test/mcmc/abstractmcmc.jl | 44 +--- test/mcmc/emcee.jl | 2 +- test/mcmc/ess.jl | 26 ++- test/mcmc/gibbs.jl | 10 +- test/mcmc/hmc.jl | 32 +-- test/mcmc/mh.jl | 14 +- test/optimisation/Optimisation.jl | 20 +- test/runtests.jl | 5 - test/skipped/unit_test_helper.jl | 4 +- test/test_utils/ad_utils.jl | 36 +-- 33 files changed, 341 insertions(+), 1225 deletions(-) delete mode 100644 test/dynamicppl/compiler.jl delete mode 100644 test/essential/ad.jl diff --git a/.github/workflows/Tests.yml b/.github/workflows/Tests.yml index 531952939d..386ef7dd49 100644 --- a/.github/workflows/Tests.yml +++ b/.github/workflows/Tests.yml @@ -24,8 +24,6 @@ jobs: test: # Run some of the slower test files individually. The last one catches everything # not included in the others. - - name: "essential/ad" - args: "essential/ad.jl" - name: "mcmc/gibbs" args: "mcmc/gibbs.jl" - name: "mcmc/hmc" @@ -37,7 +35,7 @@ jobs: - name: "mcmc/ess" args: "mcmc/ess.jl" - name: "everything else" - args: "--skip essential/ad.jl mcmc/gibbs.jl mcmc/hmc.jl mcmc/abstractmcmc.jl mcmc/Inference.jl mcmc/ess.jl" + args: "--skip mcmc/gibbs.jl mcmc/hmc.jl mcmc/abstractmcmc.jl mcmc/Inference.jl mcmc/ess.jl" runner: # Default - version: '1' diff --git a/HISTORY.md b/HISTORY.md index 82b2fd0812..6251974075 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -2,8 +2,21 @@ ## Breaking changes +### Gibbs constructors + 0.37 removes the old Gibbs constructors deprecated in 0.36. +### DynamicPPL 0.35 + +Turing.jl v0.37 uses DynamicPPL v0.35, which brings with it several breaking changes: + + - The right hand side of `.~` must from now on be a univariate distribution. + - Indexing `VarInfo` objects by samplers has been removed completely. + - The order in which nested submodel prefixes are applied has been reversed. + - The arguments for the constructor of `LogDensityFunction` have changed. `LogDensityFunction` also now satisfies the `LogDensityProblems` interface, without needing a wrapper object. + +For more details about all of the above, see the changelog of DynamicPPL [here](https://github.com/TuringLang/DynamicPPL.jl/releases/tag/v0.35.0). + # Release 0.36.0 ## Breaking changes diff --git a/Project.toml b/Project.toml index 396838cef0..d72f323373 100644 --- a/Project.toml +++ b/Project.toml @@ -23,7 +23,6 @@ ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" Libtask = "6f1fad26-d15e-5dc8-ae53-837a1d7b8c9f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" -LogDensityProblemsAD = "996a588d-648d-4e1f-a8f0-a84b347e47b1" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" NamedArrays = "86f7a689-2022-50b4-a561-43c23ac3c673" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" @@ -63,13 +62,12 @@ Distributions = "0.23.3, 0.24, 0.25" DistributionsAD = "0.6" DocStringExtensions = "0.8, 0.9" DynamicHMC = "3.4" -DynamicPPL = "0.34.1" +DynamicPPL = "0.35" EllipticalSliceSampling = "0.5, 1, 2" ForwardDiff = "0.10.3" Libtask = "0.8.8" LinearAlgebra = "1" LogDensityProblems = "2" -LogDensityProblemsAD = "1.7.0" MCMCChains = "5, 6" NamedArrays = "0.9, 0.10" Optim = "1" diff --git a/ext/TuringDynamicHMCExt.jl b/ext/TuringDynamicHMCExt.jl index c82f237c0c..5718e3855a 100644 --- a/ext/TuringDynamicHMCExt.jl +++ b/ext/TuringDynamicHMCExt.jl @@ -3,17 +3,10 @@ module TuringDynamicHMCExt ### DynamicHMC backend - https://github.com/tpapp/DynamicHMC.jl ### -if isdefined(Base, :get_extension) - using DynamicHMC: DynamicHMC - using Turing - using Turing: AbstractMCMC, Random, LogDensityProblems, DynamicPPL - using Turing.Inference: ADTypes, LogDensityProblemsAD, TYPEDFIELDS -else - import ..DynamicHMC - using ..Turing - using ..Turing: AbstractMCMC, Random, LogDensityProblems, DynamicPPL - using ..Turing.Inference: ADTypes, LogDensityProblemsAD, TYPEDFIELDS -end +using DynamicHMC: DynamicHMC +using Turing +using Turing: AbstractMCMC, Random, LogDensityProblems, DynamicPPL +using Turing.Inference: ADTypes, TYPEDFIELDS """ DynamicNUTS @@ -25,22 +18,15 @@ To use it, make sure you have DynamicHMC package (version >= 2) loaded: using DynamicHMC ``` """ -struct DynamicNUTS{AD,space,T<:DynamicHMC.NUTS} <: Turing.Inference.Hamiltonian +struct DynamicNUTS{AD,T<:DynamicHMC.NUTS} <: Turing.Inference.Hamiltonian sampler::T adtype::AD end -function DynamicNUTS( - spl::DynamicHMC.NUTS=DynamicHMC.NUTS(), - space::Tuple=(); - adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, -) - return DynamicNUTS{typeof(adtype),space,typeof(spl)}(spl, adtype) -end +DynamicNUTS() = DynamicNUTS(DynamicHMC.NUTS()) +DynamicNUTS(spl) = DynamicNUTS(spl, Turing.DEFAULT_ADTYPE) Turing.externalsampler(spl::DynamicHMC.NUTS) = DynamicNUTS(spl) -DynamicPPL.getspace(::DynamicNUTS{<:Any,space}) where {space} = space - """ DynamicNUTSState @@ -70,25 +56,28 @@ function DynamicPPL.initialstep( kwargs..., ) # Ensure that initial sample is in unconstrained space. - if !DynamicPPL.islinked(vi, spl) - vi = DynamicPPL.link!!(vi, spl, model) + if !DynamicPPL.islinked(vi) + vi = DynamicPPL.link!!(vi, model) vi = last(DynamicPPL.evaluate!!(model, vi, DynamicPPL.SamplingContext(rng, spl))) end # Define log-density function. - ℓ = LogDensityProblemsAD.ADgradient( - Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext()) + ℓ = DynamicPPL.LogDensityFunction( + model, + vi, + DynamicPPL.SamplingContext(spl, DynamicPPL.DefaultContext()); + adtype=spl.alg.adtype, ) # Perform initial step. results = DynamicHMC.mcmc_keep_warmup( - rng, ℓ, 0; initialization=(q=vi[spl],), reporter=DynamicHMC.NoProgressReport() + rng, ℓ, 0; initialization=(q=vi[:],), reporter=DynamicHMC.NoProgressReport() ) steps = DynamicHMC.mcmc_steps(results.sampling_logdensity, results.final_warmup_state) Q, _ = DynamicHMC.mcmc_next_step(steps, results.final_warmup_state.Q) # Update the variables. - vi = DynamicPPL.setindex!!(vi, Q.q, spl) + vi = DynamicPPL.unflatten(vi, Q.q) vi = DynamicPPL.setlogp!!(vi, Q.ℓq) # Create first sample and state. @@ -112,7 +101,7 @@ function AbstractMCMC.step( Q, _ = DynamicHMC.mcmc_next_step(steps, state.cache) # Update the variables. - vi = DynamicPPL.setindex!!(vi, Q.q, spl) + vi = DynamicPPL.unflatten(vi, Q.q) vi = DynamicPPL.setlogp!!(vi, Q.ℓq) # Create next sample and state. diff --git a/ext/TuringOptimExt.jl b/ext/TuringOptimExt.jl index 3d31af6f2a..d6c253e2a2 100644 --- a/ext/TuringOptimExt.jl +++ b/ext/TuringOptimExt.jl @@ -1,14 +1,8 @@ module TuringOptimExt -if isdefined(Base, :get_extension) - using Turing: Turing - import Turing: DynamicPPL, NamedArrays, Accessors, Optimisation - using Optim: Optim -else - import ..Turing - import ..Turing: DynamicPPL, NamedArrays, Accessors, Optimisation - import ..Optim -end +using Turing: Turing +import Turing: DynamicPPL, NamedArrays, Accessors, Optimisation +using Optim: Optim #################### # Optim.jl methods # @@ -42,7 +36,7 @@ function Optim.optimize( ) ctx = Optimisation.OptimizationContext(DynamicPPL.LikelihoodContext()) f = Optimisation.OptimLogDensity(model, ctx) - init_vals = DynamicPPL.getparams(f) + init_vals = DynamicPPL.getparams(f.ldf) optimizer = Optim.LBFGS() return _mle_optimize(model, init_vals, optimizer, options; kwargs...) end @@ -65,7 +59,7 @@ function Optim.optimize( ) ctx = Optimisation.OptimizationContext(DynamicPPL.LikelihoodContext()) f = Optimisation.OptimLogDensity(model, ctx) - init_vals = DynamicPPL.getparams(f) + init_vals = DynamicPPL.getparams(f.ldf) return _mle_optimize(model, init_vals, optimizer, options; kwargs...) end function Optim.optimize( @@ -81,7 +75,7 @@ end function _mle_optimize(model::DynamicPPL.Model, args...; kwargs...) ctx = Optimisation.OptimizationContext(DynamicPPL.LikelihoodContext()) - return _optimize(model, Optimisation.OptimLogDensity(model, ctx), args...; kwargs...) + return _optimize(Optimisation.OptimLogDensity(model, ctx), args...; kwargs...) end """ @@ -112,7 +106,7 @@ function Optim.optimize( ) ctx = Optimisation.OptimizationContext(DynamicPPL.DefaultContext()) f = Optimisation.OptimLogDensity(model, ctx) - init_vals = DynamicPPL.getparams(f) + init_vals = DynamicPPL.getparams(f.ldf) optimizer = Optim.LBFGS() return _map_optimize(model, init_vals, optimizer, options; kwargs...) end @@ -135,7 +129,7 @@ function Optim.optimize( ) ctx = Optimisation.OptimizationContext(DynamicPPL.DefaultContext()) f = Optimisation.OptimLogDensity(model, ctx) - init_vals = DynamicPPL.getparams(f) + init_vals = DynamicPPL.getparams(f.ldf) return _map_optimize(model, init_vals, optimizer, options; kwargs...) end function Optim.optimize( @@ -151,18 +145,16 @@ end function _map_optimize(model::DynamicPPL.Model, args...; kwargs...) ctx = Optimisation.OptimizationContext(DynamicPPL.DefaultContext()) - return _optimize(model, Optimisation.OptimLogDensity(model, ctx), args...; kwargs...) + return _optimize(Optimisation.OptimLogDensity(model, ctx), args...; kwargs...) end - """ - _optimize(model::Model, f::OptimLogDensity, optimizer=Optim.LBFGS(), args...; kwargs...) + _optimize(f::OptimLogDensity, optimizer=Optim.LBFGS(), args...; kwargs...) Estimate a mode, i.e., compute a MLE or MAP estimate. """ function _optimize( - model::DynamicPPL.Model, f::Optimisation.OptimLogDensity, - init_vals::AbstractArray=DynamicPPL.getparams(f), + init_vals::AbstractArray=DynamicPPL.getparams(f.ldf), optimizer::Optim.AbstractOptimizer=Optim.LBFGS(), options::Optim.Options=Optim.Options(), args...; @@ -170,9 +162,12 @@ function _optimize( ) # Convert the initial values, since it is assumed that users provide them # in the constrained space. - f = Accessors.@set f.varinfo = DynamicPPL.unflatten(f.varinfo, init_vals) - f = Accessors.@set f.varinfo = DynamicPPL.link(f.varinfo, model) - init_vals = DynamicPPL.getparams(f) + # TODO(penelopeysm): As with in src/optimisation/Optimisation.jl, unclear + # whether initialisation is really necessary at all + vi = DynamicPPL.unflatten(f.ldf.varinfo, init_vals) + vi = DynamicPPL.link(vi, f.ldf.model) + f = Optimisation.OptimLogDensity(f.ldf.model, vi, f.ldf.context; adtype=f.ldf.adtype) + init_vals = DynamicPPL.getparams(f.ldf) # Optimize! M = Optim.optimize(Optim.only_fg!(f), init_vals, optimizer, options, args...; kwargs...) @@ -186,12 +181,16 @@ function _optimize( end # Get the optimum in unconstrained space. `getparams` does the invlinking. - f = Accessors.@set f.varinfo = DynamicPPL.unflatten(f.varinfo, M.minimizer) - vns_vals_iter = Turing.Inference.getparams(model, f.varinfo) + vi = f.ldf.varinfo + vi_optimum = DynamicPPL.unflatten(vi, M.minimizer) + logdensity_optimum = Optimisation.OptimLogDensity( + f.ldf.model, vi_optimum, f.ldf.context + ) + vns_vals_iter = Turing.Inference.getparams(f.ldf.model, vi_optimum) varnames = map(Symbol ∘ first, vns_vals_iter) vals = map(last, vns_vals_iter) vmat = NamedArrays.NamedArray(vals, varnames) - return Optimisation.ModeResult(vmat, M, -M.minimum, f) + return Optimisation.ModeResult(vmat, M, -M.minimum, logdensity_optimum) end end # module diff --git a/src/Turing.jl b/src/Turing.jl index 6318e2bd52..abba580a27 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -9,7 +9,7 @@ using Compat: pkgversion using AdvancedVI: AdvancedVI using DynamicPPL: DynamicPPL, LogDensityFunction -import DynamicPPL: getspace, NoDist, NamedDist +import DynamicPPL: NoDist, NamedDist using LogDensityProblems: LogDensityProblems using NamedArrays: NamedArrays using Accessors: Accessors diff --git a/src/essential/container.jl b/src/essential/container.jl index bd4e21f6b5..a1012d471f 100644 --- a/src/essential/container.jl +++ b/src/essential/container.jl @@ -39,7 +39,7 @@ function AdvancedPS.advance!( end function AdvancedPS.delete_retained!(trace::TracedModel) - DynamicPPL.set_retained_vns_del_by_spl!(trace.varinfo, trace.sampler) + DynamicPPL.set_retained_vns_del!(trace.varinfo) return trace end diff --git a/src/mcmc/Inference.jl b/src/mcmc/Inference.jl index fa79656308..60aa087cdb 100644 --- a/src/mcmc/Inference.jl +++ b/src/mcmc/Inference.jl @@ -5,6 +5,11 @@ using DynamicPPL: Metadata, VarInfo, TypedVarInfo, + # TODO(mhauru) all_varnames_grouped_by_symbol isn't exported by DPPL, because it is only + # implemented for TypedVarInfo. It is used by mh.jl. Either refactor mh.jl to not use it + # or implement it for other VarInfo types and export it from DPPL. + all_varnames_grouped_by_symbol, + syms, islinked, setindex!!, push!!, @@ -12,7 +17,6 @@ using DynamicPPL: getlogp, VarName, getsym, - _getvns, getdist, Model, Sampler, @@ -22,9 +26,7 @@ using DynamicPPL: PriorContext, LikelihoodContext, set_flag!, - unset_flag!, - getspace, - inspace + unset_flag! using Distributions, Libtask, Bijectors using DistributionsAD: VectorOfMultivariate using LinearAlgebra @@ -47,7 +49,6 @@ import AdvancedPS import Accessors import EllipticalSliceSampling import LogDensityProblems -import LogDensityProblemsAD import Random import MCMCChains import StatsBase: predict @@ -75,9 +76,7 @@ export InferenceAlgorithm, RepeatSampler, Prior, assume, - dot_assume, observe, - dot_observe, predict, externalsampler @@ -161,29 +160,6 @@ function externalsampler( return ExternalSampler(sampler, adtype, Val(unconstrained)) end -getADType(spl::Sampler) = getADType(spl.alg) -getADType(::SampleFromPrior) = Turing.DEFAULT_ADTYPE - -getADType(ctx::DynamicPPL.SamplingContext) = getADType(ctx.sampler) -getADType(ctx::DynamicPPL.AbstractContext) = getADType(DynamicPPL.NodeTrait(ctx), ctx) -getADType(::DynamicPPL.IsLeaf, ctx::DynamicPPL.AbstractContext) = Turing.DEFAULT_ADTYPE -function getADType(::DynamicPPL.IsParent, ctx::DynamicPPL.AbstractContext) - return getADType(DynamicPPL.childcontext(ctx)) -end - -getADType(alg::Hamiltonian) = alg.adtype - -function LogDensityProblemsAD.ADgradient(ℓ::DynamicPPL.LogDensityFunction) - return LogDensityProblemsAD.ADgradient(getADType(DynamicPPL.getcontext(ℓ)), ℓ) -end - -function LogDensityProblems.logdensity( - f::Turing.LogDensityFunction{<:AbstractVarInfo,<:Model,<:DynamicPPL.DefaultContext}, - x::NamedTuple, -) - return DynamicPPL.logjoint(f.model, DynamicPPL.unflatten(f.varinfo, x)) -end - # TODO: make a nicer `set_namedtuple!` and move these functions to DynamicPPL. function DynamicPPL.unflatten(vi::TypedVarInfo, θ::NamedTuple) set_namedtuple!(deepcopy(vi), θ) @@ -299,7 +275,7 @@ function AbstractMCMC.sample( kwargs..., ) check_model && _check_model(model, alg) - return AbstractMCMC.sample(rng, model, Sampler(alg, model), N; kwargs...) + return AbstractMCMC.sample(rng, model, Sampler(alg), N; kwargs...) end function AbstractMCMC.sample( @@ -326,9 +302,7 @@ function AbstractMCMC.sample( kwargs..., ) check_model && _check_model(model, alg) - return AbstractMCMC.sample( - rng, model, Sampler(alg, model), ensemble, N, n_chains; kwargs... - ) + return AbstractMCMC.sample(rng, model, Sampler(alg), ensemble, N, n_chains; kwargs...) end function AbstractMCMC.sample( @@ -583,11 +557,6 @@ end # Utilities # ############## -# TODO(mhauru) Remove this once DynamicPPL has removed all its Selector stuff. -DynamicPPL.getspace(::InferenceAlgorithm) = () -DynamicPPL.getspace(spl::Sampler) = getspace(spl.alg) -DynamicPPL.inspace(vn::VarName, spl::Sampler) = inspace(vn, getspace(spl.alg)) - """ transitions_from_chain( diff --git a/src/mcmc/abstractmcmc.jl b/src/mcmc/abstractmcmc.jl index aec7b153a9..b436f43579 100644 --- a/src/mcmc/abstractmcmc.jl +++ b/src/mcmc/abstractmcmc.jl @@ -1,6 +1,6 @@ -struct TuringState{S,F} +struct TuringState{S,M,V,C} state::S - logdensity::F + ldf::DynamicPPL.LogDensityFunction{M,V,C} end state_to_turing(f::DynamicPPL.LogDensityFunction, state) = TuringState(state, f) @@ -12,20 +12,10 @@ function transition_to_turing(f::DynamicPPL.LogDensityFunction, transition) return Transition(f.model, varinfo, transition) end -state_to_turing(f::LogDensityProblemsAD.ADGradientWrapper, state) = TuringState(state, f) -function transition_to_turing(f::LogDensityProblemsAD.ADGradientWrapper, transition) - return transition_to_turing(parent(f), transition) -end - -function varinfo_from_logdensityfn(f::LogDensityProblemsAD.ADGradientWrapper) - return varinfo_from_logdensityfn(parent(f)) -end -varinfo_from_logdensityfn(f::DynamicPPL.LogDensityFunction) = f.varinfo - function varinfo(state::TuringState) - θ = getparams(DynamicPPL.getmodel(state.logdensity), state.state) + θ = getparams(state.ldf.model, state.state) # TODO: Do we need to link here first? - return DynamicPPL.unflatten(varinfo_from_logdensityfn(state.logdensity), θ) + return DynamicPPL.unflatten(state.ldf.varinfo, θ) end varinfo(state::AbstractVarInfo) = state @@ -40,20 +30,6 @@ getstats(transition::AdvancedHMC.Transition) = transition.stat getparams(::DynamicPPL.Model, transition::AdvancedMH.Transition) = transition.params -getvarinfo(f::DynamicPPL.LogDensityFunction) = f.varinfo -function getvarinfo(f::LogDensityProblemsAD.ADGradientWrapper) - return getvarinfo(LogDensityProblemsAD.parent(f)) -end - -setvarinfo(f::DynamicPPL.LogDensityFunction, varinfo) = Accessors.@set f.varinfo = varinfo -function setvarinfo( - f::LogDensityProblemsAD.ADGradientWrapper, varinfo, adtype::ADTypes.AbstractADType -) - return LogDensityProblemsAD.ADgradient( - adtype, setvarinfo(LogDensityProblemsAD.parent(f), varinfo) - ) -end - # TODO: Do we also support `resume`, etc? function AbstractMCMC.step( rng::Random.AbstractRNG, @@ -66,12 +42,8 @@ function AbstractMCMC.step( alg = sampler_wrapper.alg sampler = alg.sampler - # Create a log-density function with an implementation of the - # gradient so we ensure that we're using the same AD backend as in Turing. - f = LogDensityProblemsAD.ADgradient(alg.adtype, DynamicPPL.LogDensityFunction(model)) - - # Link the varinfo if needed. - varinfo = getvarinfo(f) + # Initialise varinfo with initial params and link the varinfo if needed. + varinfo = DynamicPPL.VarInfo(model) if requires_unconstrained_space(alg) if initial_params !== nothing # If we have initial parameters, we need to set the varinfo before linking. @@ -82,9 +54,11 @@ function AbstractMCMC.step( varinfo = DynamicPPL.link(varinfo, model) end end - f = setvarinfo(f, varinfo, alg.adtype) - # Then just call `AdvancedHMC.step` with the right arguments. + # Construct LogDensityFunction + f = DynamicPPL.LogDensityFunction(model, varinfo; adtype=alg.adtype) + + # Then just call `AbstractMCMC.step` with the right arguments. if initial_state === nothing transition_inner, state_inner = AbstractMCMC.step( rng, AbstractMCMC.LogDensityModel(f), sampler; initial_params, kwargs... @@ -111,7 +85,7 @@ function AbstractMCMC.step( kwargs..., ) sampler = sampler_wrapper.alg.sampler - f = state.logdensity + f = state.ldf # Then just call `AdvancedHMC.step` with the right arguments. transition_inner, state_inner = AbstractMCMC.step( diff --git a/src/mcmc/emcee.jl b/src/mcmc/emcee.jl index 9d272f5a0a..ab68f795e1 100644 --- a/src/mcmc/emcee.jl +++ b/src/mcmc/emcee.jl @@ -53,7 +53,7 @@ function AbstractMCMC.step( length(initial_params) == n || throw(ArgumentError("initial parameters have to be specified for each walker")) vis = map(vis, initial_params) do vi, init - vi = DynamicPPL.initialize_parameters!!(vi, init, spl, model) + vi = DynamicPPL.initialize_parameters!!(vi, init, model) # Update log joint probability. last(DynamicPPL.evaluate!!(model, rng, vi, SampleFromPrior())) @@ -67,8 +67,8 @@ function AbstractMCMC.step( state = EmceeState( vis[1], map(vis) do vi - vi = DynamicPPL.link!!(vi, spl, model) - AMH.Transition(vi[spl], getlogp(vi), false) + vi = DynamicPPL.link!!(vi, model) + AMH.Transition(vi[:], getlogp(vi), false) end, ) @@ -89,7 +89,7 @@ function AbstractMCMC.step( # Compute the next transition and state. transition = map(states) do _state - vi = setindex!!(vi, _state.params, spl) + vi = DynamicPPL.unflatten(vi, _state.params) t = Transition(getparams(model, vi), _state.lp) return t end diff --git a/src/mcmc/ess.jl b/src/mcmc/ess.jl index 98d0adabc8..c6e62d8e1c 100644 --- a/src/mcmc/ess.jl +++ b/src/mcmc/ess.jl @@ -26,16 +26,11 @@ struct ESS <: InferenceAlgorithm end function DynamicPPL.initialstep( rng::AbstractRNG, model::Model, spl::Sampler{<:ESS}, vi::AbstractVarInfo; kwargs... ) - # Sanity check - vns = _getvns(vi, spl) - length(vns) == 1 || - error("[ESS] does only support one variable ($(length(vns)) variables specified)") - for vn in only(vns) + for vn in keys(vi) dist = getdist(vi, vn) EllipticalSliceSampling.isgaussian(typeof(dist)) || - error("[ESS] only supports Gaussian prior distributions") + error("ESS only supports Gaussian prior distributions") end - return Transition(model, vi), vi end @@ -54,14 +49,16 @@ function AbstractMCMC.step( rng, EllipticalSliceSampling.ESSModel( ESSPrior(model, spl, vi), - Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext()), + Turing.LogDensityFunction( + model, vi, DynamicPPL.SamplingContext(spl, DynamicPPL.DefaultContext()) + ), ), EllipticalSliceSampling.ESS(), oldstate, ) # update sample and log-likelihood - vi = setindex!!(vi, sample, spl) + vi = DynamicPPL.unflatten(vi, sample) vi = setlogp!!(vi, state.loglikelihood) return Transition(model, vi), vi @@ -77,8 +74,8 @@ struct ESSPrior{M<:Model,S<:Sampler{<:ESS},V<:AbstractVarInfo,T} function ESSPrior{M,S,V}( model::M, sampler::S, varinfo::V ) where {M<:Model,S<:Sampler{<:ESS},V<:AbstractVarInfo} - vns = _getvns(varinfo, sampler) - μ = mapreduce(vcat, vns[1]) do vn + vns = keys(varinfo) + μ = mapreduce(vcat, vns) do vn dist = getdist(varinfo, vn) EllipticalSliceSampling.isgaussian(typeof(dist)) || error("[ESS] only supports Gaussian prior distributions") @@ -100,28 +97,22 @@ function Base.rand(rng::Random.AbstractRNG, p::ESSPrior) sampler = p.sampler varinfo = p.varinfo # TODO: Surely there's a better way of doing this now that we have `SamplingContext`? - vns = _getvns(varinfo, sampler) - for vn in Iterators.flatten(values(vns)) + vns = keys(varinfo) + for vn in vns set_flag!(varinfo, vn, "del") end p.model(rng, varinfo, sampler) - return varinfo[sampler] + return varinfo[:] end # Mean of prior distribution Distributions.mean(p::ESSPrior) = p.μ # Evaluate log-likelihood of proposals -const ESSLogLikelihood{M<:Model,S<:Sampler{<:ESS},V<:AbstractVarInfo} = Turing.LogDensityFunction{ - V,M,<:DynamicPPL.SamplingContext{<:S} -} - -function (ℓ::ESSLogLikelihood)(f::AbstractVector) - sampler = DynamicPPL.getsampler(ℓ) - varinfo = setindex!!(ℓ.varinfo, f, sampler) - varinfo = last(DynamicPPL.evaluate!!(ℓ.model, varinfo, sampler)) - return getlogp(varinfo) -end +const ESSLogLikelihood{M<:Model,S<:Sampler{<:ESS},V<:AbstractVarInfo} = + Turing.LogDensityFunction{M,V,<:DynamicPPL.SamplingContext{<:S},AD} where {AD} + +(ℓ::ESSLogLikelihood)(f::AbstractVector) = LogDensityProblems.logdensity(ℓ, f) function DynamicPPL.tilde_assume( rng::Random.AbstractRNG, ::DefaultContext, ::Sampler{<:ESS}, right, vn, vi @@ -131,22 +122,6 @@ function DynamicPPL.tilde_assume( ) end -function DynamicPPL.tilde_observe( - ctx::DefaultContext, sampler::Sampler{<:ESS}, right, left, vi -) +function DynamicPPL.tilde_observe(ctx::DefaultContext, ::Sampler{<:ESS}, right, left, vi) return DynamicPPL.tilde_observe(ctx, SampleFromPrior(), right, left, vi) end - -function DynamicPPL.dot_tilde_assume( - rng::Random.AbstractRNG, ::DefaultContext, ::Sampler{<:ESS}, right, left, vns, vi -) - return DynamicPPL.dot_tilde_assume( - rng, LikelihoodContext(), SampleFromPrior(), right, left, vns, vi - ) -end - -function DynamicPPL.dot_tilde_observe( - ctx::DefaultContext, sampler::Sampler{<:ESS}, right, left, vi -) - return DynamicPPL.dot_tilde_observe(ctx, SampleFromPrior(), right, left, vi) -end diff --git a/src/mcmc/gibbs.jl b/src/mcmc/gibbs.jl index c318cabe01..a75779e73e 100644 --- a/src/mcmc/gibbs.jl +++ b/src/mcmc/gibbs.jl @@ -198,58 +198,6 @@ function DynamicPPL.tilde_assume( end end -# Like the above tilde_assume methods, but with dot_tilde_assume and broadcasting of logpdf. -# See comments there for more details. -function DynamicPPL.dot_tilde_assume(context::GibbsContext, right, left, vns, vi) - child_context = DynamicPPL.childcontext(context) - return if is_target_varname(context, vns) - DynamicPPL.dot_tilde_assume(child_context, right, left, vns, vi) - elseif has_conditioned_gibbs(context, vns) - value, lp, _ = DynamicPPL.dot_tilde_assume( - child_context, right, left, vns, get_global_varinfo(context) - ) - value, lp, vi - else - value, lp, new_global_vi = DynamicPPL.dot_tilde_assume( - child_context, - DynamicPPL.SampleFromPrior(), - right, - left, - vns, - get_global_varinfo(context), - ) - set_global_varinfo!(context, new_global_vi) - value, lp, vi - end -end - -# As above but with an RNG. -function DynamicPPL.dot_tilde_assume( - rng::Random.AbstractRNG, context::GibbsContext, sampler, right, left, vns, vi -) - child_context = DynamicPPL.childcontext(context) - return if is_target_varname(context, vns) - DynamicPPL.dot_tilde_assume(rng, child_context, sampler, right, left, vns, vi) - elseif has_conditioned_gibbs(context, vns) - value, lp, _ = DynamicPPL.dot_tilde_assume( - child_context, right, left, vns, get_global_varinfo(context) - ) - value, lp, vi - else - value, lp, new_global_vi = DynamicPPL.dot_tilde_assume( - rng, - child_context, - DynamicPPL.SampleFromPrior(), - right, - left, - vns, - get_global_varinfo(context), - ) - set_global_varinfo!(context, new_global_vi) - value, lp, vi - end -end - """ make_conditional(model, target_variables, varinfo) @@ -281,16 +229,8 @@ function make_conditional( return DynamicPPL.contextualize(model, gibbs_context), gibbs_context_inner end -# All samplers are given the same Selector, so that they will sample all variables -# given to them by the Gibbs sampler. This avoids conflicts between the new and the old way -# of choosing which sampler to use. -function set_selector(x::DynamicPPL.Sampler) - return DynamicPPL.Sampler(x.alg, DynamicPPL.Selector(0)) -end -function set_selector(x::RepeatSampler) - return RepeatSampler(set_selector(x.sampler), x.num_repeat) -end -set_selector(x::InferenceAlgorithm) = DynamicPPL.Sampler(x, DynamicPPL.Selector(0)) +wrap_in_sampler(x::AbstractMCMC.AbstractSampler) = x +wrap_in_sampler(x::InferenceAlgorithm) = DynamicPPL.Sampler(x) to_varname_list(x::Union{VarName,Symbol}) = [VarName(x)] # Any other value is assumed to be an iterable of VarNames and Symbols. @@ -343,9 +283,7 @@ struct Gibbs{N,V<:NTuple{N,AbstractVector{<:VarName}},A<:NTuple{N,Any}} <: end end - # Ensure that samplers have the same selector, and that varnames are lists of - # VarNames. - samplers = tuple(map(set_selector, samplers)...) + samplers = tuple(map(wrap_in_sampler, samplers)...) varnames = tuple(map(to_varname_list, varnames)...) return new{length(samplers),typeof(varnames),typeof(samplers)}(varnames, samplers) end @@ -500,7 +438,9 @@ function setparams_varinfo!!( state::TuringState, params::AbstractVarInfo, ) - logdensity = DynamicPPL.setmodel(state.logdensity, model, sampler.alg.adtype) + logdensity = DynamicPPL.LogDensityFunction( + model, state.ldf.varinfo, state.ldf.context; adtype=sampler.alg.adtype + ) new_inner_state = setparams_varinfo!!( AbstractMCMC.LogDensityModel(logdensity), sampler, state.state, params ) diff --git a/src/mcmc/hmc.jl b/src/mcmc/hmc.jl index 524d02c6af..098e62eb22 100644 --- a/src/mcmc/hmc.jl +++ b/src/mcmc/hmc.jl @@ -148,27 +148,27 @@ function DynamicPPL.initialstep( kwargs..., ) # Transform the samples to unconstrained space and compute the joint log probability. - vi = DynamicPPL.link(vi_original, spl, model) + vi = DynamicPPL.link(vi_original, model) # Extract parameters. - theta = vi[spl] + theta = vi[:] # Create a Hamiltonian. metricT = getmetricT(spl.alg) metric = metricT(length(theta)) - ℓ = LogDensityProblemsAD.ADgradient( - Turing.LogDensityFunction( - vi, - model, - # Use the leaf-context from the `model` in case the user has - # contextualized the model with something like `PriorContext` - # to sample from the prior. - DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context)), - ), + ldf = DynamicPPL.LogDensityFunction( + model, + vi, + # TODO(penelopeysm): Can we just use leafcontext(model.context)? Do we + # need to pass in the sampler? (In fact LogDensityFunction defaults to + # using leafcontext(model.context) so could we just remove the argument + # entirely?) + DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context)); + adtype=spl.alg.adtype, ) - logπ = Base.Fix1(LogDensityProblems.logdensity, ℓ) - ∂logπ∂θ(x) = LogDensityProblems.logdensity_and_gradient(ℓ, x) - hamiltonian = AHMC.Hamiltonian(metric, logπ, ∂logπ∂θ) + lp_func = Base.Fix1(LogDensityProblems.logdensity, ldf) + lp_grad_func = Base.Fix1(LogDensityProblems.logdensity_and_gradient, ldf) + hamiltonian = AHMC.Hamiltonian(metric, lp_func, lp_grad_func) # Compute phase point z. z = AHMC.phasepoint(rng, theta, hamiltonian) @@ -189,9 +189,9 @@ function DynamicPPL.initialstep( # NOTE: This will sample in the unconstrained space. vi = last(DynamicPPL.evaluate!!(model, rng, vi, SampleFromUniform())) - theta = vi[spl] + theta = vi[:] - hamiltonian = AHMC.Hamiltonian(metric, logπ, ∂logπ∂θ) + hamiltonian = AHMC.Hamiltonian(metric, lp_func, lp_grad_func) z = AHMC.phasepoint(rng, theta, hamiltonian) init_attempt_count += 1 @@ -226,10 +226,10 @@ function DynamicPPL.initialstep( # Update `vi` based on acceptance if t.stat.is_accept - vi = DynamicPPL.unflatten(vi, spl, t.z.θ) + vi = DynamicPPL.unflatten(vi, t.z.θ) vi = setlogp!!(vi, t.stat.log_density) else - vi = DynamicPPL.unflatten(vi, spl, theta) + vi = DynamicPPL.unflatten(vi, theta) vi = setlogp!!(vi, log_density_old) end @@ -274,7 +274,7 @@ function AbstractMCMC.step( # Update variables vi = state.vi if t.stat.is_accept - vi = DynamicPPL.unflatten(vi, spl, t.z.θ) + vi = DynamicPPL.unflatten(vi, t.z.θ) vi = setlogp!!(vi, t.stat.log_density) end @@ -287,16 +287,19 @@ end function get_hamiltonian(model, spl, vi, state, n) metric = gen_metric(n, spl, state) - ℓ = LogDensityProblemsAD.ADgradient( - Turing.LogDensityFunction( - vi, - model, - DynamicPPL.SamplingContext(spl, DynamicPPL.leafcontext(model.context)), - ), + ldf = DynamicPPL.LogDensityFunction( + model, + vi, + # TODO(penelopeysm): Can we just use leafcontext(model.context)? Do we + # need to pass in the sampler? (In fact LogDensityFunction defaults to + # using leafcontext(model.context) so could we just remove the argument + # entirely?) + DynamicPPL.SamplingContext(spl, DynamicPPL.leafcontext(model.context)); + adtype=spl.alg.adtype, ) - ℓπ = Base.Fix1(LogDensityProblems.logdensity, ℓ) - ∂ℓπ∂θ = Base.Fix1(LogDensityProblems.logdensity_and_gradient, ℓ) - return AHMC.Hamiltonian(metric, ℓπ, ∂ℓπ∂θ) + lp_func = Base.Fix1(LogDensityProblems.logdensity, ldf) + lp_grad_func = Base.Fix1(LogDensityProblems.logdensity_and_gradient, ldf) + return AHMC.Hamiltonian(metric, lp_func, lp_grad_func) end """ @@ -493,45 +496,15 @@ end #### Compiler interface, i.e. tilde operators. #### function DynamicPPL.assume( - rng, spl::Sampler{<:Hamiltonian}, dist::Distribution, vn::VarName, vi + rng, ::Sampler{<:Hamiltonian}, dist::Distribution, vn::VarName, vi ) return DynamicPPL.assume(dist, vn, vi) end -function DynamicPPL.dot_assume( - rng, - spl::Sampler{<:Hamiltonian}, - dist::MultivariateDistribution, - vns::AbstractArray{<:VarName}, - var::AbstractMatrix, - vi, -) - return DynamicPPL.dot_assume(dist, var, vns, vi) -end -function DynamicPPL.dot_assume( - rng, - spl::Sampler{<:Hamiltonian}, - dists::Union{Distribution,AbstractArray{<:Distribution}}, - vns::AbstractArray{<:VarName}, - var::AbstractArray, - vi, -) - return DynamicPPL.dot_assume(dists, var, vns, vi) -end - -function DynamicPPL.observe(spl::Sampler{<:Hamiltonian}, d::Distribution, value, vi) +function DynamicPPL.observe(::Sampler{<:Hamiltonian}, d::Distribution, value, vi) return DynamicPPL.observe(d, value, vi) end -function DynamicPPL.dot_observe( - spl::Sampler{<:Hamiltonian}, - ds::Union{Distribution,AbstractArray{<:Distribution}}, - value::AbstractArray, - vi, -) - return DynamicPPL.dot_observe(ds, value, vi) -end - #### #### Default HMC stepsize and mass matrix adaptor #### diff --git a/src/mcmc/is.jl b/src/mcmc/is.jl index 23a7bbef19..d83abd173c 100644 --- a/src/mcmc/is.jl +++ b/src/mcmc/is.jl @@ -46,16 +46,16 @@ function getlogevidence(samples::Vector{<:Transition}, ::Sampler{<:IS}, state) return logsumexp(map(x -> x.lp, samples)) - log(length(samples)) end -function DynamicPPL.assume(rng, spl::Sampler{<:IS}, dist::Distribution, vn::VarName, vi) +function DynamicPPL.assume(rng, ::Sampler{<:IS}, dist::Distribution, vn::VarName, vi) if haskey(vi, vn) r = vi[vn] else r = rand(rng, dist) - vi = push!!(vi, vn, r, dist, spl) + vi = push!!(vi, vn, r, dist) end return r, 0, vi end -function DynamicPPL.observe(spl::Sampler{<:IS}, dist::Distribution, value, vi) +function DynamicPPL.observe(::Sampler{<:IS}, dist::Distribution, value, vi) return logpdf(dist, value), vi end diff --git a/src/mcmc/mh.jl b/src/mcmc/mh.jl index 33587f1e47..0af62e2597 100644 --- a/src/mcmc/mh.jl +++ b/src/mcmc/mh.jl @@ -186,26 +186,16 @@ end A log density function for the MH sampler. -This variant uses the `set_namedtuple!` function to update the `VarInfo`. +This variant uses the `set_namedtuple!` function to update the `VarInfo`. """ -const MHLogDensityFunction{M<:Model,S<:Sampler{<:MH},V<:AbstractVarInfo} = Turing.LogDensityFunction{ - V,M,<:DynamicPPL.SamplingContext{<:S} -} +const MHLogDensityFunction{M<:Model,S<:Sampler{<:MH},V<:AbstractVarInfo} = + Turing.LogDensityFunction{M,V,<:DynamicPPL.SamplingContext{<:S},AD} where {AD} function LogDensityProblems.logdensity(f::MHLogDensityFunction, x::NamedTuple) - # TODO: Make this work with immutable `f.varinfo` too. - sampler = DynamicPPL.getsampler(f) - vi = f.varinfo - - x_old, lj_old = vi[sampler], getlogp(vi) + vi = deepcopy(f.varinfo) set_namedtuple!(vi, x) - vi_new = last(DynamicPPL.evaluate!!(f.model, vi, DynamicPPL.getcontext(f))) + vi_new = last(DynamicPPL.evaluate!!(f.model, vi, f.context)) lj = getlogp(vi_new) - - # Reset old `vi`. - setindex!!(vi, x_old, sampler) - setlogp!!(vi, lj_old) - return lj end @@ -237,7 +227,7 @@ The first `NamedTuple` has symbols as keys and distributions as values. The second `NamedTuple` has model symbols as keys and their stored values as values. """ function dist_val_tuple(spl::Sampler{<:MH}, vi::DynamicPPL.VarInfoOrThreadSafeVarInfo) - vns = _getvns(vi, spl) + vns = all_varnames_grouped_by_symbol(vi) dt = _dist_tuple(spl.alg.proposals, vi, vns) vt = _val_tuple(vi, vns) return dt, vt @@ -297,7 +287,7 @@ end function maybe_link!!(varinfo, sampler, proposal, model) return if should_link(varinfo, sampler, proposal) - link!!(varinfo, sampler, model) + link!!(varinfo, model) else varinfo end @@ -319,8 +309,8 @@ function propose!!( Base.Fix1( LogDensityProblems.logdensity, Turing.LogDensityFunction( - vi, model, + vi, DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context)), ), ), @@ -354,15 +344,15 @@ function propose!!( Base.Fix1( LogDensityProblems.logdensity, Turing.LogDensityFunction( - vi, model, + vi, DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context)), ), ), ) trans, _ = AbstractMCMC.step(rng, densitymodel, mh_sampler, prev_trans) - return setlogp!!(DynamicPPL.unflatten(vi, spl, trans.params), trans.lp) + return setlogp!!(DynamicPPL.unflatten(vi, trans.params), trans.lp) end function DynamicPPL.initialstep( @@ -401,40 +391,6 @@ function DynamicPPL.assume( return retval end -function DynamicPPL.dot_assume( - rng, - spl::Sampler{<:MH}, - dist::MultivariateDistribution, - vns::AbstractVector{<:VarName}, - var::AbstractMatrix, - vi::AbstractVarInfo, -) - # Just defer to `SampleFromPrior`. - retval = DynamicPPL.dot_assume(rng, SampleFromPrior(), dist, vns[1], var, vi) - return retval -end -function DynamicPPL.dot_assume( - rng, - spl::Sampler{<:MH}, - dists::Union{Distribution,AbstractArray{<:Distribution}}, - vns::AbstractArray{<:VarName}, - var::AbstractArray, - vi::AbstractVarInfo, -) - # Just defer to `SampleFromPrior`. - retval = DynamicPPL.dot_assume(rng, SampleFromPrior(), dists, vns, var, vi) - return retval -end - function DynamicPPL.observe(spl::Sampler{<:MH}, d::Distribution, value, vi) return DynamicPPL.observe(SampleFromPrior(), d, value, vi) end - -function DynamicPPL.dot_observe( - spl::Sampler{<:MH}, - ds::Union{Distribution,AbstractArray{<:Distribution}}, - value::AbstractArray, - vi, -) - return DynamicPPL.dot_observe(SampleFromPrior(), ds, value, vi) -end diff --git a/src/mcmc/particle_mcmc.jl b/src/mcmc/particle_mcmc.jl index 733d572c73..5bb1103876 100644 --- a/src/mcmc/particle_mcmc.jl +++ b/src/mcmc/particle_mcmc.jl @@ -119,7 +119,7 @@ function DynamicPPL.initialstep( ) # Reset the VarInfo. reset_num_produce!(vi) - set_retained_vns_del_by_spl!(vi, spl) + set_retained_vns_del!(vi) resetlogp!!(vi) empty!!(vi) @@ -253,7 +253,7 @@ function DynamicPPL.initialstep( ) # Reset the VarInfo before new sweep reset_num_produce!(vi) - set_retained_vns_del_by_spl!(vi, spl) + set_retained_vns_del!(vi) resetlogp!!(vi) # Create a new set of particles @@ -291,7 +291,7 @@ function AbstractMCMC.step( reference = AdvancedPS.forkr(AdvancedPS.Trace(model, spl, vi, state.rng)) # For all other particles, do not retain the variables but resample them. - set_retained_vns_del_by_spl!(vi, spl) + set_retained_vns_del!(vi) # Create a new set of particles. num_particles = spl.alg.nparticles @@ -365,7 +365,7 @@ function DynamicPPL.assume( if ~haskey(vi, vn) r = rand(trng, dist) - push!!(vi, vn, r, dist, spl) + push!!(vi, vn, r, dist) elseif is_flagged(vi, vn, "del") unset_flag!(vi, vn, "del") # Reference particle parent r = rand(trng, dist) diff --git a/src/mcmc/sghmc.jl b/src/mcmc/sghmc.jl index 7cf5cd6e4d..0c322244eb 100644 --- a/src/mcmc/sghmc.jl +++ b/src/mcmc/sghmc.jl @@ -59,17 +59,20 @@ function DynamicPPL.initialstep( kwargs..., ) # Transform the samples to unconstrained space and compute the joint log probability. - if !DynamicPPL.islinked(vi, spl) - vi = DynamicPPL.link!!(vi, spl, model) + if !DynamicPPL.islinked(vi) + vi = DynamicPPL.link!!(vi, model) vi = last(DynamicPPL.evaluate!!(model, vi, DynamicPPL.SamplingContext(rng, spl))) end # Compute initial sample and state. sample = Transition(model, vi) - ℓ = LogDensityProblemsAD.ADgradient( - Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext()) + ℓ = DynamicPPL.LogDensityFunction( + model, + vi, + DynamicPPL.SamplingContext(spl, DynamicPPL.DefaultContext()); + adtype=spl.alg.adtype, ) - state = SGHMCState(ℓ, vi, zero(vi[spl])) + state = SGHMCState(ℓ, vi, zero(vi[:])) return sample, state end @@ -84,7 +87,7 @@ function AbstractMCMC.step( # Compute gradient of log density. ℓ = state.logdensity vi = state.vi - θ = vi[spl] + θ = vi[:] grad = last(LogDensityProblems.logdensity_and_gradient(ℓ, θ)) # Update latent variables and velocity according to @@ -96,7 +99,7 @@ function AbstractMCMC.step( newv = (1 - α) .* v .+ η .* grad .+ sqrt(2 * η * α) .* randn(rng, eltype(v), length(v)) # Save new variables and recompute log density. - vi = DynamicPPL.setindex!!(vi, θ, spl) + vi = DynamicPPL.unflatten(vi, θ) vi = last(DynamicPPL.evaluate!!(model, vi, DynamicPPL.SamplingContext(rng, spl))) # Compute next sample and state. @@ -219,15 +222,18 @@ function DynamicPPL.initialstep( kwargs..., ) # Transform the samples to unconstrained space and compute the joint log probability. - if !DynamicPPL.islinked(vi, spl) - vi = DynamicPPL.link!!(vi, spl, model) + if !DynamicPPL.islinked(vi) + vi = DynamicPPL.link!!(vi, model) vi = last(DynamicPPL.evaluate!!(model, vi, DynamicPPL.SamplingContext(rng, spl))) end # Create first sample and state. sample = SGLDTransition(model, vi, zero(spl.alg.stepsize(0))) - ℓ = LogDensityProblemsAD.ADgradient( - Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext()) + ℓ = DynamicPPL.LogDensityFunction( + model, + vi, + DynamicPPL.SamplingContext(spl, DynamicPPL.DefaultContext()); + adtype=spl.alg.adtype, ) state = SGLDState(ℓ, vi, 1) @@ -240,14 +246,14 @@ function AbstractMCMC.step( # Perform gradient step. ℓ = state.logdensity vi = state.vi - θ = vi[spl] + θ = vi[:] grad = last(LogDensityProblems.logdensity_and_gradient(ℓ, θ)) step = state.step stepsize = spl.alg.stepsize(step) θ .+= (stepsize / 2) .* grad .+ sqrt(stepsize) .* randn(rng, eltype(θ), length(θ)) # Save new variables and recompute log density. - vi = DynamicPPL.setindex!!(vi, θ, spl) + vi = DynamicPPL.unflatten(vi, θ) vi = last(DynamicPPL.evaluate!!(model, vi, DynamicPPL.SamplingContext(rng, spl))) # Compute next sample and state. diff --git a/src/optimisation/Optimisation.jl b/src/optimisation/Optimisation.jl index 1a03b9bc42..9da929504b 100644 --- a/src/optimisation/Optimisation.jl +++ b/src/optimisation/Optimisation.jl @@ -4,7 +4,6 @@ using ..Turing using NamedArrays: NamedArrays using DynamicPPL: DynamicPPL using LogDensityProblems: LogDensityProblems -using LogDensityProblemsAD: LogDensityProblemsAD using Optimization: Optimization using OptimizationOptimJL: OptimizationOptimJL using Random: Random @@ -89,58 +88,78 @@ function DynamicPPL.tilde_assume(ctx::OptimizationContext, dist, vn, vi) return r, lp, vi end -_loglikelihood(dist::Distribution, x) = StatsAPI.loglikelihood(dist, x) - -function _loglikelihood(dists::AbstractArray{<:Distribution}, x) - return StatsAPI.loglikelihood(arraydist(dists), x) -end - -function DynamicPPL.dot_tilde_assume(ctx::OptimizationContext, right, left, vns, vi) - # Values should be set and we're using `SampleFromPrior`, hence the `rng` argument - # shouldn't affect anything. - # TODO: Stop using `get_and_set_val!`. - r = DynamicPPL.get_and_set_val!( - Random.default_rng(), vi, vns, right, DynamicPPL.SampleFromPrior() - ) - lp = if ctx.context isa Union{DynamicPPL.DefaultContext,DynamicPPL.PriorContext} - # MAP - _loglikelihood(right, r) - else - # MLE - 0 - end - return r, lp, vi -end - function DynamicPPL.tilde_observe( ctx::OptimizationContext{<:DynamicPPL.PriorContext}, args... ) return DynamicPPL.tilde_observe(ctx.context, args...) end -function DynamicPPL.dot_tilde_observe( - ctx::OptimizationContext{<:DynamicPPL.PriorContext}, args... -) - return DynamicPPL.dot_tilde_observe(ctx.context, args...) -end - """ - OptimLogDensity{M<:DynamicPPL.Model,C<:Context,V<:DynamicPPL.VarInfo} + OptimLogDensity{ + M<:DynamicPPL.Model, + V<:DynamicPPL.VarInfo, + C<:OptimizationContext, + AD<:ADTypes.AbstractADType + } + +A struct that wraps a single LogDensityFunction. Can be invoked either using + +```julia +OptimLogDensity(model, varinfo, ctx; adtype=adtype) +``` + +or + +```julia +OptimLogDensity(model, ctx; adtype=adtype) +``` + +If not specified, `adtype` defaults to `AutoForwardDiff()`. -A struct that stores the negative log density function of a `DynamicPPL` model. +An OptimLogDensity does not, in itself, obey the LogDensityProblems interface. +Thus, if you want to calculate the log density of its contents at the point +`z`, you should manually call + +```julia +LogDensityProblems.logdensity(f.ldf, z) +``` + +However, it is a callable object which returns the *negative* log density of +the underlying LogDensityFunction at the point `z`. This is done to satisfy +the Optim.jl interface. + +```julia +optim_ld = OptimLogDensity(model, varinfo, ctx) +optim_ld(z) # returns -logp +``` """ -const OptimLogDensity{M<:DynamicPPL.Model,C<:OptimizationContext,V<:DynamicPPL.VarInfo} = Turing.LogDensityFunction{ - V,M,C +struct OptimLogDensity{ + M<:DynamicPPL.Model, + V<:DynamicPPL.VarInfo, + C<:OptimizationContext, + AD<:ADTypes.AbstractADType, } + ldf::Turing.LogDensityFunction{M,V,C,AD} +end -""" - OptimLogDensity(model::DynamicPPL.Model, context::OptimizationContext) +function OptimLogDensity( + model::DynamicPPL.Model, + vi::DynamicPPL.VarInfo, + ctx::OptimizationContext; + adtype::ADTypes.AbstractADType=AutoForwardDiff(), +) + return OptimLogDensity(Turing.LogDensityFunction(model, vi, ctx; adtype=adtype)) +end -Create a callable `OptimLogDensity` struct that evaluates a model using the given `context`. -""" -function OptimLogDensity(model::DynamicPPL.Model, context::OptimizationContext) - init = DynamicPPL.VarInfo(model) - return Turing.LogDensityFunction(init, model, context) +# No varinfo +function OptimLogDensity( + model::DynamicPPL.Model, + ctx::OptimizationContext; + adtype::ADTypes.AbstractADType=AutoForwardDiff(), +) + return OptimLogDensity( + Turing.LogDensityFunction(model, DynamicPPL.VarInfo(model), ctx; adtype=adtype) + ) end """ @@ -153,42 +172,30 @@ depends on the context of `f`. Any second argument is ignored. The two-argument method only exists to match interface the required by Optimization.jl. """ -function (f::OptimLogDensity)(z::AbstractVector) - varinfo = DynamicPPL.unflatten(f.varinfo, z) - return -DynamicPPL.getlogp( - last(DynamicPPL.evaluate!!(f.model, varinfo, DynamicPPL.getcontext(f))) - ) -end - +(f::OptimLogDensity)(z::AbstractVector) = -LogDensityProblems.logdensity(f.ldf, z) (f::OptimLogDensity)(z, _) = f(z) -# NOTE: This seems a bit weird IMO since this is the _negative_ log-likelihood. -LogDensityProblems.logdensity(f::OptimLogDensity, z::AbstractVector) = f(z) - # NOTE: The format of this function is dictated by Optim. The first argument sets whether to # compute the function value, the second whether to compute the gradient (and stores the # gradient). The last one is the actual argument of the objective function. function (f::OptimLogDensity)(F, G, z) if G !== nothing - # Calculate negative log joint and its gradient. - # TODO: Make OptimLogDensity already an LogDensityProblems.ADgradient? Allow to - # specify AD? - ℓ = LogDensityProblemsAD.ADgradient(f) - neglogp, ∇neglogp = LogDensityProblems.logdensity_and_gradient(ℓ, z) + # Calculate log joint and its gradient. + logp, ∇logp = LogDensityProblems.logdensity_and_gradient(f.ldf, z) - # Save the gradient to the pre-allocated array. - copyto!(G, ∇neglogp) + # Save the negative gradient to the pre-allocated array. + copyto!(G, -∇logp) # If F is something, the negative log joint is requested as well. # We have already computed it as a by-product above and hence return it directly. if F !== nothing - return neglogp + return -logp end end # Only negative log joint requested but no gradient. if F !== nothing - return LogDensityProblems.logdensity(f, z) + return -LogDensityProblems.logdensity(f.ldf, z) end return nothing @@ -318,9 +325,11 @@ function StatsBase.informationmatrix( # Convert the values to their unconstrained states to make sure the # Hessian is computed with respect to the untransformed parameters. - linked = DynamicPPL.istrans(m.f.varinfo) + linked = DynamicPPL.istrans(m.f.ldf.varinfo) if linked - m = Accessors.@set m.f.varinfo = DynamicPPL.invlink!!(m.f.varinfo, m.f.model) + new_vi = DynamicPPL.invlink!!(m.f.ldf.varinfo, m.f.ldf.model) + new_f = OptimLogDensity(m.f.ldf.model, new_vi, m.f.ldf.context) + m = Accessors.@set m.f = new_f end # Calculate the Hessian, which is the information matrix because the negative of the log @@ -330,7 +339,9 @@ function StatsBase.informationmatrix( # Link it back if we invlinked it. if linked - m = Accessors.@set m.f.varinfo = DynamicPPL.link!!(m.f.varinfo, m.f.model) + new_vi = DynamicPPL.link!!(m.f.ldf.varinfo, m.f.ldf.model) + new_f = OptimLogDensity(m.f.ldf.model, new_vi, m.f.ldf.context) + m = Accessors.@set m.f = new_f end return NamedArrays.NamedArray(info, (varnames, varnames)) @@ -351,7 +362,7 @@ Return the values of all the variables with the symbol(s) `var_symbol` in the mo argument should be either a `Symbol` or a vector of `Symbol`s. """ function Base.get(m::ModeResult, var_symbols::AbstractVector{Symbol}) - log_density = m.f + log_density = m.f.ldf # Get all the variable names in the model. This is the same as the list of keys in # m.values, but they are more convenient to filter when they are VarNames rather than # Symbols. @@ -383,9 +394,9 @@ richer format of `ModeResult`. It also takes care of transforming them back to t parameter space in case the optimization was done in a transformed space. """ function ModeResult(log_density::OptimLogDensity, solution::SciMLBase.OptimizationSolution) - varinfo_new = DynamicPPL.unflatten(log_density.varinfo, solution.u) + varinfo_new = DynamicPPL.unflatten(log_density.ldf.varinfo, solution.u) # `getparams` performs invlinking if needed - vns_vals_iter = Turing.Inference.getparams(log_density.model, varinfo_new) + vns_vals_iter = Turing.Inference.getparams(log_density.ldf.model, varinfo_new) syms = map(Symbol ∘ first, vns_vals_iter) vals = map(last, vns_vals_iter) return ModeResult( @@ -469,12 +480,15 @@ end OptimizationProblem(log_density::OptimLogDensity, adtype, constraints) Create an `OptimizationProblem` for the objective function defined by `log_density`. + +Note that the adtype parameter here overrides any adtype parameter the +OptimLogDensity was constructed with. """ function Optimization.OptimizationProblem(log_density::OptimLogDensity, adtype, constraints) # Note that OptimLogDensity is a callable that evaluates the model with given # parameters. Hence we can use it in the objective function as below. f = Optimization.OptimizationFunction(log_density, adtype; cons=constraints.cons) - initial_params = log_density.varinfo[:] + initial_params = log_density.ldf.varinfo[:] prob = if !has_constraints(constraints) Optimization.OptimizationProblem(f, initial_params) else @@ -540,28 +554,34 @@ function estimate_mode( end # Create an OptimLogDensity object that can be used to evaluate the objective function, - # i.e. the negative log density. Set its VarInfo to the initial parameters. - log_density = let - inner_context = if estimator isa MAP - DynamicPPL.DefaultContext() - else - DynamicPPL.LikelihoodContext() - end - ctx = OptimizationContext(inner_context) - ld = OptimLogDensity(model, ctx) - Accessors.@set ld.varinfo = DynamicPPL.unflatten(ld.varinfo, initial_params) + # i.e. the negative log density. + inner_context = if estimator isa MAP + DynamicPPL.DefaultContext() + else + DynamicPPL.LikelihoodContext() end + ctx = OptimizationContext(inner_context) + # Set its VarInfo to the initial parameters. + # TODO(penelopeysm): Unclear if this is really needed? Any time that logp is calculated + # (using `LogDensityProblems.logdensity(ldf, x)`) the parameters in the + # varinfo are completely ignored. The parameters only matter if you are calling evaluate!! + # directly on the fields of the LogDensityFunction + vi = DynamicPPL.VarInfo(model) + vi = DynamicPPL.unflatten(vi, initial_params) + + # Link the varinfo if needed. # TODO(mhauru) We currently couple together the questions of whether the user specified # bounds/constraints and whether we transform the objective function to an # unconstrained space. These should be separate concerns, but for that we need to # implement getting the bounds of the prior distributions. optimise_in_unconstrained_space = !has_constraints(constraints) if optimise_in_unconstrained_space - transformed_varinfo = DynamicPPL.link(log_density.varinfo, log_density.model) - log_density = Accessors.@set log_density.varinfo = transformed_varinfo + vi = DynamicPPL.link(vi, model) end + log_density = OptimLogDensity(model, vi, ctx) + prob = Optimization.OptimizationProblem(log_density, adtype, constraints) solution = Optimization.solve(prob, solver; kwargs...) # TODO(mhauru) We return a ModeResult for compatibility with the older Optim.jl diff --git a/test/Project.toml b/test/Project.toml index dea22415d1..96681f0491 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -51,7 +51,7 @@ Combinatorics = "1" Distributions = "0.25" DistributionsAD = "0.6.3" DynamicHMC = "2.1.6, 3.0" -DynamicPPL = "0.33, 0.34" +DynamicPPL = "0.35" FiniteDifferences = "0.10.8, 0.11, 0.12" ForwardDiff = "0.10.12 - 0.10.32, 0.10" HypothesisTests = "0.11" diff --git a/test/dynamicppl/compiler.jl b/test/dynamicppl/compiler.jl deleted file mode 100644 index 1c50c2d9e9..0000000000 --- a/test/dynamicppl/compiler.jl +++ /dev/null @@ -1,369 +0,0 @@ -module DynamicPPLCompilerTests - -using ..NumericalTests: check_numerical -using LinearAlgebra: I -using Test: @test, @testset, @test_throws -using Turing - -# TODO(penelopeysm): Move this to a DynamicPPL Test Utils module -# We use this a lot! -@model function gdemo_d() - s ~ InverseGamma(2, 3) - m ~ Normal(0, sqrt(s)) - 1.5 ~ Normal(m, sqrt(s)) - 2.0 ~ Normal(m, sqrt(s)) - return s, m -end -const gdemo_default = gdemo_d() - -@testset "compiler.jl" begin - @testset "assume" begin - @model function test_assume() - x ~ Bernoulli(1) - y ~ Bernoulli(x / 2) - return x, y - end - - smc = SMC() - pg = PG(10) - - res1 = sample(test_assume(), smc, 1000) - res2 = sample(test_assume(), pg, 1000) - - check_numerical(res1, [:y], [0.5]; atol=0.1) - check_numerical(res2, [:y], [0.5]; atol=0.1) - - # Check that all xs are 1. - @test all(isone, res1[:x]) - @test all(isone, res2[:x]) - end - @testset "beta binomial" begin - prior = Beta(2, 2) - obs = [0, 1, 0, 1, 1, 1, 1, 1, 1, 1] - exact = Beta(prior.α + sum(obs), prior.β + length(obs) - sum(obs)) - meanp = exact.α / (exact.α + exact.β) - - @model function testbb(obs) - p ~ Beta(2, 2) - x ~ Bernoulli(p) - for i in eachindex(obs) - obs[i] ~ Bernoulli(p) - end - return p, x - end - - smc = SMC() - pg = PG(10) - gibbs = Gibbs(:p => HMC(0.2, 3), :x => PG(10)) - - chn_s = sample(testbb(obs), smc, 1000) - chn_p = sample(testbb(obs), pg, 2000) - chn_g = sample(testbb(obs), gibbs, 1500) - - check_numerical(chn_s, [:p], [meanp]; atol=0.05) - check_numerical(chn_p, [:x], [meanp]; atol=0.1) - check_numerical(chn_g, [:x], [meanp]; atol=0.1) - end - @testset "model with global variables" begin - xs = [1.5 2.0] - # xx = 1 - - @model function fggibbstest(xs) - s ~ InverseGamma(2, 3) - m ~ Normal(0, sqrt(s)) - # xx ~ Normal(m, sqrt(s)) # this is illegal - - for i in eachindex(xs) - xs[i] ~ Normal(m, sqrt(s)) - # for xx in xs - # xx ~ Normal(m, sqrt(s)) - end - return s, m - end - - gibbs = Gibbs(:s => PG(10), :m => HMC(0.4, 8)) - chain = sample(fggibbstest(xs), gibbs, 2) - end - @testset "new grammar" begin - x = Float64[1 2] - - @model function gauss(x) - priors = Array{Float64}(undef, 2) - priors[1] ~ InverseGamma(2, 3) # s - priors[2] ~ Normal(0, sqrt(priors[1])) # m - for i in eachindex(x) - x[i] ~ Normal(priors[2], sqrt(priors[1])) - end - return priors - end - - chain = sample(gauss(x), PG(10), 10) - chain = sample(gauss(x), SMC(), 10) - - # Test algorithm that does not support models with keyword arguments. See issue #2007 for more details. - @model function gauss2(::Type{TV}=Vector{Float64}; x) where {TV} - priors = TV(undef, 2) - priors[1] ~ InverseGamma(2, 3) # s - priors[2] ~ Normal(0, sqrt(priors[1])) # m - for i in eachindex(x) - x[i] ~ Normal(priors[2], sqrt(priors[1])) - end - return priors - end - - @test_throws ErrorException chain = sample(gauss2(; x=x), PG(10), 10) - @test_throws ErrorException chain = sample(gauss2(; x=x), SMC(), 10) - - @test_throws ErrorException chain = sample( - gauss2(DynamicPPL.TypeWrap{Vector{Float64}}(); x=x), PG(10), 10 - ) - @test_throws ErrorException chain = sample( - gauss2(DynamicPPL.TypeWrap{Vector{Float64}}(); x=x), SMC(), 10 - ) - end - @testset "new interface" begin - obs = [0, 1, 0, 1, 1, 1, 1, 1, 1, 1] - - @model function newinterface(obs) - p ~ Beta(2, 2) - for i in eachindex(obs) - obs[i] ~ Bernoulli(p) - end - return p - end - - chain = sample( - newinterface(obs), HMC(0.75, 3; adtype=AutoForwardDiff(; chunksize=2)), 100 - ) - end - @testset "no return" begin - @model function noreturn(x) - s ~ InverseGamma(2, 3) - m ~ Normal(0, sqrt(s)) - for i in eachindex(x) - x[i] ~ Normal(m, sqrt(s)) - end - end - - chain = sample(noreturn([1.5 2.0]), HMC(0.15, 6), 1000) - check_numerical(chain, [:s, :m], [49 / 24, 7 / 6]) - end - @testset "observe with literals" begin - @model function test() - z ~ Normal(0, 1) - x ~ Bernoulli(1) - 1 ~ Bernoulli(x / 2) - 0 ~ Bernoulli(x / 2) - return x - end - - is = IS() - smc = SMC() - pg = PG(10) - - res_is = sample(test(), is, 10000) - res_smc = sample(test(), smc, 1000) - res_pg = sample(test(), pg, 100) - - @test all(isone, res_is[:x]) - @test res_is.logevidence ≈ 2 * log(0.5) - - @test all(isone, res_smc[:x]) - @test res_smc.logevidence ≈ 2 * log(0.5) - - @test all(isone, res_pg[:x]) - end - - @testset "sample" begin - alg = Gibbs(:m => HMC(0.2, 3), :s => PG(10)) - chn = sample(gdemo_default, alg, 1000) - end - - @testset "vectorization @." begin - @model function vdemo1(x) - s ~ InverseGamma(2, 3) - m ~ Normal(0, sqrt(s)) - @. x ~ Normal(m, sqrt(s)) - return s, m - end - - alg = HMC(0.01, 5) - x = randn(100) - res = sample(vdemo1(x), alg, 250) - - @model function vdemo1b(x) - s ~ InverseGamma(2, 3) - m ~ Normal(0, sqrt(s)) - @. x ~ Normal(m, $(sqrt(s))) - return s, m - end - - res = sample(vdemo1b(x), alg, 250) - - @model function vdemo2(x) - μ ~ MvNormal(zeros(size(x, 1)), I) - @. x ~ $(MvNormal(μ, I)) - end - - D = 2 - alg = HMC(0.01, 5) - res = sample(vdemo2(randn(D, 100)), alg, 250) - - # Vector assumptions - N = 10 - alg = HMC(0.2, 4; adtype=AutoForwardDiff(; chunksize=N)) - - @model function vdemo3() - x = Vector{Real}(undef, N) - for i in 1:N - x[i] ~ Normal(0, sqrt(4)) - end - end - - t_loop = @elapsed res = sample(vdemo3(), alg, 1000) - - # Test for vectorize UnivariateDistribution - @model function vdemo4() - x = Vector{Real}(undef, N) - @. x ~ Normal(0, 2) - end - - t_vec = @elapsed res = sample(vdemo4(), alg, 1000) - - @model vdemo5() = x ~ MvNormal(zeros(N), 4 * I) - - t_mv = @elapsed res = sample(vdemo5(), alg, 1000) - - println("Time for") - println(" Loop : ", t_loop) - println(" Vec : ", t_vec) - println(" Mv : ", t_mv) - - # Transformed test - @model function vdemo6() - x = Vector{Real}(undef, N) - @. x ~ InverseGamma(2, 3) - end - - sample(vdemo6(), alg, 1000) - - N = 3 - @model function vdemo7() - x = Array{Real}(undef, N, N) - @. x ~ [InverseGamma(2, 3) for i in 1:N] - end - - sample(vdemo7(), alg, 1000) - end - @testset "vectorization .~" begin - @model function vdemo1(x) - s ~ InverseGamma(2, 3) - m ~ Normal(0, sqrt(s)) - x .~ Normal(m, sqrt(s)) - return s, m - end - - alg = HMC(0.01, 5) - x = randn(100) - res = sample(vdemo1(x), alg, 250) - - @model function vdemo2(x) - μ ~ MvNormal(zeros(size(x, 1)), I) - return x .~ MvNormal(μ, I) - end - - D = 2 - alg = HMC(0.01, 5) - res = sample(vdemo2(randn(D, 100)), alg, 250) - - # Vector assumptions - N = 10 - alg = HMC(0.2, 4; adtype=AutoForwardDiff(; chunksize=N)) - - @model function vdemo3() - x = Vector{Real}(undef, N) - for i in 1:N - x[i] ~ Normal(0, sqrt(4)) - end - end - - t_loop = @elapsed res = sample(vdemo3(), alg, 1000) - - # Test for vectorize UnivariateDistribution - @model function vdemo4() - x = Vector{Real}(undef, N) - return x .~ Normal(0, 2) - end - - t_vec = @elapsed res = sample(vdemo4(), alg, 1000) - - @model vdemo5() = x ~ MvNormal(zeros(N), 4 * I) - - t_mv = @elapsed res = sample(vdemo5(), alg, 1000) - - println("Time for") - println(" Loop : ", t_loop) - println(" Vec : ", t_vec) - println(" Mv : ", t_mv) - - # Transformed test - @model function vdemo6() - x = Vector{Real}(undef, N) - return x .~ InverseGamma(2, 3) - end - - sample(vdemo6(), alg, 1000) - - @model function vdemo7() - x = Array{Real}(undef, N, N) - return x .~ [InverseGamma(2, 3) for i in 1:N] - end - - sample(vdemo7(), alg, 1000) - end - @testset "Type parameters" begin - N = 10 - alg = HMC(0.01, 5; adtype=AutoForwardDiff(; chunksize=N)) - x = randn(1000) - @model function vdemo1(::Type{T}=Float64) where {T} - x = Vector{T}(undef, N) - for i in 1:N - x[i] ~ Normal(0, sqrt(4)) - end - end - - t_loop = @elapsed res = sample(vdemo1(), alg, 250) - t_loop = @elapsed res = sample(vdemo1(DynamicPPL.TypeWrap{Float64}()), alg, 250) - - vdemo1kw(; T) = vdemo1(T) - t_loop = @elapsed res = sample( - vdemo1kw(; T=DynamicPPL.TypeWrap{Float64}()), alg, 250 - ) - - @model function vdemo2(::Type{T}=Float64) where {T<:Real} - x = Vector{T}(undef, N) - @. x ~ Normal(0, 2) - end - - t_vec = @elapsed res = sample(vdemo2(), alg, 250) - t_vec = @elapsed res = sample(vdemo2(DynamicPPL.TypeWrap{Float64}()), alg, 250) - - vdemo2kw(; T) = vdemo2(T) - t_vec = @elapsed res = sample( - vdemo2kw(; T=DynamicPPL.TypeWrap{Float64}()), alg, 250 - ) - - @model function vdemo3(::Type{TV}=Vector{Float64}) where {TV<:AbstractVector} - x = TV(undef, N) - @. x ~ InverseGamma(2, 3) - end - - sample(vdemo3(), alg, 250) - sample(vdemo3(DynamicPPL.TypeWrap{Vector{Float64}}()), alg, 250) - - vdemo3kw(; T) = vdemo3(T) - sample(vdemo3kw(; T=DynamicPPL.TypeWrap{Vector{Float64}}()), alg, 250) - end -end - -end # module diff --git a/test/essential/ad.jl b/test/essential/ad.jl deleted file mode 100644 index 0c497a2cbd..0000000000 --- a/test/essential/ad.jl +++ /dev/null @@ -1,246 +0,0 @@ -module AdTests - -using ..Models: gdemo_default -using Distributions: logpdf -using DynamicPPL: getlogp, getindex_internal -using ForwardDiff -using LinearAlgebra -using LogDensityProblems: LogDensityProblems -using LogDensityProblemsAD: LogDensityProblemsAD -using ReverseDiff -using Test: @test, @testset -using Turing -using Turing: SampleFromPrior -using Zygote - -function test_model_ad(model, f, syms::Vector{Symbol}) - # Set up VI. - vi = Turing.VarInfo(model) - - # Collect symbols. - vnms = Vector(undef, length(syms)) - vnvals = Vector{Float64}() - for i in 1:length(syms) - s = syms[i] - vnms[i] = getfield(vi.metadata, s).vns[1] - - vals = getindex_internal(vi, vnms[i]) - for i in eachindex(vals) - push!(vnvals, vals[i]) - end - end - - # Compute primal. - x = vec(vnvals) - logp = f(x) - - # Call ForwardDiff's AD directly. - grad_FWAD = sort(ForwardDiff.gradient(f, x)) - - # Compare with `logdensity_and_gradient`. - z = vi[SampleFromPrior()] - for chunksize in (0, 1, 10), standardtag in (true, false, 0, 3) - ℓ = LogDensityProblemsAD.ADgradient( - Turing.AutoForwardDiff(; chunksize=chunksize, tag=standardtag), - Turing.LogDensityFunction( - vi, model, SampleFromPrior(), DynamicPPL.DefaultContext() - ), - ) - l, ∇E = LogDensityProblems.logdensity_and_gradient(ℓ, z) - - # Compare result - @test l ≈ logp - @test sort(∇E) ≈ grad_FWAD atol = 1e-9 - end -end - -@testset "ad.jl" begin - @testset "adr" begin - ad_test_f = gdemo_default - vi = Turing.VarInfo(ad_test_f) - ad_test_f(vi, SampleFromPrior()) - svn = vi.metadata.s.vns[1] - mvn = vi.metadata.m.vns[1] - _s = getindex_internal(vi, svn)[1] - _m = getindex_internal(vi, mvn)[1] - - dist_s = InverseGamma(2, 3) - - # Hand-written logp - function logp(x::Vector) - s = x[2] - # s = invlink(dist_s, s) - m = x[1] - lik_dist = Normal(m, sqrt(s)) - lp = logpdf(dist_s, s) + logpdf(Normal(0, sqrt(s)), m) - lp += logpdf(lik_dist, 1.5) + logpdf(lik_dist, 2.0) - return lp - end - - # Call ForwardDiff's AD - g = x -> ForwardDiff.gradient(logp, x) - # _s = link(dist_s, _s) - _x = [_m, _s] - grad_FWAD = sort(g(_x)) - - ℓ = Turing.LogDensityFunction( - vi, ad_test_f, SampleFromPrior(), DynamicPPL.DefaultContext() - ) - x = map(x -> Float64(x), vi[SampleFromPrior()]) - - zygoteℓ = LogDensityProblemsAD.ADgradient(Turing.AutoZygote(), ℓ) - if isdefined(Base, :get_extension) - @test zygoteℓ isa - Base.get_extension( - LogDensityProblemsAD, :LogDensityProblemsADZygoteExt - ).ZygoteGradientLogDensity - else - @test zygoteℓ isa - LogDensityProblemsAD.LogDensityProblemsADZygoteExt.ZygoteGradientLogDensity - end - @test zygoteℓ.ℓ === ℓ - ∇E2 = LogDensityProblems.logdensity_and_gradient(zygoteℓ, x)[2] - @test sort(∇E2) ≈ grad_FWAD atol = 1e-9 - end - - @testset "general AD tests" begin - # Tests gdemo gradient. - function logp1(x::Vector) - dist_s = InverseGamma(2, 3) - s = x[2] - m = x[1] - lik_dist = Normal(m, sqrt(s)) - lp = - Turing.logpdf_with_trans(dist_s, s, false) + - Turing.logpdf_with_trans(Normal(0, sqrt(s)), m, false) - lp += logpdf(lik_dist, 1.5) + logpdf(lik_dist, 2.0) - return lp - end - - test_model_ad(gdemo_default, logp1, [:m, :s]) - - # Test Wishart AD. - @model function wishart_ad() - v ~ Wishart(7, [1 0.5; 0.5 1]) - return v - end - - # Hand-written logp - function logp3(x) - dist_v = Wishart(7, [1 0.5; 0.5 1]) - v = [x[1] x[3]; x[2] x[4]] - lp = Turing.logpdf_with_trans(dist_v, v, false) - return lp - end - - test_model_ad(wishart_ad(), logp3, [:v]) - end - @testset "Simplex Zygote and ReverseDiff (with and without caching) AD" begin - @model function dir() - return theta ~ Dirichlet(1 ./ fill(4, 4)) - end - sample(dir(), HMC(0.01, 1; adtype=AutoZygote()), 1000) - sample(dir(), HMC(0.01, 1; adtype=AutoReverseDiff(; compile=false)), 1000) - sample(dir(), HMC(0.01, 1; adtype=AutoReverseDiff(; compile=true)), 1000) - end - @testset "PDMatDistribution AD" begin - @model function wishart() - return theta ~ Wishart(4, Matrix{Float64}(I, 4, 4)) - end - - sample(wishart(), HMC(0.01, 1; adtype=AutoReverseDiff(; compile=false)), 1000) - sample(wishart(), HMC(0.01, 1; adtype=AutoZygote()), 1000) - - @model function invwishart() - return theta ~ InverseWishart(4, Matrix{Float64}(I, 4, 4)) - end - - sample(invwishart(), HMC(0.01, 1; adtype=AutoReverseDiff(; compile=false)), 1000) - sample(invwishart(), HMC(0.01, 1; adtype=AutoZygote()), 1000) - end - @testset "Hessian test" begin - @model function tst(x, ::Type{TV}=Vector{Float64}) where {TV} - params = TV(undef, 2) - @. params ~ Normal(0, 1) - - return x ~ MvNormal(params, I) - end - - function make_logjoint(model::DynamicPPL.Model, ctx::DynamicPPL.AbstractContext) - # setup - varinfo_init = Turing.VarInfo(model) - spl = DynamicPPL.SampleFromPrior() - varinfo_init = DynamicPPL.link!!(varinfo_init, spl, model) - - function logπ(z; unlinked=false) - varinfo = DynamicPPL.unflatten(varinfo_init, spl, z) - - # TODO(torfjelde): Pretty sure this is a mistake. - # Why are we not linking `varinfo` rather than `varinfo_init`? - if unlinked - varinfo_init = DynamicPPL.invlink!!(varinfo_init, spl, model) - end - varinfo = last( - DynamicPPL.evaluate!!( - model, varinfo, DynamicPPL.SamplingContext(spl, ctx) - ), - ) - if unlinked - varinfo_init = DynamicPPL.link!!(varinfo_init, spl, model) - end - - return -DynamicPPL.getlogp(varinfo) - end - - return logπ - end - - data = [0.5, -0.5] - model = tst(data) - - likelihood = make_logjoint(model, DynamicPPL.LikelihoodContext()) - target(x) = likelihood(x; unlinked=true) - - H_f = ForwardDiff.hessian(target, zeros(2)) - H_r = ReverseDiff.hessian(target, zeros(2)) - @test H_f == [1.0 0.0; 0.0 1.0] - @test H_f == H_r - end - - @testset "memoization: issue #1393" begin - @model function demo(data) - sigma ~ Uniform(0.0, 20.0) - return data ~ Normal(0, sigma) - end - - N = 1000 - for i in 1:5 - d = Normal(0.0, i) - data = rand(d, N) - chn = sample( - demo(data), NUTS(0.65; adtype=AutoReverseDiff(; compile=true)), 1000 - ) - @test mean(Array(chn[:sigma])) ≈ std(data) atol = 0.5 - end - end - - @testset "ReverseDiff compiled without linking" begin - f = DynamicPPL.LogDensityFunction(gdemo_default) - θ = DynamicPPL.getparams(f) - - f_rd = LogDensityProblemsAD.ADgradient(Turing.AutoReverseDiff(; compile=false), f) - f_rd_compiled = LogDensityProblemsAD.ADgradient( - Turing.AutoReverseDiff(; compile=true), f - ) - - ℓ, ℓ_grad = LogDensityProblems.logdensity_and_gradient(f_rd, θ) - ℓ_compiled, ℓ_grad_compiled = LogDensityProblems.logdensity_and_gradient( - f_rd_compiled, θ - ) - - @test ℓ == ℓ_compiled - @test ℓ_grad == ℓ_grad_compiled - end -end - -end diff --git a/test/ext/OptimInterface.jl b/test/ext/OptimInterface.jl index d1e77c215b..163cf36c53 100644 --- a/test/ext/OptimInterface.jl +++ b/test/ext/OptimInterface.jl @@ -137,17 +137,16 @@ using Turing # because we hit NaNs, etc. To avoid this, we set the `g_tol` and the `f_tol` to # something larger than the default. allowed_incorrect_mle = [ - DynamicPPL.TestUtils.demo_dot_assume_dot_observe, + DynamicPPL.TestUtils.demo_dot_assume_observe, DynamicPPL.TestUtils.demo_assume_index_observe, DynamicPPL.TestUtils.demo_assume_multivariate_observe, DynamicPPL.TestUtils.demo_assume_multivariate_observe_literal, DynamicPPL.TestUtils.demo_dot_assume_observe_submodel, - DynamicPPL.TestUtils.demo_dot_assume_dot_observe_matrix, - DynamicPPL.TestUtils.demo_dot_assume_matrix_dot_observe_matrix, + DynamicPPL.TestUtils.demo_dot_assume_observe_matrix_index, DynamicPPL.TestUtils.demo_assume_submodel_observe_index_literal, DynamicPPL.TestUtils.demo_dot_assume_observe_index, DynamicPPL.TestUtils.demo_dot_assume_observe_index_literal, - DynamicPPL.TestUtils.demo_assume_matrix_dot_observe_matrix, + DynamicPPL.TestUtils.demo_assume_matrix_observe_matrix_index, ] @testset "MLE for $(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS result_true = DynamicPPL.TestUtils.likelihood_optima(model) diff --git a/test/mcmc/Inference.jl b/test/mcmc/Inference.jl index 62963b4c45..d528d7fc47 100644 --- a/test/mcmc/Inference.jl +++ b/test/mcmc/Inference.jl @@ -72,7 +72,7 @@ using Turing # run sampler: progress logging should be disabled and # it should return a Chains object - sampler = Sampler(HMC(0.1, 7; adtype=adbackend), gdemo_default) + sampler = Sampler(HMC(0.1, 7; adtype=adbackend)) chains = sample(StableRNG(seed), gdemo_default, sampler, MCMCThreads(), 10, 4) @test chains isa MCMCChains.Chains end @@ -440,15 +440,6 @@ using Turing res = sample(StableRNG(seed), vdemo1b(x), alg, 10) - @model function vdemo2(x) - μ ~ MvNormal(zeros(size(x, 1)), I) - @. x ~ $(MvNormal(μ, I)) - end - - D = 2 - alg = HMC(0.01, 5; adtype=adbackend) - res = sample(StableRNG(seed), vdemo2(randn(D, 100)), alg, 10) - # Vector assumptions N = 10 alg = HMC(0.2, 4; adtype=adbackend) @@ -492,7 +483,7 @@ using Turing N = 3 @model function vdemo7() x = Array{Real}(undef, N, N) - @. x ~ [InverseGamma(2, 3) for i in 1:N] + return x ~ filldist(InverseGamma(2, 3), N, N) end sample(StableRNG(seed), vdemo7(), alg, 10) @@ -512,7 +503,7 @@ using Turing @model function vdemo2(x) μ ~ MvNormal(zeros(size(x, 1)), I) - return x .~ MvNormal(μ, I) + return x ~ filldist(MvNormal(μ, I), size(x, 2)) end D = 2 @@ -560,7 +551,7 @@ using Turing @model function vdemo7() x = Array{Real}(undef, N, N) - return x .~ [InverseGamma(2, 3) for i in 1:N] + return x ~ filldist(InverseGamma(2, 3), N, N) end sample(StableRNG(seed), vdemo7(), alg, 10) diff --git a/test/mcmc/abstractmcmc.jl b/test/mcmc/abstractmcmc.jl index 6486a86285..82baa41197 100644 --- a/test/mcmc/abstractmcmc.jl +++ b/test/mcmc/abstractmcmc.jl @@ -8,7 +8,6 @@ using DynamicPPL: DynamicPPL using ForwardDiff: ForwardDiff using LinearAlgebra: I using LogDensityProblems: LogDensityProblems -using LogDensityProblemsAD: LogDensityProblemsAD using Random: Random using ReverseDiff: ReverseDiff using StableRNGs: StableRNG @@ -18,16 +17,12 @@ using Turing using Turing.Inference: AdvancedHMC function initialize_nuts(model::Turing.Model) - # Create a log-density function with an implementation of the - # gradient so we ensure that we're using the same AD backend as in Turing. - f = LogDensityProblemsAD.ADgradient(DynamicPPL.LogDensityFunction(model)) - - # Link the varinfo. - f = Turing.Inference.setvarinfo( - f, - DynamicPPL.link!!(Turing.Inference.getvarinfo(f), model), - Turing.Inference.getADType(DynamicPPL.getcontext(LogDensityProblemsAD.parent(f))), - ) + # Create a linked varinfo + vi = DynamicPPL.VarInfo(model) + linked_vi = DynamicPPL.link!!(vi, model) + + # Create a LogDensityFunction + f = DynamicPPL.LogDensityFunction(model, linked_vi; adtype=Turing.DEFAULT_ADTYPE) # Choose parameter dimensionality and initial parameter value D = LogDensityProblems.dimension(f) @@ -122,7 +117,7 @@ end # TODO: Remove this once the constructors in the respective packages become "lazy". sampler = initialize_nuts(model) sampler_ext = DynamicPPL.Sampler( - externalsampler(sampler; adtype, unconstrained=true), model + externalsampler(sampler; adtype, unconstrained=true) ) # FIXME: Once https://github.com/TuringLang/AdvancedHMC.jl/pull/366 goes through, uncomment. # @testset "initial_params" begin @@ -148,27 +143,6 @@ end end end end - - @testset "don't drop `ADgradient` (PR: #2223)" begin - rng = Random.default_rng() - model = DynamicPPL.TestUtils.DEMO_MODELS[1] - sampler = initialize_nuts(model) - sampler_ext = externalsampler( - sampler; unconstrained=true, adtype=AutoForwardDiff() - ) - # Initial step. - state = last( - AbstractMCMC.step(rng, model, DynamicPPL.Sampler(sampler_ext); n_adapts=0) - ) - @test state.logdensity isa LogDensityProblemsAD.ADGradientWrapper - # Subsequent step. - state = last( - AbstractMCMC.step( - rng, model, DynamicPPL.Sampler(sampler_ext), state; n_adapts=0 - ), - ) - @test state.logdensity isa LogDensityProblemsAD.ADGradientWrapper - end end @testset "AdvancedMH.jl" begin @@ -178,7 +152,7 @@ end # TODO: Remove this once the constructors in the respective packages become "lazy". sampler = initialize_mh_rw(model) sampler_ext = DynamicPPL.Sampler( - externalsampler(sampler; unconstrained=true), model + externalsampler(sampler; unconstrained=true) ) @testset "initial_params" begin test_initial_params(model, sampler_ext) @@ -201,7 +175,7 @@ end # @testset "MH with prior proposal" begin # @testset "$(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS # sampler = initialize_mh_with_prior_proposal(model); - # sampler_ext = DynamicPPL.Sampler(externalsampler(sampler; unconstrained=false), model) + # sampler_ext = DynamicPPL.Sampler(externalsampler(sampler; unconstrained=false)) # @testset "initial_params" begin # test_initial_params(model, sampler_ext) # end diff --git a/test/mcmc/emcee.jl b/test/mcmc/emcee.jl index 08ea16d2a8..49ce75ac97 100644 --- a/test/mcmc/emcee.jl +++ b/test/mcmc/emcee.jl @@ -17,7 +17,7 @@ using Turing n_walkers = 250 spl = Emcee(n_walkers, 2.0) - @test DynamicPPL.alg_str(Sampler(spl, gdemo_default)) == "Emcee" + @test DynamicPPL.alg_str(Sampler(spl)) == "Emcee" chain = sample(gdemo_default, spl, n_samples) check_gdemo(chain) diff --git a/test/mcmc/ess.jl b/test/mcmc/ess.jl index 9798457af6..f6c97e0e29 100644 --- a/test/mcmc/ess.jl +++ b/test/mcmc/ess.jl @@ -30,7 +30,7 @@ using Turing N = 10 s1 = ESS() - @test DynamicPPL.alg_str(Sampler(s1, demo_default)) == "ESS" + @test DynamicPPL.alg_str(Sampler(s1)) == "ESS" c1 = sample(demo_default, s1, N) c2 = sample(demodot_default, s1, N) @@ -88,6 +88,30 @@ using Turing ) end end + + # Test that ESS can sample multiple variables regardless of whether they are under the + # same symbol or not. + @testset "Multiple variables" begin + @model function xy() + z ~ Beta(2.0, 2.0) + x ~ Normal(z, 2.0) + return y ~ Normal(-3.0, 3.0) + end + + @model function x12() + z ~ Beta(2.0, 2.0) + x = Vector{Float64}(undef, 2) + x[1] ~ Normal(z, 2.0) + return x[2] ~ Normal(-3.0, 3.0) + end + + num_samples = 10_000 + spl_x = Gibbs(@varname(z) => NUTS(), @varname(x) => ESS()) + spl_xy = Gibbs(@varname(z) => NUTS(), (@varname(x), @varname(y)) => ESS()) + + @test sample(StableRNG(23), xy(), spl_xy, num_samples).value ≈ + sample(StableRNG(23), x12(), spl_x, num_samples).value + end end end diff --git a/test/mcmc/gibbs.jl b/test/mcmc/gibbs.jl index 48c3f72c71..1f472bf4cb 100644 --- a/test/mcmc/gibbs.jl +++ b/test/mcmc/gibbs.jl @@ -40,7 +40,7 @@ const DEMO_MODELS_WITHOUT_DOT_ASSUME = Union{ DynamicPPL.Model{typeof(DynamicPPL.TestUtils.demo_assume_multivariate_observe_literal)}, DynamicPPL.Model{typeof(DynamicPPL.TestUtils.demo_assume_observe_literal)}, DynamicPPL.Model{typeof(DynamicPPL.TestUtils.demo_assume_dot_observe_literal)}, - DynamicPPL.Model{typeof(DynamicPPL.TestUtils.demo_assume_matrix_dot_observe_matrix)}, + DynamicPPL.Model{typeof(DynamicPPL.TestUtils.demo_assume_matrix_observe_matrix_index)}, } has_dot_assume(::DEMO_MODELS_WITHOUT_DOT_ASSUME) = false has_dot_assume(::DynamicPPL.Model) = true @@ -90,7 +90,7 @@ has_dot_assume(::DynamicPPL.Model) = true end end - # Check the type stability also in the dot_tilde pipeline. + # Check the type stability also when using .~. for k in all_varnames # The map(identity, ...) part is there to concretise the eltype. subkeys = map( @@ -145,7 +145,7 @@ end end unwrap_sampler(sampler::DynamicPPL.Sampler{<:AlgWrapper}) = - DynamicPPL.Sampler(sampler.alg.inner, sampler.selector) + DynamicPPL.Sampler(sampler.alg.inner) # Methods we need to define to be able to use AlgWrapper instead of an actual algorithm. # They all just propagate the call to the inner algorithm. @@ -295,7 +295,7 @@ end @varname(m) => RepeatSampler(PG(10), 2), ) for s in (s1, s2, s3, s4, s5, s6) - @test DynamicPPL.alg_str(Turing.Sampler(s, gdemo_default)) == "Gibbs" + @test DynamicPPL.alg_str(Turing.Sampler(s)) == "Gibbs" end @test sample(gdemo_default, s1, N) isa MCMCChains.Chains @@ -305,7 +305,7 @@ end @test sample(gdemo_default, s5, N) isa MCMCChains.Chains @test sample(gdemo_default, s6, N) isa MCMCChains.Chains - g = Turing.Sampler(s3, gdemo_default) + g = Turing.Sampler(s3) @test sample(gdemo_default, g, N) isa MCMCChains.Chains end diff --git a/test/mcmc/hmc.jl b/test/mcmc/hmc.jl index 3afe1332c7..0a5910e450 100644 --- a/test/mcmc/hmc.jl +++ b/test/mcmc/hmc.jl @@ -155,11 +155,11 @@ using Turing @testset "hmcda constructor" begin alg = HMCDA(0.8, 0.75; adtype=adbackend) - sampler = Sampler(alg, gdemo_default) + sampler = Sampler(alg) @test DynamicPPL.alg_str(sampler) == "HMCDA" alg = HMCDA(200, 0.8, 0.75; adtype=adbackend) - sampler = Sampler(alg, gdemo_default) + sampler = Sampler(alg) @test DynamicPPL.alg_str(sampler) == "HMCDA" @test isa(alg, HMCDA) @@ -174,11 +174,11 @@ using Turing @testset "nuts constructor" begin alg = NUTS(200, 0.65; adtype=adbackend) - sampler = Sampler(alg, gdemo_default) + sampler = Sampler(alg) @test DynamicPPL.alg_str(sampler) == "NUTS" alg = NUTS(0.65; adtype=adbackend) - sampler = Sampler(alg, gdemo_default) + sampler = Sampler(alg) @test DynamicPPL.alg_str(sampler) == "NUTS" end @@ -201,28 +201,6 @@ using Turing @test sample(StableRNG(seed), gdemo_default, alg3, 10) isa Chains end - @testset "Regression tests" begin - # https://github.com/TuringLang/DynamicPPL.jl/issues/27 - @model function mwe1(::Type{T}=Float64) where {T<:Real} - m = Matrix{T}(undef, 2, 3) - return m .~ MvNormal(zeros(2), I) - end - @test sample(StableRNG(seed), mwe1(), HMC(0.2, 4; adtype=adbackend), 100) isa Chains - - @model function mwe2(::Type{T}=Matrix{Float64}) where {T} - m = T(undef, 2, 3) - return m .~ MvNormal(zeros(2), I) - end - @test sample(StableRNG(seed), mwe2(), HMC(0.2, 4; adtype=adbackend), 100) isa Chains - - # https://github.com/TuringLang/Turing.jl/issues/1308 - @model function mwe3(::Type{T}=Array{Float64}) where {T} - m = T(undef, 2, 3) - return m .~ MvNormal(zeros(2), I) - end - @test sample(StableRNG(seed), mwe3(), HMC(0.2, 4; adtype=adbackend), 100) isa Chains - end - # issue #1923 @testset "reproducibility" begin alg = NUTS(1000, 0.8; adtype=adbackend) @@ -327,7 +305,7 @@ using Turing algs = [HMC(0.1, 10), HMCDA(0.8, 0.75), NUTS(0.5), NUTS(0, 0.5)] @testset "$(alg)" for alg in algs # Construct a HMC state by taking a single step - spl = Sampler(alg, gdemo_default) + spl = Sampler(alg) hmc_state = DynamicPPL.initialstep( Random.default_rng(), gdemo_default, spl, DynamicPPL.VarInfo(gdemo_default) )[2] diff --git a/test/mcmc/mh.jl b/test/mcmc/mh.jl index 69cf5a849b..d190e589a5 100644 --- a/test/mcmc/mh.jl +++ b/test/mcmc/mh.jl @@ -28,7 +28,7 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var)) s3 = MH() s4 = MH([1.0 0.1; 0.1 1.0]) for s in (s1, s2, s3, s4) - @test DynamicPPL.alg_str(Sampler(s, gdemo_default)) == "MH" + @test DynamicPPL.alg_str(Sampler(s)) == "MH" end c1 = sample(gdemo_default, s1, N) @@ -116,7 +116,7 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var)) end model = M(zeros(2), I, 1) - sampler = Inference.Sampler(MH(), model) + sampler = Inference.Sampler(MH()) dt, vt = Inference.dist_val_tuple(sampler, Turing.VarInfo(model)) @@ -234,22 +234,22 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var)) alg = MH() spl = DynamicPPL.Sampler(alg) vi = Turing.Inference.maybe_link!!(vi, spl, alg.proposals, gdemo_default) - @test !DynamicPPL.islinked(vi, spl) + @test !DynamicPPL.islinked(vi) # Link if proposal is `AdvancedHM.RandomWalkProposal` vi = deepcopy(vi_base) - d = length(vi_base[DynamicPPL.SampleFromPrior()]) + d = length(vi_base[:]) alg = MH(AdvancedMH.RandomWalkProposal(MvNormal(zeros(d), I))) spl = DynamicPPL.Sampler(alg) vi = Turing.Inference.maybe_link!!(vi, spl, alg.proposals, gdemo_default) - @test DynamicPPL.islinked(vi, spl) + @test DynamicPPL.islinked(vi) # Link if ALL proposals are `AdvancedHM.RandomWalkProposal`. vi = deepcopy(vi_base) alg = MH(:s => AdvancedMH.RandomWalkProposal(Normal())) spl = DynamicPPL.Sampler(alg) vi = Turing.Inference.maybe_link!!(vi, spl, alg.proposals, gdemo_default) - @test DynamicPPL.islinked(vi, spl) + @test DynamicPPL.islinked(vi) # Don't link if at least one proposal is NOT `RandomWalkProposal`. # TODO: make it so that only those that are using `RandomWalkProposal` @@ -262,7 +262,7 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var)) ) spl = DynamicPPL.Sampler(alg) vi = Turing.Inference.maybe_link!!(vi, spl, alg.proposals, gdemo_default) - @test !DynamicPPL.islinked(vi, spl) + @test !DynamicPPL.islinked(vi) end @testset "prior" begin diff --git a/test/optimisation/Optimisation.jl b/test/optimisation/Optimisation.jl index 74f7f3dfec..e80d74e0c8 100644 --- a/test/optimisation/Optimisation.jl +++ b/test/optimisation/Optimisation.jl @@ -539,17 +539,16 @@ using Turing # because we hit NaNs, etc. To avoid this, we set the `g_tol` and the `f_tol` to # something larger than the default. allowed_incorrect_mle = [ - DynamicPPL.TestUtils.demo_dot_assume_dot_observe, + DynamicPPL.TestUtils.demo_dot_assume_observe, DynamicPPL.TestUtils.demo_assume_index_observe, DynamicPPL.TestUtils.demo_assume_multivariate_observe, DynamicPPL.TestUtils.demo_assume_multivariate_observe_literal, DynamicPPL.TestUtils.demo_dot_assume_observe_submodel, - DynamicPPL.TestUtils.demo_dot_assume_dot_observe_matrix, - DynamicPPL.TestUtils.demo_dot_assume_matrix_dot_observe_matrix, + DynamicPPL.TestUtils.demo_dot_assume_observe_matrix_index, DynamicPPL.TestUtils.demo_assume_submodel_observe_index_literal, DynamicPPL.TestUtils.demo_dot_assume_observe_index, DynamicPPL.TestUtils.demo_dot_assume_observe_index_literal, - DynamicPPL.TestUtils.demo_assume_matrix_dot_observe_matrix, + DynamicPPL.TestUtils.demo_assume_matrix_observe_matrix_index, ] @testset "MLE for $(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS Random.seed!(23) @@ -624,16 +623,9 @@ using Turing m = DynamicPPL.contextualize( gdemo_default, ADUtils.ADTypeCheckContext(adbackend, gdemo_default.context) ) - if adbackend isa AutoForwardDiff - # TODO: Figure out why this is happening. - # https://github.com/TuringLang/Turing.jl/issues/2369 - @test_throws DivideError maximum_likelihood(m; adtype=adbackend) - @test_throws DivideError maximum_a_posteriori(m; adtype=adbackend) - else - # These will error if the adbackend being used is not the one set. - maximum_likelihood(m; adtype=adbackend) - maximum_a_posteriori(m; adtype=adbackend) - end + # These will error if the adbackend being used is not the one set. + maximum_likelihood(m; adtype=adbackend) + maximum_a_posteriori(m; adtype=adbackend) end @testset "Collinear coeftable" begin diff --git a/test/runtests.jl b/test/runtests.jl index 093615e540..47b714188e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -39,7 +39,6 @@ end end @testset "essential" verbose = true begin - @timeit_include("essential/ad.jl") @timeit_include("essential/container.jl") end @@ -81,10 +80,6 @@ end @timeit_include("stdlib/RandomMeasures.jl") end - @testset "DynamicPPL integration" begin - @timeit_include("dynamicppl/compiler.jl") - end - @testset "utilities" begin @timeit_include("mcmc/utilities.jl") end diff --git a/test/skipped/unit_test_helper.jl b/test/skipped/unit_test_helper.jl index 174234480e..aa513e428e 100644 --- a/test/skipped/unit_test_helper.jl +++ b/test/skipped/unit_test_helper.jl @@ -11,7 +11,9 @@ function test_grad(turing_model, grad_f; trans=Dict()) ℓ = LogDensityProblemsAD.ADgradient( Turing.AutoTracker(), Turing.LogDensityFunction( - vi, model_f, SampleFromPrior(), DynamicPPL.DefaultContext() + model_f, + vi, + DynamicPPL.SamplingContext(SampleFromPrior(), DynamicPPL.DefaultContext()), ), ) for _ in 1:10000 diff --git a/test/test_utils/ad_utils.jl b/test/test_utils/ad_utils.jl index 2c01dc524e..ee544e7ce3 100644 --- a/test/test_utils/ad_utils.jl +++ b/test/test_utils/ad_utils.jl @@ -195,40 +195,6 @@ function DynamicPPL.tilde_observe(context::ADTypeCheckContext, sampler, right, l return logp, vi end -function DynamicPPL.dot_tilde_assume(context::ADTypeCheckContext, right, left, vn, vi) - value, logp, vi = DynamicPPL.dot_tilde_assume( - DynamicPPL.childcontext(context), right, left, vn, vi - ) - check_adtype(context, vi) - return value, logp, vi -end - -function DynamicPPL.dot_tilde_assume( - rng::Random.AbstractRNG, context::ADTypeCheckContext, sampler, right, left, vn, vi -) - value, logp, vi = DynamicPPL.dot_tilde_assume( - rng, DynamicPPL.childcontext(context), sampler, right, left, vn, vi - ) - check_adtype(context, vi) - return value, logp, vi -end - -function DynamicPPL.dot_tilde_observe(context::ADTypeCheckContext, right, left, vi) - logp, vi = DynamicPPL.dot_tilde_observe( - DynamicPPL.childcontext(context), right, left, vi - ) - check_adtype(context, vi) - return logp, vi -end - -function DynamicPPL.dot_tilde_observe(context::ADTypeCheckContext, sampler, right, left, vi) - logp, vi = DynamicPPL.dot_tilde_observe( - DynamicPPL.childcontext(context), sampler, right, left, vi - ) - check_adtype(context, vi) - return logp, vi -end - # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # List of AD backends to test. @@ -236,7 +202,7 @@ end All the ADTypes on which we want to run the tests. """ adbackends = [ - Turing.AutoForwardDiff(; chunksize=0), + Turing.AutoForwardDiff(), Turing.AutoReverseDiff(; compile=false), Turing.AutoMooncake(; config=nothing), ] From f904f9981dc687d04dfa6a13d94d45013a093729 Mon Sep 17 00:00:00 2001 From: Markus Hauru Date: Fri, 14 Mar 2025 16:05:46 +0000 Subject: [PATCH 5/8] Fix call to DynamicPPL.initialize_parameters!! --- src/mcmc/gibbs.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mcmc/gibbs.jl b/src/mcmc/gibbs.jl index fce829901c..db133abc85 100644 --- a/src/mcmc/gibbs.jl +++ b/src/mcmc/gibbs.jl @@ -312,7 +312,7 @@ function initial_varinfo(rng, model, spl, initial_params) # Update the parameters if provided. if initial_params !== nothing - vi = DynamicPPL.initialize_parameters!!(vi, initial_params, spl, model) + vi = DynamicPPL.initialize_parameters!!(vi, initial_params, model) # Update joint log probability. # This is a quick fix for https://github.com/TuringLang/Turing.jl/issues/1588 From 789f49899712827015bbfd30d954055e41046ef5 Mon Sep 17 00:00:00 2001 From: Hong Ge <3279477+yebai@users.noreply.github.com> Date: Mon, 17 Mar 2025 13:47:41 +0000 Subject: [PATCH 6/8] Remove `Zygote` (#2505) * Remove `Zygote`; fix https://github.com/TuringLang/Turing.jl/issues/2504 * Update test/test_utils/ad_utils.jl Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * Add HISTORY.md entry about removing support for Zygote --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Markus Hauru Co-authored-by: Penelope Yong Co-authored-by: Markus Hauru --- HISTORY.md | 6 ++++++ docs/src/api.md | 1 - src/Turing.jl | 1 - src/essential/Essential.jl | 3 +-- test/Project.toml | 2 -- test/test_utils/ad_utils.jl | 19 +------------------ test/test_utils/test_utils.jl | 9 --------- 7 files changed, 8 insertions(+), 33 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 6251974075..62ca1d350c 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,6 +6,12 @@ 0.37 removes the old Gibbs constructors deprecated in 0.36. +### Remove Zygote support + +Zygote is no longer officially supported as an automatic differentiation backend, and `AutoZygote` is no longer exported. You can continue to use Zygote by importing `AutoZygote` from ADTypes and it may well continue to work, but it is no longer tested and no effort will be expended to fix it if something breaks. + +[Mooncake](https://github.com/compintell/Mooncake.jl/) is the recommended replacement for Zygote. + ### DynamicPPL 0.35 Turing.jl v0.37 uses DynamicPPL v0.35, which brings with it several breaking changes: diff --git a/docs/src/api.md b/docs/src/api.md index afedf59bb7..3066a7fad9 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -88,7 +88,6 @@ See the [AD guide](https://turinglang.org/docs/tutorials/docs-10-using-turing-au |:----------------- |:------------------------------------ |:---------------------- | | `AutoForwardDiff` | [`ADTypes.AutoForwardDiff`](@extref) | ForwardDiff.jl backend | | `AutoReverseDiff` | [`ADTypes.AutoReverseDiff`](@extref) | ReverseDiff.jl backend | -| `AutoZygote` | [`ADTypes.AutoZygote`](@extref) | Zygote.jl backend | | `AutoMooncake` | [`ADTypes.AutoMooncake`](@extref) | Mooncake.jl backend | ### Debugging diff --git a/src/Turing.jl b/src/Turing.jl index abba580a27..d8fc09fb9d 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -106,7 +106,6 @@ export @model, # modelling externalsampler, AutoForwardDiff, # ADTypes AutoReverseDiff, - AutoZygote, AutoMooncake, setprogress!, # debugging Flat, diff --git a/src/essential/Essential.jl b/src/essential/Essential.jl index c04c7e862b..cfa064c651 100644 --- a/src/essential/Essential.jl +++ b/src/essential/Essential.jl @@ -11,7 +11,7 @@ using Bijectors: PDMatDistribution using AdvancedVI using StatsFuns: logsumexp, softmax @reexport using DynamicPPL -using ADTypes: ADTypes, AutoForwardDiff, AutoReverseDiff, AutoZygote, AutoMooncake +using ADTypes: ADTypes, AutoForwardDiff, AutoReverseDiff, AutoMooncake using AdvancedPS: AdvancedPS @@ -20,7 +20,6 @@ include("container.jl") export @model, @varname, AutoForwardDiff, - AutoZygote, AutoReverseDiff, AutoMooncake, @logprob_str, diff --git a/test/Project.toml b/test/Project.toml index 96681f0491..489923767a 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -36,7 +36,6 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" -Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] AbstractMCMC = "5" @@ -75,5 +74,4 @@ StableRNGs = "1" StatsBase = "0.33, 0.34" StatsFuns = "0.9.5, 1" TimerOutputs = "0.5" -Zygote = "0.5.4, 0.6" julia = "1.10" diff --git a/test/test_utils/ad_utils.jl b/test/test_utils/ad_utils.jl index ee544e7ce3..309276407a 100644 --- a/test/test_utils/ad_utils.jl +++ b/test/test_utils/ad_utils.jl @@ -8,7 +8,6 @@ using Mooncake: Mooncake using Test: Test using Turing: Turing using Turing: DynamicPPL -using Zygote: Zygote export ADTypeCheckContext, adbackends @@ -31,9 +30,6 @@ const eltypes_by_adtype = Dict( ReverseDiff.TrackedVector, ), Turing.AutoMooncake => (Mooncake.CoDual,), - # Zygote.Dual is actually the same as ForwardDiff.Dual, so can't distinguish between the - # two by element type. However, we have other checks for Zygote, see check_adtype. - Turing.AutoZygote => (Zygote.Dual,), ) """ @@ -90,7 +86,6 @@ For instance, evaluating a model with would throw an error if within the model a type associated with e.g. ReverseDiff was encountered. -As a current short-coming, this context can not distinguish between ForwardDiff and Zygote. """ struct ADTypeCheckContext{ADType,ChildContext<:DynamicPPL.AbstractContext} <: DynamicPPL.AbstractContext @@ -134,21 +129,9 @@ end Check that the element types in `vi` are compatible with the ADType of `context`. -When Zygote is being used, we also more explicitly check that `adtype(context)` is -`AutoZygote`. This is because Zygote uses the same element type as ForwardDiff, so we can't -discriminate between the two based on element type alone. This function will still fail to -catch cases where Zygote is supposed to be used, but ForwardDiff is used instead. - -Throw an `IncompatibleADTypeError` if an incompatible element type is encountered, or -`WrongADBackendError` if Zygote is used unexpectedly. +Throw an `IncompatibleADTypeError` if an incompatible element type is encountered. """ function check_adtype(context::ADTypeCheckContext, vi::DynamicPPL.AbstractVarInfo) - Zygote.hook(vi) do _ - if !(adtype(context) <: Turing.AutoZygote) - throw(WrongADBackendError(Turing.AutoZygote, adtype(context))) - end - end - valids = valid_eltypes(context) for val in vi[:] valtype = typeof(val) diff --git a/test/test_utils/test_utils.jl b/test/test_utils/test_utils.jl index bf9f2b9b8d..243a80b881 100644 --- a/test/test_utils/test_utils.jl +++ b/test/test_utils/test_utils.jl @@ -7,7 +7,6 @@ using ReverseDiff: ReverseDiff using Test: @test, @testset, @test_throws using Turing: Turing using Turing: DynamicPPL -using Zygote: Zygote # Check that the ADTypeCheckContext works as expected. @testset "ADTypeCheckContext" begin @@ -16,20 +15,12 @@ using Zygote: Zygote adtypes = ( Turing.AutoForwardDiff(), Turing.AutoReverseDiff(), - Turing.AutoZygote(), # TODO: Mooncake # Turing.AutoMooncake(config=nothing), ) for actual_adtype in adtypes sampler = Turing.HMC(0.1, 5; adtype=actual_adtype) for expected_adtype in adtypes - if ( - actual_adtype == Turing.AutoForwardDiff() && - expected_adtype == Turing.AutoZygote() - ) - # TODO(mhauru) We are currently unable to check this case. - continue - end contextualised_tm = DynamicPPL.contextualize( tm, ADTypeCheckContext(expected_adtype, tm.context) ) From e842e30119658bdd73f626bbc939dbcac976e957 Mon Sep 17 00:00:00 2001 From: Markus Hauru Date: Mon, 17 Mar 2025 14:39:12 +0000 Subject: [PATCH 7/8] Fix a Gibbs test --- test/mcmc/gibbs.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/mcmc/gibbs.jl b/test/mcmc/gibbs.jl index 3b93ac89cf..b5993078ed 100644 --- a/test/mcmc/gibbs.jl +++ b/test/mcmc/gibbs.jl @@ -279,8 +279,6 @@ end WarmupCounter() = new(0, 0, 0, 0) end - Turing.Inference.drop_space(wuc::WarmupCounter) = wuc - Turing.Inference.getspace(::WarmupCounter) = () Turing.Inference.isgibbscomponent(::WarmupCounter) = true # A trivial state that holds nothing but a VarInfo, to be used with WarmupCounter. From 625a9c81a0c82b1dba8a3f5d8027c1a9c910b2bc Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 18 Mar 2025 17:33:28 +0000 Subject: [PATCH 8/8] Clean up exports (#2474) * Regroup exports by package * Export DynamicPPL.returned and DynamicPPL.prefix * Stop exporting @logprob_str and @prob_str * Remove DynamicPPL module export * Remove DynamicPPL.LogDensityFunction re-export * Remove BernoulliLogit, drop support for Distributions < 0.25.77 * Stop blanket re-exporting Libtask and Bijectors * Manually specify AbstractMCMC exports * Format Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * Remove Bijectors.ordered export * Re-export LinearAlgebra.I * Replace Turing.Model -> DynamicPPL.Model * Format * Keep exporting LogDensityFunction * Add note in docs * Align Turing exports with docs API page * Fix things like `predict` on docs API page * Fix a Gibbs test * Format * Fix missing Bijectors import * Update docs/src/api.md Co-authored-by: Markus Hauru * Update docs/src/api.md Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * Clean up broken docs links, remove unneeded deps * Format * Add changelog entry for exports * Clean up exports in essential/Essential * Apply suggestions from code review Co-authored-by: Markus Hauru --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Markus Hauru Co-authored-by: Markus Hauru --- HISTORY.md | 16 +++++++ Project.toml | 2 +- docs/Project.toml | 3 -- docs/make.jl | 13 ++---- docs/src/api.md | 75 +++++++++++++----------------- src/Turing.jl | 78 ++++++++++++++++++++------------ src/essential/Essential.jl | 10 ++-- src/mcmc/Inference.jl | 6 ++- src/mcmc/emcee.jl | 2 +- src/mcmc/ess.jl | 4 +- src/mcmc/mh.jl | 6 +-- src/optimisation/Optimisation.jl | 6 +-- src/stdlib/distributions.jl | 40 ++-------------- test/Project.toml | 2 + test/essential/container.jl | 2 +- test/ext/OptimInterface.jl | 1 + test/mcmc/abstractmcmc.jl | 1 + test/mcmc/gibbs.jl | 7 ++- test/mcmc/hmc.jl | 8 ++-- test/skipped/unit_test_helper.jl | 2 +- test/variational/advi.jl | 5 +- 21 files changed, 139 insertions(+), 150 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 62ca1d350c..f6c162d2d6 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -23,6 +23,22 @@ Turing.jl v0.37 uses DynamicPPL v0.35, which brings with it several breaking cha For more details about all of the above, see the changelog of DynamicPPL [here](https://github.com/TuringLang/DynamicPPL.jl/releases/tag/v0.35.0). +### Export list + +Turing.jl's export list has been cleaned up a fair bit. This affects what is imported into your namespace when you do an unqualified `using Turing`. You may need to import things more explicitly than before. + + - The `DynamicPPL` and `AbstractMCMC` modules are no longer exported. You will need to `import DynamicPPL` or `using DynamicPPL: DynamicPPL` (likewise `AbstractMCMC`) yourself, which in turn means that they have to be made available in your project environment. + + - `@logprob_str` and `@prob_str` have been removed following a long deprecation period. + - We no longer re-export everything from `Bijectors` and `Libtask`. To get around this, add `using Bijectors` or `using Libtask` at the top of your script (but we recommend using more selective imports). + + + We no longer export `Bijectors.ordered`. If you were using `ordered`, even Bijectors does not (currently) export this. You will have to manually import it with `using Bijectors: ordered`. + +On the other hand, we have added a few more exports: + + - `DynamicPPL.returned` and `DynamicPPL.prefix` are exported (for use with submodels). + - `LinearAlgebra.I` is exported for convenience. + # Release 0.36.0 ## Breaking changes diff --git a/Project.toml b/Project.toml index d72f323373..c5e62a675f 100644 --- a/Project.toml +++ b/Project.toml @@ -58,7 +58,7 @@ BangBang = "0.4.2" Bijectors = "0.14, 0.15" Compat = "4.15.0" DataStructures = "0.18" -Distributions = "0.23.3, 0.24, 0.25" +Distributions = "0.25.77" DistributionsAD = "0.6" DocStringExtensions = "0.8, 0.9" DynamicHMC = "3.4" diff --git a/docs/Project.toml b/docs/Project.toml index 025ada5de1..fd26d25df9 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,7 +1,4 @@ [deps] -Bijectors = "76274a88-744f-5084-9051-94815aaf08c4" -Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterInterLinks = "d12716ef-a0f6-4df4-a9f1-a5a34e75c656" -DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" diff --git a/docs/make.jl b/docs/make.jl index b553109ea7..978e5881b3 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,19 +1,17 @@ using Documenter using Turing -# Need to import Distributions and Bijectors to generate docs for functions -# from those packages. -using Distributions -using Bijectors -using DynamicPPL using DocumenterInterLinks links = InterLinks( "DynamicPPL" => "https://turinglang.org/DynamicPPL.jl/stable/objects.inv", - "AbstractPPL" => "https://turinglang.org/AbstractPPL.jl/dev/objects.inv", + "AbstractPPL" => "https://turinglang.org/AbstractPPL.jl/stable/objects.inv", + "LinearAlgebra" => "https://docs.julialang.org/en/v1/objects.inv", + "AbstractMCMC" => "https://turinglang.org/AbstractMCMC.jl/stable/objects.inv", "ADTypes" => "https://sciml.github.io/ADTypes.jl/stable/objects.inv", "AdvancedVI" => "https://turinglang.org/AdvancedVI.jl/v0.2.8/objects.inv", "DistributionsAD" => "https://turinglang.org/DistributionsAD.jl/stable/objects.inv", + "OrderedCollections" => "https://juliacollections.github.io/OrderedCollections.jl/stable/objects.inv", ) # Doctest setup @@ -21,7 +19,7 @@ DocMeta.setdocmeta!(Turing, :DocTestSetup, :(using Turing); recursive=true) makedocs(; sitename="Turing", - modules=[Turing, Distributions, Bijectors], + modules=[Turing], pages=[ "Home" => "index.md", "API" => "api.md", @@ -29,7 +27,6 @@ makedocs(; ["Inference" => "api/Inference.md", "Optimisation" => "api/Optimisation.md"], ], checkdocs=:exports, - # checkdocs_ignored_modules=[Turing, Distributions, DynamicPPL, AbstractPPL, Bijectors], doctest=false, warnonly=true, plugins=[links], diff --git a/docs/src/api.md b/docs/src/api.md index 3066a7fad9..55b09a9d10 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -6,15 +6,13 @@ Turing.jl directly re-exports the entire public API of the following packages: - [Distributions.jl](https://juliastats.org/Distributions.jl) - [MCMCChains.jl](https://turinglang.org/MCMCChains.jl) - - [AbstractMCMC.jl](https://turinglang.org/AbstractMCMC.jl) - - [Bijectors.jl](https://turinglang.org/Bijectors.jl) - - [Libtask.jl](https://github.com/TuringLang/Libtask.jl) Please see the individual packages for their documentation. ## Individual exports and re-exports -**All** of the following symbols are exported unqualified by Turing, even though the documentation suggests that many of them are qualified. +In this API documentation, for the sake of clarity, we have listed the module that actually defines each of the exported symbols. +Note, however, that **all** of the following symbols are exported unqualified by Turing. That means, for example, you can just write ```julia @@ -37,17 +35,22 @@ even though [`Prior()`](@ref) is actually defined in the `Turing.Inference` modu ### Modelling -| Exported symbol | Documentation | Description | -|:--------------- |:----------------------------------- |:-------------------------------------------- | -| `@model` | [`DynamicPPL.@model`](@extref) | Define a probabilistic model | -| `@varname` | [`AbstractPPL.@varname`](@extref) | Generate a `VarName` from a Julia expression | -| `to_submodel` | [`DynamicPPL.to_submodel`](@extref) | Define a submodel | +| Exported symbol | Documentation | Description | +|:-------------------- |:------------------------------------------ |:-------------------------------------------------------------------------------------------- | +| `@model` | [`DynamicPPL.@model`](@extref) | Define a probabilistic model | +| `@varname` | [`AbstractPPL.@varname`](@extref) | Generate a `VarName` from a Julia expression | +| `to_submodel` | [`DynamicPPL.to_submodel`](@extref) | Define a submodel | +| `prefix` | [`DynamicPPL.prefix`](@extref) | Prefix all variable names in a model with a given symbol | +| `LogDensityFunction` | [`DynamicPPL.LogDensityFunction`](@extref) | A struct containing all information about how to evaluate a model. Mostly for advanced users | ### Inference -| Exported symbol | Documentation | Description | -|:--------------- |:------------------------------------------------------------------------------------------------ |:------------------- | -| `sample` | [`StatsBase.sample`](https://turinglang.org/AbstractMCMC.jl/stable/api/#Sampling-a-single-chain) | Sample from a model | +| Exported symbol | Documentation | Description | +|:----------------- |:------------------------------------------------------------------------------------------------ |:---------------------------------- | +| `sample` | [`StatsBase.sample`](https://turinglang.org/AbstractMCMC.jl/stable/api/#Sampling-a-single-chain) | Sample from a model | +| `MCMCThreads` | [`AbstractMCMC.MCMCThreads`](@extref) | Run MCMC using multiple threads | +| `MCMCDistributed` | [`AbstractMCMC.MCMCDistributed`](@extref) | Run MCMC using multiple processes | +| `MCMCSerial` | [`AbstractMCMC.MCMCSerial`](@extref) | Run MCMC using without parallelism | ### Samplers @@ -68,6 +71,7 @@ even though [`Prior()`](@ref) is actually defined in the `Turing.Inference` modu | `SMC` | [`Turing.Inference.SMC`](@ref) | Sequential Monte Carlo | | `PG` | [`Turing.Inference.PG`](@ref) | Particle Gibbs | | `CSMC` | [`Turing.Inference.CSMC`](@ref) | The same as PG | +| `RepeatSampler` | [`Turing.Inference.RepeatSampler`](@ref) | A sampler that runs multiple times on the same variable | | `externalsampler` | [`Turing.Inference.externalsampler`](@ref) | Wrap an external sampler for use in Turing | ### Variational inference @@ -108,52 +112,37 @@ OrderedLogistic LogPoisson ``` -`BernoulliLogit` is part of Distributions.jl since version 0.25.77. -If you are using an older version of Distributions where this isn't defined, Turing will export the same distribution. - -```@docs -Distributions.BernoulliLogit -``` - ### Tools to work with distributions | Exported symbol | Documentation | Description | |:--------------- |:-------------------------------------- |:-------------------------------------------------------------- | +| `I` | [`LinearAlgebra.I`](@extref) | Identity matrix | | `filldist` | [`DistributionsAD.filldist`](@extref) | Create a product distribution from a distribution and integers | | `arraydist` | [`DistributionsAD.arraydist`](@extref) | Create a product distribution from an array of distributions | | `NamedDist` | [`DynamicPPL.NamedDist`](@extref) | A distribution that carries the name of the variable | ### Predictions -```@docs -DynamicPPL.predict -``` +| Exported symbol | Documentation | Description | +|:--------------- |:--------------------------------------------------------------------------------- |:------------------------------------------------------- | +| `predict` | [`StatsAPI.predict`](https://turinglang.org/DynamicPPL.jl/stable/api/#Predicting) | Generate samples from posterior predictive distribution | ### Querying model probabilities and quantities Please see the [generated quantities](https://turinglang.org/docs/tutorials/usage-generated-quantities/) and [probability interface](https://turinglang.org/docs/tutorials/usage-probability-interface/) guides for more information. -| Exported symbol | Documentation | Description | -|:-------------------------- |:--------------------------------------------------------------------------------------------------------------------------------- |:--------------------------------------------------------------- | -| `generated_quantities` | [`DynamicPPL.generated_quantities`](@extref) | Calculate additional quantities defined in a model | -| `pointwise_loglikelihoods` | [`DynamicPPL.pointwise_loglikelihoods`](@extref) | Compute log likelihoods for each sample in a chain | -| `logprior` | [`DynamicPPL.logprior`](@extref) | Compute log prior probability | -| `logjoint` | [`DynamicPPL.logjoint`](@extref) | Compute log joint probability | -| `LogDensityFunction` | [`DynamicPPL.LogDensityFunction`](@extref) | Wrap a Turing model to satisfy LogDensityFunctions.jl interface | -| `condition` | [`AbstractPPL.condition`](@extref) | Condition a model on data | -| `decondition` | [`AbstractPPL.decondition`](@extref) | Remove conditioning on data | -| `conditioned` | [`DynamicPPL.conditioned`](@extref) | Return the conditioned values of a model | -| `fix` | [`DynamicPPL.fix`](@extref) | Fix the value of a variable | -| `unfix` | [`DynamicPPL.unfix`](@extref) | Unfix the value of a variable | -| `OrderedDict` | [`OrderedCollections.OrderedDict`](https://juliacollections.github.io/OrderedCollections.jl/dev/ordered_containers/#OrderedDicts) | An ordered dictionary | - -### Extra re-exports from Bijectors - -Note that Bijectors itself does not export `ordered`. - -```@docs -Bijectors.ordered -``` +| Exported symbol | Documentation | Description | +|:-------------------------- |:---------------------------------------------------------------------------------------------------------------------------- |:-------------------------------------------------- | +| `returned` | [`DynamicPPL.returned`](https://turinglang.org/DynamicPPL.jl/stable/api/#DynamicPPL.returned-Tuple%7BModel,%20NamedTuple%7D) | Calculate additional quantities defined in a model | +| `pointwise_loglikelihoods` | [`DynamicPPL.pointwise_loglikelihoods`](@extref) | Compute log likelihoods for each sample in a chain | +| `logprior` | [`DynamicPPL.logprior`](@extref) | Compute log prior probability | +| `logjoint` | [`DynamicPPL.logjoint`](@extref) | Compute log joint probability | +| `condition` | [`AbstractPPL.condition`](@extref) | Condition a model on data | +| `decondition` | [`AbstractPPL.decondition`](@extref) | Remove conditioning on data | +| `conditioned` | [`DynamicPPL.conditioned`](@extref) | Return the conditioned values of a model | +| `fix` | [`DynamicPPL.fix`](@extref) | Fix the value of a variable | +| `unfix` | [`DynamicPPL.unfix`](@extref) | Unfix the value of a variable | +| `OrderedDict` | [`OrderedCollections.OrderedDict`](@extref) | An ordered dictionary | ### Point estimates diff --git a/src/Turing.jl b/src/Turing.jl index d8fc09fb9d..aa5fbe8500 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -4,22 +4,24 @@ using Reexport, ForwardDiff using DistributionsAD, Bijectors, StatsFuns, SpecialFunctions using Statistics, LinearAlgebra using Libtask -@reexport using Distributions, MCMCChains, Libtask, AbstractMCMC, Bijectors +@reexport using Distributions, MCMCChains using Compat: pkgversion using AdvancedVI: AdvancedVI -using DynamicPPL: DynamicPPL, LogDensityFunction +using DynamicPPL: DynamicPPL import DynamicPPL: NoDist, NamedDist using LogDensityProblems: LogDensityProblems using NamedArrays: NamedArrays using Accessors: Accessors using StatsAPI: StatsAPI using StatsBase: StatsBase +using AbstractMCMC using Accessors: Accessors using Printf: Printf using Random: Random +using LinearAlgebra: I using ADTypes: ADTypes @@ -64,6 +66,7 @@ include("deprecated.jl") # to be removed in the next minor version release using DynamicPPL: pointwise_loglikelihoods, generated_quantities, + returned, logprior, logjoint, condition, @@ -71,68 +74,83 @@ using DynamicPPL: fix, unfix, conditioned, - to_submodel + to_submodel, + LogDensityFunction using StatsBase: predict -using Bijectors: ordered using OrderedCollections: OrderedDict # Turing essentials - modelling macros and inference algorithms -export @model, # modelling +export + # DEPRECATED + @submodel, + generated_quantities, + # Modelling - AbstractPPL and DynamicPPL + @model, @varname, - @submodel, # Deprecated to_submodel, - DynamicPPL, - Prior, # Sampling from the prior - MH, # classic sampling + prefix, + LogDensityFunction, + # Sampling - AbstractMCMC + sample, + MCMCThreads, + MCMCDistributed, + MCMCSerial, + # Samplers - Turing.Inference + Prior, + MH, Emcee, ESS, Gibbs, - HMC, # Hamiltonian-like sampling + HMC, SGLD, SGHMC, + PolynomialStepsize, HMCDA, NUTS, - PolynomialStepsize, - IS, # particle-based sampling + IS, SMC, - CSMC, PG, + CSMC, RepeatSampler, - vi, # variational inference - ADVI, - sample, # inference - @logprob_str, # TODO: Remove, see https://github.com/TuringLang/DynamicPPL.jl/issues/356 - @prob_str, # TODO: Remove, see https://github.com/TuringLang/DynamicPPL.jl/issues/356 externalsampler, - AutoForwardDiff, # ADTypes + # Variational inference - AdvancedVI + vi, + ADVI, + # ADTypes + AutoForwardDiff, AutoReverseDiff, AutoMooncake, - setprogress!, # debugging + # Debugging - Turing + setprogress!, + # Distributions Flat, FlatPos, BinomialLogit, - BernoulliLogit, # Part of Distributions >= 0.25.77 OrderedLogistic, LogPoisson, - filldist, - arraydist, - NamedDist, # Exports from DynamicPPL + # Tools to work with Distributions + I, # LinearAlgebra + filldist, # DistributionsAD + arraydist, # DistributionsAD + NamedDist, # DynamicPPL + # Predictions - DynamicPPL predict, + # Querying model probabilities - DynamicPPL + returned, pointwise_loglikelihoods, - generated_quantities, logprior, + loglikelihood, logjoint, - LogDensityFunction, condition, decondition, + conditioned, fix, unfix, - conditioned, - OrderedDict, - ordered, # Exports from Bijectors + OrderedDict, # OrderedCollections + # Point estimates - Turing.Optimisation + # The MAP and MLE exports are only needed for the Optim.jl interface. maximum_a_posteriori, maximum_likelihood, - # The MAP and MLE exports are only needed for the Optim.jl interface. MAP, MLE diff --git a/src/essential/Essential.jl b/src/essential/Essential.jl index cfa064c651..b045e4a857 100644 --- a/src/essential/Essential.jl +++ b/src/essential/Essential.jl @@ -17,12 +17,8 @@ using AdvancedPS: AdvancedPS include("container.jl") -export @model, - @varname, - AutoForwardDiff, - AutoReverseDiff, - AutoMooncake, - @logprob_str, - @prob_str +export @model +export @varname +export AutoForwardDiff, AutoReverseDiff, AutoMooncake end # module diff --git a/src/mcmc/Inference.jl b/src/mcmc/Inference.jl index 60aa087cdb..de87a5b391 100644 --- a/src/mcmc/Inference.jl +++ b/src/mcmc/Inference.jl @@ -606,13 +606,15 @@ julia> [first(t.θ.x) for t in transitions] # extract samples for `x` [-1.704630494695469] ``` """ -function transitions_from_chain(model::Turing.Model, chain::MCMCChains.Chains; kwargs...) +function transitions_from_chain( + model::DynamicPPL.Model, chain::MCMCChains.Chains; kwargs... +) return transitions_from_chain(Random.default_rng(), model, chain; kwargs...) end function transitions_from_chain( rng::Random.AbstractRNG, - model::Turing.Model, + model::DynamicPPL.Model, chain::MCMCChains.Chains; sampler=DynamicPPL.SampleFromPrior(), ) diff --git a/src/mcmc/emcee.jl b/src/mcmc/emcee.jl index ab68f795e1..dfd1fc0d30 100644 --- a/src/mcmc/emcee.jl +++ b/src/mcmc/emcee.jl @@ -81,7 +81,7 @@ function AbstractMCMC.step( # Generate a log joint function. vi = state.vi densitymodel = AMH.DensityModel( - Base.Fix1(LogDensityProblems.logdensity, Turing.LogDensityFunction(model, vi)) + Base.Fix1(LogDensityProblems.logdensity, DynamicPPL.LogDensityFunction(model, vi)) ) # Compute the next states. diff --git a/src/mcmc/ess.jl b/src/mcmc/ess.jl index c6e62d8e1c..5448173486 100644 --- a/src/mcmc/ess.jl +++ b/src/mcmc/ess.jl @@ -49,7 +49,7 @@ function AbstractMCMC.step( rng, EllipticalSliceSampling.ESSModel( ESSPrior(model, spl, vi), - Turing.LogDensityFunction( + DynamicPPL.LogDensityFunction( model, vi, DynamicPPL.SamplingContext(spl, DynamicPPL.DefaultContext()) ), ), @@ -110,7 +110,7 @@ Distributions.mean(p::ESSPrior) = p.μ # Evaluate log-likelihood of proposals const ESSLogLikelihood{M<:Model,S<:Sampler{<:ESS},V<:AbstractVarInfo} = - Turing.LogDensityFunction{M,V,<:DynamicPPL.SamplingContext{<:S},AD} where {AD} + DynamicPPL.LogDensityFunction{M,V,<:DynamicPPL.SamplingContext{<:S},AD} where {AD} (ℓ::ESSLogLikelihood)(f::AbstractVector) = LogDensityProblems.logdensity(ℓ, f) diff --git a/src/mcmc/mh.jl b/src/mcmc/mh.jl index 0af62e2597..6a03f5359f 100644 --- a/src/mcmc/mh.jl +++ b/src/mcmc/mh.jl @@ -189,7 +189,7 @@ A log density function for the MH sampler. This variant uses the `set_namedtuple!` function to update the `VarInfo`. """ const MHLogDensityFunction{M<:Model,S<:Sampler{<:MH},V<:AbstractVarInfo} = - Turing.LogDensityFunction{M,V,<:DynamicPPL.SamplingContext{<:S},AD} where {AD} + DynamicPPL.LogDensityFunction{M,V,<:DynamicPPL.SamplingContext{<:S},AD} where {AD} function LogDensityProblems.logdensity(f::MHLogDensityFunction, x::NamedTuple) vi = deepcopy(f.varinfo) @@ -308,7 +308,7 @@ function propose!!( densitymodel = AMH.DensityModel( Base.Fix1( LogDensityProblems.logdensity, - Turing.LogDensityFunction( + DynamicPPL.LogDensityFunction( model, vi, DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context)), @@ -343,7 +343,7 @@ function propose!!( densitymodel = AMH.DensityModel( Base.Fix1( LogDensityProblems.logdensity, - Turing.LogDensityFunction( + DynamicPPL.LogDensityFunction( model, vi, DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context)), diff --git a/src/optimisation/Optimisation.jl b/src/optimisation/Optimisation.jl index 9da929504b..0d380033c2 100644 --- a/src/optimisation/Optimisation.jl +++ b/src/optimisation/Optimisation.jl @@ -139,7 +139,7 @@ struct OptimLogDensity{ C<:OptimizationContext, AD<:ADTypes.AbstractADType, } - ldf::Turing.LogDensityFunction{M,V,C,AD} + ldf::DynamicPPL.LogDensityFunction{M,V,C,AD} end function OptimLogDensity( @@ -148,7 +148,7 @@ function OptimLogDensity( ctx::OptimizationContext; adtype::ADTypes.AbstractADType=AutoForwardDiff(), ) - return OptimLogDensity(Turing.LogDensityFunction(model, vi, ctx; adtype=adtype)) + return OptimLogDensity(DynamicPPL.LogDensityFunction(model, vi, ctx; adtype=adtype)) end # No varinfo @@ -158,7 +158,7 @@ function OptimLogDensity( adtype::ADTypes.AbstractADType=AutoForwardDiff(), ) return OptimLogDensity( - Turing.LogDensityFunction(model, DynamicPPL.VarInfo(model), ctx; adtype=adtype) + DynamicPPL.LogDensityFunction(model, DynamicPPL.VarInfo(model), ctx; adtype=adtype) ) end diff --git a/src/stdlib/distributions.jl b/src/stdlib/distributions.jl index 568ab3ae3b..ebcefe72ac 100644 --- a/src/stdlib/distributions.jl +++ b/src/stdlib/distributions.jl @@ -16,9 +16,6 @@ Base.maximum(::Flat) = Inf Base.rand(rng::Random.AbstractRNG, d::Flat) = rand(rng) Distributions.logpdf(::Flat, x::Real) = zero(x) -# TODO: only implement `logpdf(d, ::Real)` if support for Distributions < 0.24 is dropped -Distributions.pdf(d::Flat, x::Real) = exp(logpdf(d, x)) - # For vec support Distributions.logpdf(::Flat, x::AbstractVector{<:Real}) = zero(x) Distributions.loglikelihood(::Flat, x::AbstractVector{<:Real}) = zero(eltype(x)) @@ -41,17 +38,13 @@ struct FlatPos{T<:Real} <: ContinuousUnivariateDistribution end Base.minimum(d::FlatPos) = d.l -Base.maximum(d::FlatPos) = Inf +Base.maximum(::FlatPos) = Inf Base.rand(rng::Random.AbstractRNG, d::FlatPos) = rand(rng) + d.l function Distributions.logpdf(d::FlatPos, x::Real) z = float(zero(x)) return x <= d.l ? oftype(z, -Inf) : z end - -# TODO: only implement `logpdf(d, ::Real)` if support for Distributions < 0.24 is dropped -Distributions.pdf(d::FlatPos, x::Real) = exp(logpdf(d, x)) - # For vec support function Distributions.loglikelihood(d::FlatPos, x::AbstractVector{<:Real}) lower = d.l @@ -91,12 +84,7 @@ BinomialLogit(n::Int, logitp::Real) = BinomialLogit{typeof(logitp)}(n, logitp) Base.minimum(::BinomialLogit) = 0 Base.maximum(d::BinomialLogit) = d.n -# TODO: only implement `logpdf(d, k::Real)` if support for Distributions < 0.24 is dropped -Distributions.pdf(d::BinomialLogit, k::Real) = exp(logpdf(d, k)) -Distributions.logpdf(d::BinomialLogit, k::Real) = _logpdf(d, k) -Distributions.logpdf(d::BinomialLogit, k::Integer) = _logpdf(d, k) - -function _logpdf(d::BinomialLogit, k::Real) +function Distributions.logpdf(d::BinomialLogit, k::Real) n, logitp, logconstant = d.n, d.logitp, d.logconstant _insupport = insupport(d, k) _k = _insupport ? round(Int, k) : 0 @@ -109,16 +97,6 @@ function Base.rand(rng::Random.AbstractRNG, d::BinomialLogit) end Distributions.sampler(d::BinomialLogit) = sampler(Binomial(d.n, logistic(d.logitp))) -# Part of Distributions >= 0.25.77 -if !isdefined(Distributions, :BernoulliLogit) - """ - BernoulliLogit(logitp::Real) - - Create a univariate logit-parameterised Bernoulli distribution. - """ - BernoulliLogit(logitp::Real) = BinomialLogit(1, logitp) -end - """ OrderedLogistic(η, c::AbstractVector) @@ -151,12 +129,7 @@ end Base.minimum(d::OrderedLogistic) = 0 Base.maximum(d::OrderedLogistic) = length(d.cutpoints) + 1 -# TODO: only implement `logpdf(d, k::Real)` if support for Distributions < 0.24 is dropped -Distributions.pdf(d::OrderedLogistic, k::Real) = exp(logpdf(d, k)) -Distributions.logpdf(d::OrderedLogistic, k::Real) = _logpdf(d, k) -Distributions.logpdf(d::OrderedLogistic, k::Integer) = _logpdf(d, k) - -function _logpdf(d::OrderedLogistic, k::Real) +function Distributions.logpdf(d::OrderedLogistic, k::Real) η, cutpoints = d.η, d.cutpoints K = length(cutpoints) + 1 @@ -232,7 +205,7 @@ LogPoisson(logλ::Real) = LogPoisson{typeof(logλ)}(logλ) Base.minimum(d::LogPoisson) = 0 Base.maximum(d::LogPoisson) = Inf -function _logpdf(d::LogPoisson, k::Real) +function Distributions.logpdf(d::LogPoisson, k::Real) _insupport = insupport(d, k) _k = _insupport ? round(Int, k) : 0 logp = _k * d.logλ - d.λ - SpecialFunctions.loggamma(_k + 1) @@ -240,10 +213,5 @@ function _logpdf(d::LogPoisson, k::Real) return _insupport ? logp : oftype(logp, -Inf) end -# TODO: only implement `logpdf(d, ::Real)` if support for Distributions < 0.24 is dropped -Distributions.pdf(d::LogPoisson, k::Real) = exp(logpdf(d, k)) -Distributions.logpdf(d::LogPoisson, k::Integer) = _logpdf(d, k) -Distributions.logpdf(d::LogPoisson, k::Real) = _logpdf(d, k) - Base.rand(rng::Random.AbstractRNG, d::LogPoisson) = rand(rng, Poisson(d.λ)) Distributions.sampler(d::LogPoisson) = sampler(Poisson(d.λ)) diff --git a/test/Project.toml b/test/Project.toml index 489923767a..36b7ebdec9 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -6,6 +6,7 @@ AdvancedPS = "576499cb-2369-40b2-a588-c64705576edc" AdvancedVI = "b5ca4192-6429-45e5-a2d9-87aec30a685c" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" BangBang = "198e06fe-97b7-11e9-32a5-e1d131e6ad66" +Bijectors = "76274a88-744f-5084-9051-94815aaf08c4" Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" @@ -45,6 +46,7 @@ AdvancedPS = "=0.6.0" AdvancedVI = "0.2" Aqua = "0.8" BangBang = "0.4" +Bijectors = "0.14, 0.15" Clustering = "0.14, 0.15" Combinatorics = "1" Distributions = "0.25" diff --git a/test/essential/container.jl b/test/essential/container.jl index d1e1b21bd6..1cb790d5ae 100644 --- a/test/essential/container.jl +++ b/test/essential/container.jl @@ -2,7 +2,7 @@ module ContainerTests using AdvancedPS: AdvancedPS using Distributions: Bernoulli, Beta, Gamma, Normal -using DynamicPPL: @model, Sampler +using DynamicPPL: DynamicPPL, @model, Sampler using Test: @test, @testset using Turing diff --git a/test/ext/OptimInterface.jl b/test/ext/OptimInterface.jl index 163cf36c53..a93206e7e0 100644 --- a/test/ext/OptimInterface.jl +++ b/test/ext/OptimInterface.jl @@ -2,6 +2,7 @@ module OptimInterfaceTests using ..Models: gdemo_default using Distributions.FillArrays: Zeros +using DynamicPPL: DynamicPPL using LinearAlgebra: I using Optim: Optim using Random: Random diff --git a/test/mcmc/abstractmcmc.jl b/test/mcmc/abstractmcmc.jl index 82baa41197..f909df8ef1 100644 --- a/test/mcmc/abstractmcmc.jl +++ b/test/mcmc/abstractmcmc.jl @@ -1,6 +1,7 @@ module AbstractMCMCTests import ..ADUtils +using AbstractMCMC: AbstractMCMC using AdvancedMH: AdvancedMH using Distributions: sample using Distributions.FillArrays: Zeros diff --git a/test/mcmc/gibbs.jl b/test/mcmc/gibbs.jl index b5993078ed..69b2b10326 100644 --- a/test/mcmc/gibbs.jl +++ b/test/mcmc/gibbs.jl @@ -9,6 +9,7 @@ using ..NumericalTests: two_sample_test import ..ADUtils import Combinatorics +using AbstractMCMC: AbstractMCMC using Distributions: InverseGamma, Normal using Distributions: sample using DynamicPPL: DynamicPPL @@ -179,7 +180,7 @@ end end # The methods that capture testing information for us. - function Turing.AbstractMCMC.step( + function AbstractMCMC.step( rng::Random.AbstractRNG, model::DynamicPPL.Model, sampler::DynamicPPL.Sampler{<:AlgWrapper}, @@ -187,9 +188,7 @@ end kwargs..., ) capture_targets_and_algs(sampler.alg.inner, model.context) - return Turing.AbstractMCMC.step( - rng, model, unwrap_sampler(sampler), args...; kwargs... - ) + return AbstractMCMC.step(rng, model, unwrap_sampler(sampler), args...; kwargs...) end function Turing.DynamicPPL.initialstep( diff --git a/test/mcmc/hmc.jl b/test/mcmc/hmc.jl index 0a5910e450..67a46ec00a 100644 --- a/test/mcmc/hmc.jl +++ b/test/mcmc/hmc.jl @@ -4,9 +4,9 @@ using ..Models: gdemo_default using ..ADUtils: ADTypeCheckContext using ..NumericalTests: check_gdemo, check_numerical import ..ADUtils +using Bijectors: Bijectors using Distributions: Bernoulli, Beta, Categorical, Dirichlet, Normal, Wishart, sample -import DynamicPPL -using DynamicPPL: Sampler +using DynamicPPL: DynamicPPL, Sampler import ForwardDiff using HypothesisTests: ApproximateTwoSampleKSTest, pvalue import ReverseDiff @@ -271,7 +271,9 @@ using Turing # HACK: Necessary to avoid NUTS failing during adaptation. try - x ~ transformed(Normal(0, 1), inverse(Bijectors.Logit(lb, ub))) + x ~ Bijectors.transformed( + Normal(0, 1), Bijectors.inverse(Bijectors.Logit(lb, ub)) + ) catch e if e isa DomainError Turing.@addlogprob! -Inf diff --git a/test/skipped/unit_test_helper.jl b/test/skipped/unit_test_helper.jl index aa513e428e..7dec7757bb 100644 --- a/test/skipped/unit_test_helper.jl +++ b/test/skipped/unit_test_helper.jl @@ -10,7 +10,7 @@ function test_grad(turing_model, grad_f; trans=Dict()) @testset "Gradient using random inputs" begin ℓ = LogDensityProblemsAD.ADgradient( Turing.AutoTracker(), - Turing.LogDensityFunction( + DynamicPPL.LogDensityFunction( model_f, vi, DynamicPPL.SamplingContext(SampleFromPrior(), DynamicPPL.DefaultContext()), diff --git a/test/variational/advi.jl b/test/variational/advi.jl index 639df018cc..c2abacb675 100644 --- a/test/variational/advi.jl +++ b/test/variational/advi.jl @@ -4,6 +4,7 @@ using ..Models: gdemo_default using ..NumericalTests: check_gdemo import AdvancedVI using AdvancedVI: TruncatedADAGrad, DecayedADAGrad +using Bijectors: Bijectors using Distributions: Dirichlet, Normal using LinearAlgebra: I using MCMCChains: Chains @@ -71,11 +72,11 @@ using Turing.Essential: TuringDiagMvNormal end m = dirichlet() - b = bijector(m) + b = Bijectors.bijector(m) x0 = m() z0 = b(x0) @test size(z0) == (1,) - x0_inv = inverse(b)(z0) + x0_inv = Bijectors.inverse(b)(z0) @test size(x0_inv) == size(x0) @test all(x0 .≈ x0_inv)