Allow usage of AbstractSampler (#2008)

torfjelde · JaimeRZP · yebai · web-flow · commit d89dae34de81 · 2023-07-05T09:25:30.000+01:00
* initial work on allowing AdvancedHMC samplers * simplify the hacky initialize_nuts method * slight generalization * remove unnecessary type constraint * rever changes to sample overloads * use a subtype of InferenceAlgorithm to wrap any sampler * improve usage of SamplerWrapper * renamed hmc_new.jl to something a bit more indicative * added support for AdvancedMH * forgot to change include * renamed SamplerWrapper to ExternalSampler and provided a function externalsampler * added tests for Advanced{HMC,MH} * fixed external tests * change target acceptance rate * fixed optim tests * remove NelderMead from tests * allow models with one variance parameter per observation to fail MLE test * no tests (#2028) * no tests * more tol --------- Co-authored-by: Jaime RZ <jaimerz011235813@gmail.com> Co-authored-by: Hong Ge <3279477+yebai@users.noreply.github.com>
diff --git a/src/Turing.jl b/src/Turing.jl
@@ -112,6 +112,7 @@ export  @model,                 # modelling
         resume,
         @logprob_str,
         @prob_str,
+        externalsampler,
 
         setchunksize,           # helper
         setadbackend,
diff --git a/src/contrib/inference/abstractmcmc.jl b/src/contrib/inference/abstractmcmc.jl
@@ -0,0 +1,87 @@
+struct TuringState{S,F}
+    state::S
+    logdensity::F
+end
+
+struct TuringTransition{T,NT<:NamedTuple,F<:AbstractFloat}
+    θ::T
+    lp::F
+    stat::NT
+end
+
+function TuringTransition(vi::AbstractVarInfo, t)
+    theta = tonamedtuple(vi)
+    lp = getlogp(vi)
+    return TuringTransition(theta, lp, getstats(t))
+end
+
+metadata(t::TuringTransition) = merge((lp = t.lp,), t.stat)
+DynamicPPL.getlogp(t::TuringTransition) = t.lp
+
+state_to_turing(f::DynamicPPL.LogDensityFunction, state) = TuringState(state, f)
+function transition_to_turing(f::DynamicPPL.LogDensityFunction, transition)
+    θ = getparams(transition)
+    varinfo = DynamicPPL.unflatten(f.varinfo, θ)
+    # TODO: `deepcopy` is overkill; make more efficient.
+    varinfo = DynamicPPL.invlink!!(deepcopy(varinfo), f.model)
+    return TuringTransition(varinfo, transition)
+end
+
+# NOTE: Only thing that depends on the underlying sampler.
+# Something similar should be part of AbstractMCMC at some point:
+# https://github.com/TuringLang/AbstractMCMC.jl/pull/86
+getparams(transition::AdvancedHMC.Transition) = transition.z.θ
+getstats(transition::AdvancedHMC.Transition) = transition.stat
+
+getparams(transition::AdvancedMH.Transition) = transition.params
+getstats(transition) = NamedTuple()
+
+getvarinfo(f::DynamicPPL.LogDensityFunction) = f.varinfo
+getvarinfo(f::LogDensityProblemsAD.ADGradientWrapper) = getvarinfo(parent(f))
+
+setvarinfo(f::DynamicPPL.LogDensityFunction, varinfo) = Setfield.@set f.varinfo = varinfo
+setvarinfo(f::LogDensityProblemsAD.ADGradientWrapper, varinfo) = setvarinfo(parent(f), varinfo)
+
+# TODO: Do we also support `resume`, etc?
+function AbstractMCMC.step(
+    rng::Random.AbstractRNG,
+    model::DynamicPPL.Model,
+    sampler_wrapper::Sampler{<:ExternalSampler};
+    kwargs...
+)
+    sampler = sampler_wrapper.alg.sampler
+
+    # Create a log-density function with an implementation of the
+    # gradient so we ensure that we're using the same AD backend as in Turing.
+    f = LogDensityProblemsAD.ADgradient(DynamicPPL.LogDensityFunction(model))
+
+    # Link the varinfo.
+    f = setvarinfo(f, DynamicPPL.link!!(getvarinfo(f), model))
+
+    # Then just call `AdvancedHMC.step` with the right arguments.
+    transition_inner, state_inner = AbstractMCMC.step(
+        rng, AbstractMCMC.LogDensityModel(f), sampler; kwargs...
+    )
+
+    # Update the `state`
+    return transition_to_turing(f, transition_inner), state_to_turing(f, state_inner)
+end
+
+function AbstractMCMC.step(
+    rng::Random.AbstractRNG,
+    model::DynamicPPL.Model,
+    sampler_wrapper::Sampler{<:ExternalSampler},
+    state::TuringState;
+    kwargs...
+)
+    sampler = sampler_wrapper.alg.sampler
+    f = state.logdensity
+
+    # Then just call `AdvancedHMC.step` with the right arguments.
+    transition_inner, state_inner = AbstractMCMC.step(
+        rng, AbstractMCMC.LogDensityModel(f), sampler, state.state; kwargs...
+    )
+
+    # Update the `state`
+    return transition_to_turing(f, transition_inner), state_to_turing(f, state_inner)
+end
diff --git a/src/inference/Inference.jl b/src/inference/Inference.jl
@@ -22,6 +22,7 @@ using DynamicPPL
 using AbstractMCMC: AbstractModel, AbstractSampler
 using DocStringExtensions: TYPEDEF, TYPEDFIELDS
 using DataStructures: OrderedSet
+using Setfield: Setfield
 
 import AbstractMCMC
 import AdvancedHMC; const AHMC = AdvancedHMC
@@ -66,7 +67,8 @@ export  InferenceAlgorithm,
         dot_observe,
         resume,
         predict,
-        isgibbscomponent
+        isgibbscomponent,
+        externalsampler
 
 #######################
 # Sampler abstraction #
@@ -77,9 +79,26 @@ abstract type ParticleInference <: InferenceAlgorithm end
 abstract type Hamiltonian{AD} <: InferenceAlgorithm end
 abstract type StaticHamiltonian{AD} <: Hamiltonian{AD} end
 abstract type AdaptiveHamiltonian{AD} <: Hamiltonian{AD} end
-
 getADbackend(::Hamiltonian{AD}) where AD = AD()
 
+"""
+    ExternalSampler{S<:AbstractSampler}
+
+# Fields
+$(TYPEDFIELDS)
+"""
+struct ExternalSampler{S<:AbstractSampler} <: InferenceAlgorithm
+    "the sampler to wrap"
+    sampler::S
+end
+
+"""
+    externalsampler(sampler::AbstractSampler)
+
+Wrap a sampler so it can be used as an inference algorithm.
+"""
+externalsampler(sampler::AbstractSampler) = ExternalSampler(sampler)
+
 # Algorithm for sampling from the prior
 struct Prior <: InferenceAlgorithm end
 
@@ -246,7 +265,6 @@ function AbstractMCMC.sample(
     return AbstractMCMC.sample(rng, model, SampleFromPrior(), ensemble, N, n_chains;
                                chain_type=chain_type, progress=progress, kwargs...)
 end
-
 ##########################
 # Chain making utilities #
 ##########################
@@ -442,6 +460,7 @@ include("gibbs_conditional.jl")
 include("gibbs.jl")
 include("../contrib/inference/sghmc.jl")
 include("emcee.jl")
+include("../contrib/inference/abstractmcmc.jl")
 
 ################
 # Typing tools #
diff --git a/test/contrib/inference/abstractmcmc.jl b/test/contrib/inference/abstractmcmc.jl
@@ -0,0 +1,75 @@
+using Turing.Inference: AdvancedHMC
+
+function initialize_nuts(model::Turing.Model)
+    # Create a log-density function with an implementation of the
+    # gradient so we ensure that we're using the same AD backend as in Turing.
+    f = LogDensityProblemsAD.ADgradient(DynamicPPL.LogDensityFunction(model))
+
+    # Link the varinfo.
+    f = Turing.Inference.setvarinfo(f, DynamicPPL.link!!(Turing.Inference.getvarinfo(f), model))
+
+    # Choose parameter dimensionality and initial parameter value
+    D = LogDensityProblems.dimension(f)
+    initial_θ = rand(D) .- 0.5
+
+    # Define a Hamiltonian system
+    metric = AdvancedHMC.DiagEuclideanMetric(D)
+    hamiltonian = AdvancedHMC.Hamiltonian(metric, f)
+
+    # Define a leapfrog solver, with initial step size chosen heuristically
+    initial_ϵ = AdvancedHMC.find_good_stepsize(hamiltonian, initial_θ)
+    integrator = AdvancedHMC.Leapfrog(initial_ϵ)
+
+    # Define an HMC sampler, with the following components
+    #   - multinomial sampling scheme,
+    #   - generalised No-U-Turn criteria, and
+    #   - windowed adaption for step-size and diagonal mass matrix
+    proposal = AdvancedHMC.NUTS{AdvancedHMC.MultinomialTS,AdvancedHMC.GeneralisedNoUTurn}(integrator)
+    adaptor = AdvancedHMC.StanHMCAdaptor(
+        AdvancedHMC.MassMatrixAdaptor(metric),
+        AdvancedHMC.StepSizeAdaptor(0.65, integrator)
+    )
+
+    return AdvancedHMC.HMCSampler(proposal, metric, adaptor)
+end
+
+
+function initialize_mh(model)
+    f = DynamicPPL.LogDensityFunction(model)
+    d = LogDensityProblems.dimension(f)
+    return AdvancedMH.RWMH(MvNormal(Zeros(d), 0.1 * I))
+end
+
+@testset "External samplers" begin
+    @testset "AdvancedHMC.jl" begin
+        for model in DynamicPPL.TestUtils.DEMO_MODELS
+            # Need some functionality to initialize the sampler.
+            # TODO: Remove this once the constructors in the respective packages become "lazy".
+            sampler = initialize_nuts(model);
+            DynamicPPL.TestUtils.test_sampler(
+                [model],
+                DynamicPPL.Sampler(externalsampler(sampler), model),
+                5_000;
+                nadapts=1_000,
+                discard_initial=1_000,
+                rtol=0.2
+            )
+        end
+    end
+
+    @testset "AdvancedMH.jl" begin
+        for model in DynamicPPL.TestUtils.DEMO_MODELS
+            # Need some functionality to initialize the sampler.
+            # TODO: Remove this once the constructors in the respective packages become "lazy".
+            sampler = initialize_mh(model);
+            DynamicPPL.TestUtils.test_sampler(
+                [model],
+                DynamicPPL.Sampler(externalsampler(sampler), model),
+                10_000;
+                discard_initial=1_000,
+                thinning=10,
+                rtol=0.2
+            )
+        end
+    end
+end
diff --git a/test/modes/OptimInterface.jl b/test/modes/OptimInterface.jl
@@ -159,7 +159,7 @@ end
         @testset "MAP for $(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS
             result_true = posterior_optima(model)
 
-            @testset "$(optimizer)" for optimizer in [LBFGS(), NelderMead()]
+            @testset "$(nameof(typeof(optimizer)))" for optimizer in [LBFGS(), NelderMead()]
                 result = optimize(model, MAP(), optimizer)
                 vals = result.values
 
@@ -170,21 +170,42 @@ end
                 end
             end
         end
+
+
+        # Some of the models have one variance parameter per observation, and so
+        # the MLE should have the variances set to 0. Since we're working in
+        # transformed space, this corresponds to `-Inf`, which is of course not achievable.
+        # In particular, it can result in "early termniation" of the optimization process
+        # because we hit NaNs, etc. To avoid this, we set the `g_tol` and the `f_tol` to
+        # something larger than the default.
+        allowed_incorrect_mle = [
+            DynamicPPL.TestUtils.demo_dot_assume_dot_observe,
+            DynamicPPL.TestUtils.demo_assume_index_observe,
+            DynamicPPL.TestUtils.demo_assume_multivariate_observe,
+            DynamicPPL.TestUtils.demo_assume_observe_literal,
+            DynamicPPL.TestUtils.demo_dot_assume_observe_submodel,
+            DynamicPPL.TestUtils.demo_dot_assume_dot_observe_matrix,
+            DynamicPPL.TestUtils.demo_dot_assume_matrix_dot_observe_matrix,
+        ]
         @testset "MLE for $(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS
             result_true = likelihood_optima(model)
 
             # `NelderMead` seems to struggle with convergence here, so we exclude it.
-            @testset "$(optimizer)" for optimizer in [LBFGS(),]
-                result = optimize(model, MLE(), optimizer)
+            @testset "$(nameof(typeof(optimizer)))" for optimizer in [LBFGS(),]
+                result = optimize(model, MLE(), optimizer, Optim.Options(g_tol=1e-3, f_tol=1e-3))
                 vals = result.values
 
                 for vn in DynamicPPL.TestUtils.varnames(model)
                     for vn_leaf in DynamicPPL.TestUtils.varname_leaves(vn, get(result_true, vn))
-                        @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol=0.05
+                        if model.f in allowed_incorrect_mle
+                            @test isfinite(get(result_true, vn_leaf))
+                        else
+                            @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol=0.05
+                        end
                     end
                 end
             end
-       end
+        end
     end
 
     # Issue: https://discourse.julialang.org/t/two-equivalent-conditioning-syntaxes-giving-different-likelihood-values/100320
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -77,6 +77,7 @@ macro timeit_include(path::AbstractString) :(@timeit TIMEROUTPUT $path include($
                     @timeit_include("inference/Inference.jl")
                     @timeit_include("contrib/inference/dynamichmc.jl")
                     @timeit_include("contrib/inference/sghmc.jl")
+                    @timeit_include("contrib/inference/abstractmcmc.jl")
                     @timeit_include("inference/mh.jl")
                 end
             end