"Fixes" for PG-in-Gibbs (#2629)

penelopeysm · mhauru · web-flow · commit 7124864a7260 · 2025-07-31T12:18:53.000+01:00
* WIP PMCMC work

* Fixes to ProduceLogLikelihoodAccumulator

* inline definition of `set_retained_vns_del!`

* Fix ProduceLogLikelihoodAcc

* Remove all uses of `set_retained_vns_del!`

* Use nice functions

* Remove PG tests with dynamic number of Gibbs-conditioned-observations

* Fix essential/container tests

* Update pMCMC implementation as per discussion

* remove extra printing statements

* revert unneeded changes

* Add back (some kind of) dynamic model test

* fix rebase

* Add a todo comment for dynamic model tests

---------

Co-authored-by: Markus Hauru &lt;mhauru@turing.ac.uk&gt;
diff --git a/src/mcmc/particle_mcmc.jl b/src/mcmc/particle_mcmc.jl
@@ -4,6 +4,38 @@
 
 ### AdvancedPS models and interface
 
+"""
+    set_all_del!(vi::AbstractVarInfo)
+
+Set the "del" flag for all variables in the VarInfo `vi`, thus marking them for
+resampling.
+"""
+function set_all_del!(vi::AbstractVarInfo)
+    # TODO(penelopeysm): Instead of being a 'del' flag on the VarInfo, we
+    # could either:
+    # - keep a boolean 'resample' flag on the trace, or
+    # - modify the model context appropriately.
+    # However, this refactoring will have to wait until InitContext is
+    # merged into DPPL.
+    for vn in keys(vi)
+        DynamicPPL.set_flag!(vi, vn, "del")
+    end
+    return nothing
+end
+
+"""
+    unset_all_del!(vi::AbstractVarInfo)
+
+Unset the "del" flag for all variables in the VarInfo `vi`, thus preventing
+them from being resampled.
+"""
+function unset_all_del!(vi::AbstractVarInfo)
+    for vn in keys(vi)
+        DynamicPPL.unset_flag!(vi, vn, "del")
+    end
+    return nothing
+end
+
 struct TracedModel{S<:AbstractSampler,V<:AbstractVarInfo,M<:Model,E<:Tuple} <:
        AdvancedPS.AbstractGenericModel
     model::M
@@ -33,26 +65,30 @@ end
 function AdvancedPS.advance!(
     trace::AdvancedPS.Trace{<:AdvancedPS.LibtaskModel{<:TracedModel}}, isref::Bool=false
 )
-    # We want to increment num produce for the VarInfo stored in the trace. The trace is
-    # mutable, so we create a new model with the incremented VarInfo and set it in the trace
-    model = trace.model
-    model = Accessors.@set model.f.varinfo = DynamicPPL.increment_num_produce!!(
-        model.f.varinfo
-    )
-    trace.model = model
     # Make sure we load/reset the rng in the new replaying mechanism
     isref ? AdvancedPS.load_state!(trace.rng) : AdvancedPS.save_state!(trace.rng)
     score = consume(trace.model.ctask)
     return score
 end
 
 function AdvancedPS.delete_retained!(trace::TracedModel)
-    DynamicPPL.set_retained_vns_del!(trace.varinfo)
+    # This method is called if, during a CSMC update, we perform a resampling
+    # and choose the reference particle as the trajectory to carry on from.
+    # In such a case, we need to ensure that when we continue sampling (i.e.
+    # the next time we hit tilde_assume), we don't use the values in the 
+    # reference particle but rather sample new values.
+    #
+    # Here, we indiscriminately set the 'del' flag for all variables in the
+    # VarInfo. This is slightly overkill: it is not necessary to set the 'del'
+    # flag for variables that were already sampled. However, it allows us to
+    # avoid keeping track of which variables were sampled, which leads to many
+    # simplifications in the VarInfo data structure.
+    set_all_del!(trace.varinfo)
     return trace
 end
 
 function AdvancedPS.reset_model(trace::TracedModel)
-    return Accessors.@set trace.varinfo = DynamicPPL.reset_num_produce!!(trace.varinfo)
+    return trace
 end
 
 function Libtask.TapedTask(taped_globals, model::TracedModel; kwargs...)
@@ -176,8 +212,7 @@ function DynamicPPL.initialstep(
 )
     # Reset the VarInfo.
     vi = DynamicPPL.setacc!!(vi, ProduceLogLikelihoodAccumulator())
-    vi = DynamicPPL.reset_num_produce!!(vi)
-    DynamicPPL.set_retained_vns_del!(vi)
+    set_all_del!(vi)
     vi = DynamicPPL.resetlogp!!(vi)
     vi = DynamicPPL.empty!!(vi)
 
@@ -307,8 +342,7 @@ function DynamicPPL.initialstep(
 )
     vi = DynamicPPL.setacc!!(vi, ProduceLogLikelihoodAccumulator())
     # Reset the VarInfo before new sweep
-    vi = DynamicPPL.reset_num_produce!!(vi)
-    DynamicPPL.set_retained_vns_del!(vi)
+    set_all_del!(vi)
     vi = DynamicPPL.resetlogp!!(vi)
 
     # Create a new set of particles
@@ -339,14 +373,15 @@ function AbstractMCMC.step(
 )
     # Reset the VarInfo before new sweep.
     vi = state.vi
-    vi = DynamicPPL.reset_num_produce!!(vi)
+    vi = DynamicPPL.setacc!!(vi, ProduceLogLikelihoodAccumulator())
     vi = DynamicPPL.resetlogp!!(vi)
 
     # Create reference particle for which the samples will be retained.
+    unset_all_del!(vi)
     reference = AdvancedPS.forkr(AdvancedPS.Trace(model, spl, vi, state.rng))
 
     # For all other particles, do not retain the variables but resample them.
-    DynamicPPL.set_retained_vns_del!(vi)
+    set_all_del!(vi)
 
     # Create a new set of particles.
     num_particles = spl.alg.nparticles
@@ -451,12 +486,11 @@ function DynamicPPL.assume(
         vi = push!!(vi, vn, r, dist)
     elseif DynamicPPL.is_flagged(vi, vn, "del")
         DynamicPPL.unset_flag!(vi, vn, "del") # Reference particle parent
-        r = rand(trng, dist)
-        vi[vn] = DynamicPPL.tovec(r)
         # TODO(mhauru):
         # The below is the only line that differs from assume called on SampleFromPrior.
-        # Could we just call assume on SampleFromPrior and then `setorder!!` after that?
-        vi = DynamicPPL.setorder!!(vi, vn, DynamicPPL.get_num_produce(vi))
+        # Could we just call assume on SampleFromPrior with a specific rng?
+        r = rand(trng, dist)
+        vi[vn] = DynamicPPL.tovec(r)
     else
         r = vi[vn]
     end
@@ -498,8 +532,6 @@ function AdvancedPS.Trace(
     rng::AdvancedPS.TracedRNG,
 )
     newvarinfo = deepcopy(varinfo)
-    newvarinfo = DynamicPPL.reset_num_produce!!(newvarinfo)
-
     tmodel = TracedModel(model, sampler, newvarinfo, rng)
     newtrace = AdvancedPS.Trace(tmodel, rng)
     return newtrace
diff --git a/test/essential/container.jl b/test/essential/container.jl
@@ -19,6 +19,7 @@ using Turing
 
     @testset "constructor" begin
         vi = DynamicPPL.VarInfo()
+        vi = DynamicPPL.setacc!!(vi, Turing.Inference.ProduceLogLikelihoodAccumulator())
         sampler = Sampler(PG(10))
         model = test()
         trace = AdvancedPS.Trace(model, sampler, vi, AdvancedPS.TracedRNG())
@@ -46,6 +47,7 @@ using Turing
             return a, b
         end
         vi = DynamicPPL.VarInfo()
+        vi = DynamicPPL.setacc!!(vi, Turing.Inference.ProduceLogLikelihoodAccumulator())
         sampler = Sampler(PG(10))
         model = normal()
 
diff --git a/test/mcmc/gibbs.jl b/test/mcmc/gibbs.jl
@@ -207,8 +207,8 @@ end
         val ~ Normal(s, 1)
         1.0 ~ Normal(s + m, 1)
 
-        n := m + 1
-        xs = M(undef, n)
+        n := m
+        xs = M(undef, 5)
         for i in eachindex(xs)
             xs[i] ~ Beta(0.5, 0.5)
         end
@@ -565,40 +565,98 @@ end
         end
     end
 
-    # The below test used to sample incorrectly before
-    # https://github.com/TuringLang/Turing.jl/pull/2328
-    @testset "dynamic model with ESS" begin
-        @model function dynamic_model_for_ess()
-            b ~ Bernoulli()
-            x_length = b ? 1 : 2
-            x = Vector{Float64}(undef, x_length)
-            for i in 1:x_length
-                x[i] ~ Normal(i, 1.0)
+    @testset "PG with variable number of observations" begin
+        # When sampling from a model with Particle Gibbs, it is mandatory for
+        # the number of observations to be the same in all particles, since the
+        # observations trigger particle resampling.
+        #
+        # Up until Turing v0.39, `x ~ dist` statements where `x` was the
+        # responsibility of a different (non-PG) Gibbs subsampler used to not
+        # count as an observation. Instead, the log-probability `logpdf(dist, x)`
+        # would be manually added to the VarInfo's `logp` field and included in the
+        # weighting for the _following_ observation.
+        #
+        # In Turing v0.40, this is now changed: `x ~ dist` uses tilde_observe!!
+        # which thus triggers resampling. Thus, for example, the following model
+        # does not work any more:
+        #
+        #   @model function f()
+        #       a ~ Poisson(2.0)
+        #       x = Vector{Float64}(undef, a)
+        #       for i in eachindex(x)
+        #           x[i] ~ Normal()
+        #       end
+        #   end
+        #   sample(f(), Gibbs(:a => PG(10), :x => MH()), 1000)
+        # 
+        # because the number of observations in each particle depends on the value
+        # of `a`.
+        #
+        # This testset checks that ways of working around such a situation.
+
+        function test_dynamic_bernoulli(chain)
+            means = Dict(:b => 0.5, "x[1]" => 1.0, "x[2]" => 2.0)
+            stds = Dict(:b => 0.5, "x[1]" => 1.0, "x[2]" => 1.0)
+            for vn in keys(means)
+                @test isapprox(mean(skipmissing(chain[:, vn, 1])), means[vn]; atol=0.1)
+                @test isapprox(std(skipmissing(chain[:, vn, 1])), stds[vn]; atol=0.1)
             end
         end
 
-        m = dynamic_model_for_ess()
-        chain = sample(m, Gibbs(:b => PG(10), :x => ESS()), 2000; discard_initial=100)
-        means = Dict(:b => 0.5, "x[1]" => 1.0, "x[2]" => 2.0)
-        stds = Dict(:b => 0.5, "x[1]" => 1.0, "x[2]" => 1.0)
-        for vn in keys(means)
-            @test isapprox(mean(skipmissing(chain[:, vn, 1])), means[vn]; atol=0.1)
-            @test isapprox(std(skipmissing(chain[:, vn, 1])), stds[vn]; atol=0.1)
+        # TODO(DPPL0.37/penelopeysm): decide what to do with these tests
+        @testset "Coalescing multiple observations into one" begin
+            # Instead of observing x[1] and x[2] separately, we lump them into a
+            # single distribution.
+            @model function dynamic_bernoulli()
+                b ~ Bernoulli()
+                if b
+                    dists = [Normal(1.0)]
+                else
+                    dists = [Normal(1.0), Normal(2.0)]
+                end
+                return x ~ product_distribution(dists)
+            end
+            model = dynamic_bernoulli()
+            # This currently fails because if the global varinfo has `x` with length 2,
+            # and the particle sampler has `b = true`, it attempts to calculate the
+            # log-likelihood of a length-2 vector with respect to a length-1
+            # distribution.
+            @test_throws DimensionMismatch chain = sample(
+                StableRNG(468),
+                model,
+                Gibbs(:b => PG(10), :x => ESS()),
+                2000;
+                discard_initial=100,
+            )
+            # test_dynamic_bernoulli(chain)
         end
-    end
 
-    @testset "dynamic model with dot tilde" begin
-        @model function dynamic_model_with_dot_tilde(
-            num_zs=10, (::Type{M})=Vector{Float64}
-        ) where {M}
-            z = Vector{Int}(undef, num_zs)
-            z .~ Poisson(1.0)
-            num_ms = sum(z)
-            m = M(undef, num_ms)
-            return m .~ Normal(1.0, 1.0)
-        end
-        model = dynamic_model_with_dot_tilde()
-        sample(model, Gibbs(:z => PG(10), :m => HMC(0.01, 4)), 100)
+        @testset "Inserting @addlogprob!" begin
+            # On top of observing x[i], we also add in extra 'observations'
+            @model function dynamic_bernoulli_2()
+                b ~ Bernoulli()
+                x_length = b ? 1 : 2
+                x = Vector{Float64}(undef, x_length)
+                for i in 1:x_length
+                    x[i] ~ Normal(i, 1.0)
+                end
+                if length(x) == 1
+                    # This value is the expectation value of logpdf(Normal(), x) where x ~ Normal().
+                    # See discussion in
+                    # https://github.com/TuringLang/Turing.jl/pull/2629#discussion_r2237323817
+                    @addlogprob!(-1.418849)
+                end
+            end
+            model = dynamic_bernoulli_2()
+            chain = sample(
+                StableRNG(468),
+                model,
+                Gibbs(:b => PG(10), :x => ESS()),
+                2000;
+                discard_initial=100,
+            )
+            test_dynamic_bernoulli(chain)
+        end
     end
 
     @testset "Demo model" begin