Fix for pointwise_loglikelihoods (#281)

torfjelde · torfjelde · commit 64357e16b08f · 2021-07-21T14:58:29.000Z
Currently statements such as `x .~ Normal()` will result in only a single entry in the result of `pointwise_loglikelihoods`, i.e. `x` is treated as a single multivariate random variable rather than a collection of independent random variables. This is unfortunate for a couple of reasons. a) It is counter-intuitive as indicated by users finding it confusing: TuringLang/Turing.jl#1666. And I 100% agree with them, in particular because of (b). b) It is actually different from how `x` is treated in `dot_tilde_assume` due to the usage of `DynamicPPL.unwrap_right_left_vns` for the assume-branch but _not_ for the observe-branch https://github.com/TuringLang/DynamicPPL.jl/blob/b82459a081c4b8925da3c0d97a6dc61687648ed3/src/compiler.jl#L369-L387 We _could_ simply add the `unwrap_right_left_vns` to the observe-branch too, _but_ it will add some unnecessary overhead due to https://github.com/TuringLang/DynamicPPL.jl/blob/b82459a081c4b8925da3c0d97a6dc61687648ed3/src/compiler.jl#L106-L115 On the bright side it will make the inputs to `dot_tilde_assume!` and `dot_tilde_observe!` more consistent, so I'm a bit uncertain what the "right" choice is here. For now I've decided to just call `unwrap_right_left_vns` from within the `dot_tilde_observe!` for `PointwiseLikelihoodContext` as it only introduces an overhead to the `pointwise_loglikelihood` computation but nothing else. IMO this is way to go for this PR, but the above is something that should be given more thought later, e.g. introduce multi-index `VarName`.
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "DynamicPPL"
 uuid = "366bfd00-2699-11ea-058f-f148b4cae6d8"
-version = "0.12.3"
+version = "0.12.4"
 
 [deps]
 AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"
diff --git a/src/loglikelihoods.jl b/src/loglikelihoods.jl
@@ -92,15 +92,35 @@ end
 function dot_tilde_observe!(context::PointwiseLikelihoodContext, right, left, vn, inds, vi)
     # Need the `logp` value, so we cannot defer `acclogp!` to child-context, i.e.
     # we have to intercept the call to `dot_tilde_observe!`.
-    logp = dot_tilde_observe(context.context, right, left, vi)
-    acclogp!(vi, logp)
 
-    # Track loglikelihood value.
-    push!(context, vn, logp)
+    # We want to treat `.~` as a collection of independent observations,
+    # hence we need the `logp` for each of them. Broadcasting the univariate
+    # `tilde_obseve` does exactly this.
+    logps = _pointwise_tilde_observe(context.context, right, left, vi)
+    acclogp!(vi, sum(logps))
+
+    # Need to unwrap the `vn`, i.e. get one `VarName` for each entry in `left`.
+    _, _, vns = unwrap_right_left_vns(right, left, vn)
+    for (vn, logp) in zip(vns, logps)
+        # Track loglikelihood value.
+        push!(context, vn, logp)
+    end
 
     return left
 end
 
+# FIXME: This is really not a good approach since it needs to stay in sync with
+# the `dot_assume` implementations, but as things are _right now_ this is the best we can do.
+function _pointwise_tilde_observe(context, right, left, vi)
+    return tilde_observe.(Ref(context), right, left, Ref(vi))
+end
+
+function _pointwise_tilde_observe(
+    context, right::MultivariateDistribution, left::AbstractMatrix, vi
+)
+    return tilde_observe.(Ref(context), Ref(right), eachcol(left), Ref(vi))
+end
+
 """
     pointwise_loglikelihoods(model::Model, chain::Chains, keytype = String)
 
@@ -114,22 +134,30 @@ Currently, only `String` and `VarName` are supported.
 # Notes
 Say `y` is a `Vector` of `n` i.i.d. `Normal(μ, σ)` variables, with `μ` and `σ`
 both being `<:Real`. Then the *observe* (i.e. when the left-hand side is an
-*observation*) statements can be implemented in two ways:
+*observation*) statements can be implemented in three ways:
+1. using a `for` loop:
 ```julia
 for i in eachindex(y)
     y[i] ~ Normal(μ, σ)
 end
 ```
-or
+2. using `.~`:
+```julia
+y .~ Normal(μ, σ)
+```
+3. using `MvNormal`:
 ```julia
-y ~ MvNormal(fill(μ, n), fill(σ, n))
+y ~ MvNormal(fill(μ, n), Diagonal(fill(σ, n)))
 ```
-Unfortunately, just by looking at the latter statement, it's impossible to tell 
-whether or not this is one *single* observation which is `n` dimensional OR if we
-have *multiple* 1-dimensional observations. Therefore, `loglikelihoods` will only
-work with the first example.
+
+In (1) and (2), `y` will be treated as a collection of `n` i.i.d. 1-dimensional variables,
+while in (3) `y` will be treated as a _single_ n-dimensional observation.
+
+This is important to keep in mind, in particular if the computation is used
+for downstream computations.
 
 # Examples
+## From chain
 ```julia-repl
 julia> using DynamicPPL, Turing
 
@@ -169,6 +197,27 @@ Dict{VarName,Array{Float64,2}} with 4 entries:
   xs[1] => [-1.42932; -2.68123; … ; -1.66333; -1.66333]
   xs[3] => [-1.42862; -2.67573; … ; -1.66251; -1.66251]
 ```
+
+## Broadcasting
+Note that `x .~ Dist()` will treat `x` as a collection of
+_independent_ observations rather than as a single observation.
+
+```jldoctest; setup = :(using Distributions)
+julia> @model function demo(x)
+           x .~ Normal()
+       end;
+
+julia> m = demo([1.0, ]);
+
+julia> ℓ = pointwise_loglikelihoods(m, VarInfo(m)); first(ℓ[@varname(x[1])])
+-1.4189385332046727
+
+julia> m = demo([1.0; 1.0]);
+
+julia> ℓ = pointwise_loglikelihoods(m, VarInfo(m)); first.((ℓ[@varname(x[1])], ℓ[@varname(x[2])]))
+(-1.4189385332046727, -1.4189385332046727)
+```
+
 """
 function pointwise_loglikelihoods(model::Model, chain, keytype::Type{T}=String) where {T}
     # Get the data by executing the model once
diff --git a/test/loglikelihoods.jl b/test/loglikelihoods.jl
@@ -4,7 +4,7 @@
     # `dot_assume` and `observe`
     m = TV(undef, length(x))
     m .~ Normal()
-    return x ~ MvNormal(m, 0.5 * ones(length(x)))
+    return x ~ MvNormal(m, 0.5)
 end
 
 @model function gdemo2(x=10 * ones(2), ::Type{TV}=Vector{Float64}) where {TV}
@@ -13,13 +13,13 @@ end
     for i in eachindex(m)
         m[i] ~ Normal()
     end
-    return x ~ MvNormal(m, 0.5 * ones(length(x)))
+    return x ~ MvNormal(m, 0.5)
 end
 
 @model function gdemo3(x=10 * ones(2))
     # Multivariate `assume` and `observe`
     m ~ MvNormal(length(x), 1.0)
-    return x ~ MvNormal(m, 0.5 * ones(length(x)))
+    return x ~ MvNormal(m, 0.5)
 end
 
 @model function gdemo4(x=10 * ones(2), ::Type{TV}=Vector{Float64}) where {TV}
@@ -39,11 +39,11 @@ end
     return x .~ Normal(m, 0.5)
 end
 
-# @model function gdemo6(::Type{TV} = Vector{Float64}) where {TV}
-#     # `assume` and literal `observe`
-#     m ~ MvNormal(length(x), 1.0)
-#     [10.0, 10.0] ~ MvNormal(m, 0.5 * ones(2))
-# end
+@model function gdemo6(::Type{TV}=Vector{Float64}) where {TV}
+    # `assume` and literal `observe`
+    m ~ MvNormal(2, 1.0)
+    return [10.0, 10.0] ~ MvNormal(m, 0.5)
+end
 
 @model function gdemo7(::Type{TV}=Vector{Float64}) where {TV}
     # `dot_assume` and literal `observe` with indexing
@@ -54,11 +54,11 @@ end
     end
 end
 
-# @model function gdemo8(::Type{TV} = Vector{Float64}) where {TV}
-#     # `assume` and literal `dot_observe`
-#     m ~ Normal()
-#     [10.0, ] .~ Normal(m, 0.5)
-# end
+@model function gdemo8(::Type{TV}=Vector{Float64}) where {TV}
+    # `assume` and literal `dot_observe`
+    m ~ Normal()
+    return [10.0] .~ Normal(m, 0.5)
+end
 
 @model function _prior_dot_assume(::Type{TV}=Vector{Float64}) where {TV}
     m = TV(undef, 2)
@@ -76,7 +76,7 @@ end
 end
 
 @model function _likelihood_dot_observe(m, x)
-    return x ~ MvNormal(m, 0.5 * ones(length(m)))
+    return x ~ MvNormal(m, 0.5)
 end
 
 @model function gdemo10(x=10 * ones(2), ::Type{TV}=Vector{Float64}) where {TV}
@@ -87,8 +87,26 @@ end
     @submodel _likelihood_dot_observe(m, x)
 end
 
+@model function gdemo11(x=10 * ones(2, 1), ::Type{TV}=Vector{Float64}) where {TV}
+    m = TV(undef, length(x))
+    m .~ Normal()
+
+    # Dotted observe for `Matrix`.
+    return x .~ MvNormal(m, 0.5)
+end
+
 const gdemo_models = (
-    gdemo1(), gdemo2(), gdemo3(), gdemo4(), gdemo5(), gdemo7(), gdemo9(), gdemo10()
+    gdemo1(),
+    gdemo2(),
+    gdemo3(),
+    gdemo4(),
+    gdemo5(),
+    gdemo6(),
+    gdemo7(),
+    gdemo8(),
+    gdemo9(),
+    gdemo10(),
+    gdemo11(),
 )
 
 @testset "loglikelihoods.jl" begin