Merge pull request #23 from TuringLang/py/dppl-models

penelopeysm · web-flow · commit 0db60277dfd8 · 2025-08-14T17:11:22.000+01:00
Add DPPL models
diff --git a/.github/workflows/generate_website.yml b/.github/workflows/generate_website.yml
@@ -23,6 +23,11 @@ permissions:
   actions: write
   contents: write
 
+# Cancel existing tests on the same PR if a new commit is added to a pull request
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
 jobs:
   setup-keys:
     runs-on: ubuntu-latest
@@ -53,6 +58,8 @@ jobs:
       - name: Setup keys
         id: keys
         run: uv run ad.py setup
+        env:
+          DATADEPS_ALWAYS_ACCEPT: "true"
 
   run-models:
     runs-on: ubuntu-latest
@@ -88,6 +95,8 @@ jobs:
         env:
           ADTYPE_KEYS: ${{ needs.setup-keys.outputs.adtype_keys }}
           ADTESTS_MODELS_TO_LOAD: ${{ matrix.model }}
+          DATADEPS_ALWAYS_ACCEPT: "true"
+          PYTHONUNBUFFERED: "1"
 
       - name: Output matrix values
         id: output-matrix
diff --git a/Project.toml b/Project.toml
@@ -1,18 +1,24 @@
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de"
+DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
 DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+DistributionsAD = "ced4e74d-a319-5a8a-b0ac-84af2272839c"
 DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
+MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
 Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
+MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
 Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
diff --git a/main.jl b/main.jl
@@ -96,6 +96,14 @@ end
 @include_model "Distributions" "observe_bernoulli"
 @include_model "Distributions" "observe_categorical"
 @include_model "Distributions" "observe_von_mises"
+@include_model "DynamicPPL arXiV paper" "dppl_gauss_unknown"
+@include_model "DynamicPPL arXiV paper" "dppl_hier_poisson"
+@include_model "DynamicPPL arXiV paper" "dppl_high_dim_gauss"
+@include_model "DynamicPPL arXiV paper" "dppl_hmm_semisup"
+@include_model "DynamicPPL arXiV paper" "dppl_lda"
+@include_model "DynamicPPL arXiV paper" "dppl_logistic_regression"
+@include_model "DynamicPPL arXiV paper" "dppl_naive_bayes"
+@include_model "DynamicPPL arXiV paper" "dppl_sto_volatility"
 @include_model "DynamicPPL demo models" "demo_assume_dot_observe"
 @include_model "DynamicPPL demo models" "demo_assume_dot_observe_literal"
 @include_model "DynamicPPL demo models" "demo_assume_index_observe"
diff --git a/models/dppl_gauss_unknown.jl b/models/dppl_gauss_unknown.jl
@@ -0,0 +1,12 @@
+n = 10_000
+s = abs(rand()) + 0.5
+y = randn() .+ s * randn(n)
+
+@model function dppl_gauss_unknown(y)
+    N = length(y)
+    m ~ Normal(0, 1)
+    s ~ truncated(Cauchy(0, 5); lower=0)
+    y ~ filldist(Normal(m, s), N)
+end
+
+model = dppl_gauss_unknown(y)
diff --git a/models/dppl_hier_poisson.jl b/models/dppl_hier_poisson.jl
@@ -0,0 +1,27 @@
+using LazyArrays
+using Turing: LogPoisson
+
+nd, ns = 5, 10
+a0, a1, a0_sig = 1.0, 0.5, 0.3
+n = nd * ns
+# simulate group level parameters
+a0s = rand(Normal(0, a0_sig), ns)
+logpop = rand(Normal(9, 1.5), ns)
+λ = exp.(a0 .+ a0s + (a1 * logpop))
+# and individual data
+y = mapreduce(λi -> rand(Poisson(λi), nd), vcat, λ)
+x = repeat(logpop, inner=nd)
+idx = repeat(collect(1:ns), inner=nd)
+
+lazyarray(f, x) = LazyArray(Base.broadcasted(f, x))
+
+@model function dppl_hier_poisson(y, x, idx, ns)
+    a0 ~ Normal(0, 10)
+    a1 ~ Normal(0, 1)
+    a0_sig ~ truncated(Cauchy(0, 1); lower=0)
+    a0s ~ filldist(Normal(0, a0_sig), ns)
+    alpha = a0 .+ a0s[idx] .+ a1 * x
+    y ~ arraydist(lazyarray(LogPoisson, alpha))
+end
+
+model = dppl_hier_poisson(y, x, idx, ns)
diff --git a/models/dppl_high_dim_gauss.jl b/models/dppl_high_dim_gauss.jl
@@ -0,0 +1,5 @@
+@model function dppl_high_dim_gauss(D)
+    m ~ filldist(Normal(0, 1), D)
+end
+
+model = dppl_high_dim_gauss(10_000)
diff --git a/models/dppl_hmm_semisup.jl b/models/dppl_hmm_semisup.jl
@@ -0,0 +1,59 @@
+using StatsFuns: logsumexp
+
+# Set up hyperparameters
+K, v, T, T_unsup = 5, 20, 100, 200
+alpha = fill(1.0, K)
+beta = fill(0.1, v)
+theta = rand(Dirichlet(alpha), K)
+phi = rand(Dirichlet(beta), K)
+
+# Simulate data (supervised)
+w = Vector{Int}(undef, T)
+z = Vector{Int}(undef, T)
+z[1] = rand(1:K)
+w[1] = rand(Categorical(phi[:, z[1]]))
+for t in 2:T
+    z[t] = rand(Categorical(theta[:, z[t - 1]]))
+    w[t] = rand(Categorical(phi[:, z[t]]))
+end
+
+# Unsupervised
+u = Vector{Int}(undef, T_unsup)
+y = Vector{Int}(undef, T_unsup)
+y[1] = rand(1:K)
+u[1] = rand(Categorical(phi[:, y[1]]))
+for t in 2:T_unsup
+    y[t] = rand(Categorical(theta[:, y[t - 1]]))
+    u[t] = rand(Categorical(phi[:, y[t]]))
+end
+
+@model function dppl_hmm_semisup(K, T, T_unsup, w, z, u, alpha, beta)
+    theta ~ filldist(Dirichlet(alpha), K)
+    phi ~ filldist(Dirichlet(beta), K)
+    for t in 1:T
+        w[t] ~ Categorical(phi[:, z[t]]);
+    end
+    for t in 2:T
+        z[t] ~ Categorical(theta[:, z[t - 1]]);
+    end
+
+    TF = eltype(theta)
+    acc = similar(alpha, TF, K)
+    gamma = similar(alpha, TF, K)
+    temp_gamma = similar(alpha, TF, K)
+    for k in 1:K
+        gamma[k] = log(phi[u[1],k])
+    end
+    for t in 2:T_unsup
+        for k in 1:K
+            for j in 1:K
+                acc[j] = gamma[j] + log(theta[k, j]) + log(phi[u[t], k])
+            end
+            temp_gamma[k] = logsumexp(acc)
+        end
+        gamma .= temp_gamma
+    end
+    @addlogprob! logsumexp(gamma)
+end
+
+model = dppl_hmm_semisup(K, T, T_unsup, w, z, u, alpha, beta)
diff --git a/models/dppl_lda.jl b/models/dppl_lda.jl
@@ -0,0 +1,30 @@
+v = 100      # words
+k = 5        # topics
+m = 10       # number of docs
+alpha = ones(k)
+beta = ones(v)
+
+phi = rand(Dirichlet(beta), k)
+theta = rand(Dirichlet(alpha), m)
+doc_lengths = rand(Poisson(1_000), m)
+n = sum(doc_lengths)
+
+w = Vector{Int}(undef, n)
+doc = Vector{Int}(undef, n)
+for i in 1:m
+    local idx = sum(doc_lengths[1:i-1]) # starting index for inner loop
+    for j in 1:doc_lengths[i]
+        z = rand(Categorical(theta[:, i]))
+        w[idx + j] = rand(Categorical(phi[:, z]))
+        doc[idx + j] = i
+    end
+end
+
+@model function dppl_lda(k, m, w, doc, alpha, beta)
+    theta ~ filldist(Dirichlet(alpha), m)
+    phi ~ filldist(Dirichlet(beta), k)
+    log_phi_dot_theta = log.(phi * theta)
+    @addlogprob! sum(log_phi_dot_theta[CartesianIndex.(w, doc)])
+end
+
+model = dppl_lda(k, m, w, doc, alpha, beta)
diff --git a/models/dppl_logistic_regression.jl b/models/dppl_logistic_regression.jl
@@ -0,0 +1,21 @@
+using StatsFuns: logistic
+using LazyArrays
+
+d, n = 100, 10_000
+X = randn(d, n)
+w = randn(d)
+y = Int.(logistic.(X' * w) .> 0.5)
+
+function safelogistic(x::T) where {T}
+    logistic(x) * (1 - 2 * eps(T)) + eps(T)
+end
+
+lazyarray(f, x) = LazyArray(Base.broadcasted(f, x))
+
+@model function dppl_logistic_regression(Xt, y)
+    N, D = size(Xt)
+    w ~ filldist(Normal(), D)
+    y ~ arraydist(lazyarray(x -> Bernoulli(safelogistic(x)), Xt * w))
+end
+
+model = dppl_logistic_regression(X', y)
diff --git a/models/dppl_naive_bayes.jl b/models/dppl_naive_bayes.jl
@@ -0,0 +1,27 @@
+using MLDatasets: MNIST
+using MultivariateStats: fit, PCA, transform
+
+# Load MNIST images and labels
+features = MNIST(split=:train).features
+nrows, ncols, nimages = size(features)
+image_raw = Float64.(reshape(features, (nrows * ncols, nimages)))
+labels = MNIST(split=:train).targets .+ 1
+C = 10 # Number of labels
+
+# Preprocess the images by reducing dimensionality
+D = 40
+pca = fit(PCA, image_raw; maxoutdim=D)
+image = transform(pca, image_raw)
+
+# Take only the first 1000 images and vectorise
+N = 1000
+image_subset = image[:, 1:N]'
+image_vec = vec(image_subset[:, :])
+labels = labels[1:N]
+
+@model function dppl_naive_bayes(image_vec, labels, C, D)
+    m ~ filldist(Normal(0, 10), C, D)
+    image_vec ~ MvNormal(vec(m[labels, :]), I)
+end
+
+model = dppl_naive_bayes(image_vec, labels, C, D)
diff --git a/models/dppl_sto_volatility.jl b/models/dppl_sto_volatility.jl
@@ -0,0 +1,23 @@
+using DelimitedFiles: readdlm
+
+path = "$(@__DIR__)/../data/dppl_sto_volatility.csv"
+data, _ = readdlm(path, ',', header=true)
+to_num(x) = x isa Number ? x : 0.1
+y = map(to_num, data[1:500, 2])
+
+@model function dppl_sto_volatility(y, ::Type{Tv}=Vector{Float64}) where {Tv}
+    T = length(y)
+    μ ~ Cauchy(0, 10)
+    ϕ ~ Uniform(-1, 1)
+    σ ~ truncated(Cauchy(0, 5); lower=0)
+
+    h = Tv(undef, T)
+    h[1] ~ Normal(μ, σ / sqrt(1 - ϕ^2))
+    y[1] ~ Normal(0, exp(h[1] / 2))
+    for t in 2:T
+        h[t] ~ Normal(μ + ϕ * (h[t-1] - μ), σ)
+        y[t] ~ Normal(0, exp(h[t] / 2))
+    end
+end
+
+model = dppl_sto_volatility(y)