From 33cad3a8db12647dca3aa09bbeb3588c5d282e82 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Wed, 21 May 2025 18:27:23 +0100 Subject: [PATCH 01/15] Add three of the DPPL models from https://arxiv.org/pdf/2002.02702 --- .github/workflows/generate_website.yml | 1 + Project.toml | 3 +++ main.jl | 3 +++ models/dppl_gauss_unknown.jl | 12 ++++++++++++ models/dppl_high_dim_gauss.jl | 5 +++++ models/dppl_naive_bayes.jl | 27 ++++++++++++++++++++++++++ 6 files changed, 51 insertions(+) create mode 100644 models/dppl_gauss_unknown.jl create mode 100644 models/dppl_high_dim_gauss.jl create mode 100644 models/dppl_naive_bayes.jl diff --git a/.github/workflows/generate_website.yml b/.github/workflows/generate_website.yml index 2218856..420bf1f 100644 --- a/.github/workflows/generate_website.yml +++ b/.github/workflows/generate_website.yml @@ -88,6 +88,7 @@ jobs: env: ADTYPE_KEYS: ${{ needs.setup-keys.outputs.adtype_keys }} ADTESTS_MODELS_TO_LOAD: ${{ matrix.model }} + DATADEPS_ALWAYS_ACCEPT: "true" - name: Output matrix values id: output-matrix diff --git a/Project.toml b/Project.toml index af96777..00f9471 100644 --- a/Project.toml +++ b/Project.toml @@ -3,13 +3,16 @@ ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de" DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +DistributionsAD = "ced4e74d-a319-5a8a-b0ac-84af2272839c" DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" +MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6" +MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" diff --git a/main.jl b/main.jl index 7faffc9..61ebdd9 100644 --- a/main.jl +++ b/main.jl @@ -100,6 +100,9 @@ end @include_model "demo_dot_assume_observe_submodel" @include_model "dot_assume" @include_model "dot_observe" +@include_model "dppl_gauss_unknown.jl" +@include_model "dppl_high_dim_gauss.jl" +@include_model "dppl_naive_bayes.jl" @include_model "dynamic_constraint" @include_model "multiple_constraints_same_var" @include_model "multithreaded" diff --git a/models/dppl_gauss_unknown.jl b/models/dppl_gauss_unknown.jl new file mode 100644 index 0000000..a79a212 --- /dev/null +++ b/models/dppl_gauss_unknown.jl @@ -0,0 +1,12 @@ +n = 10_000 +s = abs(rand()) + 0.5 +y = randn() .+ s * randn(n) + +@model function dppl_gauss_unknown(y) + N = length(y) + m ~ Normal(0, 1) + s ~ truncated(Cauchy(0, 5); lower=0) + y ~ filldist(Normal(m, s), N) +end + +@register dppl_gauss_unknown(y) diff --git a/models/dppl_high_dim_gauss.jl b/models/dppl_high_dim_gauss.jl new file mode 100644 index 0000000..2275b6a --- /dev/null +++ b/models/dppl_high_dim_gauss.jl @@ -0,0 +1,5 @@ +@model function dppl_high_dim_gauss(D) + m ~ filldist(Normal(0, 1), D) +end + +@register dppl_high_dim_gauss(10_000) diff --git a/models/dppl_naive_bayes.jl b/models/dppl_naive_bayes.jl new file mode 100644 index 0000000..3ed7f4f --- /dev/null +++ b/models/dppl_naive_bayes.jl @@ -0,0 +1,27 @@ +using MLDatasets: MNIST +using MultivariateStats: fit, PCA, transform + +# Load MNIST images and labels +features = MNIST(split=:train).features +nrows, ncols, nimages = size(features) +image_raw = Float64.(reshape(features, (nrows * ncols, nimages))) +labels = MNIST(split=:train).targets .+ 1 +C = 10 # Number of labels + +# Preprocess the images by reducing dimensionality +D = 40 +pca = fit(PCA, image_raw; maxoutdim=D) +image = transform(pca, image_raw) + +# Take only the first 1000 images and vectorise +N = 1000 +image_subset = image[:, 1:N]' +image_vec = vec(image_subset[:, :]) +labels = labels[1:N] + +@model dppl_naive_bayes(image_vec, labels, C, D) = begin + m ~ filldist(Normal(0, 10), C, D) + image_vec ~ MvNormal(vec(m[labels, :]), I) +end + +@register dppl_naive_bayes(image_vec, labels, C, D) From 6af413e6b31b18cbefca1efb591aaf4f1a70763b Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Wed, 21 May 2025 18:41:04 +0100 Subject: [PATCH 02/15] Add datadeps envvar --- .github/workflows/generate_website.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/generate_website.yml b/.github/workflows/generate_website.yml index 420bf1f..f9fda06 100644 --- a/.github/workflows/generate_website.yml +++ b/.github/workflows/generate_website.yml @@ -53,6 +53,8 @@ jobs: - name: Setup keys id: keys run: uv run ad.py setup + env: + DATADEPS_ALWAYS_ACCEPT: "true" run-models: runs-on: ubuntu-latest From 86bb6a9513bdb80c5218e40d11751a8389b51e86 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 19 Jun 2025 23:08:46 +0100 Subject: [PATCH 03/15] add three more models --- Project.toml | 2 ++ main.jl | 5 ++++- models/dppl_hier_poisson.jl | 27 +++++++++++++++++++++++++++ models/dppl_logistic_regression.jl | 21 +++++++++++++++++++++ models/dppl_sto_volatility.jl | 5 +++++ 5 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 models/dppl_hier_poisson.jl create mode 100644 models/dppl_logistic_regression.jl create mode 100644 models/dppl_sto_volatility.jl diff --git a/Project.toml b/Project.toml index 00f9471..0783a3a 100644 --- a/Project.toml +++ b/Project.toml @@ -8,6 +8,7 @@ DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" @@ -16,6 +17,7 @@ MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" diff --git a/main.jl b/main.jl index 61ebdd9..92bd483 100644 --- a/main.jl +++ b/main.jl @@ -102,7 +102,10 @@ end @include_model "dot_observe" @include_model "dppl_gauss_unknown.jl" @include_model "dppl_high_dim_gauss.jl" -@include_model "dppl_naive_bayes.jl" +# Disabled because of https://github.com/TuringLang/ADTests/issues/24 +# @include_model "dppl_naive_bayes" +@include_model "dppl_logistic_regression" +@include_model "dppl_hier_poisson" @include_model "dynamic_constraint" @include_model "multiple_constraints_same_var" @include_model "multithreaded" diff --git a/models/dppl_hier_poisson.jl b/models/dppl_hier_poisson.jl new file mode 100644 index 0000000..aa1c591 --- /dev/null +++ b/models/dppl_hier_poisson.jl @@ -0,0 +1,27 @@ +using LazyArrays +using Turing: LogPoisson + +nd, ns = 5, 10 +a0, a1, a0_sig = 1.0, 0.5, 0.3 +n = nd * ns +# simulate group level parameters +a0s = rand(Normal(0, a0_sig), ns) +logpop = rand(Normal(9, 1.5), ns) +λ = exp.(a0 .+ a0s + (a1 * logpop)) +# and individual data +y = mapreduce(λi -> rand(Poisson(λi), nd), vcat, λ) +x = repeat(logpop, inner=nd) +idx = repeat(collect(1:ns), inner=nd) + +lazyarray(f, x) = LazyArray(Base.broadcasted(f, x)) + +@model dppl_hier_poisson(y, x, idx, ns) = begin + a0 ~ Normal(0, 10) + a1 ~ Normal(0, 1) + a0_sig ~ truncated(Cauchy(0, 1); lower=0) + a0s ~ filldist(Normal(0, a0_sig), ns) + alpha = a0 .+ a0s[idx] .+ a1 * x + y ~ arraydist(lazyarray(LogPoisson, alpha)) +end + +@register dppl_hier_poisson(y, x, idx, ns) diff --git a/models/dppl_logistic_regression.jl b/models/dppl_logistic_regression.jl new file mode 100644 index 0000000..0a3ada9 --- /dev/null +++ b/models/dppl_logistic_regression.jl @@ -0,0 +1,21 @@ +using StatsFuns: logistic +using LazyArrays + +d, n = 100, 10_000 +X = randn(d, n) +w = randn(d) +y = Int.(logistic.(X' * w) .> 0.5) + +function safelogistic(x::T) where {T} + logistic(x) * (1 - 2 * eps(T)) + eps(T) +end + +lazyarray(f, x) = LazyArray(Base.broadcasted(f, x)) + +@model dppl_logistic_regression(Xt, y) = begin + N, D = size(Xt) + w ~ filldist(Normal(), D) + y ~ arraydist(lazyarray(x -> Bernoulli(safelogistic(x)), Xt * w)) +end + +@register dppl_logistic_regression(X', y) diff --git a/models/dppl_sto_volatility.jl b/models/dppl_sto_volatility.jl new file mode 100644 index 0000000..a849767 --- /dev/null +++ b/models/dppl_sto_volatility.jl @@ -0,0 +1,5 @@ +using Downloads, CSV + +URL = +http_response = Downloads.download(url) +file = CSV.File(http_response) From c9318903d38b655d32fbb57a37fc524756eb6371 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Fri, 20 Jun 2025 00:11:49 +0100 Subject: [PATCH 04/15] Add the last two DPPL models --- main.jl | 7 +++-- models/dppl_hier_poisson.jl | 2 +- models/dppl_hmm_semisup.jl | 59 +++++++++++++++++++++++++++++++++++++ models/dppl_lda.jl | 34 +++++++++++++++++++++ 4 files changed, 99 insertions(+), 3 deletions(-) create mode 100644 models/dppl_hmm_semisup.jl create mode 100644 models/dppl_lda.jl diff --git a/main.jl b/main.jl index 92bd483..97d9407 100644 --- a/main.jl +++ b/main.jl @@ -101,11 +101,14 @@ end @include_model "dot_assume" @include_model "dot_observe" @include_model "dppl_gauss_unknown.jl" +@include_model "dppl_hier_poisson" @include_model "dppl_high_dim_gauss.jl" +@include_model "dppl_hmm_semisup" +@include_model "dppl_lda" +@include_model "dppl_logistic_regression" # Disabled because of https://github.com/TuringLang/ADTests/issues/24 # @include_model "dppl_naive_bayes" -@include_model "dppl_logistic_regression" -@include_model "dppl_hier_poisson" +@include_model "dppl_sto_volatility" @include_model "dynamic_constraint" @include_model "multiple_constraints_same_var" @include_model "multithreaded" diff --git a/models/dppl_hier_poisson.jl b/models/dppl_hier_poisson.jl index aa1c591..f432839 100644 --- a/models/dppl_hier_poisson.jl +++ b/models/dppl_hier_poisson.jl @@ -15,7 +15,7 @@ idx = repeat(collect(1:ns), inner=nd) lazyarray(f, x) = LazyArray(Base.broadcasted(f, x)) -@model dppl_hier_poisson(y, x, idx, ns) = begin +@model function dppl_hier_poisson(y, x, idx, ns) a0 ~ Normal(0, 10) a1 ~ Normal(0, 1) a0_sig ~ truncated(Cauchy(0, 1); lower=0) diff --git a/models/dppl_hmm_semisup.jl b/models/dppl_hmm_semisup.jl new file mode 100644 index 0000000..de23a75 --- /dev/null +++ b/models/dppl_hmm_semisup.jl @@ -0,0 +1,59 @@ +using StatsFuns: logsumexp + +# Set up hyperparameters +K, v, T, T_unsup = 5, 20, 100, 200 +alpha = fill(1.0, K) +beta = fill(0.1, v) +theta = rand(Dirichlet(alpha), K) +phi = rand(Dirichlet(beta), K) + +# Simulate data (supervised) +w = Vector{Int}(undef, T) +z = Vector{Int}(undef, T) +z[1] = rand(1:K) +w[1] = rand(Categorical(phi[:, z[1]])) +for t in 2:T + z[t] = rand(Categorical(theta[:, z[t - 1]])) + w[t] = rand(Categorical(phi[:, z[t]])) +end + +# Unsupervised +u = Vector{Int}(undef, T_unsup) +y = Vector{Int}(undef, T_unsup) +y[1] = rand(1:K) +u[1] = rand(Categorical(phi[:, y[1]])) +for t in 2:T_unsup + y[t] = rand(Categorical(theta[:, y[t - 1]])) + u[t] = rand(Categorical(phi[:, y[t]])) +end + +@model function dppl_hmm_semisup(K, T, T_unsup, w, z, u, alpha, beta) + theta ~ filldist(Dirichlet(alpha), K) + phi ~ filldist(Dirichlet(beta), K) + for t in 1:T + w[t] ~ Categorical(phi[:, z[t]]); + end + for t in 2:T + z[t] ~ Categorical(theta[:, z[t - 1]]); + end + + TF = eltype(theta) + acc = similar(alpha, TF, K) + gamma = similar(alpha, TF, K) + temp_gamma = similar(alpha, TF, K) + for k in 1:K + gamma[k] = log(phi[u[1],k]) + end + for t in 2:T_unsup + for k in 1:K + for j in 1:K + acc[j] = gamma[j] + log(theta[k, j]) + log(phi[u[t], k]) + end + temp_gamma[k] = logsumexp(acc) + end + gamma .= temp_gamma + end + DynamicPPL.@addlogprob! logsumexp(gamma) +end + +@register dppl_hmm_semisup(K, T, T_unsup, w, z, u, alpha, beta) diff --git a/models/dppl_lda.jl b/models/dppl_lda.jl new file mode 100644 index 0000000..a28ff6f --- /dev/null +++ b/models/dppl_lda.jl @@ -0,0 +1,34 @@ +v = 100 # words +k = 5 # topics +m = 10 # number of docs +alpha = ones(k) +beta = ones(v) + +phi = rand(Dirichlet(beta), k) +theta = rand(Dirichlet(alpha), m) +doc_lengths = rand(Poisson(1_000), m) +n = sum(doc_lengths) + +w_lda = Vector{Int}(undef, n) +doc_lda = Vector{Int}(undef, n) +for i in 1:m + # Because all the models exist in the same scope, we need + # to add some variable suffixes to avoid local/global + # scope warnings. This is quite ugly and should be solved + # properly, using e.g. modules or functions. + local idx_lda = sum(doc_lengths[1:i-1]) # starting index for inner loop + for j in 1:doc_lengths[i] + z_lda = rand(Categorical(theta[:, i])) + w_lda[idx_lda + j] = rand(Categorical(phi[:, z_lda])) + doc_lda[idx_lda + j] = i + end +end + +@model function dppl_lda(k, m, w, doc, alpha, beta) + theta ~ filldist(Dirichlet(alpha), m) + phi ~ filldist(Dirichlet(beta), k) + log_phi_dot_theta = log.(phi * theta) + DynamicPPL.@addlogprob! sum(log_phi_dot_theta[CartesianIndex.(w, doc)]) +end + +@register dppl_lda(k, m, w_lda, doc_lda, alpha, beta) From 46f46857acddcd56d3907a9e25f0f56611f6e1ba Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Fri, 20 Jun 2025 00:19:15 +0100 Subject: [PATCH 05/15] fix function names --- models/dppl_logistic_regression.jl | 2 +- models/dppl_naive_bayes.jl | 2 +- models/dppl_sto_volatility.jl | 24 +++++++++++++++++++++--- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/models/dppl_logistic_regression.jl b/models/dppl_logistic_regression.jl index 0a3ada9..e09f606 100644 --- a/models/dppl_logistic_regression.jl +++ b/models/dppl_logistic_regression.jl @@ -12,7 +12,7 @@ end lazyarray(f, x) = LazyArray(Base.broadcasted(f, x)) -@model dppl_logistic_regression(Xt, y) = begin +@model function dppl_logistic_regression(Xt, y) N, D = size(Xt) w ~ filldist(Normal(), D) y ~ arraydist(lazyarray(x -> Bernoulli(safelogistic(x)), Xt * w)) diff --git a/models/dppl_naive_bayes.jl b/models/dppl_naive_bayes.jl index 3ed7f4f..5b23add 100644 --- a/models/dppl_naive_bayes.jl +++ b/models/dppl_naive_bayes.jl @@ -19,7 +19,7 @@ image_subset = image[:, 1:N]' image_vec = vec(image_subset[:, :]) labels = labels[1:N] -@model dppl_naive_bayes(image_vec, labels, C, D) = begin +@model function dppl_naive_bayes(image_vec, labels, C, D) m ~ filldist(Normal(0, 10), C, D) image_vec ~ MvNormal(vec(m[labels, :]), I) end diff --git a/models/dppl_sto_volatility.jl b/models/dppl_sto_volatility.jl index a849767..26a318c 100644 --- a/models/dppl_sto_volatility.jl +++ b/models/dppl_sto_volatility.jl @@ -1,5 +1,23 @@ using Downloads, CSV -URL = -http_response = Downloads.download(url) -file = CSV.File(http_response) +path = "$(@__DIR__)/../data/dppl_sto_volatility.csv" +data, _ = readdlm(path, ',', header=true) +to_num(x) = x isa Number ? x : 0.1 +y = map(to_num, data[1:500, 2]) + +@model function dppl_sto_volatility(y, ::Type{Tv}=Vector{Float64}) where {Tv} + T = length(y) + μ ~ Cauchy(0, 10) + ϕ ~ Uniform(-1, 1) + σ ~ truncated(Cauchy(0, 5); lower=0) + + h = Tv(undef, T) + h[1] ~ Normal(μ, σ / sqrt(1 - ϕ^2)) + y[1] ~ Normal(0, exp(h[1] / 2)) + for t in 2:T + h[t] ~ Normal(μ + ϕ * (h[t-1] - μ), σ) + y[t] ~ Normal(0, exp(h[t] / 2)) + end +end + +@register dppl_sto_volatility(y) From c41459d99e818b46673637b83891f5e5d0586cc4 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Fri, 20 Jun 2025 00:21:44 +0100 Subject: [PATCH 06/15] fix refresh website workflow for PRs --- .github/workflows/refresh_website.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/refresh_website.yml b/.github/workflows/refresh_website.yml index 4465450..6fce188 100644 --- a/.github/workflows/refresh_website.yml +++ b/.github/workflows/refresh_website.yml @@ -32,9 +32,9 @@ jobs: - name: Download results to web/src/data run: | - curl -O https://raw.githubusercontent.com/TuringLang/ADTests/refs/heads/gh-pages/adtests.json - curl -O https://raw.githubusercontent.com/TuringLang/ADTests/refs/heads/gh-pages/manifest.json - curl -O https://raw.githubusercontent.com/TuringLang/ADTests/refs/heads/gh-pages/model_definitions.json + curl -O https://raw.githubusercontent.com/TuringLang/ADTests/refs/heads/gh-pages/${{ github.event_name == 'pull_request' && '/pr' || '' }}adtests.json + curl -O https://raw.githubusercontent.com/TuringLang/ADTests/refs/heads/gh-pages/${{ github.event_name == 'pull_request' && '/pr' || '' }}manifest.json + curl -O https://raw.githubusercontent.com/TuringLang/ADTests/refs/heads/gh-pages/${{ github.event_name == 'pull_request' && '/pr' || '' }}model_definition.json working-directory: web/src/data # This isn't needed to build the website, it's just there so that the From 131071169f062d6157f04f2095073b6c31788db4 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Fri, 20 Jun 2025 00:22:37 +0100 Subject: [PATCH 07/15] actually fix it --- .github/workflows/refresh_website.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/refresh_website.yml b/.github/workflows/refresh_website.yml index 6fce188..cf73fa2 100644 --- a/.github/workflows/refresh_website.yml +++ b/.github/workflows/refresh_website.yml @@ -32,9 +32,9 @@ jobs: - name: Download results to web/src/data run: | - curl -O https://raw.githubusercontent.com/TuringLang/ADTests/refs/heads/gh-pages/${{ github.event_name == 'pull_request' && '/pr' || '' }}adtests.json - curl -O https://raw.githubusercontent.com/TuringLang/ADTests/refs/heads/gh-pages/${{ github.event_name == 'pull_request' && '/pr' || '' }}manifest.json - curl -O https://raw.githubusercontent.com/TuringLang/ADTests/refs/heads/gh-pages/${{ github.event_name == 'pull_request' && '/pr' || '' }}model_definition.json + curl -O https://raw.githubusercontent.com/TuringLang/ADTests/refs/heads/gh-pages/${{ github.event_name == 'pull_request' && 'pr/' || '' }}adtests.json + curl -O https://raw.githubusercontent.com/TuringLang/ADTests/refs/heads/gh-pages/${{ github.event_name == 'pull_request' && 'pr/' || '' }}manifest.json + curl -O https://raw.githubusercontent.com/TuringLang/ADTests/refs/heads/gh-pages/${{ github.event_name == 'pull_request' && 'pr/' || '' }}model_definition.json working-directory: web/src/data # This isn't needed to build the website, it's just there so that the From 0d7db2ee9063ae4e2456af96e27b06de7a1b0f89 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Fri, 20 Jun 2025 00:23:53 +0100 Subject: [PATCH 08/15] i lied --- .github/workflows/refresh_website.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/refresh_website.yml b/.github/workflows/refresh_website.yml index cf73fa2..d849733 100644 --- a/.github/workflows/refresh_website.yml +++ b/.github/workflows/refresh_website.yml @@ -34,7 +34,7 @@ jobs: run: | curl -O https://raw.githubusercontent.com/TuringLang/ADTests/refs/heads/gh-pages/${{ github.event_name == 'pull_request' && 'pr/' || '' }}adtests.json curl -O https://raw.githubusercontent.com/TuringLang/ADTests/refs/heads/gh-pages/${{ github.event_name == 'pull_request' && 'pr/' || '' }}manifest.json - curl -O https://raw.githubusercontent.com/TuringLang/ADTests/refs/heads/gh-pages/${{ github.event_name == 'pull_request' && 'pr/' || '' }}model_definition.json + curl -O https://raw.githubusercontent.com/TuringLang/ADTests/refs/heads/gh-pages/${{ github.event_name == 'pull_request' && 'pr/' || '' }}model_definitions.json working-directory: web/src/data # This isn't needed to build the website, it's just there so that the From e3d97e79da0bc234e8dc005f9240d41fb80f6bc8 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Fri, 20 Jun 2025 00:27:15 +0100 Subject: [PATCH 09/15] cancel old workflows --- .github/workflows/generate_website.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/generate_website.yml b/.github/workflows/generate_website.yml index f9fda06..86807f8 100644 --- a/.github/workflows/generate_website.yml +++ b/.github/workflows/generate_website.yml @@ -23,6 +23,11 @@ permissions: actions: write contents: write +# Cancel existing tests on the same PR if a new commit is added to a pull request +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + jobs: setup-keys: runs-on: ubuntu-latest From c1b0cee016570c01357d97ca0ee350941f808824 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 29 Jun 2025 17:57:09 +0100 Subject: [PATCH 10/15] use `model = ` instead of `@register` --- models/dppl_gauss_unknown.jl | 2 +- models/dppl_hier_poisson.jl | 2 +- models/dppl_high_dim_gauss.jl | 2 +- models/dppl_hmm_semisup.jl | 2 +- models/dppl_lda.jl | 2 +- models/dppl_logistic_regression.jl | 2 +- models/dppl_naive_bayes.jl | 2 +- models/dppl_sto_volatility.jl | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/models/dppl_gauss_unknown.jl b/models/dppl_gauss_unknown.jl index a79a212..b70bacd 100644 --- a/models/dppl_gauss_unknown.jl +++ b/models/dppl_gauss_unknown.jl @@ -9,4 +9,4 @@ y = randn() .+ s * randn(n) y ~ filldist(Normal(m, s), N) end -@register dppl_gauss_unknown(y) +model = dppl_gauss_unknown(y) diff --git a/models/dppl_hier_poisson.jl b/models/dppl_hier_poisson.jl index f432839..6850353 100644 --- a/models/dppl_hier_poisson.jl +++ b/models/dppl_hier_poisson.jl @@ -24,4 +24,4 @@ lazyarray(f, x) = LazyArray(Base.broadcasted(f, x)) y ~ arraydist(lazyarray(LogPoisson, alpha)) end -@register dppl_hier_poisson(y, x, idx, ns) +model = dppl_hier_poisson(y, x, idx, ns) diff --git a/models/dppl_high_dim_gauss.jl b/models/dppl_high_dim_gauss.jl index 2275b6a..58d09ef 100644 --- a/models/dppl_high_dim_gauss.jl +++ b/models/dppl_high_dim_gauss.jl @@ -2,4 +2,4 @@ m ~ filldist(Normal(0, 1), D) end -@register dppl_high_dim_gauss(10_000) +model = dppl_high_dim_gauss(10_000) diff --git a/models/dppl_hmm_semisup.jl b/models/dppl_hmm_semisup.jl index de23a75..8c8fd8a 100644 --- a/models/dppl_hmm_semisup.jl +++ b/models/dppl_hmm_semisup.jl @@ -56,4 +56,4 @@ end DynamicPPL.@addlogprob! logsumexp(gamma) end -@register dppl_hmm_semisup(K, T, T_unsup, w, z, u, alpha, beta) +model = dppl_hmm_semisup(K, T, T_unsup, w, z, u, alpha, beta) diff --git a/models/dppl_lda.jl b/models/dppl_lda.jl index a28ff6f..43f8bd8 100644 --- a/models/dppl_lda.jl +++ b/models/dppl_lda.jl @@ -31,4 +31,4 @@ end DynamicPPL.@addlogprob! sum(log_phi_dot_theta[CartesianIndex.(w, doc)]) end -@register dppl_lda(k, m, w_lda, doc_lda, alpha, beta) +model = dppl_lda(k, m, w_lda, doc_lda, alpha, beta) diff --git a/models/dppl_logistic_regression.jl b/models/dppl_logistic_regression.jl index e09f606..3b21928 100644 --- a/models/dppl_logistic_regression.jl +++ b/models/dppl_logistic_regression.jl @@ -18,4 +18,4 @@ lazyarray(f, x) = LazyArray(Base.broadcasted(f, x)) y ~ arraydist(lazyarray(x -> Bernoulli(safelogistic(x)), Xt * w)) end -@register dppl_logistic_regression(X', y) +model = dppl_logistic_regression(X', y) diff --git a/models/dppl_naive_bayes.jl b/models/dppl_naive_bayes.jl index 5b23add..2bc1cfa 100644 --- a/models/dppl_naive_bayes.jl +++ b/models/dppl_naive_bayes.jl @@ -24,4 +24,4 @@ labels = labels[1:N] image_vec ~ MvNormal(vec(m[labels, :]), I) end -@register dppl_naive_bayes(image_vec, labels, C, D) +model = dppl_naive_bayes(image_vec, labels, C, D) diff --git a/models/dppl_sto_volatility.jl b/models/dppl_sto_volatility.jl index 26a318c..e664773 100644 --- a/models/dppl_sto_volatility.jl +++ b/models/dppl_sto_volatility.jl @@ -20,4 +20,4 @@ y = map(to_num, data[1:500, 2]) end end -@register dppl_sto_volatility(y) +model = dppl_sto_volatility(y) From 544016d9073dd22116589e18a7b94dc64903439a Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 29 Jun 2025 18:37:47 +0100 Subject: [PATCH 11/15] fix --- main.jl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/main.jl b/main.jl index 97d9407..e12c3bd 100644 --- a/main.jl +++ b/main.jl @@ -100,14 +100,13 @@ end @include_model "demo_dot_assume_observe_submodel" @include_model "dot_assume" @include_model "dot_observe" -@include_model "dppl_gauss_unknown.jl" +@include_model "dppl_gauss_unknown" @include_model "dppl_hier_poisson" -@include_model "dppl_high_dim_gauss.jl" +@include_model "dppl_high_dim_gauss" @include_model "dppl_hmm_semisup" @include_model "dppl_lda" @include_model "dppl_logistic_regression" -# Disabled because of https://github.com/TuringLang/ADTests/issues/24 -# @include_model "dppl_naive_bayes" +@include_model "dppl_naive_bayes" @include_model "dppl_sto_volatility" @include_model "dynamic_constraint" @include_model "multiple_constraints_same_var" From f9603628607572a998946f8833619106f47437e9 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 29 Jun 2025 18:43:40 +0100 Subject: [PATCH 12/15] fixes --- models/dppl_hmm_semisup.jl | 2 +- models/dppl_lda.jl | 20 ++++++++------------ 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/models/dppl_hmm_semisup.jl b/models/dppl_hmm_semisup.jl index 8c8fd8a..a165e94 100644 --- a/models/dppl_hmm_semisup.jl +++ b/models/dppl_hmm_semisup.jl @@ -53,7 +53,7 @@ end end gamma .= temp_gamma end - DynamicPPL.@addlogprob! logsumexp(gamma) + @addlogprob! logsumexp(gamma) end model = dppl_hmm_semisup(K, T, T_unsup, w, z, u, alpha, beta) diff --git a/models/dppl_lda.jl b/models/dppl_lda.jl index 43f8bd8..b807d38 100644 --- a/models/dppl_lda.jl +++ b/models/dppl_lda.jl @@ -9,18 +9,14 @@ theta = rand(Dirichlet(alpha), m) doc_lengths = rand(Poisson(1_000), m) n = sum(doc_lengths) -w_lda = Vector{Int}(undef, n) -doc_lda = Vector{Int}(undef, n) +w = Vector{Int}(undef, n) +doc = Vector{Int}(undef, n) for i in 1:m - # Because all the models exist in the same scope, we need - # to add some variable suffixes to avoid local/global - # scope warnings. This is quite ugly and should be solved - # properly, using e.g. modules or functions. - local idx_lda = sum(doc_lengths[1:i-1]) # starting index for inner loop + local idx = sum(doc_lengths[1:i-1]) # starting index for inner loop for j in 1:doc_lengths[i] - z_lda = rand(Categorical(theta[:, i])) - w_lda[idx_lda + j] = rand(Categorical(phi[:, z_lda])) - doc_lda[idx_lda + j] = i + z = rand(Categorical(theta[:, i])) + w[idx + j] = rand(Categorical(phi[:, z])) + doc[idx + j] = i end end @@ -28,7 +24,7 @@ end theta ~ filldist(Dirichlet(alpha), m) phi ~ filldist(Dirichlet(beta), k) log_phi_dot_theta = log.(phi * theta) - DynamicPPL.@addlogprob! sum(log_phi_dot_theta[CartesianIndex.(w, doc)]) + @addlogprob! sum(log_phi_dot_theta[CartesianIndex.(w, doc)]) end -model = dppl_lda(k, m, w_lda, doc_lda, alpha, beta) +model = dppl_lda(k, m, w, doc, alpha, beta) From e27139d6e453f57b53ef42de988303afd5125c05 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 29 Jun 2025 18:50:20 +0100 Subject: [PATCH 13/15] fix --- models/dppl_sto_volatility.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/models/dppl_sto_volatility.jl b/models/dppl_sto_volatility.jl index e664773..27bb213 100644 --- a/models/dppl_sto_volatility.jl +++ b/models/dppl_sto_volatility.jl @@ -1,5 +1,3 @@ -using Downloads, CSV - path = "$(@__DIR__)/../data/dppl_sto_volatility.csv" data, _ = readdlm(path, ',', header=true) to_num(x) = x isa Number ? x : 0.1 From d781d5334ffbe369b76a8ef0d3e762deffb2199e Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 29 Jun 2025 19:09:12 +0100 Subject: [PATCH 14/15] fixes --- Project.toml | 1 + main.jl | 2 +- models/dppl_sto_volatility.jl | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 0783a3a..db204b1 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,7 @@ [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de" +DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" DistributionsAD = "ced4e74d-a319-5a8a-b0ac-84af2272839c" diff --git a/main.jl b/main.jl index e12c3bd..71e9e72 100644 --- a/main.jl +++ b/main.jl @@ -17,7 +17,7 @@ ADTYPES = Dict( "ForwardDiff" => AutoForwardDiff(), "ReverseDiff" => AutoReverseDiff(; compile = false), "ReverseDiffCompiled" => AutoReverseDiff(; compile = true), - "Mooncake" => AutoMooncake(; config = nothing), + "Mooncake" => AutoMooncake(), "EnzymeForward" => AutoEnzyme(; mode = set_runtime_activity(Forward, true)), "EnzymeReverse" => AutoEnzyme(; mode = set_runtime_activity(Reverse, true)), "Zygote" => AutoZygote(), diff --git a/models/dppl_sto_volatility.jl b/models/dppl_sto_volatility.jl index 27bb213..381075a 100644 --- a/models/dppl_sto_volatility.jl +++ b/models/dppl_sto_volatility.jl @@ -1,3 +1,5 @@ +using DelimitedFiles: readdlm + path = "$(@__DIR__)/../data/dppl_sto_volatility.csv" data, _ = readdlm(path, ',', header=true) to_num(x) = x isa Number ? x : 0.1 From 07c976f947faead41cc720c8e2b173e89294b284 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 29 Jun 2025 19:31:13 +0100 Subject: [PATCH 15/15] try with unbuffered --- .github/workflows/generate_website.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/generate_website.yml b/.github/workflows/generate_website.yml index 86807f8..f206337 100644 --- a/.github/workflows/generate_website.yml +++ b/.github/workflows/generate_website.yml @@ -96,6 +96,7 @@ jobs: ADTYPE_KEYS: ${{ needs.setup-keys.outputs.adtype_keys }} ADTESTS_MODELS_TO_LOAD: ${{ matrix.model }} DATADEPS_ALWAYS_ACCEPT: "true" + PYTHONUNBUFFERED: "1" - name: Output matrix values id: output-matrix