From ea69430a76d6b9cb3a3b6350229bf335acad1f00 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Wed, 22 Oct 2025 04:32:54 -0400 Subject: [PATCH 01/32] update vi interface to match AdvancedVI@0.5 --- Project.toml | 2 +- src/Turing.jl | 3 + src/variational/VariationalInference.jl | 80 +++++++++++-------------- src/variational/deprecated.jl | 61 ------------------- test/Project.toml | 2 +- test/runtests.jl | 78 ++++++++++++------------ test/variational/advi.jl | 72 ++++++++-------------- 7 files changed, 106 insertions(+), 192 deletions(-) delete mode 100644 src/variational/deprecated.jl diff --git a/Project.toml b/Project.toml index a2e5f206f4..9a8997ce38 100644 --- a/Project.toml +++ b/Project.toml @@ -55,7 +55,7 @@ Accessors = "0.1" AdvancedHMC = "0.3.0, 0.4.0, 0.5.2, 0.6, 0.7, 0.8" AdvancedMH = "0.8" AdvancedPS = "0.7" -AdvancedVI = "0.4" +AdvancedVI = "0.5" BangBang = "0.4.2" Bijectors = "0.14, 0.15" Compat = "4.15.0" diff --git a/src/Turing.jl b/src/Turing.jl index 0cdbe24586..0d29e1397e 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -117,6 +117,9 @@ export q_locationscale, q_meanfield_gaussian, q_fullrank_gaussian, + KLMinRepGradProxDescent, + KLMinRepGradDescent, + KLMinScoreGradDescent, # ADTypes AutoForwardDiff, AutoReverseDiff, diff --git a/src/variational/VariationalInference.jl b/src/variational/VariationalInference.jl index d516319684..1af8a24eb0 100644 --- a/src/variational/VariationalInference.jl +++ b/src/variational/VariationalInference.jl @@ -1,21 +1,24 @@ module Variational -using DynamicPPL +using AdvancedVI: + AdvancedVI, KLMinRepGradDescent, KLMinRepGradProxDescent, KLMinScoreGradDescent using ADTypes +using Bijectors: Bijectors using Distributions +using DynamicPPL using LinearAlgebra using LogDensityProblems using Random +using ..Turing: DEFAULT_ADTYPE, PROGRESS -import ..Turing: DEFAULT_ADTYPE, PROGRESS - -import AdvancedVI -import Bijectors - -export vi, q_locationscale, q_meanfield_gaussian, q_fullrank_gaussian - -include("deprecated.jl") +export vi, + q_locationscale, + q_meanfield_gaussian, + q_fullrank_gaussian, + KLMinRepGradProxDescent, + KLMinRepGradDescent, + KLMinScoreGradDescent """ q_initialize_scale( @@ -248,76 +251,61 @@ end """ vi( [rng::Random.AbstractRNG,] - model::DynamicPPL.Model; + model::DynamicPPL.Model, q, - n_iterations::Int; - objective::AdvancedVI.AbstractVariationalObjective = AdvancedVI.RepGradELBO( - 10; entropy = AdvancedVI.ClosedFormEntropyZeroGradient() - ), + max_iter::Int; + algorithm::AdvancedVI.AbstractVariationalAlgorithm = KLMinRepGradProxDescent(DEFAULT_ADTYPE; n_samples=10), show_progress::Bool = Turing.PROGRESS[], - optimizer::Optimisers.AbstractRule = AdvancedVI.DoWG(), - averager::AdvancedVI.AbstractAverager = AdvancedVI.PolynomialAveraging(), - operator::AdvancedVI.AbstractOperator = AdvancedVI.ProximalLocationScaleEntropy(), - adtype::ADTypes.AbstractADType = Turing.DEFAULT_ADTYPE, kwargs... ) -Approximating the target `model` via variational inference by optimizing `objective` with the initialization `q`. +Approximate the target `model` via the variational inference algorithm `algorithm` by starting from the initial variational approximation `q`. This is a thin wrapper around `AdvancedVI.optimize`. +The default `algorithm` assumes `q` uses `AdvancedVI.MvLocationScale`, which can be constructed by invoking `q_fullrank_gaussian` or `q_meanfield_gaussian`. +For other variational families, refer to `AdvancedVI` to determine the best algorithm and options. # Arguments - `model`: The target `DynamicPPL.Model`. - `q`: The initial variational approximation. -- `n_iterations`: Number of optimization steps. +- `max_iter`: Maximum number of steps. # Keyword Arguments -- `objective`: Variational objective to be optimized. +- `algorithm`: Variational inference algorithm. - `show_progress`: Whether to show the progress bar. -- `optimizer`: Optimization algorithm. -- `averager`: Parameter averaging strategy. -- `operator`: Operator applied after each optimization step. -- `adtype`: Automatic differentiation backend. +- `adtype`: Automatic differentiation backend to be applied to the log-density. The default value for `algorithm` also uses this backend for differentiation the variational objective. See the docs of `AdvancedVI.optimize` for additional keyword arguments. # Returns -- `q`: Variational distribution formed by the last iterate of the optimization run. -- `q_avg`: Variational distribution formed by the averaged iterates according to `averager`. -- `state`: Collection of states used for optimization. This can be used to resume from a past call to `vi`. -- `info`: Information generated during the optimization run. +- `q`: Output variational distribution of `algorithm`. +- `state`: Collection of states used by `algorithm`. This can be used to resume from a past call to `vi`. +- `info`: Information generated while executing `algorithm`. """ function vi( rng::Random.AbstractRNG, model::DynamicPPL.Model, q, - n_iterations::Int; - objective=AdvancedVI.RepGradELBO( - 10; entropy=AdvancedVI.ClosedFormEntropyZeroGradient() - ), - show_progress::Bool=PROGRESS[], - optimizer=AdvancedVI.DoWG(), - averager=AdvancedVI.PolynomialAveraging(), - operator=AdvancedVI.ProximalLocationScaleEntropy(), + max_iter::Int, + args...; adtype::ADTypes.AbstractADType=DEFAULT_ADTYPE, + algorithm=KLMinRepGradProxDescent(adtype; n_samples=10), + show_progress::Bool=PROGRESS[], kwargs..., ) return AdvancedVI.optimize( rng, - LogDensityFunction(model), - objective, + algorithm, + max_iter, + LogDensityFunction(model; adtype), q, - n_iterations; + args...; show_progress=show_progress, - adtype, - optimizer, - averager, - operator, kwargs..., ) end -function vi(model::DynamicPPL.Model, q, n_iterations::Int; kwargs...) - return vi(Random.default_rng(), model, q, n_iterations; kwargs...) +function vi(model::DynamicPPL.Model, q, max_iter::Int; kwargs...) + return vi(Random.default_rng(), model, q, max_iter; kwargs...) end end diff --git a/src/variational/deprecated.jl b/src/variational/deprecated.jl deleted file mode 100644 index 9a9f4777b5..0000000000 --- a/src/variational/deprecated.jl +++ /dev/null @@ -1,61 +0,0 @@ - -import DistributionsAD -export ADVI - -Base.@deprecate meanfield(model) q_meanfield_gaussian(model) - -struct ADVI{AD} - "Number of samples used to estimate the ELBO in each optimization step." - samples_per_step::Int - "Maximum number of gradient steps." - max_iters::Int - "AD backend used for automatic differentiation." - adtype::AD -end - -function ADVI( - samples_per_step::Int=1, - max_iters::Int=1000; - adtype::ADTypes.AbstractADType=ADTypes.AutoForwardDiff(), -) - Base.depwarn( - "The type ADVI will be removed in future releases. Please refer to the new interface for `vi`", - :ADVI; - force=true, - ) - return ADVI{typeof(adtype)}(samples_per_step, max_iters, adtype) -end - -function vi(model::DynamicPPL.Model, alg::ADVI; kwargs...) - Base.depwarn( - "This specialization along with the type `ADVI` will be deprecated in future releases. Please refer to the new interface for `vi`.", - :vi; - force=true, - ) - q = q_meanfield_gaussian(Random.default_rng(), model) - objective = AdvancedVI.RepGradELBO( - alg.samples_per_step; entropy=AdvancedVI.ClosedFormEntropy() - ) - operator = AdvancedVI.IdentityOperator() - _, q_avg, _, _ = vi(model, q, alg.max_iters; objective, operator, kwargs...) - return q_avg -end - -function vi( - model::DynamicPPL.Model, - alg::ADVI, - q::Bijectors.TransformedDistribution{<:DistributionsAD.TuringDiagMvNormal}; - kwargs..., -) - Base.depwarn( - "This specialization along with the type `ADVI` will be deprecated in future releases. Please refer to the new interface for `vi`.", - :vi; - force=true, - ) - objective = AdvancedVI.RepGradELBO( - alg.samples_per_step; entropy=AdvancedVI.ClosedFormEntropy() - ) - operator = AdvancedVI.IdentityOperator() - _, q_avg, _, _ = vi(model, q, alg.max_iters; objective, operator, kwargs...) - return q_avg -end diff --git a/test/Project.toml b/test/Project.toml index 138b1a1a0d..b03dfd8970 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -44,7 +44,7 @@ AbstractMCMC = "5" AbstractPPL = "0.11, 0.12, 0.13" AdvancedMH = "0.6, 0.7, 0.8" AdvancedPS = "0.7" -AdvancedVI = "0.4" +AdvancedVI = "0.5" Aqua = "0.8" BangBang = "0.4" Bijectors = "0.14, 0.15" diff --git a/test/runtests.jl b/test/runtests.jl index 5fb6b21411..d2535d58a3 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -13,7 +13,7 @@ include("test_utils/models.jl") include("test_utils/numerical_tests.jl") include("test_utils/sampler.jl") -Turing.setprogress!(false) +#Turing.setprogress!(false) included_paths, excluded_paths = parse_args(ARGS) # Filter which tests to run and collect timing and allocations information to show in a @@ -30,55 +30,59 @@ macro timeit_include(path::AbstractString) end @testset "Turing" verbose = true begin - @testset "Aqua" begin - @timeit_include("Aqua.jl") - end + # @testset "Aqua" begin + # @timeit_include("Aqua.jl") + # end - @testset "AD" verbose = true begin - @timeit_include("ad.jl") - end + # @testset "AD" verbose = true begin + # @timeit_include("ad.jl") + # end - @testset "essential" verbose = true begin - @timeit_include("essential/container.jl") - end + # @testset "essential" verbose = true begin + # @timeit_include("essential/container.jl") + # end - @testset "samplers (without AD)" verbose = true begin - @timeit_include("mcmc/particle_mcmc.jl") - @timeit_include("mcmc/emcee.jl") - @timeit_include("mcmc/ess.jl") - @timeit_include("mcmc/is.jl") - end + # @testset "samplers (without AD)" verbose = true begin + # @timeit_include("mcmc/particle_mcmc.jl") + # @timeit_include("mcmc/emcee.jl") + # @timeit_include("mcmc/ess.jl") + # @timeit_include("mcmc/is.jl") + # end @timeit TIMEROUTPUT "inference" begin - @testset "inference with samplers" verbose = true begin - @timeit_include("mcmc/gibbs.jl") - @timeit_include("mcmc/hmc.jl") - @timeit_include("mcmc/Inference.jl") - @timeit_include("mcmc/sghmc.jl") - @timeit_include("mcmc/external_sampler.jl") - @timeit_include("mcmc/mh.jl") - @timeit_include("ext/dynamichmc.jl") - @timeit_include("mcmc/repeat_sampler.jl") - end + # @testset "inference with samplers" verbose = true begin + # @timeit_include("mcmc/gibbs.jl") + # @timeit_include("mcmc/hmc.jl") + # @timeit_include("mcmc/Inference.jl") + # @timeit_include("mcmc/sghmc.jl") + # @timeit_include("mcmc/external_sampler.jl") + # @timeit_include("mcmc/mh.jl") + # @timeit_include("ext/dynamichmc.jl") + # @timeit_include("mcmc/repeat_sampler.jl") + # end @testset "variational algorithms" begin @timeit_include("variational/advi.jl") end - @testset "mode estimation" verbose = true begin - @timeit_include("optimisation/Optimisation.jl") - @timeit_include("ext/OptimInterface.jl") - end + # @testset "mode estimation" verbose = true begin + # @timeit_include("optimisation/Optimisation.jl") + # @timeit_include("ext/OptimInterface.jl") + # end end - @testset "stdlib" verbose = true begin - @timeit_include("stdlib/distributions.jl") - @timeit_include("stdlib/RandomMeasures.jl") - end + # @testset "variational optimisers" begin + # @timeit_include("variational/optimisers.jl") + # end - @testset "utilities" begin - @timeit_include("mcmc/utilities.jl") - end + # @testset "stdlib" verbose = true begin + # @timeit_include("stdlib/distributions.jl") + # @timeit_include("stdlib/RandomMeasures.jl") + # end + + # @testset "utilities" begin + # @timeit_include("mcmc/utilities.jl") + # end end show(TIMEROUTPUT; compact=true, sortby=:firstexec) diff --git a/test/variational/advi.jl b/test/variational/advi.jl index ed8f745df2..2ab5d2b424 100644 --- a/test/variational/advi.jl +++ b/test/variational/advi.jl @@ -10,12 +10,16 @@ using Distributions: Dirichlet, Normal using LinearAlgebra using MCMCChains: Chains using Random +using ReverseDiff using StableRNGs: StableRNG using Test: @test, @testset using Turing using Turing.Variational @testset "ADVI" begin + adtype = AutoReverseDiff() + operator = AdvancedVI.ClipScale() + @testset "q initialization" begin m = gdemo_default d = length(Turing.DynamicPPL.VarInfo(m)[:]) @@ -41,86 +45,62 @@ using Turing.Variational @testset "default interface" begin for q0 in [q_meanfield_gaussian(gdemo_default), q_fullrank_gaussian(gdemo_default)] - _, q, _, _ = vi(gdemo_default, q0, 100; show_progress=Turing.PROGRESS[]) + q, _, _ = vi(gdemo_default, q0, 100; show_progress=Turing.PROGRESS[], adtype) c1 = rand(q, 10) end end - @testset "custom interface $name" for (name, objective, operator, optimizer) in [ - ( - "ADVI with closed-form entropy", - AdvancedVI.RepGradELBO(10), - AdvancedVI.ProximalLocationScaleEntropy(), - AdvancedVI.DoG(), - ), + @testset "custom algorithm $name" for (name, algorithm) in [ ( - "ADVI with proximal entropy", - AdvancedVI.RepGradELBO(10; entropy=AdvancedVI.ClosedFormEntropyZeroGradient()), - AdvancedVI.ClipScale(), - AdvancedVI.DoG(), + "KLMinRepGradProxDescent", + KLMinRepGradProxDescent(AutoReverseDiff(); n_samples=10), ), ( - "ADVI with STL entropy", - AdvancedVI.RepGradELBO(10; entropy=AdvancedVI.StickingTheLandingEntropy()), - AdvancedVI.ClipScale(), - AdvancedVI.DoG(), + "KLMinRepGradDescent", + KLMinRepGradDescent(AutoReverseDiff(); operator, n_samples=10), ), ] T = 1000 - q, q_avg, _, _ = vi( + q, _, _ = vi( gdemo_default, q_meanfield_gaussian(gdemo_default), T; - objective, - optimizer, - operator, + algorithm, + adtype, show_progress=Turing.PROGRESS[], ) - N = 1000 - c1 = rand(q_avg, N) c2 = rand(q, N) end - @testset "inference $name" for (name, objective, operator, optimizer) in [ + @testset "inference $name" for (name, algorithm) in [ ( - "ADVI with closed-form entropy", - AdvancedVI.RepGradELBO(10), - AdvancedVI.ProximalLocationScaleEntropy(), - AdvancedVI.DoG(), + "KLMinRepGradProxDescent", + KLMinRepGradProxDescent(AutoReverseDiff(); n_samples=10), ), ( - "ADVI with proximal entropy", - RepGradELBO(10; entropy=AdvancedVI.ClosedFormEntropyZeroGradient()), - AdvancedVI.ClipScale(), - AdvancedVI.DoG(), - ), - ( - "ADVI with STL entropy", - AdvancedVI.RepGradELBO(10; entropy=AdvancedVI.StickingTheLandingEntropy()), - AdvancedVI.ClipScale(), - AdvancedVI.DoG(), + "KLMinRepGradDescent", + KLMinRepGradDescent(AutoReverseDiff(); operator, n_samples=10), ), ] rng = StableRNG(0x517e1d9bf89bf94f) T = 1000 - q, q_avg, _, _ = vi( + q, _, _ = vi( rng, gdemo_default, q_meanfield_gaussian(gdemo_default), T; - optimizer, + algorithm, + adtype, show_progress=Turing.PROGRESS[], ) N = 1000 - for q_out in [q_avg, q] - samples = transpose(rand(rng, q_out, N)) - chn = Chains(reshape(samples, size(samples)..., 1), ["s", "m"]) + samples = transpose(rand(rng, q, N)) + chn = Chains(reshape(samples, size(samples)..., 1), ["s", "m"]) - check_gdemo(chn; atol=0.5) - end + check_gdemo(chn; atol=0.5) end # regression test for: @@ -143,7 +123,7 @@ using Turing.Variational @test all(x0 .≈ x0_inv) # And regression for https://github.com/TuringLang/Turing.jl/issues/2160. - _, q, _, _ = vi(rng, m, q_meanfield_gaussian(m), 1000) + q, _, _ = vi(rng, m, q_meanfield_gaussian(m), 1000; adtype) x = rand(rng, q, 1000) @test mean(eachcol(x)) ≈ [0.5, 0.5] atol = 0.1 end @@ -158,7 +138,7 @@ using Turing.Variational end model = demo_issue2205() | (y=1.0,) - _, q, _, _ = vi(rng, model, q_meanfield_gaussian(model), 1000) + q, _, _ = vi(rng, model, q_meanfield_gaussian(model), 1000; adtype) # True mean. mean_true = 1 / 2 var_true = 1 / 2 From 86ee6dd5e1ec64f39e451794375a89122905b7d3 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Wed, 22 Oct 2025 04:34:51 -0400 Subject: [PATCH 02/32] revert unintended commit of `runtests.jl` --- test/runtests.jl | 78 +++++++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 41 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index d2535d58a3..5fb6b21411 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -13,7 +13,7 @@ include("test_utils/models.jl") include("test_utils/numerical_tests.jl") include("test_utils/sampler.jl") -#Turing.setprogress!(false) +Turing.setprogress!(false) included_paths, excluded_paths = parse_args(ARGS) # Filter which tests to run and collect timing and allocations information to show in a @@ -30,59 +30,55 @@ macro timeit_include(path::AbstractString) end @testset "Turing" verbose = true begin - # @testset "Aqua" begin - # @timeit_include("Aqua.jl") - # end + @testset "Aqua" begin + @timeit_include("Aqua.jl") + end - # @testset "AD" verbose = true begin - # @timeit_include("ad.jl") - # end + @testset "AD" verbose = true begin + @timeit_include("ad.jl") + end - # @testset "essential" verbose = true begin - # @timeit_include("essential/container.jl") - # end + @testset "essential" verbose = true begin + @timeit_include("essential/container.jl") + end - # @testset "samplers (without AD)" verbose = true begin - # @timeit_include("mcmc/particle_mcmc.jl") - # @timeit_include("mcmc/emcee.jl") - # @timeit_include("mcmc/ess.jl") - # @timeit_include("mcmc/is.jl") - # end + @testset "samplers (without AD)" verbose = true begin + @timeit_include("mcmc/particle_mcmc.jl") + @timeit_include("mcmc/emcee.jl") + @timeit_include("mcmc/ess.jl") + @timeit_include("mcmc/is.jl") + end @timeit TIMEROUTPUT "inference" begin - # @testset "inference with samplers" verbose = true begin - # @timeit_include("mcmc/gibbs.jl") - # @timeit_include("mcmc/hmc.jl") - # @timeit_include("mcmc/Inference.jl") - # @timeit_include("mcmc/sghmc.jl") - # @timeit_include("mcmc/external_sampler.jl") - # @timeit_include("mcmc/mh.jl") - # @timeit_include("ext/dynamichmc.jl") - # @timeit_include("mcmc/repeat_sampler.jl") - # end + @testset "inference with samplers" verbose = true begin + @timeit_include("mcmc/gibbs.jl") + @timeit_include("mcmc/hmc.jl") + @timeit_include("mcmc/Inference.jl") + @timeit_include("mcmc/sghmc.jl") + @timeit_include("mcmc/external_sampler.jl") + @timeit_include("mcmc/mh.jl") + @timeit_include("ext/dynamichmc.jl") + @timeit_include("mcmc/repeat_sampler.jl") + end @testset "variational algorithms" begin @timeit_include("variational/advi.jl") end - # @testset "mode estimation" verbose = true begin - # @timeit_include("optimisation/Optimisation.jl") - # @timeit_include("ext/OptimInterface.jl") - # end + @testset "mode estimation" verbose = true begin + @timeit_include("optimisation/Optimisation.jl") + @timeit_include("ext/OptimInterface.jl") + end end - # @testset "variational optimisers" begin - # @timeit_include("variational/optimisers.jl") - # end - - # @testset "stdlib" verbose = true begin - # @timeit_include("stdlib/distributions.jl") - # @timeit_include("stdlib/RandomMeasures.jl") - # end + @testset "stdlib" verbose = true begin + @timeit_include("stdlib/distributions.jl") + @timeit_include("stdlib/RandomMeasures.jl") + end - # @testset "utilities" begin - # @timeit_include("mcmc/utilities.jl") - # end + @testset "utilities" begin + @timeit_include("mcmc/utilities.jl") + end end show(TIMEROUTPUT; compact=true, sortby=:firstexec) From d87004562aad8881eed0abe653a7d55ab85a5105 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 24 Oct 2025 00:08:18 -0400 Subject: [PATCH 03/32] update docs for `vi` --- src/variational/VariationalInference.jl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/variational/VariationalInference.jl b/src/variational/VariationalInference.jl index 1af8a24eb0..630d3b62f2 100644 --- a/src/variational/VariationalInference.jl +++ b/src/variational/VariationalInference.jl @@ -254,14 +254,17 @@ end model::DynamicPPL.Model, q, max_iter::Int; - algorithm::AdvancedVI.AbstractVariationalAlgorithm = KLMinRepGradProxDescent(DEFAULT_ADTYPE; n_samples=10), + adtype::ADTypes.AbstractADType=DEFAULT_ADTYPE, + algorithm::AdvancedVI.AbstractVariationalAlgorithm = KLMinRepGradProxDescent( + adtype; n_samples=10 + ), show_progress::Bool = Turing.PROGRESS[], kwargs... ) Approximate the target `model` via the variational inference algorithm `algorithm` by starting from the initial variational approximation `q`. This is a thin wrapper around `AdvancedVI.optimize`. -The default `algorithm` assumes `q` uses `AdvancedVI.MvLocationScale`, which can be constructed by invoking `q_fullrank_gaussian` or `q_meanfield_gaussian`. +The default `algorithm`, `KLMinRepGradProxDescent` ([relevant docs](https://turinglang.org/AdvancedVI.jl/dev/klminrepgradproxdescent/)), assumes `q` uses `AdvancedVI.MvLocationScale`, which can be constructed by invoking `q_fullrank_gaussian` or `q_meanfield_gaussian`. For other variational families, refer to `AdvancedVI` to determine the best algorithm and options. # Arguments @@ -270,9 +273,9 @@ For other variational families, refer to `AdvancedVI` to determine the best algo - `max_iter`: Maximum number of steps. # Keyword Arguments +- `adtype`: Automatic differentiation backend to be applied to the log-density. The default value for `algorithm` also uses this backend for differentiation the variational objective. - `algorithm`: Variational inference algorithm. - `show_progress`: Whether to show the progress bar. -- `adtype`: Automatic differentiation backend to be applied to the log-density. The default value for `algorithm` also uses this backend for differentiation the variational objective. See the docs of `AdvancedVI.optimize` for additional keyword arguments. @@ -288,7 +291,9 @@ function vi( max_iter::Int, args...; adtype::ADTypes.AbstractADType=DEFAULT_ADTYPE, - algorithm=KLMinRepGradProxDescent(adtype; n_samples=10), + algorithm::AdvancedVI.AbstractVariationalAlgorithm=KLMinRepGradProxDescent( + adtype; n_samples=10 + ), show_progress::Bool=PROGRESS[], kwargs..., ) From 2d928e0a5e731cfa54eb6a02e09a3401bc5f5ff2 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 24 Oct 2025 00:17:04 -0400 Subject: [PATCH 04/32] add history entry for `AdvancedVI@0.5` --- HISTORY.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index dc66f1f496..83686b257b 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,19 @@ + # 0.42.0 +## Breaking Changes + +**AdvancedVI 0.5** + +Turing.jl v0.42 updates `AdvancedVI.jl` compatibility to 0.5. +Most of the changes introduced in `AdvancedVI.jl@0.5` are structural, with some changes spilling out into the interface. +The summary of the changes below are the things that affect the end-users of Turing. +For a more comprehensive list of changes, please refer to the [changelogs](https://github.com/TuringLang/AdvancedVI.jl/blob/main/HISTORY.md) in `AdvancedVI`. + +- A new level of interface for defining different variational algorithms have been introduced in `AdvancedVI` v0.5. As a result, the method `Turing.vi` now receives a keyword argument `algorithm`. The object `algorithm <: AdvancedVI.AbstractVariationalAlgorithm` should now contain all the algorithm-specific configurations. Therefore, keyword arguments of `vi` that were algorithm-specific such as `objective`, `operator`, `averager` and so on, have been moved as fields of the relevant `<: AdvancedVI.AbstractVariationalAlgorithm` structs. +- The default hyperparameters of `DoG`and `DoWG` have been altered. +- The depricated `AdvancedVI@0.2`-era interface is now removed. + # 0.41.0 ## DynamicPPL 0.38 @@ -62,7 +76,7 @@ Note that if the initial sample is included, the corresponding sampler statistic Due to a technical limitation of MCMCChains.jl, this causes all indexing into MCMCChains to return `Union{Float64, Missing}` or similar. If you want the old behaviour, you can discard the first sample (e.g. using `discard_initial=1`). -# 0.40.5 +# 0.4# 0.40.5 Bump Optimization.jl compatibility to include v5. From 5211b37286302a2f850fa19f025cd3dbf3cd0f2a Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 24 Oct 2025 00:18:40 -0400 Subject: [PATCH 05/32] remove export for removed symbol --- src/Turing.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Turing.jl b/src/Turing.jl index 98cfcf29c7..a4f40df259 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -116,7 +116,6 @@ export externalsampler, # Variational inference - AdvancedVI vi, - ADVI, q_locationscale, q_meanfield_gaussian, q_fullrank_gaussian, From f0d615d92ddc6763a4ea38b481b1da04e0cd8e97 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 24 Oct 2025 00:24:13 -0400 Subject: [PATCH 06/32] fix formatting Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- HISTORY.md | 1 - 1 file changed, 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 83686b257b..6e0a620ba0 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,4 +1,3 @@ - # 0.42.0 ## Breaking Changes From 1b2351f2501a6ae143c100ffd7169c6a60ced58f Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 24 Oct 2025 00:24:21 -0400 Subject: [PATCH 07/32] fix formatting Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- HISTORY.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 6e0a620ba0..3327481194 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -9,9 +9,9 @@ Most of the changes introduced in `AdvancedVI.jl@0.5` are structural, with some The summary of the changes below are the things that affect the end-users of Turing. For a more comprehensive list of changes, please refer to the [changelogs](https://github.com/TuringLang/AdvancedVI.jl/blob/main/HISTORY.md) in `AdvancedVI`. -- A new level of interface for defining different variational algorithms have been introduced in `AdvancedVI` v0.5. As a result, the method `Turing.vi` now receives a keyword argument `algorithm`. The object `algorithm <: AdvancedVI.AbstractVariationalAlgorithm` should now contain all the algorithm-specific configurations. Therefore, keyword arguments of `vi` that were algorithm-specific such as `objective`, `operator`, `averager` and so on, have been moved as fields of the relevant `<: AdvancedVI.AbstractVariationalAlgorithm` structs. -- The default hyperparameters of `DoG`and `DoWG` have been altered. -- The depricated `AdvancedVI@0.2`-era interface is now removed. + - A new level of interface for defining different variational algorithms have been introduced in `AdvancedVI` v0.5. As a result, the method `Turing.vi` now receives a keyword argument `algorithm`. The object `algorithm <: AdvancedVI.AbstractVariationalAlgorithm` should now contain all the algorithm-specific configurations. Therefore, keyword arguments of `vi` that were algorithm-specific such as `objective`, `operator`, `averager` and so on, have been moved as fields of the relevant `<: AdvancedVI.AbstractVariationalAlgorithm` structs. + - The default hyperparameters of `DoG`and `DoWG` have been altered. + - The depricated `AdvancedVI@0.2`-era interface is now removed. # 0.41.0 From 2be31b4e69a7c485c269b6a055ee007f955ba1a9 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 24 Oct 2025 00:27:57 -0400 Subject: [PATCH 08/32] tidy tests advi --- test/variational/advi.jl | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/test/variational/advi.jl b/test/variational/advi.jl index 2ab5d2b424..b426f0e6a3 100644 --- a/test/variational/advi.jl +++ b/test/variational/advi.jl @@ -51,14 +51,8 @@ using Turing.Variational end @testset "custom algorithm $name" for (name, algorithm) in [ - ( - "KLMinRepGradProxDescent", - KLMinRepGradProxDescent(AutoReverseDiff(); n_samples=10), - ), - ( - "KLMinRepGradDescent", - KLMinRepGradDescent(AutoReverseDiff(); operator, n_samples=10), - ), + ("KLMinRepGradProxDescent", KLMinRepGradProxDescent(adtype; n_samples=10)), + ("KLMinRepGradDescent", KLMinRepGradDescent(adtype; operator, n_samples=10)), ] T = 1000 q, _, _ = vi( @@ -74,14 +68,8 @@ using Turing.Variational end @testset "inference $name" for (name, algorithm) in [ - ( - "KLMinRepGradProxDescent", - KLMinRepGradProxDescent(AutoReverseDiff(); n_samples=10), - ), - ( - "KLMinRepGradDescent", - KLMinRepGradDescent(AutoReverseDiff(); operator, n_samples=10), - ), + ("KLMinRepGradProxDescent", KLMinRepGradProxDescent(adtype; n_samples=10)), + ("KLMinRepGradDescent", KLMinRepGradDescent(adtype; operator, n_samples=10)), ] rng = StableRNG(0x517e1d9bf89bf94f) From e48ae42e38ab3f966304283cbadabac0c98a84df Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 24 Oct 2025 00:29:12 -0400 Subject: [PATCH 09/32] fix rename file `advi.jl` to `vi.jl` to reflect naming changes --- test/variational/{advi.jl => vi.jl} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/variational/{advi.jl => vi.jl} (100%) diff --git a/test/variational/advi.jl b/test/variational/vi.jl similarity index 100% rename from test/variational/advi.jl rename to test/variational/vi.jl From 44f776255b01b9c1d050a8e3d65fcb3502e21807 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 24 Oct 2025 22:38:35 -0400 Subject: [PATCH 10/32] fix docs Co-authored-by: Markus Hauru --- HISTORY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 3327481194..cd796e8ec7 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -9,7 +9,7 @@ Most of the changes introduced in `AdvancedVI.jl@0.5` are structural, with some The summary of the changes below are the things that affect the end-users of Turing. For a more comprehensive list of changes, please refer to the [changelogs](https://github.com/TuringLang/AdvancedVI.jl/blob/main/HISTORY.md) in `AdvancedVI`. - - A new level of interface for defining different variational algorithms have been introduced in `AdvancedVI` v0.5. As a result, the method `Turing.vi` now receives a keyword argument `algorithm`. The object `algorithm <: AdvancedVI.AbstractVariationalAlgorithm` should now contain all the algorithm-specific configurations. Therefore, keyword arguments of `vi` that were algorithm-specific such as `objective`, `operator`, `averager` and so on, have been moved as fields of the relevant `<: AdvancedVI.AbstractVariationalAlgorithm` structs. + - A new level of interface for defining different variational algorithms has been introduced in `AdvancedVI` v0.5. As a result, the function `Turing.vi` now receives a keyword argument `algorithm`. The object `algorithm <: AdvancedVI.AbstractVariationalAlgorithm` should now contain all the algorithm-specific configurations. Therefore, keyword arguments of `vi` that were algorithm-specific such as `objective`, `operator`, `averager` and so on, have been moved as fields of the relevant `<: AdvancedVI.AbstractVariationalAlgorithm` structs. - The default hyperparameters of `DoG`and `DoWG` have been altered. - The depricated `AdvancedVI@0.2`-era interface is now removed. From fd0e9286140775b441b918c9a44599c575718c80 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 24 Oct 2025 22:38:51 -0400 Subject: [PATCH 11/32] fix HISTORY.md Co-authored-by: Markus Hauru --- HISTORY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index cd796e8ec7..341814e6bf 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -75,7 +75,7 @@ Note that if the initial sample is included, the corresponding sampler statistic Due to a technical limitation of MCMCChains.jl, this causes all indexing into MCMCChains to return `Union{Float64, Missing}` or similar. If you want the old behaviour, you can discard the first sample (e.g. using `discard_initial=1`). -# 0.4# 0.40.5 +# 0.40.5 Bump Optimization.jl compatibility to include v5. From 77276bdc6c048fba4be1343ca745361c9d20145a Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 24 Oct 2025 22:39:03 -0400 Subject: [PATCH 12/32] fix HISTORY.md Co-authored-by: Markus Hauru --- HISTORY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 341814e6bf..2f75200af3 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -11,7 +11,7 @@ For a more comprehensive list of changes, please refer to the [changelogs](https - A new level of interface for defining different variational algorithms has been introduced in `AdvancedVI` v0.5. As a result, the function `Turing.vi` now receives a keyword argument `algorithm`. The object `algorithm <: AdvancedVI.AbstractVariationalAlgorithm` should now contain all the algorithm-specific configurations. Therefore, keyword arguments of `vi` that were algorithm-specific such as `objective`, `operator`, `averager` and so on, have been moved as fields of the relevant `<: AdvancedVI.AbstractVariationalAlgorithm` structs. - The default hyperparameters of `DoG`and `DoWG` have been altered. - - The depricated `AdvancedVI@0.2`-era interface is now removed. + - The deprecated `AdvancedVI@0.2`-era interface is now removed. # 0.41.0 From e70ddb4fc8c7a884440a6622524a526dcf73c8bd Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 24 Oct 2025 22:46:11 -0400 Subject: [PATCH 13/32] update history --- HISTORY.md | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 3327481194..ba92be1ffd 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -9,7 +9,44 @@ Most of the changes introduced in `AdvancedVI.jl@0.5` are structural, with some The summary of the changes below are the things that affect the end-users of Turing. For a more comprehensive list of changes, please refer to the [changelogs](https://github.com/TuringLang/AdvancedVI.jl/blob/main/HISTORY.md) in `AdvancedVI`. - - A new level of interface for defining different variational algorithms have been introduced in `AdvancedVI` v0.5. As a result, the method `Turing.vi` now receives a keyword argument `algorithm`. The object `algorithm <: AdvancedVI.AbstractVariationalAlgorithm` should now contain all the algorithm-specific configurations. Therefore, keyword arguments of `vi` that were algorithm-specific such as `objective`, `operator`, `averager` and so on, have been moved as fields of the relevant `<: AdvancedVI.AbstractVariationalAlgorithm` structs. +A new level of interface for defining different variational algorithms have been introduced in `AdvancedVI` v0.5. As a result, the method `Turing.vi` now receives a keyword argument `algorithm`. The object `algorithm <: AdvancedVI.AbstractVariationalAlgorithm` should now contain all the algorithm-specific configurations. Therefore, keyword arguments of `vi` that were algorithm-specific such as `objective`, `operator`, `averager` and so on, have been moved as fields of the relevant `<: AdvancedVI.AbstractVariationalAlgorithm` structs. +For example, + +```julia +vi(model, q, n_iters; objective=RepGradELBO(10), operator=AdvancedVI.ClipScale()) +``` + +is now + +```julia +vi( + model, + q, + n_iters; + algorithm=KLMinRepGradDescent(adtype; n_samples=10, operator=AdvancedVI.ClipScale()), +) +``` + +Similarly, + +```julia +vi( + model, + q, + n_iters; + objective=RepGradELBO(10; entropy=AdvancedVI.ClosedFormEntropyZeroGradient()), + operator=AdvancedVI.ProximalLocationScaleEntropy(), +) +``` + +is now + +```julia +vi(model, q, n_iters; algorithm=KLMinRepGradProxDescent(adtype; n_samples=10)) +``` + +Additionally, + - The default hyperparameters of `DoG`and `DoWG` have been altered. - The depricated `AdvancedVI@0.2`-era interface is now removed. From cdc8b2fcc0c432fca417b42b2e6c09b54c8356d6 Mon Sep 17 00:00:00 2001 From: Hong Ge <3279477+yebai@users.noreply.github.com> Date: Wed, 12 Nov 2025 16:49:11 +0000 Subject: [PATCH 14/32] Update README.md for clarity and formatting --- README.md | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 21167ac585..d320d72c05 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,5 @@ -

Turing.jl logo

-

Turing.jl

-

Probabilistic programming and Bayesian inference in Julia

+

Turing.jl logo

+

Bayesian inference with probabilistic programming

Tutorials API docs @@ -9,7 +8,7 @@ ColPrac: Contributor's Guide on Collaborative Practices for Community Packages

-## 🚀 Get started +## Get started Install Julia (see [the official Julia website](https://julialang.org/install/); you will need at least Julia 1.10 for the latest version of Turing.jl). Then, launch a Julia REPL and run: @@ -38,7 +37,7 @@ You can find the main TuringLang documentation at [**https://turinglang.org**](h API documentation for Turing.jl is specifically available at [**https://turinglang.org/Turing.jl/stable**](https://turinglang.org/Turing.jl/stable/). -## 🛠️ Contributing +## Contributing ### Issues @@ -55,20 +54,20 @@ Breaking releases (minor version) should target the `breaking` branch. If you have not received any feedback on an issue or PR for a while, please feel free to ping `@TuringLang/maintainers` in a comment. -## 💬 Other channels +## Other channels The Turing.jl userbase tends to be most active on the [`#turing` channel of Julia Slack](https://julialang.slack.com/archives/CCYDC34A0). If you do not have an invitation to Julia's Slack, you can get one from [the official Julia website](https://julialang.org/slack/). There are also often threads on [Julia Discourse](https://discourse.julialang.org) (you can search using, e.g., [the `turing` tag](https://discourse.julialang.org/tag/turing)). -## 🔄 What's changed recently? +## What's changed recently? We publish a fortnightly newsletter summarising recent updates in the TuringLang ecosystem, which you can view on [our website](https://turinglang.org/news/), [GitHub](https://github.com/TuringLang/Turing.jl/issues/2498), or [Julia Slack](https://julialang.slack.com/archives/CCYDC34A0). For Turing.jl specifically, you can see a full changelog in [`HISTORY.md`](https://github.com/TuringLang/Turing.jl/blob/main/HISTORY.md) or [our GitHub releases](https://github.com/TuringLang/Turing.jl/releases). -## 🧩 Where does Turing.jl sit in the TuringLang ecosystem? +## Where does Turing.jl sit in the TuringLang ecosystem? Turing.jl is the main entry point for users, and seeks to provide a unified, convenient interface to all of the functionality in the TuringLang (and broader Julia) ecosystem. @@ -125,5 +124,3 @@ month = feb, ``` - -You can see the full list of publications that have cited Turing.jl on [Google Scholar](https://scholar.google.com/scholar?cites=11803241473159708991). From 32e70d6ee5735f8a424b2b048ef70512d1039c6a Mon Sep 17 00:00:00 2001 From: Hong Ge <3279477+yebai@users.noreply.github.com> Date: Wed, 12 Nov 2025 16:52:03 +0000 Subject: [PATCH 15/32] Add linear regression model example to README --- README.md | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index d320d72c05..250c3c4f34 100644 --- a/README.md +++ b/README.md @@ -22,15 +22,22 @@ You can define models using the `@model` macro, and then perform Markov chain Mo ```julia julia> using Turing -julia> @model function my_first_model(data) - mean ~ Normal(0, 1) - sd ~ truncated(Cauchy(0, 3); lower=0) - data ~ Normal(mean, sd) +julia> @model function linear_regression(x) + # Priors + α ~ Normal(0, 1) + β ~ Normal(0, 1) + σ² ~ truncated(Cauchy(0, 3); lower=0) + + # Likelihood + μ = α .+ β .* x + y ~ MvNormal(μ, σ² * I) end -julia> model = my_first_model(randn()) +julia> x, y = rand(10), rand(10) -julia> chain = sample(model, NUTS(), 1000) +julia> posterior = linear_regression(x) | (; y = y) + +julia> chain = sample(posterior, NUTS(), 1000) ``` You can find the main TuringLang documentation at [**https://turinglang.org**](https://turinglang.org), which contains general information about Turing.jl's features, as well as a variety of tutorials with examples of Turing.jl models. From 19bf7d6bc6f7264ecb12b47b63d92c0e08068794 Mon Sep 17 00:00:00 2001 From: Shravan Goswami <123811742+shravanngoswamii@users.noreply.github.com> Date: Thu, 13 Nov 2025 02:52:24 +0530 Subject: [PATCH 16/32] Add dark/light mode logo support (#2714) Just a minor README update to support dark/light mode logo, good for dark mode users! --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 250c3c4f34..7f953700ea 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,9 @@ -

Turing.jl logo

+

+ + + Turing.jl logo + +

Bayesian inference with probabilistic programming

Tutorials From 4c02f7baf27e6b83e945148267f6bf95ad56db94 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Wed, 19 Nov 2025 16:37:37 -0500 Subject: [PATCH 17/32] bump AdvancedVI version --- Project.toml | 2 +- test/Project.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index bbddd9508f..a52829c5f4 100644 --- a/Project.toml +++ b/Project.toml @@ -55,7 +55,7 @@ Accessors = "0.1" AdvancedHMC = "0.3.0, 0.4.0, 0.5.2, 0.6, 0.7, 0.8" AdvancedMH = "0.8" AdvancedPS = "0.7" -AdvancedVI = "0.5" +AdvancedVI = "0.6" BangBang = "0.4.2" Bijectors = "0.14, 0.15" Compat = "4.15.0" diff --git a/test/Project.toml b/test/Project.toml index 8d819a6749..7833656757 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -44,7 +44,7 @@ AbstractMCMC = "5" AbstractPPL = "0.11, 0.12, 0.13" AdvancedMH = "0.6, 0.7, 0.8" AdvancedPS = "0.7" -AdvancedVI = "0.5" +AdvancedVI = "0.6" Aqua = "0.8" BangBang = "0.4" Bijectors = "0.14, 0.15" From 6518b821c5b1e575450dc855b05526e553207bb0 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Wed, 19 Nov 2025 16:59:23 -0500 Subject: [PATCH 18/32] add exports new algorithms, modify `vi` to operate in unconstrained --- src/Turing.jl | 3 ++ src/variational/VariationalInference.jl | 63 +++++++++++++++++++------ test/variational/vi.jl | 12 +++++ 3 files changed, 63 insertions(+), 15 deletions(-) diff --git a/src/Turing.jl b/src/Turing.jl index a4f40df259..0528788fed 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -122,6 +122,9 @@ export KLMinRepGradProxDescent, KLMinRepGradDescent, KLMinScoreGradDescent, + KLMinNaturalGradDescent, + KLMinSqrtNaturalGradDescent, + KLMinWassFwdBwd, # ADTypes AutoForwardDiff, AutoReverseDiff, diff --git a/src/variational/VariationalInference.jl b/src/variational/VariationalInference.jl index 630d3b62f2..d4b1c6ceab 100644 --- a/src/variational/VariationalInference.jl +++ b/src/variational/VariationalInference.jl @@ -2,13 +2,20 @@ module Variational using AdvancedVI: - AdvancedVI, KLMinRepGradDescent, KLMinRepGradProxDescent, KLMinScoreGradDescent + AdvancedVI, + KLMinRepGradDescent, + KLMinRepGradProxDescent, + KLMinScoreGradDescent, + KLMinWassFwdBwd, + KLMinNaturalGradDescent, + KLMinSqrtNaturalGradDescent + using ADTypes using Bijectors: Bijectors using Distributions -using DynamicPPL +using DynamicPPL: DynamicPPL using LinearAlgebra -using LogDensityProblems +using LogDensityProblems: LogDensityProblems using Random using ..Turing: DEFAULT_ADTYPE, PROGRESS @@ -18,7 +25,17 @@ export vi, q_fullrank_gaussian, KLMinRepGradProxDescent, KLMinRepGradDescent, - KLMinScoreGradDescent + KLMinScoreGradDescent, + KLMinWassFwdBwd, + KLMinNaturalGradDescent, + KLMinSqrtNaturalGradDescent + +requires_unconstrained_space(::AdvancedVI.AbstractVariationalAlgorithm) = false +requires_unconstrained_space(::AdvancedVI.KLMinRepGradProxDescent) = true +requires_unconstrained_space(::AdvancedVI.KLMinRepGradDescent) = true +requires_unconstrained_space(::AdvancedVI.KLMinWassFwdBwd) = true +requires_unconstrained_space(::AdvancedVI.KLMinNaturalGradDescent) = true +requires_unconstrained_space(::AdvancedVI.KLMinSqrtNaturalGradDescent) = true """ q_initialize_scale( @@ -65,7 +82,7 @@ function q_initialize_scale( num_max_trials::Int=10, reduce_factor::Real=one(eltype(scale)) / 2, ) - prob = LogDensityFunction(model) + prob = DynamicPPL.LogDensityFunction(model) ℓπ = Base.Fix1(LogDensityProblems.logdensity, prob) varinfo = DynamicPPL.VarInfo(model) @@ -264,8 +281,12 @@ end Approximate the target `model` via the variational inference algorithm `algorithm` by starting from the initial variational approximation `q`. This is a thin wrapper around `AdvancedVI.optimize`. + +If the chosen variational inference algorithm operates in an unconstrained space, then the provided initial variational approximation `q` must be a `Bijectors.TransformedDistribution` of an unconstrained distribution. +For example, the initialization supplied by `q_meanfield_gaussian`,`q_fullrank_gaussian`, `q_locationscale`. + The default `algorithm`, `KLMinRepGradProxDescent` ([relevant docs](https://turinglang.org/AdvancedVI.jl/dev/klminrepgradproxdescent/)), assumes `q` uses `AdvancedVI.MvLocationScale`, which can be constructed by invoking `q_fullrank_gaussian` or `q_meanfield_gaussian`. -For other variational families, refer to `AdvancedVI` to determine the best algorithm and options. +For other variational families, refer the documentation of `AdvancedVI` to determine the best algorithm and other options. # Arguments - `model`: The target `DynamicPPL.Model`. @@ -294,19 +315,31 @@ function vi( algorithm::AdvancedVI.AbstractVariationalAlgorithm=KLMinRepGradProxDescent( adtype; n_samples=10 ), + unconstrained::Bool=requires_unconstrained_space(algorithm), show_progress::Bool=PROGRESS[], kwargs..., ) - return AdvancedVI.optimize( - rng, - algorithm, - max_iter, - LogDensityFunction(model; adtype), - q, - args...; - show_progress=show_progress, - kwargs..., + prob, q, trans = if unconstrained + @assert q isa Bijectors.TransformedDistribution "The algorithm $(algorithm) operates in an unconstrained space. Therefore, the initial variational approximation is expected to be a Bijectors.TransformedDistribution of an unconstrained distribution." + vi = DynamicPPL.ldf_default_varinfo(model, DynamicPPL.getlogjoint_internal) + vi = DynamicPPL.set_transformed!!(vi, true) + prob = DynamicPPL.LogDensityFunction( + model, DynamicPPL.getlogjoint_internal, vi; adtype + ) + prob, q.dist, q.transform + else + prob = DynamicPPL.LogDensityFunction(model; adtype) + prob, q, nothing + end + q, info, state = AdvancedVI.optimize( + rng, algorithm, max_iter, prob, q, args...; show_progress=show_progress, kwargs... ) + q = if unconstrained + Bijectors.TransformedDistribution(q, trans) + else + q + end + q, info, state end function vi(model::DynamicPPL.Model, q, max_iter::Int; kwargs...) diff --git a/test/variational/vi.jl b/test/variational/vi.jl index b426f0e6a3..efe82cb454 100644 --- a/test/variational/vi.jl +++ b/test/variational/vi.jl @@ -53,6 +53,12 @@ using Turing.Variational @testset "custom algorithm $name" for (name, algorithm) in [ ("KLMinRepGradProxDescent", KLMinRepGradProxDescent(adtype; n_samples=10)), ("KLMinRepGradDescent", KLMinRepGradDescent(adtype; operator, n_samples=10)), + ("KLMinNaturalGradDescent", KLMinNaturalGradDescent(stepsize=1e-3, n_samples=10)), + ( + "KLMinSqrtNaturalGradDescent", + KLMinSqrtNaturalGradDescent(stepsize=1e-3, n_samples=10), + ), + ("KLMinWassFwdBwd", KLMinWassFwdBwd(stepsize=1e-3, n_samples=10)), ] T = 1000 q, _, _ = vi( @@ -70,6 +76,12 @@ using Turing.Variational @testset "inference $name" for (name, algorithm) in [ ("KLMinRepGradProxDescent", KLMinRepGradProxDescent(adtype; n_samples=10)), ("KLMinRepGradDescent", KLMinRepGradDescent(adtype; operator, n_samples=10)), + ("KLMinNaturalGradDescent", KLMinNaturalGradDescent(stepsize=1e-3, n_samples=10)), + ( + "KLMinSqrtNaturalGradDescent", + KLMinSqrtNaturalGradDescent(stepsize=1e-3, n_samples=10), + ), + ("KLMinWassFwdBwd", KLMinWassFwdBwd(stepsize=1e-3, n_samples=10)), ] rng = StableRNG(0x517e1d9bf89bf94f) From 874a0b24ed1672428d88dfb6cbdad4c96501dfdd Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Wed, 19 Nov 2025 17:06:23 -0500 Subject: [PATCH 19/32] add clarification on initializing unconstrained algorithms --- HISTORY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/HISTORY.md b/HISTORY.md index dd7ab477e3..8d24469adc 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -68,6 +68,7 @@ Additionally, - The default hyperparameters of `DoG`and `DoWG` have been altered. - The deprecated `AdvancedVI@0.2`-era interface is now removed. - `estimate_objective` now returns the value to be minimized by the optimization algorithm. For example, for ELBO maximization algorithms, `estimate_objective` will return the *negative ELBO*. This is breaking change from the previous behavior where the ELBO was returns. + - When using algorithms that expect to operate in unconstrained spaces, the user is now explicitly expected to provide a `Bijectors.TransformedDistribution` wrapping an unconstrained distribution. (Refer to the docstring of `vi`.) ### New Features From e021eb7bf42a2a2b06d3f9a073f20d16eec4f6e6 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Wed, 19 Nov 2025 17:10:46 -0500 Subject: [PATCH 20/32] update api --- docs/src/api.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/src/api.md b/docs/src/api.md index 885d587ea6..2eda3be6f4 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -114,6 +114,12 @@ See the [docs of AdvancedVI.jl](https://turinglang.org/AdvancedVI.jl/stable/) fo | `q_locationscale` | [`Turing.Variational.q_locationscale`](@ref) | Find a numerically non-degenerate initialization for a location-scale variational family | | `q_meanfield_gaussian` | [`Turing.Variational.q_meanfield_gaussian`](@ref) | Find a numerically non-degenerate initialization for a mean-field Gaussian family | | `q_fullrank_gaussian` | [`Turing.Variational.q_fullrank_gaussian`](@ref) | Find a numerically non-degenerate initialization for a full-rank Gaussian family | +| `KLMinRepGradDescent` | [`Turing.Variational.KLMinRepGradDescent`](@ref) | KL divergence minimization via stochastic gradient descent with the reparameterization gradient | +| `KLMinRepGradProxDescent` | [`Turing.Variational.KLMinRepGradProxDescent`](@ref) | KL divergence minimization via stochastic proximal gradient descent with the reparameterization gradient over location-scale variational families | +| `KLMinScoreGradDescent` | [`Turing.Variational.KLMinScoreGradDescent`](@ref) | KL divergence minimization via stochastic gradient descent with the score gradient | +| `KLMinWassFwdBwd` | [`Turing.Variational.KLMinWassFwdBwd`](@ref) | KL divergence minimization via Wasserstein proximal gradient descent | +| `KLMinNaturalGradDescent` | [`Turing.Variational.KLMinNaturalGradDescent`](@ref) | KL divergence minimization via natural gradient descent | +| `KLMinSqrtNaturalGradDescent` | [`Turing.Variational.KLMinSqrtNaturalGradDescent`](@ref) | KL divergence minimization via natural gradient descent in the square-root parameterization | ### Automatic differentiation types From eec7ef2e571623fe1a5bbadcbbbb32f6d0143b14 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Wed, 19 Nov 2025 17:11:17 -0500 Subject: [PATCH 21/32] run formatter Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/variational/VariationalInference.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/variational/VariationalInference.jl b/src/variational/VariationalInference.jl index d4b1c6ceab..09f7861963 100644 --- a/src/variational/VariationalInference.jl +++ b/src/variational/VariationalInference.jl @@ -339,7 +339,7 @@ function vi( else q end - q, info, state + return q, info, state end function vi(model::DynamicPPL.Model, q, max_iter::Int; kwargs...) From b6d820261866c0fb6ccac9a3da66989c78ad8968 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Wed, 19 Nov 2025 17:11:24 -0500 Subject: [PATCH 22/32] run formatter Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- test/variational/vi.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/variational/vi.jl b/test/variational/vi.jl index efe82cb454..e2db6a46f5 100644 --- a/test/variational/vi.jl +++ b/test/variational/vi.jl @@ -53,7 +53,7 @@ using Turing.Variational @testset "custom algorithm $name" for (name, algorithm) in [ ("KLMinRepGradProxDescent", KLMinRepGradProxDescent(adtype; n_samples=10)), ("KLMinRepGradDescent", KLMinRepGradDescent(adtype; operator, n_samples=10)), - ("KLMinNaturalGradDescent", KLMinNaturalGradDescent(stepsize=1e-3, n_samples=10)), + ("KLMinNaturalGradDescent", KLMinNaturalGradDescent(; stepsize=1e-3, n_samples=10)), ( "KLMinSqrtNaturalGradDescent", KLMinSqrtNaturalGradDescent(stepsize=1e-3, n_samples=10), From b900ab49e12fe9446490880bf2e30001e0a40f20 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Wed, 19 Nov 2025 17:11:35 -0500 Subject: [PATCH 23/32] run formatter Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- test/variational/vi.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/variational/vi.jl b/test/variational/vi.jl index e2db6a46f5..9ec9703ed0 100644 --- a/test/variational/vi.jl +++ b/test/variational/vi.jl @@ -56,7 +56,7 @@ using Turing.Variational ("KLMinNaturalGradDescent", KLMinNaturalGradDescent(; stepsize=1e-3, n_samples=10)), ( "KLMinSqrtNaturalGradDescent", - KLMinSqrtNaturalGradDescent(stepsize=1e-3, n_samples=10), + KLMinSqrtNaturalGradDescent(; stepsize=1e-3, n_samples=10), ), ("KLMinWassFwdBwd", KLMinWassFwdBwd(stepsize=1e-3, n_samples=10)), ] From e71b07b6bd3670b810a286b624e5734ba7d20f58 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Wed, 19 Nov 2025 17:12:00 -0500 Subject: [PATCH 24/32] run formatter Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- test/variational/vi.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/variational/vi.jl b/test/variational/vi.jl index 9ec9703ed0..2c382b13d7 100644 --- a/test/variational/vi.jl +++ b/test/variational/vi.jl @@ -79,7 +79,7 @@ using Turing.Variational ("KLMinNaturalGradDescent", KLMinNaturalGradDescent(stepsize=1e-3, n_samples=10)), ( "KLMinSqrtNaturalGradDescent", - KLMinSqrtNaturalGradDescent(stepsize=1e-3, n_samples=10), + KLMinSqrtNaturalGradDescent(; stepsize=1e-3, n_samples=10), ), ("KLMinWassFwdBwd", KLMinWassFwdBwd(stepsize=1e-3, n_samples=10)), ] From c08de123d9af8d155d5adca361e95e6739d8b9c9 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Wed, 19 Nov 2025 17:12:11 -0500 Subject: [PATCH 25/32] run formatter Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- test/variational/vi.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/variational/vi.jl b/test/variational/vi.jl index 2c382b13d7..681bff0ed5 100644 --- a/test/variational/vi.jl +++ b/test/variational/vi.jl @@ -58,7 +58,7 @@ using Turing.Variational "KLMinSqrtNaturalGradDescent", KLMinSqrtNaturalGradDescent(; stepsize=1e-3, n_samples=10), ), - ("KLMinWassFwdBwd", KLMinWassFwdBwd(stepsize=1e-3, n_samples=10)), + ("KLMinWassFwdBwd", KLMinWassFwdBwd(; stepsize=1e-3, n_samples=10)), ] T = 1000 q, _, _ = vi( From ae80f1e3941279411b302c29160b4440b2872db3 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Wed, 19 Nov 2025 17:12:21 -0500 Subject: [PATCH 26/32] run formatter Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- test/variational/vi.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/variational/vi.jl b/test/variational/vi.jl index 681bff0ed5..69b8078b7d 100644 --- a/test/variational/vi.jl +++ b/test/variational/vi.jl @@ -76,7 +76,7 @@ using Turing.Variational @testset "inference $name" for (name, algorithm) in [ ("KLMinRepGradProxDescent", KLMinRepGradProxDescent(adtype; n_samples=10)), ("KLMinRepGradDescent", KLMinRepGradDescent(adtype; operator, n_samples=10)), - ("KLMinNaturalGradDescent", KLMinNaturalGradDescent(stepsize=1e-3, n_samples=10)), + ("KLMinNaturalGradDescent", KLMinNaturalGradDescent(; stepsize=1e-3, n_samples=10)), ( "KLMinSqrtNaturalGradDescent", KLMinSqrtNaturalGradDescent(; stepsize=1e-3, n_samples=10), From 73bd309c14e29d450b05ec61ec70d2bc4559b8e6 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Wed, 19 Nov 2025 17:12:33 -0500 Subject: [PATCH 27/32] run formatter Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- test/variational/vi.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/variational/vi.jl b/test/variational/vi.jl index 69b8078b7d..1815e5953c 100644 --- a/test/variational/vi.jl +++ b/test/variational/vi.jl @@ -81,7 +81,7 @@ using Turing.Variational "KLMinSqrtNaturalGradDescent", KLMinSqrtNaturalGradDescent(; stepsize=1e-3, n_samples=10), ), - ("KLMinWassFwdBwd", KLMinWassFwdBwd(stepsize=1e-3, n_samples=10)), + ("KLMinWassFwdBwd", KLMinWassFwdBwd(; stepsize=1e-3, n_samples=10)), ] rng = StableRNG(0x517e1d9bf89bf94f) From eaac4c3218fc1aad1532c5d614416cdec63735f1 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Wed, 19 Nov 2025 17:12:40 -0500 Subject: [PATCH 28/32] run formatter Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- HISTORY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/HISTORY.md b/HISTORY.md index 8d24469adc..7b990bdab7 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -81,6 +81,7 @@ Additionally, Any of the new algorithms above can readily be used by simply swappin the `algorithm` keyword argument of `vi`. For example, to use batch-and-match: + ```julia vi(model, q, n_iters; algorithm=FisherMinBatchMatch()) ``` From 757ebb46c7348c6430594dcf682a6f73928bed21 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Wed, 19 Nov 2025 17:15:10 -0500 Subject: [PATCH 29/32] revert changes to README --- README.md | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 7f953700ea..21167ac585 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,6 @@ -

- - - Turing.jl logo - -

-

Bayesian inference with probabilistic programming

+

Turing.jl logo

+

Turing.jl

+

Probabilistic programming and Bayesian inference in Julia

Tutorials API docs @@ -13,7 +9,7 @@ ColPrac: Contributor's Guide on Collaborative Practices for Community Packages

-## Get started +## 🚀 Get started Install Julia (see [the official Julia website](https://julialang.org/install/); you will need at least Julia 1.10 for the latest version of Turing.jl). Then, launch a Julia REPL and run: @@ -27,29 +23,22 @@ You can define models using the `@model` macro, and then perform Markov chain Mo ```julia julia> using Turing -julia> @model function linear_regression(x) - # Priors - α ~ Normal(0, 1) - β ~ Normal(0, 1) - σ² ~ truncated(Cauchy(0, 3); lower=0) - - # Likelihood - μ = α .+ β .* x - y ~ MvNormal(μ, σ² * I) +julia> @model function my_first_model(data) + mean ~ Normal(0, 1) + sd ~ truncated(Cauchy(0, 3); lower=0) + data ~ Normal(mean, sd) end -julia> x, y = rand(10), rand(10) +julia> model = my_first_model(randn()) -julia> posterior = linear_regression(x) | (; y = y) - -julia> chain = sample(posterior, NUTS(), 1000) +julia> chain = sample(model, NUTS(), 1000) ``` You can find the main TuringLang documentation at [**https://turinglang.org**](https://turinglang.org), which contains general information about Turing.jl's features, as well as a variety of tutorials with examples of Turing.jl models. API documentation for Turing.jl is specifically available at [**https://turinglang.org/Turing.jl/stable**](https://turinglang.org/Turing.jl/stable/). -## Contributing +## 🛠️ Contributing ### Issues @@ -66,20 +55,20 @@ Breaking releases (minor version) should target the `breaking` branch. If you have not received any feedback on an issue or PR for a while, please feel free to ping `@TuringLang/maintainers` in a comment. -## Other channels +## 💬 Other channels The Turing.jl userbase tends to be most active on the [`#turing` channel of Julia Slack](https://julialang.slack.com/archives/CCYDC34A0). If you do not have an invitation to Julia's Slack, you can get one from [the official Julia website](https://julialang.org/slack/). There are also often threads on [Julia Discourse](https://discourse.julialang.org) (you can search using, e.g., [the `turing` tag](https://discourse.julialang.org/tag/turing)). -## What's changed recently? +## 🔄 What's changed recently? We publish a fortnightly newsletter summarising recent updates in the TuringLang ecosystem, which you can view on [our website](https://turinglang.org/news/), [GitHub](https://github.com/TuringLang/Turing.jl/issues/2498), or [Julia Slack](https://julialang.slack.com/archives/CCYDC34A0). For Turing.jl specifically, you can see a full changelog in [`HISTORY.md`](https://github.com/TuringLang/Turing.jl/blob/main/HISTORY.md) or [our GitHub releases](https://github.com/TuringLang/Turing.jl/releases). -## Where does Turing.jl sit in the TuringLang ecosystem? +## 🧩 Where does Turing.jl sit in the TuringLang ecosystem? Turing.jl is the main entry point for users, and seeks to provide a unified, convenient interface to all of the functionality in the TuringLang (and broader Julia) ecosystem. @@ -136,3 +125,5 @@ month = feb, ``` + +You can see the full list of publications that have cited Turing.jl on [Google Scholar](https://scholar.google.com/scholar?cites=11803241473159708991). From 05ab71164b784edaffbb40d7692c7610cf104b21 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Thu, 20 Nov 2025 10:49:24 -0500 Subject: [PATCH 30/32] fix wrong use of transformation in vi --- src/variational/VariationalInference.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/variational/VariationalInference.jl b/src/variational/VariationalInference.jl index 09f7861963..06d39128b9 100644 --- a/src/variational/VariationalInference.jl +++ b/src/variational/VariationalInference.jl @@ -322,7 +322,7 @@ function vi( prob, q, trans = if unconstrained @assert q isa Bijectors.TransformedDistribution "The algorithm $(algorithm) operates in an unconstrained space. Therefore, the initial variational approximation is expected to be a Bijectors.TransformedDistribution of an unconstrained distribution." vi = DynamicPPL.ldf_default_varinfo(model, DynamicPPL.getlogjoint_internal) - vi = DynamicPPL.set_transformed!!(vi, true) + vi = DynamicPPL.link!!(vi, model) prob = DynamicPPL.LogDensityFunction( model, DynamicPPL.getlogjoint_internal, vi; adtype ) From 91606b5b94cb123412c169b7f6005e9e1635e336 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Thu, 20 Nov 2025 12:09:56 -0500 Subject: [PATCH 31/32] change inital value for scale matrices to 0.6*I and update docs --- HISTORY.md | 1 + src/variational/VariationalInference.jl | 18 +++++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 7b990bdab7..35310bfa1b 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -68,6 +68,7 @@ Additionally, - The default hyperparameters of `DoG`and `DoWG` have been altered. - The deprecated `AdvancedVI@0.2`-era interface is now removed. - `estimate_objective` now returns the value to be minimized by the optimization algorithm. For example, for ELBO maximization algorithms, `estimate_objective` will return the *negative ELBO*. This is breaking change from the previous behavior where the ELBO was returns. + - The initial value for the `q_meanfield_gaussian`, `q_fullrank_gaussian`, and `q_locationscale` have changed. Specificially, the default initial value for the scale matrix has been changed from `I` to `0.6*I`. - When using algorithms that expect to operate in unconstrained spaces, the user is now explicitly expected to provide a `Bijectors.TransformedDistribution` wrapping an unconstrained distribution. (Refer to the docstring of `vi`.) ### New Features diff --git a/src/variational/VariationalInference.jl b/src/variational/VariationalInference.jl index 06d39128b9..46283b2e77 100644 --- a/src/variational/VariationalInference.jl +++ b/src/variational/VariationalInference.jl @@ -117,7 +117,9 @@ end Find a numerically non-degenerate variational distribution `q` for approximating the target `model` within the location-scale variational family formed by the type of `scale` and `basedist`. The distribution can be manually specified by setting `location`, `scale`, and `basedist`. -Otherwise, it chooses a standard Gaussian by default. +Otherwise, it chooses a Gaussian with zero-mean and scale `0.6*I` (covariance of `0.6^2*I`) by default. +This guarantees that the samples from the initial variational approximation will fall in the range of (-2, 2) with 99.9% probability, which mimics the behavior of the `Turing.InitFromUniform()` strategy. + Whether the default choice is used or not, the `scale` may be adjusted via `q_initialize_scale` so that the log-densities of `model` are finite over the samples from `q`. If `meanfield` is set as `true`, the scale of `q` is restricted to be a diagonal matrix and only the diagonal of `scale` is used. @@ -165,9 +167,11 @@ function q_locationscale( L = if isnothing(scale) if meanfield - q_initialize_scale(rng, model, μ, Diagonal(ones(num_params)), basedist; kwargs...) + q_initialize_scale( + rng, model, μ, Diagonal(fill(0.6, num_params)), basedist; kwargs... + ) else - L0 = LowerTriangular(Matrix{Float64}(I, num_params, num_params)) + L0 = LowerTriangular(Matrix{Float64}(0.6*I, num_params, num_params)) q_initialize_scale(rng, model, μ, L0, basedist; kwargs...) end else @@ -198,6 +202,10 @@ end Find a numerically non-degenerate mean-field Gaussian `q` for approximating the target `model`. +If the `scale` set as `nothing`, the default value will be a zero-mean Gaussian with a `Diagonal` scale matrix (the "mean-field" approximation) no larger than `0.6*I` (covariance of `0.6^2*I`). +This guarantees that the samples from the initial variational approximation will fall in the range of (-2, 2) with 99.9% probability, which mimics the behavior of the `Turing.InitFromUniform()` strategy. +Whether the default choice is used or not, the `scale` may be adjusted via `q_initialize_scale` so that the log-densities of `model` are finite over the samples from `q`. + # Arguments - `model`: The target `DynamicPPL.Model`. @@ -237,6 +245,10 @@ end Find a numerically non-degenerate Gaussian `q` with a scale with full-rank factors (traditionally referred to as a "full-rank family") for approximating the target `model`. +If the `scale` set as `nothing`, the default value will be a zero-mean Gaussian with a `LowerTriangular` scale matrix (resulting in a covariance with "full-rank" factors) no larger than `0.6*I` (covariance of `0.6^2*I`). +This guarantees that the samples from the initial variational approximation will fall in the range of (-2, 2) with 99.9% probability, which mimics the behavior of the `Turing.InitFromUniform()` strategy. +Whether the default choice is used or not, the `scale` may be adjusted via `q_initialize_scale` so that the log-densities of `model` are finite over the samples from `q`. + # Arguments - `model`: The target `DynamicPPL.Model`. From 722153a41f975bf9cfa3fad42260692ba073f472 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Thu, 20 Nov 2025 12:16:53 -0500 Subject: [PATCH 32/32] run formatter Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/variational/VariationalInference.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/variational/VariationalInference.jl b/src/variational/VariationalInference.jl index 46283b2e77..5dde445c66 100644 --- a/src/variational/VariationalInference.jl +++ b/src/variational/VariationalInference.jl @@ -171,7 +171,7 @@ function q_locationscale( rng, model, μ, Diagonal(fill(0.6, num_params)), basedist; kwargs... ) else - L0 = LowerTriangular(Matrix{Float64}(0.6*I, num_params, num_params)) + L0 = LowerTriangular(Matrix{Float64}(0.6 * I, num_params, num_params)) q_initialize_scale(rng, model, μ, L0, basedist; kwargs...) end else