From ea69430a76d6b9cb3a3b6350229bf335acad1f00 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <kyrkim@seas.upenn.edu>
Date: Wed, 22 Oct 2025 04:32:54 -0400
Subject: [PATCH 01/32] update vi interface to match AdvancedVI@0.5

---
 Project.toml                            |  2 +-
 src/Turing.jl                           |  3 +
 src/variational/VariationalInference.jl | 80 +++++++++++--------------
 src/variational/deprecated.jl           | 61 -------------------
 test/Project.toml                       |  2 +-
 test/runtests.jl                        | 78 ++++++++++++------------
 test/variational/advi.jl                | 72 ++++++++--------------
 7 files changed, 106 insertions(+), 192 deletions(-)
 delete mode 100644 src/variational/deprecated.jl

diff --git a/Project.toml b/Project.toml
index a2e5f206f4..9a8997ce38 100644
--- a/Project.toml
+++ b/Project.toml
@@ -55,7 +55,7 @@ Accessors = "0.1"
 AdvancedHMC = "0.3.0, 0.4.0, 0.5.2, 0.6, 0.7, 0.8"
 AdvancedMH = "0.8"
 AdvancedPS = "0.7"
-AdvancedVI = "0.4"
+AdvancedVI = "0.5"
 BangBang = "0.4.2"
 Bijectors = "0.14, 0.15"
 Compat = "4.15.0"
diff --git a/src/Turing.jl b/src/Turing.jl
index 0cdbe24586..0d29e1397e 100644
--- a/src/Turing.jl
+++ b/src/Turing.jl
@@ -117,6 +117,9 @@ export
     q_locationscale,
     q_meanfield_gaussian,
     q_fullrank_gaussian,
+    KLMinRepGradProxDescent,
+    KLMinRepGradDescent,
+    KLMinScoreGradDescent,
     # ADTypes
     AutoForwardDiff,
     AutoReverseDiff,
diff --git a/src/variational/VariationalInference.jl b/src/variational/VariationalInference.jl
index d516319684..1af8a24eb0 100644
--- a/src/variational/VariationalInference.jl
+++ b/src/variational/VariationalInference.jl
@@ -1,21 +1,24 @@
 
 module Variational
 
-using DynamicPPL
+using AdvancedVI:
+    AdvancedVI, KLMinRepGradDescent, KLMinRepGradProxDescent, KLMinScoreGradDescent
 using ADTypes
+using Bijectors: Bijectors
 using Distributions
+using DynamicPPL
 using LinearAlgebra
 using LogDensityProblems
 using Random
+using ..Turing: DEFAULT_ADTYPE, PROGRESS
 
-import ..Turing: DEFAULT_ADTYPE, PROGRESS
-
-import AdvancedVI
-import Bijectors
-
-export vi, q_locationscale, q_meanfield_gaussian, q_fullrank_gaussian
-
-include("deprecated.jl")
+export vi,
+    q_locationscale,
+    q_meanfield_gaussian,
+    q_fullrank_gaussian,
+    KLMinRepGradProxDescent,
+    KLMinRepGradDescent,
+    KLMinScoreGradDescent
 
 """
     q_initialize_scale(
@@ -248,76 +251,61 @@ end
 """
     vi(
         [rng::Random.AbstractRNG,]
-        model::DynamicPPL.Model;
+        model::DynamicPPL.Model,
         q,
-        n_iterations::Int;
-        objective::AdvancedVI.AbstractVariationalObjective = AdvancedVI.RepGradELBO(
-            10; entropy = AdvancedVI.ClosedFormEntropyZeroGradient()
-        ),
+        max_iter::Int;
+        algorithm::AdvancedVI.AbstractVariationalAlgorithm = KLMinRepGradProxDescent(DEFAULT_ADTYPE; n_samples=10),
         show_progress::Bool = Turing.PROGRESS[],
-        optimizer::Optimisers.AbstractRule = AdvancedVI.DoWG(),
-        averager::AdvancedVI.AbstractAverager = AdvancedVI.PolynomialAveraging(),
-        operator::AdvancedVI.AbstractOperator = AdvancedVI.ProximalLocationScaleEntropy(),
-        adtype::ADTypes.AbstractADType = Turing.DEFAULT_ADTYPE,
         kwargs...
     )
 
-Approximating the target `model` via variational inference by optimizing `objective` with the initialization `q`.
+Approximate the target `model` via the variational inference algorithm `algorithm` by starting from the initial variational approximation `q`.
 This is a thin wrapper around `AdvancedVI.optimize`.
+The default `algorithm` assumes `q` uses `AdvancedVI.MvLocationScale`, which can be constructed by invoking `q_fullrank_gaussian` or `q_meanfield_gaussian`.
+For other variational families, refer to `AdvancedVI` to determine the best algorithm and options.
 
 # Arguments
 - `model`: The target `DynamicPPL.Model`.
 - `q`: The initial variational approximation.
-- `n_iterations`: Number of optimization steps.
+- `max_iter`: Maximum number of steps.
 
 # Keyword Arguments
-- `objective`: Variational objective to be optimized.
+- `algorithm`: Variational inference algorithm.
 - `show_progress`: Whether to show the progress bar.
-- `optimizer`: Optimization algorithm.
-- `averager`: Parameter averaging strategy.
-- `operator`: Operator applied after each optimization step.
-- `adtype`: Automatic differentiation backend.
+- `adtype`: Automatic differentiation backend to be applied to the log-density. The default value for `algorithm` also uses this backend for differentiation the variational objective.
 
 See the docs of `AdvancedVI.optimize` for additional keyword arguments.
 
 # Returns 
-- `q`: Variational distribution formed by the last iterate of the optimization run.
-- `q_avg`: Variational distribution formed by the averaged iterates according to `averager`.
-- `state`: Collection of states used for optimization. This can be used to resume from a past call to `vi`.
-- `info`: Information generated during the optimization run.
+- `q`: Output variational distribution of `algorithm`.
+- `state`: Collection of states used by `algorithm`. This can be used to resume from a past call to `vi`.
+- `info`: Information generated while executing `algorithm`.
 """
 function vi(
     rng::Random.AbstractRNG,
     model::DynamicPPL.Model,
     q,
-    n_iterations::Int;
-    objective=AdvancedVI.RepGradELBO(
-        10; entropy=AdvancedVI.ClosedFormEntropyZeroGradient()
-    ),
-    show_progress::Bool=PROGRESS[],
-    optimizer=AdvancedVI.DoWG(),
-    averager=AdvancedVI.PolynomialAveraging(),
-    operator=AdvancedVI.ProximalLocationScaleEntropy(),
+    max_iter::Int,
+    args...;
     adtype::ADTypes.AbstractADType=DEFAULT_ADTYPE,
+    algorithm=KLMinRepGradProxDescent(adtype; n_samples=10),
+    show_progress::Bool=PROGRESS[],
     kwargs...,
 )
     return AdvancedVI.optimize(
         rng,
-        LogDensityFunction(model),
-        objective,
+        algorithm,
+        max_iter,
+        LogDensityFunction(model; adtype),
         q,
-        n_iterations;
+        args...;
         show_progress=show_progress,
-        adtype,
-        optimizer,
-        averager,
-        operator,
         kwargs...,
     )
 end
 
-function vi(model::DynamicPPL.Model, q, n_iterations::Int; kwargs...)
-    return vi(Random.default_rng(), model, q, n_iterations; kwargs...)
+function vi(model::DynamicPPL.Model, q, max_iter::Int; kwargs...)
+    return vi(Random.default_rng(), model, q, max_iter; kwargs...)
 end
 
 end
diff --git a/src/variational/deprecated.jl b/src/variational/deprecated.jl
deleted file mode 100644
index 9a9f4777b5..0000000000
--- a/src/variational/deprecated.jl
+++ /dev/null
@@ -1,61 +0,0 @@
-
-import DistributionsAD
-export ADVI
-
-Base.@deprecate meanfield(model) q_meanfield_gaussian(model)
-
-struct ADVI{AD}
-    "Number of samples used to estimate the ELBO in each optimization step."
-    samples_per_step::Int
-    "Maximum number of gradient steps."
-    max_iters::Int
-    "AD backend used for automatic differentiation."
-    adtype::AD
-end
-
-function ADVI(
-    samples_per_step::Int=1,
-    max_iters::Int=1000;
-    adtype::ADTypes.AbstractADType=ADTypes.AutoForwardDiff(),
-)
-    Base.depwarn(
-        "The type ADVI will be removed in future releases. Please refer to the new interface for `vi`",
-        :ADVI;
-        force=true,
-    )
-    return ADVI{typeof(adtype)}(samples_per_step, max_iters, adtype)
-end
-
-function vi(model::DynamicPPL.Model, alg::ADVI; kwargs...)
-    Base.depwarn(
-        "This specialization along with the type `ADVI`  will be deprecated in future releases. Please refer to the new interface for `vi`.",
-        :vi;
-        force=true,
-    )
-    q = q_meanfield_gaussian(Random.default_rng(), model)
-    objective = AdvancedVI.RepGradELBO(
-        alg.samples_per_step; entropy=AdvancedVI.ClosedFormEntropy()
-    )
-    operator = AdvancedVI.IdentityOperator()
-    _, q_avg, _, _ = vi(model, q, alg.max_iters; objective, operator, kwargs...)
-    return q_avg
-end
-
-function vi(
-    model::DynamicPPL.Model,
-    alg::ADVI,
-    q::Bijectors.TransformedDistribution{<:DistributionsAD.TuringDiagMvNormal};
-    kwargs...,
-)
-    Base.depwarn(
-        "This specialization along with the type `ADVI`  will be deprecated in future releases. Please refer to the new interface for `vi`.",
-        :vi;
-        force=true,
-    )
-    objective = AdvancedVI.RepGradELBO(
-        alg.samples_per_step; entropy=AdvancedVI.ClosedFormEntropy()
-    )
-    operator = AdvancedVI.IdentityOperator()
-    _, q_avg, _, _ = vi(model, q, alg.max_iters; objective, operator, kwargs...)
-    return q_avg
-end
diff --git a/test/Project.toml b/test/Project.toml
index 138b1a1a0d..b03dfd8970 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -44,7 +44,7 @@ AbstractMCMC = "5"
 AbstractPPL = "0.11, 0.12, 0.13"
 AdvancedMH = "0.6, 0.7, 0.8"
 AdvancedPS = "0.7"
-AdvancedVI = "0.4"
+AdvancedVI = "0.5"
 Aqua = "0.8"
 BangBang = "0.4"
 Bijectors = "0.14, 0.15"
diff --git a/test/runtests.jl b/test/runtests.jl
index 5fb6b21411..d2535d58a3 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -13,7 +13,7 @@ include("test_utils/models.jl")
 include("test_utils/numerical_tests.jl")
 include("test_utils/sampler.jl")
 
-Turing.setprogress!(false)
+#Turing.setprogress!(false)
 included_paths, excluded_paths = parse_args(ARGS)
 
 # Filter which tests to run and collect timing and allocations information to show in a
@@ -30,55 +30,59 @@ macro timeit_include(path::AbstractString)
 end
 
 @testset "Turing" verbose = true begin
-    @testset "Aqua" begin
-        @timeit_include("Aqua.jl")
-    end
+    # @testset "Aqua" begin
+    #     @timeit_include("Aqua.jl")
+    # end
 
-    @testset "AD" verbose = true begin
-        @timeit_include("ad.jl")
-    end
+    # @testset "AD" verbose = true begin
+    #     @timeit_include("ad.jl")
+    # end
 
-    @testset "essential" verbose = true begin
-        @timeit_include("essential/container.jl")
-    end
+    # @testset "essential" verbose = true begin
+    #     @timeit_include("essential/container.jl")
+    # end
 
-    @testset "samplers (without AD)" verbose = true begin
-        @timeit_include("mcmc/particle_mcmc.jl")
-        @timeit_include("mcmc/emcee.jl")
-        @timeit_include("mcmc/ess.jl")
-        @timeit_include("mcmc/is.jl")
-    end
+    # @testset "samplers (without AD)" verbose = true begin
+    #     @timeit_include("mcmc/particle_mcmc.jl")
+    #     @timeit_include("mcmc/emcee.jl")
+    #     @timeit_include("mcmc/ess.jl")
+    #     @timeit_include("mcmc/is.jl")
+    # end
 
     @timeit TIMEROUTPUT "inference" begin
-        @testset "inference with samplers" verbose = true begin
-            @timeit_include("mcmc/gibbs.jl")
-            @timeit_include("mcmc/hmc.jl")
-            @timeit_include("mcmc/Inference.jl")
-            @timeit_include("mcmc/sghmc.jl")
-            @timeit_include("mcmc/external_sampler.jl")
-            @timeit_include("mcmc/mh.jl")
-            @timeit_include("ext/dynamichmc.jl")
-            @timeit_include("mcmc/repeat_sampler.jl")
-        end
+        # @testset "inference with samplers" verbose = true begin
+        #     @timeit_include("mcmc/gibbs.jl")
+        #     @timeit_include("mcmc/hmc.jl")
+        #     @timeit_include("mcmc/Inference.jl")
+        #     @timeit_include("mcmc/sghmc.jl")
+        #     @timeit_include("mcmc/external_sampler.jl")
+        #     @timeit_include("mcmc/mh.jl")
+        #     @timeit_include("ext/dynamichmc.jl")
+        #     @timeit_include("mcmc/repeat_sampler.jl")
+        # end
 
         @testset "variational algorithms" begin
             @timeit_include("variational/advi.jl")
         end
 
-        @testset "mode estimation" verbose = true begin
-            @timeit_include("optimisation/Optimisation.jl")
-            @timeit_include("ext/OptimInterface.jl")
-        end
+        # @testset "mode estimation" verbose = true begin
+        #     @timeit_include("optimisation/Optimisation.jl")
+        #     @timeit_include("ext/OptimInterface.jl")
+        # end
     end
 
-    @testset "stdlib" verbose = true begin
-        @timeit_include("stdlib/distributions.jl")
-        @timeit_include("stdlib/RandomMeasures.jl")
-    end
+    # @testset "variational optimisers" begin
+    #     @timeit_include("variational/optimisers.jl")
+    # end
 
-    @testset "utilities" begin
-        @timeit_include("mcmc/utilities.jl")
-    end
+    # @testset "stdlib" verbose = true begin
+    #     @timeit_include("stdlib/distributions.jl")
+    #     @timeit_include("stdlib/RandomMeasures.jl")
+    # end
+
+    # @testset "utilities" begin
+    #     @timeit_include("mcmc/utilities.jl")
+    # end
 end
 
 show(TIMEROUTPUT; compact=true, sortby=:firstexec)
diff --git a/test/variational/advi.jl b/test/variational/advi.jl
index ed8f745df2..2ab5d2b424 100644
--- a/test/variational/advi.jl
+++ b/test/variational/advi.jl
@@ -10,12 +10,16 @@ using Distributions: Dirichlet, Normal
 using LinearAlgebra
 using MCMCChains: Chains
 using Random
+using ReverseDiff
 using StableRNGs: StableRNG
 using Test: @test, @testset
 using Turing
 using Turing.Variational
 
 @testset "ADVI" begin
+    adtype = AutoReverseDiff()
+    operator = AdvancedVI.ClipScale()
+
     @testset "q initialization" begin
         m = gdemo_default
         d = length(Turing.DynamicPPL.VarInfo(m)[:])
@@ -41,86 +45,62 @@ using Turing.Variational
 
     @testset "default interface" begin
         for q0 in [q_meanfield_gaussian(gdemo_default), q_fullrank_gaussian(gdemo_default)]
-            _, q, _, _ = vi(gdemo_default, q0, 100; show_progress=Turing.PROGRESS[])
+            q, _, _ = vi(gdemo_default, q0, 100; show_progress=Turing.PROGRESS[], adtype)
             c1 = rand(q, 10)
         end
     end
 
-    @testset "custom interface $name" for (name, objective, operator, optimizer) in [
-        (
-            "ADVI with closed-form entropy",
-            AdvancedVI.RepGradELBO(10),
-            AdvancedVI.ProximalLocationScaleEntropy(),
-            AdvancedVI.DoG(),
-        ),
+    @testset "custom algorithm $name" for (name, algorithm) in [
         (
-            "ADVI with proximal entropy",
-            AdvancedVI.RepGradELBO(10; entropy=AdvancedVI.ClosedFormEntropyZeroGradient()),
-            AdvancedVI.ClipScale(),
-            AdvancedVI.DoG(),
+            "KLMinRepGradProxDescent",
+            KLMinRepGradProxDescent(AutoReverseDiff(); n_samples=10),
         ),
         (
-            "ADVI with STL entropy",
-            AdvancedVI.RepGradELBO(10; entropy=AdvancedVI.StickingTheLandingEntropy()),
-            AdvancedVI.ClipScale(),
-            AdvancedVI.DoG(),
+            "KLMinRepGradDescent",
+            KLMinRepGradDescent(AutoReverseDiff(); operator, n_samples=10),
         ),
     ]
         T = 1000
-        q, q_avg, _, _ = vi(
+        q, _, _ = vi(
             gdemo_default,
             q_meanfield_gaussian(gdemo_default),
             T;
-            objective,
-            optimizer,
-            operator,
+            algorithm,
+            adtype,
             show_progress=Turing.PROGRESS[],
         )
-
         N = 1000
-        c1 = rand(q_avg, N)
         c2 = rand(q, N)
     end
 
-    @testset "inference $name" for (name, objective, operator, optimizer) in [
+    @testset "inference $name" for (name, algorithm) in [
         (
-            "ADVI with closed-form entropy",
-            AdvancedVI.RepGradELBO(10),
-            AdvancedVI.ProximalLocationScaleEntropy(),
-            AdvancedVI.DoG(),
+            "KLMinRepGradProxDescent",
+            KLMinRepGradProxDescent(AutoReverseDiff(); n_samples=10),
         ),
         (
-            "ADVI with proximal entropy",
-            RepGradELBO(10; entropy=AdvancedVI.ClosedFormEntropyZeroGradient()),
-            AdvancedVI.ClipScale(),
-            AdvancedVI.DoG(),
-        ),
-        (
-            "ADVI with STL entropy",
-            AdvancedVI.RepGradELBO(10; entropy=AdvancedVI.StickingTheLandingEntropy()),
-            AdvancedVI.ClipScale(),
-            AdvancedVI.DoG(),
+            "KLMinRepGradDescent",
+            KLMinRepGradDescent(AutoReverseDiff(); operator, n_samples=10),
         ),
     ]
         rng = StableRNG(0x517e1d9bf89bf94f)
 
         T = 1000
-        q, q_avg, _, _ = vi(
+        q, _, _ = vi(
             rng,
             gdemo_default,
             q_meanfield_gaussian(gdemo_default),
             T;
-            optimizer,
+            algorithm,
+            adtype,
             show_progress=Turing.PROGRESS[],
         )
 
         N = 1000
-        for q_out in [q_avg, q]
-            samples = transpose(rand(rng, q_out, N))
-            chn = Chains(reshape(samples, size(samples)..., 1), ["s", "m"])
+        samples = transpose(rand(rng, q, N))
+        chn = Chains(reshape(samples, size(samples)..., 1), ["s", "m"])
 
-            check_gdemo(chn; atol=0.5)
-        end
+        check_gdemo(chn; atol=0.5)
     end
 
     # regression test for:
@@ -143,7 +123,7 @@ using Turing.Variational
         @test all(x0 .≈ x0_inv)
 
         # And regression for https://github.com/TuringLang/Turing.jl/issues/2160.
-        _, q, _, _ = vi(rng, m, q_meanfield_gaussian(m), 1000)
+        q, _, _ = vi(rng, m, q_meanfield_gaussian(m), 1000; adtype)
         x = rand(rng, q, 1000)
         @test mean(eachcol(x)) ≈ [0.5, 0.5] atol = 0.1
     end
@@ -158,7 +138,7 @@ using Turing.Variational
         end
 
         model = demo_issue2205() | (y=1.0,)
-        _, q, _, _ = vi(rng, model, q_meanfield_gaussian(model), 1000)
+        q, _, _ = vi(rng, model, q_meanfield_gaussian(model), 1000; adtype)
         # True mean.
         mean_true = 1 / 2
         var_true = 1 / 2

From 86ee6dd5e1ec64f39e451794375a89122905b7d3 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <kyrkim@seas.upenn.edu>
Date: Wed, 22 Oct 2025 04:34:51 -0400
Subject: [PATCH 02/32] revert unintended commit of `runtests.jl`

---
 test/runtests.jl | 78 +++++++++++++++++++++++-------------------------
 1 file changed, 37 insertions(+), 41 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index d2535d58a3..5fb6b21411 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -13,7 +13,7 @@ include("test_utils/models.jl")
 include("test_utils/numerical_tests.jl")
 include("test_utils/sampler.jl")
 
-#Turing.setprogress!(false)
+Turing.setprogress!(false)
 included_paths, excluded_paths = parse_args(ARGS)
 
 # Filter which tests to run and collect timing and allocations information to show in a
@@ -30,59 +30,55 @@ macro timeit_include(path::AbstractString)
 end
 
 @testset "Turing" verbose = true begin
-    # @testset "Aqua" begin
-    #     @timeit_include("Aqua.jl")
-    # end
+    @testset "Aqua" begin
+        @timeit_include("Aqua.jl")
+    end
 
-    # @testset "AD" verbose = true begin
-    #     @timeit_include("ad.jl")
-    # end
+    @testset "AD" verbose = true begin
+        @timeit_include("ad.jl")
+    end
 
-    # @testset "essential" verbose = true begin
-    #     @timeit_include("essential/container.jl")
-    # end
+    @testset "essential" verbose = true begin
+        @timeit_include("essential/container.jl")
+    end
 
-    # @testset "samplers (without AD)" verbose = true begin
-    #     @timeit_include("mcmc/particle_mcmc.jl")
-    #     @timeit_include("mcmc/emcee.jl")
-    #     @timeit_include("mcmc/ess.jl")
-    #     @timeit_include("mcmc/is.jl")
-    # end
+    @testset "samplers (without AD)" verbose = true begin
+        @timeit_include("mcmc/particle_mcmc.jl")
+        @timeit_include("mcmc/emcee.jl")
+        @timeit_include("mcmc/ess.jl")
+        @timeit_include("mcmc/is.jl")
+    end
 
     @timeit TIMEROUTPUT "inference" begin
-        # @testset "inference with samplers" verbose = true begin
-        #     @timeit_include("mcmc/gibbs.jl")
-        #     @timeit_include("mcmc/hmc.jl")
-        #     @timeit_include("mcmc/Inference.jl")
-        #     @timeit_include("mcmc/sghmc.jl")
-        #     @timeit_include("mcmc/external_sampler.jl")
-        #     @timeit_include("mcmc/mh.jl")
-        #     @timeit_include("ext/dynamichmc.jl")
-        #     @timeit_include("mcmc/repeat_sampler.jl")
-        # end
+        @testset "inference with samplers" verbose = true begin
+            @timeit_include("mcmc/gibbs.jl")
+            @timeit_include("mcmc/hmc.jl")
+            @timeit_include("mcmc/Inference.jl")
+            @timeit_include("mcmc/sghmc.jl")
+            @timeit_include("mcmc/external_sampler.jl")
+            @timeit_include("mcmc/mh.jl")
+            @timeit_include("ext/dynamichmc.jl")
+            @timeit_include("mcmc/repeat_sampler.jl")
+        end
 
         @testset "variational algorithms" begin
             @timeit_include("variational/advi.jl")
         end
 
-        # @testset "mode estimation" verbose = true begin
-        #     @timeit_include("optimisation/Optimisation.jl")
-        #     @timeit_include("ext/OptimInterface.jl")
-        # end
+        @testset "mode estimation" verbose = true begin
+            @timeit_include("optimisation/Optimisation.jl")
+            @timeit_include("ext/OptimInterface.jl")
+        end
     end
 
-    # @testset "variational optimisers" begin
-    #     @timeit_include("variational/optimisers.jl")
-    # end
-
-    # @testset "stdlib" verbose = true begin
-    #     @timeit_include("stdlib/distributions.jl")
-    #     @timeit_include("stdlib/RandomMeasures.jl")
-    # end
+    @testset "stdlib" verbose = true begin
+        @timeit_include("stdlib/distributions.jl")
+        @timeit_include("stdlib/RandomMeasures.jl")
+    end
 
-    # @testset "utilities" begin
-    #     @timeit_include("mcmc/utilities.jl")
-    # end
+    @testset "utilities" begin
+        @timeit_include("mcmc/utilities.jl")
+    end
 end
 
 show(TIMEROUTPUT; compact=true, sortby=:firstexec)

From d87004562aad8881eed0abe653a7d55ab85a5105 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <kyrkim@seas.upenn.edu>
Date: Fri, 24 Oct 2025 00:08:18 -0400
Subject: [PATCH 03/32] update docs for `vi`

---
 src/variational/VariationalInference.jl | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/variational/VariationalInference.jl b/src/variational/VariationalInference.jl
index 1af8a24eb0..630d3b62f2 100644
--- a/src/variational/VariationalInference.jl
+++ b/src/variational/VariationalInference.jl
@@ -254,14 +254,17 @@ end
         model::DynamicPPL.Model,
         q,
         max_iter::Int;
-        algorithm::AdvancedVI.AbstractVariationalAlgorithm = KLMinRepGradProxDescent(DEFAULT_ADTYPE; n_samples=10),
+        adtype::ADTypes.AbstractADType=DEFAULT_ADTYPE,
+        algorithm::AdvancedVI.AbstractVariationalAlgorithm = KLMinRepGradProxDescent(
+            adtype; n_samples=10
+        ),
         show_progress::Bool = Turing.PROGRESS[],
         kwargs...
     )
 
 Approximate the target `model` via the variational inference algorithm `algorithm` by starting from the initial variational approximation `q`.
 This is a thin wrapper around `AdvancedVI.optimize`.
-The default `algorithm` assumes `q` uses `AdvancedVI.MvLocationScale`, which can be constructed by invoking `q_fullrank_gaussian` or `q_meanfield_gaussian`.
+The default `algorithm`, `KLMinRepGradProxDescent` ([relevant docs](https://turinglang.org/AdvancedVI.jl/dev/klminrepgradproxdescent/)), assumes `q` uses `AdvancedVI.MvLocationScale`, which can be constructed by invoking `q_fullrank_gaussian` or `q_meanfield_gaussian`.
 For other variational families, refer to `AdvancedVI` to determine the best algorithm and options.
 
 # Arguments
@@ -270,9 +273,9 @@ For other variational families, refer to `AdvancedVI` to determine the best algo
 - `max_iter`: Maximum number of steps.
 
 # Keyword Arguments
+- `adtype`: Automatic differentiation backend to be applied to the log-density. The default value for `algorithm` also uses this backend for differentiation the variational objective.
 - `algorithm`: Variational inference algorithm.
 - `show_progress`: Whether to show the progress bar.
-- `adtype`: Automatic differentiation backend to be applied to the log-density. The default value for `algorithm` also uses this backend for differentiation the variational objective.
 
 See the docs of `AdvancedVI.optimize` for additional keyword arguments.
 
@@ -288,7 +291,9 @@ function vi(
     max_iter::Int,
     args...;
     adtype::ADTypes.AbstractADType=DEFAULT_ADTYPE,
-    algorithm=KLMinRepGradProxDescent(adtype; n_samples=10),
+    algorithm::AdvancedVI.AbstractVariationalAlgorithm=KLMinRepGradProxDescent(
+        adtype; n_samples=10
+    ),
     show_progress::Bool=PROGRESS[],
     kwargs...,
 )

From 2d928e0a5e731cfa54eb6a02e09a3401bc5f5ff2 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <kyrkim@seas.upenn.edu>
Date: Fri, 24 Oct 2025 00:17:04 -0400
Subject: [PATCH 04/32] add history entry for `AdvancedVI@0.5`

---
 HISTORY.md | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/HISTORY.md b/HISTORY.md
index dc66f1f496..83686b257b 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,5 +1,19 @@
+
 # 0.42.0
 
+## Breaking Changes
+
+**AdvancedVI 0.5**
+
+Turing.jl v0.42 updates `AdvancedVI.jl` compatibility to 0.5.
+Most of the changes introduced in `AdvancedVI.jl@0.5` are structural, with some changes spilling out into the interface.
+The summary of the changes below are the things that affect the end-users of Turing.
+For a more comprehensive list of changes, please refer to the [changelogs](https://github.com/TuringLang/AdvancedVI.jl/blob/main/HISTORY.md) in `AdvancedVI`.
+
+- A new level of interface for defining different variational algorithms have been introduced in `AdvancedVI` v0.5. As a result, the method `Turing.vi` now receives a keyword argument `algorithm`. The object `algorithm <: AdvancedVI.AbstractVariationalAlgorithm` should now contain all the algorithm-specific configurations. Therefore, keyword arguments of `vi` that were algorithm-specific such as `objective`, `operator`, `averager` and so on, have been moved as fields of the relevant `<: AdvancedVI.AbstractVariationalAlgorithm` structs.
+- The default hyperparameters of `DoG`and `DoWG` have been altered.
+- The depricated `AdvancedVI@0.2`-era interface is now removed.
+
 # 0.41.0
 
 ## DynamicPPL 0.38
@@ -62,7 +76,7 @@ Note that if the initial sample is included, the corresponding sampler statistic
 Due to a technical limitation of MCMCChains.jl, this causes all indexing into MCMCChains to return `Union{Float64, Missing}` or similar.
 If you want the old behaviour, you can discard the first sample (e.g. using `discard_initial=1`).
 
-# 0.40.5
+# 0.4# 0.40.5
 
 Bump Optimization.jl compatibility to include v5.
 

From 5211b37286302a2f850fa19f025cd3dbf3cd0f2a Mon Sep 17 00:00:00 2001
From: Kyurae Kim <kyrkim@seas.upenn.edu>
Date: Fri, 24 Oct 2025 00:18:40 -0400
Subject: [PATCH 05/32] remove export for removed symbol

---
 src/Turing.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Turing.jl b/src/Turing.jl
index 98cfcf29c7..a4f40df259 100644
--- a/src/Turing.jl
+++ b/src/Turing.jl
@@ -116,7 +116,6 @@ export
     externalsampler,
     # Variational inference - AdvancedVI
     vi,
-    ADVI,
     q_locationscale,
     q_meanfield_gaussian,
     q_fullrank_gaussian,

From f0d615d92ddc6763a4ea38b481b1da04e0cd8e97 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <msca8h@naver.com>
Date: Fri, 24 Oct 2025 00:24:13 -0400
Subject: [PATCH 06/32] fix formatting

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 HISTORY.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/HISTORY.md b/HISTORY.md
index 83686b257b..6e0a620ba0 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,4 +1,3 @@
-
 # 0.42.0
 
 ## Breaking Changes

From 1b2351f2501a6ae143c100ffd7169c6a60ced58f Mon Sep 17 00:00:00 2001
From: Kyurae Kim <msca8h@naver.com>
Date: Fri, 24 Oct 2025 00:24:21 -0400
Subject: [PATCH 07/32] fix formatting

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 HISTORY.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/HISTORY.md b/HISTORY.md
index 6e0a620ba0..3327481194 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -9,9 +9,9 @@ Most of the changes introduced in `AdvancedVI.jl@0.5` are structural, with some
 The summary of the changes below are the things that affect the end-users of Turing.
 For a more comprehensive list of changes, please refer to the [changelogs](https://github.com/TuringLang/AdvancedVI.jl/blob/main/HISTORY.md) in `AdvancedVI`.
 
-- A new level of interface for defining different variational algorithms have been introduced in `AdvancedVI` v0.5. As a result, the method `Turing.vi` now receives a keyword argument `algorithm`. The object `algorithm <: AdvancedVI.AbstractVariationalAlgorithm` should now contain all the algorithm-specific configurations. Therefore, keyword arguments of `vi` that were algorithm-specific such as `objective`, `operator`, `averager` and so on, have been moved as fields of the relevant `<: AdvancedVI.AbstractVariationalAlgorithm` structs.
-- The default hyperparameters of `DoG`and `DoWG` have been altered.
-- The depricated `AdvancedVI@0.2`-era interface is now removed.
+  - A new level of interface for defining different variational algorithms have been introduced in `AdvancedVI` v0.5. As a result, the method `Turing.vi` now receives a keyword argument `algorithm`. The object `algorithm <: AdvancedVI.AbstractVariationalAlgorithm` should now contain all the algorithm-specific configurations. Therefore, keyword arguments of `vi` that were algorithm-specific such as `objective`, `operator`, `averager` and so on, have been moved as fields of the relevant `<: AdvancedVI.AbstractVariationalAlgorithm` structs.
+  - The default hyperparameters of `DoG`and `DoWG` have been altered.
+  - The depricated `AdvancedVI@0.2`-era interface is now removed.
 
 # 0.41.0
 

From 2be31b4e69a7c485c269b6a055ee007f955ba1a9 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <kyrkim@seas.upenn.edu>
Date: Fri, 24 Oct 2025 00:27:57 -0400
Subject: [PATCH 08/32] tidy tests advi

---
 test/variational/advi.jl | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/test/variational/advi.jl b/test/variational/advi.jl
index 2ab5d2b424..b426f0e6a3 100644
--- a/test/variational/advi.jl
+++ b/test/variational/advi.jl
@@ -51,14 +51,8 @@ using Turing.Variational
     end
 
     @testset "custom algorithm $name" for (name, algorithm) in [
-        (
-            "KLMinRepGradProxDescent",
-            KLMinRepGradProxDescent(AutoReverseDiff(); n_samples=10),
-        ),
-        (
-            "KLMinRepGradDescent",
-            KLMinRepGradDescent(AutoReverseDiff(); operator, n_samples=10),
-        ),
+        ("KLMinRepGradProxDescent", KLMinRepGradProxDescent(adtype; n_samples=10)),
+        ("KLMinRepGradDescent", KLMinRepGradDescent(adtype; operator, n_samples=10)),
     ]
         T = 1000
         q, _, _ = vi(
@@ -74,14 +68,8 @@ using Turing.Variational
     end
 
     @testset "inference $name" for (name, algorithm) in [
-        (
-            "KLMinRepGradProxDescent",
-            KLMinRepGradProxDescent(AutoReverseDiff(); n_samples=10),
-        ),
-        (
-            "KLMinRepGradDescent",
-            KLMinRepGradDescent(AutoReverseDiff(); operator, n_samples=10),
-        ),
+        ("KLMinRepGradProxDescent", KLMinRepGradProxDescent(adtype; n_samples=10)),
+        ("KLMinRepGradDescent", KLMinRepGradDescent(adtype; operator, n_samples=10)),
     ]
         rng = StableRNG(0x517e1d9bf89bf94f)
 

From e48ae42e38ab3f966304283cbadabac0c98a84df Mon Sep 17 00:00:00 2001
From: Kyurae Kim <kyrkim@seas.upenn.edu>
Date: Fri, 24 Oct 2025 00:29:12 -0400
Subject: [PATCH 09/32] fix rename file `advi.jl` to `vi.jl` to reflect naming
 changes

---
 test/variational/{advi.jl => vi.jl} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename test/variational/{advi.jl => vi.jl} (100%)

diff --git a/test/variational/advi.jl b/test/variational/vi.jl
similarity index 100%
rename from test/variational/advi.jl
rename to test/variational/vi.jl

From 44f776255b01b9c1d050a8e3d65fcb3502e21807 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <msca8h@naver.com>
Date: Fri, 24 Oct 2025 22:38:35 -0400
Subject: [PATCH 10/32] fix docs

Co-authored-by: Markus Hauru <markus@mhauru.org>
---
 HISTORY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HISTORY.md b/HISTORY.md
index 3327481194..cd796e8ec7 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -9,7 +9,7 @@ Most of the changes introduced in `AdvancedVI.jl@0.5` are structural, with some
 The summary of the changes below are the things that affect the end-users of Turing.
 For a more comprehensive list of changes, please refer to the [changelogs](https://github.com/TuringLang/AdvancedVI.jl/blob/main/HISTORY.md) in `AdvancedVI`.
 
-  - A new level of interface for defining different variational algorithms have been introduced in `AdvancedVI` v0.5. As a result, the method `Turing.vi` now receives a keyword argument `algorithm`. The object `algorithm <: AdvancedVI.AbstractVariationalAlgorithm` should now contain all the algorithm-specific configurations. Therefore, keyword arguments of `vi` that were algorithm-specific such as `objective`, `operator`, `averager` and so on, have been moved as fields of the relevant `<: AdvancedVI.AbstractVariationalAlgorithm` structs.
+  - A new level of interface for defining different variational algorithms has been introduced in `AdvancedVI` v0.5. As a result, the function `Turing.vi` now receives a keyword argument `algorithm`. The object `algorithm <: AdvancedVI.AbstractVariationalAlgorithm` should now contain all the algorithm-specific configurations. Therefore, keyword arguments of `vi` that were algorithm-specific such as `objective`, `operator`, `averager` and so on, have been moved as fields of the relevant `<: AdvancedVI.AbstractVariationalAlgorithm` structs.
   - The default hyperparameters of `DoG`and `DoWG` have been altered.
   - The depricated `AdvancedVI@0.2`-era interface is now removed.
 

From fd0e9286140775b441b918c9a44599c575718c80 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <msca8h@naver.com>
Date: Fri, 24 Oct 2025 22:38:51 -0400
Subject: [PATCH 11/32] fix HISTORY.md

Co-authored-by: Markus Hauru <markus@mhauru.org>
---
 HISTORY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HISTORY.md b/HISTORY.md
index cd796e8ec7..341814e6bf 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -75,7 +75,7 @@ Note that if the initial sample is included, the corresponding sampler statistic
 Due to a technical limitation of MCMCChains.jl, this causes all indexing into MCMCChains to return `Union{Float64, Missing}` or similar.
 If you want the old behaviour, you can discard the first sample (e.g. using `discard_initial=1`).
 
-# 0.4# 0.40.5
+# 0.40.5
 
 Bump Optimization.jl compatibility to include v5.
 

From 77276bdc6c048fba4be1343ca745361c9d20145a Mon Sep 17 00:00:00 2001
From: Kyurae Kim <msca8h@naver.com>
Date: Fri, 24 Oct 2025 22:39:03 -0400
Subject: [PATCH 12/32] fix HISTORY.md

Co-authored-by: Markus Hauru <markus@mhauru.org>
---
 HISTORY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HISTORY.md b/HISTORY.md
index 341814e6bf..2f75200af3 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -11,7 +11,7 @@ For a more comprehensive list of changes, please refer to the [changelogs](https
 
   - A new level of interface for defining different variational algorithms has been introduced in `AdvancedVI` v0.5. As a result, the function `Turing.vi` now receives a keyword argument `algorithm`. The object `algorithm <: AdvancedVI.AbstractVariationalAlgorithm` should now contain all the algorithm-specific configurations. Therefore, keyword arguments of `vi` that were algorithm-specific such as `objective`, `operator`, `averager` and so on, have been moved as fields of the relevant `<: AdvancedVI.AbstractVariationalAlgorithm` structs.
   - The default hyperparameters of `DoG`and `DoWG` have been altered.
-  - The depricated `AdvancedVI@0.2`-era interface is now removed.
+  - The deprecated `AdvancedVI@0.2`-era interface is now removed.
 
 # 0.41.0
 

From e70ddb4fc8c7a884440a6622524a526dcf73c8bd Mon Sep 17 00:00:00 2001
From: Kyurae Kim <kyrkim@seas.upenn.edu>
Date: Fri, 24 Oct 2025 22:46:11 -0400
Subject: [PATCH 13/32] update history

---
 HISTORY.md | 39 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/HISTORY.md b/HISTORY.md
index 3327481194..ba92be1ffd 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -9,7 +9,44 @@ Most of the changes introduced in `AdvancedVI.jl@0.5` are structural, with some
 The summary of the changes below are the things that affect the end-users of Turing.
 For a more comprehensive list of changes, please refer to the [changelogs](https://github.com/TuringLang/AdvancedVI.jl/blob/main/HISTORY.md) in `AdvancedVI`.
 
-  - A new level of interface for defining different variational algorithms have been introduced in `AdvancedVI` v0.5. As a result, the method `Turing.vi` now receives a keyword argument `algorithm`. The object `algorithm <: AdvancedVI.AbstractVariationalAlgorithm` should now contain all the algorithm-specific configurations. Therefore, keyword arguments of `vi` that were algorithm-specific such as `objective`, `operator`, `averager` and so on, have been moved as fields of the relevant `<: AdvancedVI.AbstractVariationalAlgorithm` structs.
+A new level of interface for defining different variational algorithms have been introduced in `AdvancedVI` v0.5. As a result, the method `Turing.vi` now receives a keyword argument `algorithm`. The object `algorithm <: AdvancedVI.AbstractVariationalAlgorithm` should now contain all the algorithm-specific configurations. Therefore, keyword arguments of `vi` that were algorithm-specific such as `objective`, `operator`, `averager` and so on, have been moved as fields of the relevant `<: AdvancedVI.AbstractVariationalAlgorithm` structs.
+For example,
+
+```julia
+vi(model, q, n_iters; objective=RepGradELBO(10), operator=AdvancedVI.ClipScale())
+```
+
+is now
+
+```julia
+vi(
+    model,
+    q,
+    n_iters;
+    algorithm=KLMinRepGradDescent(adtype; n_samples=10, operator=AdvancedVI.ClipScale()),
+)
+```
+
+Similarly,
+
+```julia
+vi(
+    model,
+    q,
+    n_iters;
+    objective=RepGradELBO(10; entropy=AdvancedVI.ClosedFormEntropyZeroGradient()),
+    operator=AdvancedVI.ProximalLocationScaleEntropy(),
+)
+```
+
+is now
+
+```julia
+vi(model, q, n_iters; algorithm=KLMinRepGradProxDescent(adtype; n_samples=10))
+```
+
+Additionally,
+
   - The default hyperparameters of `DoG`and `DoWG` have been altered.
   - The depricated `AdvancedVI@0.2`-era interface is now removed.
 

From cdc8b2fcc0c432fca417b42b2e6c09b54c8356d6 Mon Sep 17 00:00:00 2001
From: Hong Ge <3279477+yebai@users.noreply.github.com>
Date: Wed, 12 Nov 2025 16:49:11 +0000
Subject: [PATCH 14/32] Update README.md for clarity and formatting

---
 README.md | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 21167ac585..d320d72c05 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,5 @@
-<p align="center"><img src="https://raw.githubusercontent.com/TuringLang/turinglang.github.io/refs/heads/main/assets/logo/turing-logo.svg" alt="Turing.jl logo" width="200" /></p>
-<h1 align="center">Turing.jl</h1>
-<p align="center"><i>Probabilistic programming and Bayesian inference in Julia</i></p>
+<p align="center"><img src="https://turinglang.org/assets/logo/turing-logo-light.svg" alt="Turing.jl logo" width="300" /></p>
+<p align="center"><i>Bayesian inference with probabilistic programming</i></p>
 <p align="center">
 <a href="https://turinglang.org/"><img src="https://img.shields.io/badge/docs-tutorials-blue.svg" alt="Tutorials" /></a>
 <a href="https://turinglang.org/Turing.jl/stable"><img src="https://img.shields.io/badge/docs-API-blue.svg" alt="API docs" /></a>
@@ -9,7 +8,7 @@
 <a href="https://github.com/SciML/ColPrac"><img src="https://img.shields.io/badge/ColPrac-Contributor%27s%20Guide-blueviolet" alt="ColPrac: Contributor's Guide on Collaborative Practices for Community Packages" /></a>
 </p>
 
-## 🚀 Get started
+## Get started
 
 Install Julia (see [the official Julia website](https://julialang.org/install/); you will need at least Julia 1.10 for the latest version of Turing.jl).
 Then, launch a Julia REPL and run:
@@ -38,7 +37,7 @@ You can find the main TuringLang documentation at [**https://turinglang.org**](h
 
 API documentation for Turing.jl is specifically available at [**https://turinglang.org/Turing.jl/stable**](https://turinglang.org/Turing.jl/stable/).
 
-## 🛠️ Contributing
+## Contributing
 
 ### Issues
 
@@ -55,20 +54,20 @@ Breaking releases (minor version) should target the `breaking` branch.
 
 If you have not received any feedback on an issue or PR for a while, please feel free to ping `@TuringLang/maintainers` in a comment.
 
-## 💬 Other channels
+## Other channels
 
 The Turing.jl userbase tends to be most active on the [`#turing` channel of Julia Slack](https://julialang.slack.com/archives/CCYDC34A0).
 If you do not have an invitation to Julia's Slack, you can get one from [the official Julia website](https://julialang.org/slack/).
 
 There are also often threads on [Julia Discourse](https://discourse.julialang.org) (you can search using, e.g., [the `turing` tag](https://discourse.julialang.org/tag/turing)).
 
-## 🔄 What's changed recently?
+## What's changed recently?
 
 We publish a fortnightly newsletter summarising recent updates in the TuringLang ecosystem, which you can view on [our website](https://turinglang.org/news/), [GitHub](https://github.com/TuringLang/Turing.jl/issues/2498), or [Julia Slack](https://julialang.slack.com/archives/CCYDC34A0).
 
 For Turing.jl specifically, you can see a full changelog in [`HISTORY.md`](https://github.com/TuringLang/Turing.jl/blob/main/HISTORY.md) or [our GitHub releases](https://github.com/TuringLang/Turing.jl/releases).
 
-## 🧩 Where does Turing.jl sit in the TuringLang ecosystem?
+## Where does Turing.jl sit in the TuringLang ecosystem?
 
 Turing.jl is the main entry point for users, and seeks to provide a unified, convenient interface to all of the functionality in the TuringLang (and broader Julia) ecosystem.
 
@@ -125,5 +124,3 @@ month = feb,
 ```
 
 </details>
-
-You can see the full list of publications that have cited Turing.jl on [Google Scholar](https://scholar.google.com/scholar?cites=11803241473159708991).

From 32e70d6ee5735f8a424b2b048ef70512d1039c6a Mon Sep 17 00:00:00 2001
From: Hong Ge <3279477+yebai@users.noreply.github.com>
Date: Wed, 12 Nov 2025 16:52:03 +0000
Subject: [PATCH 15/32] Add linear regression model example to README

---
 README.md | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index d320d72c05..250c3c4f34 100644
--- a/README.md
+++ b/README.md
@@ -22,15 +22,22 @@ You can define models using the `@model` macro, and then perform Markov chain Mo
 ```julia
 julia> using Turing
 
-julia> @model function my_first_model(data)
-           mean ~ Normal(0, 1)
-           sd ~ truncated(Cauchy(0, 3); lower=0)
-           data ~ Normal(mean, sd)
+julia> @model function linear_regression(x)
+           # Priors
+           α ~ Normal(0, 1)
+           β ~ Normal(0, 1)
+           σ² ~ truncated(Cauchy(0, 3); lower=0)
+
+           # Likelihood
+           μ = α .+ β .* x
+           y ~ MvNormal(μ, σ² * I)
        end
 
-julia> model = my_first_model(randn())
+julia> x, y = rand(10), rand(10)
 
-julia> chain = sample(model, NUTS(), 1000)
+julia> posterior = linear_regression(x) | (; y = y)
+
+julia> chain = sample(posterior, NUTS(), 1000)
 ```
 
 You can find the main TuringLang documentation at [**https://turinglang.org**](https://turinglang.org), which contains general information about Turing.jl's features, as well as a variety of tutorials with examples of Turing.jl models.

From 19bf7d6bc6f7264ecb12b47b63d92c0e08068794 Mon Sep 17 00:00:00 2001
From: Shravan Goswami <123811742+shravanngoswamii@users.noreply.github.com>
Date: Thu, 13 Nov 2025 02:52:24 +0530
Subject: [PATCH 16/32] Add dark/light mode logo support (#2714)

Just a minor README update to support dark/light mode logo, good for
dark mode users!
---
 README.md | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 250c3c4f34..7f953700ea 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,9 @@
-<p align="center"><img src="https://turinglang.org/assets/logo/turing-logo-light.svg" alt="Turing.jl logo" width="300" /></p>
+<p align="center">
+  <picture>
+    <source media="(prefers-color-scheme: dark)" srcset="https://turinglang.org/assets/logo/turing-logo-dark.svg">
+    <img src="https://turinglang.org/assets/logo/turing-logo-light.svg" alt="Turing.jl logo" width="300">
+  </picture>
+</p>
 <p align="center"><i>Bayesian inference with probabilistic programming</i></p>
 <p align="center">
 <a href="https://turinglang.org/"><img src="https://img.shields.io/badge/docs-tutorials-blue.svg" alt="Tutorials" /></a>

From 4c02f7baf27e6b83e945148267f6bf95ad56db94 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <kyrkim@seas.upenn.edu>
Date: Wed, 19 Nov 2025 16:37:37 -0500
Subject: [PATCH 17/32] bump AdvancedVI version

---
 Project.toml      | 2 +-
 test/Project.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index bbddd9508f..a52829c5f4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -55,7 +55,7 @@ Accessors = "0.1"
 AdvancedHMC = "0.3.0, 0.4.0, 0.5.2, 0.6, 0.7, 0.8"
 AdvancedMH = "0.8"
 AdvancedPS = "0.7"
-AdvancedVI = "0.5"
+AdvancedVI = "0.6"
 BangBang = "0.4.2"
 Bijectors = "0.14, 0.15"
 Compat = "4.15.0"
diff --git a/test/Project.toml b/test/Project.toml
index 8d819a6749..7833656757 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -44,7 +44,7 @@ AbstractMCMC = "5"
 AbstractPPL = "0.11, 0.12, 0.13"
 AdvancedMH = "0.6, 0.7, 0.8"
 AdvancedPS = "0.7"
-AdvancedVI = "0.5"
+AdvancedVI = "0.6"
 Aqua = "0.8"
 BangBang = "0.4"
 Bijectors = "0.14, 0.15"

From 6518b821c5b1e575450dc855b05526e553207bb0 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <kyrkim@seas.upenn.edu>
Date: Wed, 19 Nov 2025 16:59:23 -0500
Subject: [PATCH 18/32] add exports new algorithms, modify `vi` to operate in
 unconstrained

---
 src/Turing.jl                           |  3 ++
 src/variational/VariationalInference.jl | 63 +++++++++++++++++++------
 test/variational/vi.jl                  | 12 +++++
 3 files changed, 63 insertions(+), 15 deletions(-)

diff --git a/src/Turing.jl b/src/Turing.jl
index a4f40df259..0528788fed 100644
--- a/src/Turing.jl
+++ b/src/Turing.jl
@@ -122,6 +122,9 @@ export
     KLMinRepGradProxDescent,
     KLMinRepGradDescent,
     KLMinScoreGradDescent,
+    KLMinNaturalGradDescent,
+    KLMinSqrtNaturalGradDescent,
+    KLMinWassFwdBwd,
     # ADTypes
     AutoForwardDiff,
     AutoReverseDiff,
diff --git a/src/variational/VariationalInference.jl b/src/variational/VariationalInference.jl
index 630d3b62f2..d4b1c6ceab 100644
--- a/src/variational/VariationalInference.jl
+++ b/src/variational/VariationalInference.jl
@@ -2,13 +2,20 @@
 module Variational
 
 using AdvancedVI:
-    AdvancedVI, KLMinRepGradDescent, KLMinRepGradProxDescent, KLMinScoreGradDescent
+    AdvancedVI,
+    KLMinRepGradDescent,
+    KLMinRepGradProxDescent,
+    KLMinScoreGradDescent,
+    KLMinWassFwdBwd,
+    KLMinNaturalGradDescent,
+    KLMinSqrtNaturalGradDescent
+
 using ADTypes
 using Bijectors: Bijectors
 using Distributions
-using DynamicPPL
+using DynamicPPL: DynamicPPL
 using LinearAlgebra
-using LogDensityProblems
+using LogDensityProblems: LogDensityProblems
 using Random
 using ..Turing: DEFAULT_ADTYPE, PROGRESS
 
@@ -18,7 +25,17 @@ export vi,
     q_fullrank_gaussian,
     KLMinRepGradProxDescent,
     KLMinRepGradDescent,
-    KLMinScoreGradDescent
+    KLMinScoreGradDescent,
+    KLMinWassFwdBwd,
+    KLMinNaturalGradDescent,
+    KLMinSqrtNaturalGradDescent
+
+requires_unconstrained_space(::AdvancedVI.AbstractVariationalAlgorithm) = false
+requires_unconstrained_space(::AdvancedVI.KLMinRepGradProxDescent) = true
+requires_unconstrained_space(::AdvancedVI.KLMinRepGradDescent) = true
+requires_unconstrained_space(::AdvancedVI.KLMinWassFwdBwd) = true
+requires_unconstrained_space(::AdvancedVI.KLMinNaturalGradDescent) = true
+requires_unconstrained_space(::AdvancedVI.KLMinSqrtNaturalGradDescent) = true
 
 """
     q_initialize_scale(
@@ -65,7 +82,7 @@ function q_initialize_scale(
     num_max_trials::Int=10,
     reduce_factor::Real=one(eltype(scale)) / 2,
 )
-    prob = LogDensityFunction(model)
+    prob = DynamicPPL.LogDensityFunction(model)
     ℓπ = Base.Fix1(LogDensityProblems.logdensity, prob)
     varinfo = DynamicPPL.VarInfo(model)
 
@@ -264,8 +281,12 @@ end
 
 Approximate the target `model` via the variational inference algorithm `algorithm` by starting from the initial variational approximation `q`.
 This is a thin wrapper around `AdvancedVI.optimize`.
+
+If the chosen variational inference algorithm operates in an unconstrained space, then the provided initial variational approximation `q` must be a `Bijectors.TransformedDistribution` of an unconstrained distribution.
+For example, the initialization supplied by  `q_meanfield_gaussian`,`q_fullrank_gaussian`, `q_locationscale`.
+
 The default `algorithm`, `KLMinRepGradProxDescent` ([relevant docs](https://turinglang.org/AdvancedVI.jl/dev/klminrepgradproxdescent/)), assumes `q` uses `AdvancedVI.MvLocationScale`, which can be constructed by invoking `q_fullrank_gaussian` or `q_meanfield_gaussian`.
-For other variational families, refer to `AdvancedVI` to determine the best algorithm and options.
+For other variational families, refer the documentation of `AdvancedVI` to determine the best algorithm and other options.
 
 # Arguments
 - `model`: The target `DynamicPPL.Model`.
@@ -294,19 +315,31 @@ function vi(
     algorithm::AdvancedVI.AbstractVariationalAlgorithm=KLMinRepGradProxDescent(
         adtype; n_samples=10
     ),
+    unconstrained::Bool=requires_unconstrained_space(algorithm),
     show_progress::Bool=PROGRESS[],
     kwargs...,
 )
-    return AdvancedVI.optimize(
-        rng,
-        algorithm,
-        max_iter,
-        LogDensityFunction(model; adtype),
-        q,
-        args...;
-        show_progress=show_progress,
-        kwargs...,
+    prob, q, trans = if unconstrained
+        @assert q isa Bijectors.TransformedDistribution "The algorithm $(algorithm) operates in an unconstrained space. Therefore, the initial variational approximation is expected to be a Bijectors.TransformedDistribution of an unconstrained distribution."
+        vi = DynamicPPL.ldf_default_varinfo(model, DynamicPPL.getlogjoint_internal)
+        vi = DynamicPPL.set_transformed!!(vi, true)
+        prob = DynamicPPL.LogDensityFunction(
+            model, DynamicPPL.getlogjoint_internal, vi; adtype
+        )
+        prob, q.dist, q.transform
+    else
+        prob = DynamicPPL.LogDensityFunction(model; adtype)
+        prob, q, nothing
+    end
+    q, info, state = AdvancedVI.optimize(
+        rng, algorithm, max_iter, prob, q, args...; show_progress=show_progress, kwargs...
     )
+    q = if unconstrained
+        Bijectors.TransformedDistribution(q, trans)
+    else
+        q
+    end
+    q, info, state
 end
 
 function vi(model::DynamicPPL.Model, q, max_iter::Int; kwargs...)
diff --git a/test/variational/vi.jl b/test/variational/vi.jl
index b426f0e6a3..efe82cb454 100644
--- a/test/variational/vi.jl
+++ b/test/variational/vi.jl
@@ -53,6 +53,12 @@ using Turing.Variational
     @testset "custom algorithm $name" for (name, algorithm) in [
         ("KLMinRepGradProxDescent", KLMinRepGradProxDescent(adtype; n_samples=10)),
         ("KLMinRepGradDescent", KLMinRepGradDescent(adtype; operator, n_samples=10)),
+        ("KLMinNaturalGradDescent", KLMinNaturalGradDescent(stepsize=1e-3, n_samples=10)),
+        (
+            "KLMinSqrtNaturalGradDescent",
+            KLMinSqrtNaturalGradDescent(stepsize=1e-3, n_samples=10),
+        ),
+        ("KLMinWassFwdBwd", KLMinWassFwdBwd(stepsize=1e-3, n_samples=10)),
     ]
         T = 1000
         q, _, _ = vi(
@@ -70,6 +76,12 @@ using Turing.Variational
     @testset "inference $name" for (name, algorithm) in [
         ("KLMinRepGradProxDescent", KLMinRepGradProxDescent(adtype; n_samples=10)),
         ("KLMinRepGradDescent", KLMinRepGradDescent(adtype; operator, n_samples=10)),
+        ("KLMinNaturalGradDescent", KLMinNaturalGradDescent(stepsize=1e-3, n_samples=10)),
+        (
+            "KLMinSqrtNaturalGradDescent",
+            KLMinSqrtNaturalGradDescent(stepsize=1e-3, n_samples=10),
+        ),
+        ("KLMinWassFwdBwd", KLMinWassFwdBwd(stepsize=1e-3, n_samples=10)),
     ]
         rng = StableRNG(0x517e1d9bf89bf94f)
 

From 874a0b24ed1672428d88dfb6cbdad4c96501dfdd Mon Sep 17 00:00:00 2001
From: Kyurae Kim <kyrkim@seas.upenn.edu>
Date: Wed, 19 Nov 2025 17:06:23 -0500
Subject: [PATCH 19/32] add clarification on initializing unconstrained
 algorithms

---
 HISTORY.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/HISTORY.md b/HISTORY.md
index dd7ab477e3..8d24469adc 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -68,6 +68,7 @@ Additionally,
   - The default hyperparameters of `DoG`and `DoWG` have been altered.
   - The deprecated `AdvancedVI@0.2`-era interface is now removed.
   - `estimate_objective` now returns the value to be minimized by the optimization algorithm. For example, for ELBO maximization algorithms, `estimate_objective` will return the *negative ELBO*. This is breaking change from the previous behavior where the ELBO was returns.
+  - When using algorithms that expect to operate in unconstrained spaces, the user is now explicitly expected to provide a `Bijectors.TransformedDistribution` wrapping an unconstrained distribution. (Refer to the docstring of `vi`.)
 
 ### New Features
 

From e021eb7bf42a2a2b06d3f9a073f20d16eec4f6e6 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <kyrkim@seas.upenn.edu>
Date: Wed, 19 Nov 2025 17:10:46 -0500
Subject: [PATCH 20/32] update api

---
 docs/src/api.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docs/src/api.md b/docs/src/api.md
index 885d587ea6..2eda3be6f4 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -114,6 +114,12 @@ See the [docs of AdvancedVI.jl](https://turinglang.org/AdvancedVI.jl/stable/) fo
 | `q_locationscale`      | [`Turing.Variational.q_locationscale`](@ref)      | Find a numerically non-degenerate initialization for a location-scale variational family |
 | `q_meanfield_gaussian` | [`Turing.Variational.q_meanfield_gaussian`](@ref) | Find a numerically non-degenerate initialization for a mean-field Gaussian family        |
 | `q_fullrank_gaussian`  | [`Turing.Variational.q_fullrank_gaussian`](@ref)  | Find a numerically non-degenerate initialization for a full-rank Gaussian family         |
+| `KLMinRepGradDescent`  | [`Turing.Variational.KLMinRepGradDescent`](@ref)  | KL divergence minimization via stochastic gradient descent with the reparameterization gradient |
+| `KLMinRepGradProxDescent`  | [`Turing.Variational.KLMinRepGradProxDescent`](@ref)  | KL divergence minimization via stochastic proximal gradient descent with the reparameterization gradient over location-scale variational families  |
+| `KLMinScoreGradDescent`  | [`Turing.Variational.KLMinScoreGradDescent`](@ref)  | KL divergence minimization via stochastic gradient descent with the score gradient |
+| `KLMinWassFwdBwd`  | [`Turing.Variational.KLMinWassFwdBwd`](@ref)  | KL divergence minimization via Wasserstein proximal gradient descent |
+| `KLMinNaturalGradDescent`  | [`Turing.Variational.KLMinNaturalGradDescent`](@ref)  | KL divergence minimization via natural gradient descent  |
+| `KLMinSqrtNaturalGradDescent`  | [`Turing.Variational.KLMinSqrtNaturalGradDescent`](@ref)  | KL divergence minimization via natural gradient descent in the square-root parameterization |
 
 ### Automatic differentiation types
 

From eec7ef2e571623fe1a5bbadcbbbb32f6d0143b14 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <msca8h@naver.com>
Date: Wed, 19 Nov 2025 17:11:17 -0500
Subject: [PATCH 21/32] run formatter

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 src/variational/VariationalInference.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/variational/VariationalInference.jl b/src/variational/VariationalInference.jl
index d4b1c6ceab..09f7861963 100644
--- a/src/variational/VariationalInference.jl
+++ b/src/variational/VariationalInference.jl
@@ -339,7 +339,7 @@ function vi(
     else
         q
     end
-    q, info, state
+    return q, info, state
 end
 
 function vi(model::DynamicPPL.Model, q, max_iter::Int; kwargs...)

From b6d820261866c0fb6ccac9a3da66989c78ad8968 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <msca8h@naver.com>
Date: Wed, 19 Nov 2025 17:11:24 -0500
Subject: [PATCH 22/32] run formatter

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 test/variational/vi.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/variational/vi.jl b/test/variational/vi.jl
index efe82cb454..e2db6a46f5 100644
--- a/test/variational/vi.jl
+++ b/test/variational/vi.jl
@@ -53,7 +53,7 @@ using Turing.Variational
     @testset "custom algorithm $name" for (name, algorithm) in [
         ("KLMinRepGradProxDescent", KLMinRepGradProxDescent(adtype; n_samples=10)),
         ("KLMinRepGradDescent", KLMinRepGradDescent(adtype; operator, n_samples=10)),
-        ("KLMinNaturalGradDescent", KLMinNaturalGradDescent(stepsize=1e-3, n_samples=10)),
+        ("KLMinNaturalGradDescent", KLMinNaturalGradDescent(; stepsize=1e-3, n_samples=10)),
         (
             "KLMinSqrtNaturalGradDescent",
             KLMinSqrtNaturalGradDescent(stepsize=1e-3, n_samples=10),

From b900ab49e12fe9446490880bf2e30001e0a40f20 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <msca8h@naver.com>
Date: Wed, 19 Nov 2025 17:11:35 -0500
Subject: [PATCH 23/32] run formatter

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 test/variational/vi.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/variational/vi.jl b/test/variational/vi.jl
index e2db6a46f5..9ec9703ed0 100644
--- a/test/variational/vi.jl
+++ b/test/variational/vi.jl
@@ -56,7 +56,7 @@ using Turing.Variational
         ("KLMinNaturalGradDescent", KLMinNaturalGradDescent(; stepsize=1e-3, n_samples=10)),
         (
             "KLMinSqrtNaturalGradDescent",
-            KLMinSqrtNaturalGradDescent(stepsize=1e-3, n_samples=10),
+            KLMinSqrtNaturalGradDescent(; stepsize=1e-3, n_samples=10),
         ),
         ("KLMinWassFwdBwd", KLMinWassFwdBwd(stepsize=1e-3, n_samples=10)),
     ]

From e71b07b6bd3670b810a286b624e5734ba7d20f58 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <msca8h@naver.com>
Date: Wed, 19 Nov 2025 17:12:00 -0500
Subject: [PATCH 24/32] run formatter

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 test/variational/vi.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/variational/vi.jl b/test/variational/vi.jl
index 9ec9703ed0..2c382b13d7 100644
--- a/test/variational/vi.jl
+++ b/test/variational/vi.jl
@@ -79,7 +79,7 @@ using Turing.Variational
         ("KLMinNaturalGradDescent", KLMinNaturalGradDescent(stepsize=1e-3, n_samples=10)),
         (
             "KLMinSqrtNaturalGradDescent",
-            KLMinSqrtNaturalGradDescent(stepsize=1e-3, n_samples=10),
+            KLMinSqrtNaturalGradDescent(; stepsize=1e-3, n_samples=10),
         ),
         ("KLMinWassFwdBwd", KLMinWassFwdBwd(stepsize=1e-3, n_samples=10)),
     ]

From c08de123d9af8d155d5adca361e95e6739d8b9c9 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <msca8h@naver.com>
Date: Wed, 19 Nov 2025 17:12:11 -0500
Subject: [PATCH 25/32] run formatter

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 test/variational/vi.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/variational/vi.jl b/test/variational/vi.jl
index 2c382b13d7..681bff0ed5 100644
--- a/test/variational/vi.jl
+++ b/test/variational/vi.jl
@@ -58,7 +58,7 @@ using Turing.Variational
             "KLMinSqrtNaturalGradDescent",
             KLMinSqrtNaturalGradDescent(; stepsize=1e-3, n_samples=10),
         ),
-        ("KLMinWassFwdBwd", KLMinWassFwdBwd(stepsize=1e-3, n_samples=10)),
+        ("KLMinWassFwdBwd", KLMinWassFwdBwd(; stepsize=1e-3, n_samples=10)),
     ]
         T = 1000
         q, _, _ = vi(

From ae80f1e3941279411b302c29160b4440b2872db3 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <msca8h@naver.com>
Date: Wed, 19 Nov 2025 17:12:21 -0500
Subject: [PATCH 26/32] run formatter

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 test/variational/vi.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/variational/vi.jl b/test/variational/vi.jl
index 681bff0ed5..69b8078b7d 100644
--- a/test/variational/vi.jl
+++ b/test/variational/vi.jl
@@ -76,7 +76,7 @@ using Turing.Variational
     @testset "inference $name" for (name, algorithm) in [
         ("KLMinRepGradProxDescent", KLMinRepGradProxDescent(adtype; n_samples=10)),
         ("KLMinRepGradDescent", KLMinRepGradDescent(adtype; operator, n_samples=10)),
-        ("KLMinNaturalGradDescent", KLMinNaturalGradDescent(stepsize=1e-3, n_samples=10)),
+        ("KLMinNaturalGradDescent", KLMinNaturalGradDescent(; stepsize=1e-3, n_samples=10)),
         (
             "KLMinSqrtNaturalGradDescent",
             KLMinSqrtNaturalGradDescent(; stepsize=1e-3, n_samples=10),

From 73bd309c14e29d450b05ec61ec70d2bc4559b8e6 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <msca8h@naver.com>
Date: Wed, 19 Nov 2025 17:12:33 -0500
Subject: [PATCH 27/32] run formatter

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 test/variational/vi.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/variational/vi.jl b/test/variational/vi.jl
index 69b8078b7d..1815e5953c 100644
--- a/test/variational/vi.jl
+++ b/test/variational/vi.jl
@@ -81,7 +81,7 @@ using Turing.Variational
             "KLMinSqrtNaturalGradDescent",
             KLMinSqrtNaturalGradDescent(; stepsize=1e-3, n_samples=10),
         ),
-        ("KLMinWassFwdBwd", KLMinWassFwdBwd(stepsize=1e-3, n_samples=10)),
+        ("KLMinWassFwdBwd", KLMinWassFwdBwd(; stepsize=1e-3, n_samples=10)),
     ]
         rng = StableRNG(0x517e1d9bf89bf94f)
 

From eaac4c3218fc1aad1532c5d614416cdec63735f1 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <msca8h@naver.com>
Date: Wed, 19 Nov 2025 17:12:40 -0500
Subject: [PATCH 28/32] run formatter

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 HISTORY.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/HISTORY.md b/HISTORY.md
index 8d24469adc..7b990bdab7 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -81,6 +81,7 @@ Additionally,
 
 Any of the new algorithms above can readily be used by simply swappin the `algorithm` keyword argument of `vi`.
 For example, to use batch-and-match:
+
 ```julia
 vi(model, q, n_iters; algorithm=FisherMinBatchMatch())
 ```

From 757ebb46c7348c6430594dcf682a6f73928bed21 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <kyrkim@seas.upenn.edu>
Date: Wed, 19 Nov 2025 17:15:10 -0500
Subject: [PATCH 29/32] revert changes to README

---
 README.md | 41 ++++++++++++++++-------------------------
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/README.md b/README.md
index 7f953700ea..21167ac585 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,6 @@
-<p align="center">
-  <picture>
-    <source media="(prefers-color-scheme: dark)" srcset="https://turinglang.org/assets/logo/turing-logo-dark.svg">
-    <img src="https://turinglang.org/assets/logo/turing-logo-light.svg" alt="Turing.jl logo" width="300">
-  </picture>
-</p>
-<p align="center"><i>Bayesian inference with probabilistic programming</i></p>
+<p align="center"><img src="https://raw.githubusercontent.com/TuringLang/turinglang.github.io/refs/heads/main/assets/logo/turing-logo.svg" alt="Turing.jl logo" width="200" /></p>
+<h1 align="center">Turing.jl</h1>
+<p align="center"><i>Probabilistic programming and Bayesian inference in Julia</i></p>
 <p align="center">
 <a href="https://turinglang.org/"><img src="https://img.shields.io/badge/docs-tutorials-blue.svg" alt="Tutorials" /></a>
 <a href="https://turinglang.org/Turing.jl/stable"><img src="https://img.shields.io/badge/docs-API-blue.svg" alt="API docs" /></a>
@@ -13,7 +9,7 @@
 <a href="https://github.com/SciML/ColPrac"><img src="https://img.shields.io/badge/ColPrac-Contributor%27s%20Guide-blueviolet" alt="ColPrac: Contributor's Guide on Collaborative Practices for Community Packages" /></a>
 </p>
 
-## Get started
+## 🚀 Get started
 
 Install Julia (see [the official Julia website](https://julialang.org/install/); you will need at least Julia 1.10 for the latest version of Turing.jl).
 Then, launch a Julia REPL and run:
@@ -27,29 +23,22 @@ You can define models using the `@model` macro, and then perform Markov chain Mo
 ```julia
 julia> using Turing
 
-julia> @model function linear_regression(x)
-           # Priors
-           α ~ Normal(0, 1)
-           β ~ Normal(0, 1)
-           σ² ~ truncated(Cauchy(0, 3); lower=0)
-
-           # Likelihood
-           μ = α .+ β .* x
-           y ~ MvNormal(μ, σ² * I)
+julia> @model function my_first_model(data)
+           mean ~ Normal(0, 1)
+           sd ~ truncated(Cauchy(0, 3); lower=0)
+           data ~ Normal(mean, sd)
        end
 
-julia> x, y = rand(10), rand(10)
+julia> model = my_first_model(randn())
 
-julia> posterior = linear_regression(x) | (; y = y)
-
-julia> chain = sample(posterior, NUTS(), 1000)
+julia> chain = sample(model, NUTS(), 1000)
 ```
 
 You can find the main TuringLang documentation at [**https://turinglang.org**](https://turinglang.org), which contains general information about Turing.jl's features, as well as a variety of tutorials with examples of Turing.jl models.
 
 API documentation for Turing.jl is specifically available at [**https://turinglang.org/Turing.jl/stable**](https://turinglang.org/Turing.jl/stable/).
 
-## Contributing
+## 🛠️ Contributing
 
 ### Issues
 
@@ -66,20 +55,20 @@ Breaking releases (minor version) should target the `breaking` branch.
 
 If you have not received any feedback on an issue or PR for a while, please feel free to ping `@TuringLang/maintainers` in a comment.
 
-## Other channels
+## 💬 Other channels
 
 The Turing.jl userbase tends to be most active on the [`#turing` channel of Julia Slack](https://julialang.slack.com/archives/CCYDC34A0).
 If you do not have an invitation to Julia's Slack, you can get one from [the official Julia website](https://julialang.org/slack/).
 
 There are also often threads on [Julia Discourse](https://discourse.julialang.org) (you can search using, e.g., [the `turing` tag](https://discourse.julialang.org/tag/turing)).
 
-## What's changed recently?
+## 🔄 What's changed recently?
 
 We publish a fortnightly newsletter summarising recent updates in the TuringLang ecosystem, which you can view on [our website](https://turinglang.org/news/), [GitHub](https://github.com/TuringLang/Turing.jl/issues/2498), or [Julia Slack](https://julialang.slack.com/archives/CCYDC34A0).
 
 For Turing.jl specifically, you can see a full changelog in [`HISTORY.md`](https://github.com/TuringLang/Turing.jl/blob/main/HISTORY.md) or [our GitHub releases](https://github.com/TuringLang/Turing.jl/releases).
 
-## Where does Turing.jl sit in the TuringLang ecosystem?
+## 🧩 Where does Turing.jl sit in the TuringLang ecosystem?
 
 Turing.jl is the main entry point for users, and seeks to provide a unified, convenient interface to all of the functionality in the TuringLang (and broader Julia) ecosystem.
 
@@ -136,3 +125,5 @@ month = feb,
 ```
 
 </details>
+
+You can see the full list of publications that have cited Turing.jl on [Google Scholar](https://scholar.google.com/scholar?cites=11803241473159708991).

From 05ab71164b784edaffbb40d7692c7610cf104b21 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <kyrkim@seas.upenn.edu>
Date: Thu, 20 Nov 2025 10:49:24 -0500
Subject: [PATCH 30/32] fix wrong use of transformation in vi

---
 src/variational/VariationalInference.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/variational/VariationalInference.jl b/src/variational/VariationalInference.jl
index 09f7861963..06d39128b9 100644
--- a/src/variational/VariationalInference.jl
+++ b/src/variational/VariationalInference.jl
@@ -322,7 +322,7 @@ function vi(
     prob, q, trans = if unconstrained
         @assert q isa Bijectors.TransformedDistribution "The algorithm $(algorithm) operates in an unconstrained space. Therefore, the initial variational approximation is expected to be a Bijectors.TransformedDistribution of an unconstrained distribution."
         vi = DynamicPPL.ldf_default_varinfo(model, DynamicPPL.getlogjoint_internal)
-        vi = DynamicPPL.set_transformed!!(vi, true)
+        vi = DynamicPPL.link!!(vi, model)
         prob = DynamicPPL.LogDensityFunction(
             model, DynamicPPL.getlogjoint_internal, vi; adtype
         )

From 91606b5b94cb123412c169b7f6005e9e1635e336 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <kyrkim@seas.upenn.edu>
Date: Thu, 20 Nov 2025 12:09:56 -0500
Subject: [PATCH 31/32] change inital value for scale matrices to 0.6*I and
 update docs

---
 HISTORY.md                              |  1 +
 src/variational/VariationalInference.jl | 18 +++++++++++++++---
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/HISTORY.md b/HISTORY.md
index 7b990bdab7..35310bfa1b 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -68,6 +68,7 @@ Additionally,
   - The default hyperparameters of `DoG`and `DoWG` have been altered.
   - The deprecated `AdvancedVI@0.2`-era interface is now removed.
   - `estimate_objective` now returns the value to be minimized by the optimization algorithm. For example, for ELBO maximization algorithms, `estimate_objective` will return the *negative ELBO*. This is breaking change from the previous behavior where the ELBO was returns.
+  - The initial value for the `q_meanfield_gaussian`, `q_fullrank_gaussian`, and `q_locationscale` have changed. Specificially, the default initial value for the scale matrix has been changed from `I` to `0.6*I`.
   - When using algorithms that expect to operate in unconstrained spaces, the user is now explicitly expected to provide a `Bijectors.TransformedDistribution` wrapping an unconstrained distribution. (Refer to the docstring of `vi`.)
 
 ### New Features
diff --git a/src/variational/VariationalInference.jl b/src/variational/VariationalInference.jl
index 06d39128b9..46283b2e77 100644
--- a/src/variational/VariationalInference.jl
+++ b/src/variational/VariationalInference.jl
@@ -117,7 +117,9 @@ end
 Find a numerically non-degenerate variational distribution `q` for approximating the  target `model` within the location-scale variational family formed by the type of `scale` and `basedist`.
 
 The distribution can be manually specified by setting `location`, `scale`, and `basedist`.
-Otherwise, it chooses a standard Gaussian by default.
+Otherwise, it chooses a Gaussian with zero-mean and scale `0.6*I` (covariance of `0.6^2*I`) by default.
+This guarantees that the samples from the initial variational approximation will fall in the range of (-2, 2) with 99.9% probability, which mimics the behavior of the `Turing.InitFromUniform()` strategy.
+
 Whether the default choice is used or not, the `scale` may be adjusted via `q_initialize_scale` so that the log-densities of `model` are finite over the samples from `q`.
 If `meanfield` is set as `true`, the scale of `q` is restricted to be a diagonal matrix and only the diagonal of `scale` is used.
 
@@ -165,9 +167,11 @@ function q_locationscale(
 
     L = if isnothing(scale)
         if meanfield
-            q_initialize_scale(rng, model, μ, Diagonal(ones(num_params)), basedist; kwargs...)
+            q_initialize_scale(
+                rng, model, μ, Diagonal(fill(0.6, num_params)), basedist; kwargs...
+            )
         else
-            L0 = LowerTriangular(Matrix{Float64}(I, num_params, num_params))
+            L0 = LowerTriangular(Matrix{Float64}(0.6*I, num_params, num_params))
             q_initialize_scale(rng, model, μ, L0, basedist; kwargs...)
         end
     else
@@ -198,6 +202,10 @@ end
 
 Find a numerically non-degenerate mean-field Gaussian `q` for approximating the  target `model`.
 
+If the `scale` set as `nothing`, the default value will be a zero-mean Gaussian with a `Diagonal` scale matrix (the "mean-field" approximation) no larger than `0.6*I` (covariance of `0.6^2*I`).
+This guarantees that the samples from the initial variational approximation will fall in the range of (-2, 2) with 99.9% probability, which mimics the behavior of the `Turing.InitFromUniform()` strategy.
+Whether the default choice is used or not, the `scale` may be adjusted via `q_initialize_scale` so that the log-densities of `model` are finite over the samples from `q`.
+
 # Arguments
 - `model`: The target `DynamicPPL.Model`.
 
@@ -237,6 +245,10 @@ end
 
 Find a numerically non-degenerate Gaussian `q` with a scale with full-rank factors (traditionally referred to as a "full-rank family") for approximating the target `model`.
 
+If the `scale` set as `nothing`, the default value will be a zero-mean Gaussian with a `LowerTriangular` scale matrix (resulting in a covariance with "full-rank" factors) no larger than `0.6*I` (covariance of `0.6^2*I`).
+This guarantees that the samples from the initial variational approximation will fall in the range of (-2, 2) with 99.9% probability, which mimics the behavior of the `Turing.InitFromUniform()` strategy.
+Whether the default choice is used or not, the `scale` may be adjusted via `q_initialize_scale` so that the log-densities of `model` are finite over the samples from `q`.
+
 # Arguments
 - `model`: The target `DynamicPPL.Model`.
 

From 722153a41f975bf9cfa3fad42260692ba073f472 Mon Sep 17 00:00:00 2001
From: Kyurae Kim <msca8h@naver.com>
Date: Thu, 20 Nov 2025 12:16:53 -0500
Subject: [PATCH 32/32] run formatter

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 src/variational/VariationalInference.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/variational/VariationalInference.jl b/src/variational/VariationalInference.jl
index 46283b2e77..5dde445c66 100644
--- a/src/variational/VariationalInference.jl
+++ b/src/variational/VariationalInference.jl
@@ -171,7 +171,7 @@ function q_locationscale(
                 rng, model, μ, Diagonal(fill(0.6, num_params)), basedist; kwargs...
             )
         else
-            L0 = LowerTriangular(Matrix{Float64}(0.6*I, num_params, num_params))
+            L0 = LowerTriangular(Matrix{Float64}(0.6 * I, num_params, num_params))
             q_initialize_scale(rng, model, μ, L0, basedist; kwargs...)
         end
     else