Merge branch 'main' of github.com:TuringLang/Turing.jl into bump_advancedvi_0.5

Red-Portal · Red-Portal · commit 25b50876b875 · 2025-11-19T15:44:43.000-05:00
diff --git a/.github/workflows/Tests.yml b/.github/workflows/Tests.yml
@@ -35,23 +35,20 @@ jobs:
           - name: "everything else"
             args: "--skip mcmc/gibbs.jl mcmc/Inference.jl ad.jl"
         runner:
-          # TODO(mhauru) All the ones below that run on 1.11 should actually be run on 1.
-          # The current setup is a temporary arrangement to deal with issues where Mooncake
-          # and Libtask are broken on 1.12.
           # Default
-          - version: '1.11'
+          - version: '1'
             os: ubuntu-latest
             num_threads: 1
           # Multithreaded
-          - version: '1.11'
+          - version: '1'
             os: ubuntu-latest
             num_threads: 2
           # Windows
-          - version: '1.11'
+          - version: '1'
             os: windows-latest
             num_threads: 1
           # macOS
-          - version: '1.11'
+          - version: '1'
             os: macos-latest
             num_threads: 1
           # Minimum supported Julia version
diff --git a/HISTORY.md b/HISTORY.md
@@ -1,14 +1,14 @@
 # 0.42.0
 
-## Breaking Changes
+## **AdvancedVI 0.6**
 
-**AdvancedVI 0.5**
-
-Turing.jl v0.42 updates `AdvancedVI.jl` compatibility to 0.5.
-Most of the changes introduced in `AdvancedVI.jl@0.5` are structural, with some changes spilling out into the interface.
+Turing.jl v0.42 updates `AdvancedVI.jl` compatibility to 0.6 (we skipped the breaking 0.5 update as it does not introduce new features).
+`AdvancedVI.jl@0.6` introduces major structural changes including breaking changes to the interface and multiple new features.
 The summary of the changes below are the things that affect the end-users of Turing.
 For a more comprehensive list of changes, please refer to the [changelogs](https://github.com/TuringLang/AdvancedVI.jl/blob/main/HISTORY.md) in `AdvancedVI`.
 
+### Breaking Changes
+
 A new level of interface for defining different variational algorithms has been introduced in `AdvancedVI` v0.5. As a result, the function `Turing.vi` now receives a keyword argument `algorithm`. The object `algorithm <: AdvancedVI.AbstractVariationalAlgorithm` should now contain all the algorithm-specific configurations. Therefore, keyword arguments of `vi` that were algorithm-specific such as `objective`, `operator`, `averager` and so on, have been moved as fields of the relevant `<: AdvancedVI.AbstractVariationalAlgorithm` structs.
 For example,
 
@@ -49,6 +49,40 @@ Additionally,
 
   - The default hyperparameters of `DoG`and `DoWG` have been altered.
   - The deprecated `AdvancedVI@0.2`-era interface is now removed.
+  - `estimate_objective` now returns the value to be minimized by the optimization algorithm. For example, for ELBO maximization algorithms, `estimate_objective` will return the *negative ELBO*. This is breaking change from the previous behavior where the ELBO was returns.
+
+### New Features
+
+`AdvancedVI@0.6` adds numerous new features including the following new VI algorithms:
+
+  - `KLMinWassFwdBwd`: Also known as "Wasserstein variational inference," this algorithm minimizes the KL divergence under the Wasserstein-2 metric.
+  - `KLMinNaturalGradDescent`: This algorithm, also known as "online variational Newton," is the canonical "black-box" natural gradient variational inference algorithm, which minimizes the KL divergence via mirror descent under the KL divergence as the Bregman divergence.
+  - `KLMinSqrtNaturalGradDescent`: This is a recent variant of `KLMinNaturalGradDescent` that operates in the Cholesky-factor parameterization of Gaussians instead of precision matrices.
+  - `FisherMinBatchMatch`: This algorithm called "batch-and-match," minimizes the variation of the 2nd order fisher divergence via a proximal point-type algorithm.
+
+Any of the new algorithms above can readily be used by simply swappin the `algorithm` keyword argument of `vi`.
+For example, to use batch-and-match:
+```julia
+vi(model, q, n_iters; algorithm=FisherMinBatchMatch())
+```
+
+# 0.41.1
+
+The `ModeResult` struct returned by `maximum_a_posteriori` and `maximum_likelihood` can now be wrapped in `InitFromParams()`.
+This makes it easier to use the parameters in downstream code, e.g. when specifying initial parameters for MCMC sampling.
+For example:
+
+```julia
+@model function f()
+    # ...
+end
+model = f()
+opt_result = maximum_a_posteriori(model)
+
+sample(model, NUTS(), 1000; initial_params=InitFromParams(opt_result))
+```
+
+If you need to access the dictionary of parameters, it is stored in `opt_result.params` but note that this field may change in future breaking releases as that Turing's optimisation interface is slated for overhaul in the near future.
 
 # 0.41.0
 
diff --git a/README.md b/README.md
@@ -1,6 +1,10 @@
-<p align="center"><img src="https://raw.githubusercontent.com/TuringLang/turinglang.github.io/refs/heads/main/assets/logo/turing-logo.svg" alt="Turing.jl logo" width="200" /></p>
-<h1 align="center">Turing.jl</h1>
-<p align="center"><i>Probabilistic programming and Bayesian inference in Julia</i></p>
+<p align="center">
+  <picture>
+    <source media="(prefers-color-scheme: dark)" srcset="https://turinglang.org/assets/logo/turing-logo-dark.svg">
+    <img src="https://turinglang.org/assets/logo/turing-logo-light.svg" alt="Turing.jl logo" width="300">
+  </picture>
+</p>
+<p align="center"><i>Bayesian inference with probabilistic programming</i></p>
 <p align="center">
 <a href="https://turinglang.org/"><img src="https://img.shields.io/badge/docs-tutorials-blue.svg" alt="Tutorials" /></a>
 <a href="https://turinglang.org/Turing.jl/stable"><img src="https://img.shields.io/badge/docs-API-blue.svg" alt="API docs" /></a>
@@ -9,7 +13,7 @@
 <a href="https://github.com/SciML/ColPrac"><img src="https://img.shields.io/badge/ColPrac-Contributor%27s%20Guide-blueviolet" alt="ColPrac: Contributor's Guide on Collaborative Practices for Community Packages" /></a>
 </p>
 
-## 🚀 Get started
+## Get started
 
 Install Julia (see [the official Julia website](https://julialang.org/install/); you will need at least Julia 1.10 for the latest version of Turing.jl).
 Then, launch a Julia REPL and run:
@@ -23,22 +27,29 @@ You can define models using the `@model` macro, and then perform Markov chain Mo
 ```julia
 julia> using Turing
 
-julia> @model function my_first_model(data)
-           mean ~ Normal(0, 1)
-           sd ~ truncated(Cauchy(0, 3); lower=0)
-           data ~ Normal(mean, sd)
+julia> @model function linear_regression(x)
+           # Priors
+           α ~ Normal(0, 1)
+           β ~ Normal(0, 1)
+           σ² ~ truncated(Cauchy(0, 3); lower=0)
+
+           # Likelihood
+           μ = α .+ β .* x
+           y ~ MvNormal(μ, σ² * I)
        end
 
-julia> model = my_first_model(randn())
+julia> x, y = rand(10), rand(10)
 
-julia> chain = sample(model, NUTS(), 1000)
+julia> posterior = linear_regression(x) | (; y = y)
+
+julia> chain = sample(posterior, NUTS(), 1000)
 ```
 
 You can find the main TuringLang documentation at [**https://turinglang.org**](https://turinglang.org), which contains general information about Turing.jl's features, as well as a variety of tutorials with examples of Turing.jl models.
 
 API documentation for Turing.jl is specifically available at [**https://turinglang.org/Turing.jl/stable**](https://turinglang.org/Turing.jl/stable/).
 
-## 🛠️ Contributing
+## Contributing
 
 ### Issues
 
@@ -55,20 +66,20 @@ Breaking releases (minor version) should target the `breaking` branch.
 
 If you have not received any feedback on an issue or PR for a while, please feel free to ping `@TuringLang/maintainers` in a comment.
 
-## 💬 Other channels
+## Other channels
 
 The Turing.jl userbase tends to be most active on the [`#turing` channel of Julia Slack](https://julialang.slack.com/archives/CCYDC34A0).
 If you do not have an invitation to Julia's Slack, you can get one from [the official Julia website](https://julialang.org/slack/).
 
 There are also often threads on [Julia Discourse](https://discourse.julialang.org) (you can search using, e.g., [the `turing` tag](https://discourse.julialang.org/tag/turing)).
 
-## 🔄 What's changed recently?
+## What's changed recently?
 
 We publish a fortnightly newsletter summarising recent updates in the TuringLang ecosystem, which you can view on [our website](https://turinglang.org/news/), [GitHub](https://github.com/TuringLang/Turing.jl/issues/2498), or [Julia Slack](https://julialang.slack.com/archives/CCYDC34A0).
 
 For Turing.jl specifically, you can see a full changelog in [`HISTORY.md`](https://github.com/TuringLang/Turing.jl/blob/main/HISTORY.md) or [our GitHub releases](https://github.com/TuringLang/Turing.jl/releases).
 
-## 🧩 Where does Turing.jl sit in the TuringLang ecosystem?
+## Where does Turing.jl sit in the TuringLang ecosystem?
 
 Turing.jl is the main entry point for users, and seeks to provide a unified, convenient interface to all of the functionality in the TuringLang (and broader Julia) ecosystem.
 
@@ -125,5 +136,3 @@ month = feb,
 ```
 
 </details>
-
-You can see the full list of publications that have cited Turing.jl on [Google Scholar](https://scholar.google.com/scholar?cites=11803241473159708991).
diff --git a/ext/TuringOptimExt.jl b/ext/TuringOptimExt.jl
@@ -192,7 +192,7 @@ function _optimize(
     varnames = map(Symbol ∘ first, vns_vals_iter)
     vals = map(last, vns_vals_iter)
     vmat = NamedArrays.NamedArray(vals, varnames)
-    return Optimisation.ModeResult(vmat, M, -M.minimum, logdensity_optimum)
+    return Optimisation.ModeResult(vmat, M, -M.minimum, logdensity_optimum, vals_dict)
 end
 
 end # module
diff --git a/src/optimisation/Optimisation.jl b/src/optimisation/Optimisation.jl
@@ -4,6 +4,7 @@ using ..Turing
 using NamedArrays: NamedArrays
 using AbstractPPL: AbstractPPL
 using DynamicPPL: DynamicPPL
+using DocStringExtensions: TYPEDFIELDS
 using LogDensityProblems: LogDensityProblems
 using Optimization: Optimization
 using OptimizationOptimJL: OptimizationOptimJL
@@ -154,13 +155,22 @@ end
         V<:NamedArrays.NamedArray,
         M<:NamedArrays.NamedArray,
         O<:Optim.MultivariateOptimizationResults,
-        S<:NamedArrays.NamedArray
+        S<:NamedArrays.NamedArray,
+        P<:AbstractDict{<:VarName,<:Any}
     }
 
 A wrapper struct to store various results from a MAP or MLE estimation.
+
+## Fields
+
+$(TYPEDFIELDS)
 """
-struct ModeResult{V<:NamedArrays.NamedArray,O<:Any,M<:OptimLogDensity} <:
-       StatsBase.StatisticalModel
+struct ModeResult{
+    V<:NamedArrays.NamedArray,
+    O<:Any,
+    M<:OptimLogDensity,
+    P<:AbstractDict{<:AbstractPPL.VarName,<:Any},
+} <: StatsBase.StatisticalModel
     "A vector with the resulting point estimates."
     values::V
     "The stored optimiser results."
@@ -169,6 +179,8 @@ struct ModeResult{V<:NamedArrays.NamedArray,O<:Any,M<:OptimLogDensity} <:
     lp::Float64
     "The evaluation function used to calculate the output."
     f::M
+    "Dictionary of parameter values"
+    params::P
 end
 
 function Base.show(io::IO, ::MIME"text/plain", m::ModeResult)
@@ -182,6 +194,15 @@ function Base.show(io::IO, m::ModeResult)
     return show(io, m.values.array)
 end
 
+"""
+    InitFromParams(m::ModeResult)
+
+Initialize a model from the parameters stored in a `ModeResult`.
+"""
+function DynamicPPL.InitFromParams(m::ModeResult)
+    return DynamicPPL.InitFromParams(m.params)
+end
+
 # Various StatsBase methods for ModeResult
 
 """
@@ -355,9 +376,13 @@ function ModeResult(log_density::OptimLogDensity, solution::SciMLBase.Optimizati
     iters = map(AbstractPPL.varname_and_value_leaves, keys(vals), values(vals))
     vns_vals_iter = mapreduce(collect, vcat, iters)
     syms = map(Symbol ∘ first, vns_vals_iter)
-    vals = map(last, vns_vals_iter)
+    split_vals = map(last, vns_vals_iter)
     return ModeResult(
-        NamedArrays.NamedArray(vals, syms), solution, -solution.objective, log_density
+        NamedArrays.NamedArray(split_vals, syms),
+        solution,
+        -solution.objective,
+        log_density,
+        vals,
     )
 end
 
diff --git a/test/ad.jl b/test/ad.jl
@@ -10,10 +10,8 @@ using Test
 using ..Models: gdemo_default
 import ForwardDiff, ReverseDiff
 
-# Detect if prerelease version, if so, we skip some tests
-const IS_PRERELEASE = !isempty(VERSION.prerelease)
-const INCLUDE_MOONCAKE = !IS_PRERELEASE
-
+# Skip Mooncake on 1.12 as it is not compatible yet
+const INCLUDE_MOONCAKE = VERSION < v"1.12"
 if INCLUDE_MOONCAKE
     import Pkg
     Pkg.add("Mooncake")
diff --git a/test/optimisation/Optimisation.jl b/test/optimisation/Optimisation.jl
@@ -101,6 +101,13 @@ using Turing
                 @test result.optim_result.retcode == Optimization.ReturnCode.Success
             end
             @test isapprox(result.lp, true_logp, atol=0.01)
+            # check that the parameter dict matches the NamedArray
+            # NOTE: This test only works for models where all parameters are identity
+            # varnames AND real-valued. Thankfully, this is true for `gdemo`.
+            @test length(only(result.values.dicts)) == length(result.params)
+            for (k, index) in only(result.values.dicts)
+                @test result.params[AbstractPPL.VarName{k}()] == result.values.array[index]
+            end
         end
 
         @testset "MLE" begin
@@ -546,6 +553,26 @@ using Turing
         end
     end
 
+    @testset "using ModeResult to initialise MCMC" begin
+        @model function f(y)
+            μ ~ Normal(0, 1)
+            σ ~ Gamma(2, 1)
+            return y ~ Normal(μ, σ)
+        end
+        model = f(randn(10))
+        mle = maximum_likelihood(model)
+        # TODO(penelopeysm): This relies on the fact that HMC does indeed
+        # use the initial_params passed to it. We should use something
+        # like a StaticSampler (see test/mcmc/Inference) to make this more
+        # robust.
+        chain = sample(
+            model, HMC(0.1, 10), 2; initial_params=InitFromParams(mle), num_warmup=0
+        )
+        # Check that those parameters were indeed used as initial params
+        @test chain[:µ][1] == mle.params[@varname(µ)]
+        @test chain[:σ][1] == mle.params[@varname(σ)]
+    end
+
     # Issue: https://discourse.julialang.org/t/turing-mixture-models-with-dirichlet-weightings/112910
     @testset "Optimization with different linked dimensionality" begin
         @model demo_dirichlet() = x ~ Dirichlet(2 * ones(3))
@@ -621,7 +648,13 @@ using Turing
         m = saddle_model()
         optim_ld = Turing.Optimisation.OptimLogDensity(m, DynamicPPL.getloglikelihood)
         vals = Turing.Optimisation.NamedArrays.NamedArray([0.0, 0.0])
-        m = Turing.Optimisation.ModeResult(vals, nothing, 0.0, optim_ld)
+        m = Turing.Optimisation.ModeResult(
+            vals,
+            nothing,
+            0.0,
+            optim_ld,
+            Dict{AbstractPPL.VarName,Float64}(@varname(x) => 0.0, @varname(y) => 0.0),
+        )
         ct = coeftable(m)
         @assert isnan(ct.cols[2][1])
         @assert ct.colnms[end] == "Error notes"