From 1d75f6071069dd73ccb8435a20c4156251c262c5 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Fri, 5 Dec 2025 11:15:46 +0000 Subject: [PATCH 01/48] Bump minor version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 2831cd3318..10f3ab2544 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "Turing" uuid = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" -version = "0.42.0" +version = "0.43.0" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" From 0477b65fadbaaa6564d9445f196239629fef1351 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 8 Jan 2026 10:56:26 +0000 Subject: [PATCH 02/48] move stats.jl to separate file --- src/optimisation/Optimisation.jl | 108 +------------------------------ src/optimisation/stats.jl | 105 ++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 106 deletions(-) create mode 100644 src/optimisation/stats.jl diff --git a/src/optimisation/Optimisation.jl b/src/optimisation/Optimisation.jl index bc897f0646..1d6f9e2fc6 100644 --- a/src/optimisation/Optimisation.jl +++ b/src/optimisation/Optimisation.jl @@ -142,112 +142,6 @@ function DynamicPPL.InitFromParams( return DynamicPPL.InitFromParams(m.params, fallback) end -# Various StatsBase methods for ModeResult - -""" - StatsBase.coeftable(m::ModeResult; level::Real=0.95, numerrors_warnonly::Bool=true) - - -Return a table with coefficients and related statistics of the model. level determines the -level for confidence intervals (by default, 95%). - -In case the `numerrors_warnonly` argument is true (the default) numerical errors encountered -during the computation of the standard errors will be caught and reported in an extra -"Error notes" column. -""" -function StatsBase.coeftable(m::ModeResult; level::Real=0.95, numerrors_warnonly::Bool=true) - # Get columns for coeftable. - terms = string.(StatsBase.coefnames(m)) - estimates = m.values.array[:, 1] - # If numerrors_warnonly is true, and if either the information matrix is singular or has - # negative entries on its diagonal, then `notes` will be a list of strings for each - # value in `m.values`, explaining why the standard error is NaN. - notes = nothing - local stderrors - if numerrors_warnonly - infmat = StatsBase.informationmatrix(m) - local vcov - try - vcov = inv(infmat) - catch e - if isa(e, LinearAlgebra.SingularException) - stderrors = fill(NaN, length(m.values)) - notes = fill("Information matrix is singular", length(m.values)) - else - rethrow(e) - end - else - vars = LinearAlgebra.diag(vcov) - stderrors = eltype(vars)[] - if any(x -> x < 0, vars) - notes = [] - end - for var in vars - if var >= 0 - push!(stderrors, sqrt(var)) - if notes !== nothing - push!(notes, "") - end - else - push!(stderrors, NaN) - if notes !== nothing - push!(notes, "Negative variance") - end - end - end - end - else - stderrors = StatsBase.stderror(m) - end - zscore = estimates ./ stderrors - p = map(z -> StatsAPI.pvalue(Distributions.Normal(), z; tail=:both), zscore) - - # Confidence interval (CI) - q = Statistics.quantile(Distributions.Normal(), (1 + level) / 2) - ci_low = estimates .- q .* stderrors - ci_high = estimates .+ q .* stderrors - - level_ = 100 * level - level_percentage = isinteger(level_) ? Int(level_) : level_ - - cols = Vector[estimates, stderrors, zscore, p, ci_low, ci_high] - colnms = [ - "Coef.", - "Std. Error", - "z", - "Pr(>|z|)", - "Lower $(level_percentage)%", - "Upper $(level_percentage)%", - ] - if notes !== nothing - push!(cols, notes) - push!(colnms, "Error notes") - end - return StatsBase.CoefTable(cols, colnms, terms) -end - -function StatsBase.informationmatrix( - m::ModeResult; hessian_function=ForwardDiff.hessian, kwargs... -) - # This needs to be calculated in unlinked space - model = m.f.ldf.model - vi = DynamicPPL.VarInfo(model) - getlogdensity = _choose_getlogdensity(m.estimator) - new_optimld = OptimLogDensity(DynamicPPL.LogDensityFunction(model, getlogdensity, vi)) - - # Calculate the Hessian, which is the information matrix because the negative of the log - # likelihood was optimized - varnames = StatsBase.coefnames(m) - info = hessian_function(new_optimld, m.values.array[:, 1]) - return NamedArrays.NamedArray(info, (varnames, varnames)) -end - -StatsBase.coef(m::ModeResult) = m.values -StatsBase.coefnames(m::ModeResult) = names(m.values)[1] -StatsBase.params(m::ModeResult) = StatsBase.coefnames(m) -StatsBase.vcov(m::ModeResult) = inv(StatsBase.informationmatrix(m)) -StatsBase.loglikelihood(m::ModeResult) = m.lp - """ Base.get(m::ModeResult, var_symbol::Symbol) Base.get(m::ModeResult, var_symbols::AbstractVector{Symbol}) @@ -548,4 +442,6 @@ function maximum_likelihood(model::DynamicPPL.Model, args...; kwargs...) return estimate_mode(model, MLE(), args...; kwargs...) end +include("stats.jl") + end diff --git a/src/optimisation/stats.jl b/src/optimisation/stats.jl new file mode 100644 index 0000000000..eae9e81c6b --- /dev/null +++ b/src/optimisation/stats.jl @@ -0,0 +1,105 @@ +# Various StatsBase methods for ModeResult + +""" + StatsBase.coeftable(m::ModeResult; level::Real=0.95, numerrors_warnonly::Bool=true) + + +Return a table with coefficients and related statistics of the model. level determines the +level for confidence intervals (by default, 95%). + +In case the `numerrors_warnonly` argument is true (the default) numerical errors encountered +during the computation of the standard errors will be caught and reported in an extra +"Error notes" column. +""" +function StatsBase.coeftable(m::ModeResult; level::Real=0.95, numerrors_warnonly::Bool=true) + # Get columns for coeftable. + terms = string.(StatsBase.coefnames(m)) + estimates = m.values.array[:, 1] + # If numerrors_warnonly is true, and if either the information matrix is singular or has + # negative entries on its diagonal, then `notes` will be a list of strings for each + # value in `m.values`, explaining why the standard error is NaN. + notes = nothing + local stderrors + if numerrors_warnonly + infmat = StatsBase.informationmatrix(m) + local vcov + try + vcov = inv(infmat) + catch e + if isa(e, LinearAlgebra.SingularException) + stderrors = fill(NaN, length(m.values)) + notes = fill("Information matrix is singular", length(m.values)) + else + rethrow(e) + end + else + vars = LinearAlgebra.diag(vcov) + stderrors = eltype(vars)[] + if any(x -> x < 0, vars) + notes = [] + end + for var in vars + if var >= 0 + push!(stderrors, sqrt(var)) + if notes !== nothing + push!(notes, "") + end + else + push!(stderrors, NaN) + if notes !== nothing + push!(notes, "Negative variance") + end + end + end + end + else + stderrors = StatsBase.stderror(m) + end + zscore = estimates ./ stderrors + p = map(z -> StatsAPI.pvalue(Distributions.Normal(), z; tail=:both), zscore) + + # Confidence interval (CI) + q = Statistics.quantile(Distributions.Normal(), (1 + level) / 2) + ci_low = estimates .- q .* stderrors + ci_high = estimates .+ q .* stderrors + + level_ = 100 * level + level_percentage = isinteger(level_) ? Int(level_) : level_ + + cols = Vector[estimates, stderrors, zscore, p, ci_low, ci_high] + colnms = [ + "Coef.", + "Std. Error", + "z", + "Pr(>|z|)", + "Lower $(level_percentage)%", + "Upper $(level_percentage)%", + ] + if notes !== nothing + push!(cols, notes) + push!(colnms, "Error notes") + end + return StatsBase.CoefTable(cols, colnms, terms) +end + +function StatsBase.informationmatrix( + m::ModeResult; hessian_function=ForwardDiff.hessian, kwargs... +) + # This needs to be calculated in unlinked space + model = m.f.ldf.model + vi = DynamicPPL.VarInfo(model) + getlogdensity = _choose_getlogdensity(m.estimator) + new_optimld = OptimLogDensity(DynamicPPL.LogDensityFunction(model, getlogdensity, vi)) + + # Calculate the Hessian, which is the information matrix because the negative of the log + # likelihood was optimized + varnames = StatsBase.coefnames(m) + info = hessian_function(new_optimld, m.values.array[:, 1]) + return NamedArrays.NamedArray(info, (varnames, varnames)) +end + +StatsBase.coef(m::ModeResult) = m.values +StatsBase.coefnames(m::ModeResult) = names(m.values)[1] +StatsBase.params(m::ModeResult) = StatsBase.coefnames(m) +StatsBase.vcov(m::ModeResult) = inv(StatsBase.informationmatrix(m)) +StatsBase.loglikelihood(m::ModeResult) = m.lp From f7d878b031a85ef199c768eac8c686c1510f631a Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 26 Jan 2026 10:59:54 +0000 Subject: [PATCH 03/48] Optimisation interface rework (#2747) Closes #2232 Closes #2363 Closes #2745 Closes #2732 Closes #1775 Closes #2735 Closes #2734 Closes #2634 This PR redesigns Turing's optimisation interface as discussed in #2634. # How to review? The key things that are changed here are: - The fields of the `ModeResult` struct are slightly different. We get rid of the old `NamedArray` that was stored in there. Instead, users who really, really need it can generate it themselves via `StatsBase.coef(result)`. The primary interface for getting parameters will be `result.params`, which is a `Dict{VarName}` (obviously can be changed to VNT later). - Linking is now user-controlled and depends on the `link` parameter passed in. - Constraints are now passed in as either a NamedTuple or a Dict{VarName} (again pending VNT). Values are always provided in unlinked space. - Initial parameters are now supplied as an `AbstractInitStrategy`. The code in `src/optimisation/init.jl` will attempt to sample initial parameters using that strategy, but will also make sure that the sampled value obeys the constraints. The most meaningful section of code is the definition of `estimate_mode`, plus `src/optimisation/init.jl` which handles everything to do with initialisation and constraints. The rest of the stuff are just tweaks to fit the new interface. --- HISTORY.md | 40 ++ Project.toml | 8 +- src/Turing.jl | 2 +- src/init_strategy.jl | 27 ++ src/mcmc/abstractmcmc.jl | 28 +- src/mcmc/emcee.jl | 2 +- src/mcmc/repeat_sampler.jl | 4 +- src/optimisation/Optimisation.jl | 550 ++++++++++------------ src/optimisation/init.jl | 316 +++++++++++++ src/optimisation/stats.jl | 112 ++++- test/Project.toml | 6 +- test/ad.jl | 7 +- test/optimisation/Optimisation.jl | 757 ++++++++++++++++-------------- 13 files changed, 1154 insertions(+), 705 deletions(-) create mode 100644 src/init_strategy.jl create mode 100644 src/optimisation/init.jl diff --git a/HISTORY.md b/HISTORY.md index aff1bfe07c..669500616a 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,3 +1,43 @@ +# 0.43.0 + +## Optimisation interface + +Turing.jl's optimisation interface has been completely overhauled in this release. +The aim of this has been to provide users with a more consistent and principled way of specifying constraints. + +The crux of the issue is that Optimization.jl expects vectorised inputs, whereas Turing models are more high-level: they have named variables which may be scalars, vectors, or in general anything. +Prior to this version, Turing's interface required the user to provide the vectorised inputs 'raw', which is both unintuitive and error-prone (especially when considering that optimisation may run in linked or unlinked space). + +Going forward, initial parameters for optimisation are specified using `AbstractInitStrategy`. +If specific parameters are provided (via `InitFromParams`), these must be in model space (i.e. untransformed). +This directly mimics the interface for MCMC sampling that has been in place since v0.41. + +Furthermore, lower and upper bounds (if desired) must be specified as either a NamedTuple or an AbstractDict{<:VarName}. +Bounds are always provided in model space; Turing will handle the transformation of these bounds to linked space if necessary. +Constraints are respected when creating initial parameters for optimisation: if the `AbstractInitStrategy` provided is incompatible with the constraints (for example `InitFromParams((; x = 2.0))` but `x` is constrained to be between `[0, 1]`), an error will be raised. + +Here is a (very simplified) example of the new interface: + +```julia +using Turing +@model f() = x ~ Beta(2, 2) +maximum_a_posteriori( + f(); + # All of the following are in unlinked space. + initial_params=InitFromParams((; x=0.3)), + lb=(; x=0.1), + ub=(; x=0.4), +) +``` + +For more information, please see the docstring of `estimate_mode`. + +Note that in some cases, the translation of bounds to linked space may not be well-defined. +This is especially true for distributions where the samples have elements that are not independent (for example, Dirichlet, or LKJCholesky). +In these cases, Turing will raise an error if bounds are provided. +Users who wish to perform optimisation with such constraints should directly use `LogDensityFunction` and Optimization.jl. +Documentation on this matter will be forthcoming. + # 0.42.4 Fixes a typo that caused NUTS to perform one less adaptation step than in versions prior to 0.41. diff --git a/Project.toml b/Project.toml index 10f3ab2544..b71c560e3d 100644 --- a/Project.toml +++ b/Project.toml @@ -15,6 +15,7 @@ BangBang = "198e06fe-97b7-11e9-32a5-e1d131e6ad66" Bijectors = "76274a88-744f-5084-9051-94815aaf08c4" Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" DistributionsAD = "ced4e74d-a319-5a8a-b0ac-84af2272839c" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" @@ -25,7 +26,6 @@ Libtask = "6f1fad26-d15e-5dc8-ae53-837a1d7b8c9f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" -NamedArrays = "86f7a689-2022-50b4-a561-43c23ac3c673" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" @@ -58,20 +58,20 @@ BangBang = "0.4.2" Bijectors = "0.14, 0.15" Compat = "4.15.0" DataStructures = "0.18, 0.19" +DifferentiationInterface = "0.7" Distributions = "0.25.77" DistributionsAD = "0.6" DocStringExtensions = "0.8, 0.9" DynamicHMC = "3.4" -DynamicPPL = "0.39.1" +DynamicPPL = "0.39.11" EllipticalSliceSampling = "0.5, 1, 2" ForwardDiff = "0.10.3, 1" Libtask = "0.9.3" LinearAlgebra = "1" LogDensityProblems = "2" MCMCChains = "5, 6, 7" -NamedArrays = "0.9, 0.10" Optimization = "3, 4, 5" -OptimizationOptimJL = "0.1, 0.2, 0.3, 0.4" +OptimizationOptimJL = "0.1 - 0.4" OrderedCollections = "1" Printf = "1" Random = "1" diff --git a/src/Turing.jl b/src/Turing.jl index 2de60bc2fe..9f084dc14f 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -11,7 +11,6 @@ using AdvancedVI: AdvancedVI using DynamicPPL: DynamicPPL import DynamicPPL: NoDist, NamedDist using LogDensityProblems: LogDensityProblems -using NamedArrays: NamedArrays using Accessors: Accessors using StatsAPI: StatsAPI using StatsBase: StatsBase @@ -45,6 +44,7 @@ end # Random probability measures. include("stdlib/distributions.jl") include("stdlib/RandomMeasures.jl") +include("init_strategy.jl") include("mcmc/Inference.jl") # inference algorithms using .Inference include("variational/Variational.jl") diff --git a/src/init_strategy.jl b/src/init_strategy.jl new file mode 100644 index 0000000000..455a734c45 --- /dev/null +++ b/src/init_strategy.jl @@ -0,0 +1,27 @@ +using AbstractPPL: VarName +using DynamicPPL: DynamicPPL + +# This function is shared by both MCMC and optimisation, so has to exist outside of both. +""" + _convert_initial_params(initial_params) + +Convert `initial_params` to a `DynamicPPl.AbstractInitStrategy` if it is not already one, or +throw a useful error message. +""" +_convert_initial_params(initial_params::DynamicPPL.AbstractInitStrategy) = initial_params +function _convert_initial_params(nt::NamedTuple) + @info "Using a NamedTuple for `initial_params` will be deprecated in a future release. Please use `InitFromParams(namedtuple)` instead." + return DynamicPPL.InitFromParams(nt) +end +function _convert_initial_params(d::AbstractDict{<:VarName}) + @info "Using a Dict for `initial_params` will be deprecated in a future release. Please use `InitFromParams(dict)` instead." + return DynamicPPL.InitFromParams(d) +end +function _convert_initial_params(::AbstractVector{<:Real}) + errmsg = "`initial_params` must be a `NamedTuple`, an `AbstractDict{<:VarName}`, or ideally a `DynamicPPL.AbstractInitStrategy`. Using a vector of parameters for `initial_params` is no longer supported. Please see https://turinglang.org/docs/usage/sampling-options/#specifying-initial-parameters for details on how to update your code." + throw(ArgumentError(errmsg)) +end +function _convert_initial_params(@nospecialize(_::Any)) + errmsg = "`initial_params` must be a `NamedTuple`, an `AbstractDict{<:VarName}`, or a `DynamicPPL.AbstractInitStrategy`." + throw(ArgumentError(errmsg)) +end diff --git a/src/mcmc/abstractmcmc.jl b/src/mcmc/abstractmcmc.jl index 5adafd327b..c8c4db1329 100644 --- a/src/mcmc/abstractmcmc.jl +++ b/src/mcmc/abstractmcmc.jl @@ -19,30 +19,6 @@ parameters for sampling are chosen if not specified by the user. By default, thi """ init_strategy(::AbstractSampler) = DynamicPPL.InitFromPrior() -""" - _convert_initial_params(initial_params) - -Convert `initial_params` to a `DynamicPPl.AbstractInitStrategy` if it is not already one, or -throw a useful error message. -""" -_convert_initial_params(initial_params::DynamicPPL.AbstractInitStrategy) = initial_params -function _convert_initial_params(nt::NamedTuple) - @info "Using a NamedTuple for `initial_params` will be deprecated in a future release. Please use `InitFromParams(namedtuple)` instead." - return DynamicPPL.InitFromParams(nt) -end -function _convert_initial_params(d::AbstractDict{<:VarName}) - @info "Using a Dict for `initial_params` will be deprecated in a future release. Please use `InitFromParams(dict)` instead." - return DynamicPPL.InitFromParams(d) -end -function _convert_initial_params(::AbstractVector{<:Real}) - errmsg = "`initial_params` must be a `NamedTuple`, an `AbstractDict{<:VarName}`, or ideally a `DynamicPPL.AbstractInitStrategy`. Using a vector of parameters for `initial_params` is no longer supported. Please see https://turinglang.org/docs/usage/sampling-options/#specifying-initial-parameters for details on how to update your code." - throw(ArgumentError(errmsg)) -end -function _convert_initial_params(@nospecialize(_::Any)) - errmsg = "`initial_params` must be a `NamedTuple`, an `AbstractDict{<:VarName}`, or a `DynamicPPL.AbstractInitStrategy`." - throw(ArgumentError(errmsg)) -end - """ default_varinfo(rng, model, sampler) @@ -89,7 +65,7 @@ function AbstractMCMC.sample( model, spl, N; - initial_params=_convert_initial_params(initial_params), + initial_params=Turing._convert_initial_params(initial_params), chain_type, kwargs..., ) @@ -134,7 +110,7 @@ function AbstractMCMC.sample( n_chains; chain_type, check_model=false, # no need to check again - initial_params=map(_convert_initial_params, initial_params), + initial_params=map(Turing._convert_initial_params, initial_params), kwargs..., ) end diff --git a/src/mcmc/emcee.jl b/src/mcmc/emcee.jl index 899749e287..33b2180d3b 100644 --- a/src/mcmc/emcee.jl +++ b/src/mcmc/emcee.jl @@ -39,7 +39,7 @@ function Turing.Inference.init_strategy(spl::Emcee) return fill(DynamicPPL.InitFromPrior(), _get_n_walkers(spl)) end # We also have to explicitly allow this or else it will error... -function Turing.Inference._convert_initial_params( +function Turing._convert_initial_params( x::AbstractVector{<:DynamicPPL.AbstractInitStrategy} ) return x diff --git a/src/mcmc/repeat_sampler.jl b/src/mcmc/repeat_sampler.jl index 133517494e..548fb1ba81 100644 --- a/src/mcmc/repeat_sampler.jl +++ b/src/mcmc/repeat_sampler.jl @@ -115,7 +115,7 @@ function AbstractMCMC.sample( model, sampler, N; - initial_params=_convert_initial_params(initial_params), + initial_params=Turing._convert_initial_params(initial_params), chain_type=chain_type, progress=progress, kwargs..., @@ -143,7 +143,7 @@ function AbstractMCMC.sample( ensemble, N, n_chains; - initial_params=map(_convert_initial_params, initial_params), + initial_params=map(Turing._convert_initial_params, initial_params), chain_type=chain_type, progress=progress, kwargs..., diff --git a/src/optimisation/Optimisation.jl b/src/optimisation/Optimisation.jl index 1d6f9e2fc6..2aef6c69ce 100644 --- a/src/optimisation/Optimisation.jl +++ b/src/optimisation/Optimisation.jl @@ -1,13 +1,12 @@ module Optimisation using ..Turing -using NamedArrays: NamedArrays -using AbstractPPL: AbstractPPL -using DynamicPPL: DynamicPPL +using AbstractPPL: AbstractPPL, VarName +using DynamicPPL: DynamicPPL, VarInfo, LogDensityFunction using DocStringExtensions: TYPEDFIELDS using LogDensityProblems: LogDensityProblems using Optimization: Optimization -using OptimizationOptimJL: OptimizationOptimJL +using OptimizationOptimJL: LBFGS using Random: Random using SciMLBase: SciMLBase using ADTypes: ADTypes @@ -21,6 +20,8 @@ using LinearAlgebra: LinearAlgebra export maximum_a_posteriori, maximum_likelihood, MAP, MLE +include("init.jl") + """ ModeEstimator @@ -35,6 +36,8 @@ abstract type ModeEstimator end Concrete type for maximum likelihood estimation. """ struct MLE <: ModeEstimator end +logprob_func(::MLE) = DynamicPPL.getloglikelihood +logprob_accs(::MLE) = (DynamicPPL.LogLikelihoodAccumulator(),) """ MAP <: ModeEstimator @@ -42,49 +45,21 @@ struct MLE <: ModeEstimator end Concrete type for maximum a posteriori estimation. """ struct MAP <: ModeEstimator end - -""" - OptimLogDensity{L<:DynamicPPL.LogDensityFunction} - -A struct that represents a log-density function, which can be used with Optimization.jl. -This is a thin wrapper around `DynamicPPL.LogDensityFunction`: the main difference is that -the log-density is negated (because Optimization.jl performs minimisation, and we usually -want to maximise the log-density). - -An `OptimLogDensity` does not, in itself, obey the LogDensityProblems.jl interface. Thus, if -you want to calculate the log density of its contents at the point `z`, you should manually -call `LogDensityProblems.logdensity(f.ldf, z)`, instead of `LogDensityProblems.logdensity(f, -z)`. - -However, because Optimization.jl requires the objective function to be callable, you can -also call `f(z)` directly to get the negative log density at `z`. -""" -struct OptimLogDensity{L<:DynamicPPL.LogDensityFunction} - ldf::L +# Note that we use `getlogjoint` rather than `getlogjoint_internal`: this is intentional, +# because even though the VarInfo may be linked, the optimisation target should not take the +# Jacobian term into account. +logprob_func(::MAP) = DynamicPPL.getlogjoint +function logprob_accs(::MAP) + return (DynamicPPL.LogLikelihoodAccumulator(), DynamicPPL.LogPriorAccumulator()) end -""" - (f::OptimLogDensity)(z) - (f::OptimLogDensity)(z, _) - -Evaluate the negative log probability density at the array `z`. Which kind of probability -density is evaluated depends on the `getlogdensity` function used to construct the -underlying `LogDensityFunction` (e.g., `DynamicPPL.getlogjoint` for MAP estimation, or -`DynamicPPL.getloglikelihood` for MLE). - -Any second argument is ignored. The two-argument method only exists to match the interface -required by Optimization.jl. -""" -(f::OptimLogDensity)(z::AbstractVector) = -LogDensityProblems.logdensity(f.ldf, z) -(f::OptimLogDensity)(z, _) = f(z) - """ ModeResult{ - V<:NamedArrays.NamedArray, - O<:Any, - M<:OptimLogDensity, - P<:AbstractDict{<:VarName,<:Any} E<:ModeEstimator, + P<:AbstractDict{<:VarName,<:Any} + LP<:Real, + L<:DynamicPPL.LogDensityFunction, + O<:Any, } A wrapper struct to store various results from a MAP or MLE estimation. @@ -94,85 +69,35 @@ A wrapper struct to store various results from a MAP or MLE estimation. $(TYPEDFIELDS) """ struct ModeResult{ - V<:NamedArrays.NamedArray, - O<:Any, - M<:OptimLogDensity, - P<:AbstractDict{<:AbstractPPL.VarName,<:Any}, E<:ModeEstimator, + P<:AbstractDict{<:AbstractPPL.VarName,<:Any}, + LP<:Real, + L<:LogDensityFunction, + O, } <: StatsBase.StatisticalModel - "A vector with the resulting point estimates." - values::V - "The stored optimiser results." - optim_result::O - "The final log likelihood or log joint, depending on whether `MAP` or `MLE` was run." - lp::Float64 - "The evaluation function used to calculate the output." - f::M - "Dictionary of parameter values" - params::P - "Whether the optimization was done in a transformed space." - linked::Bool "The type of mode estimation (MAP or MLE)." estimator::E + "Dictionary of parameter values. These values are always provided in unlinked space, + even if the optimisation was run in linked space." + params::P + "The final log likelihood or log joint, depending on whether `MAP` or `MLE` was run. + Note that this is the actual log probability of the parameters, i.e., not negated; + we do need a negated log probability to run the optimisation itself (since it is a + maximisation), but this is handled in a way that is entirely transparent to the user." + lp::LP + "Whether the optimisation was done in a transformed space." + linked::Bool + "The LogDensityFunction used to calculate the output. Note that this LogDensityFunction + calculates the actual (non-negated) log density. It should hold that `m.lp == + LogDensityProblems.logdensity(m.ldf, m.optim_result.u)`." + ldf::L + "The stored optimiser results." + optim_result::O end -function Base.show(io::IO, ::MIME"text/plain", m::ModeResult) - print(io, "ModeResult with maximized lp of ") - Printf.@printf(io, "%.2f", m.lp) - println(io) - return show(io, m.values) -end - -function Base.show(io::IO, m::ModeResult) - return show(io, m.values.array) -end - -""" - InitFromParams( - m::ModeResult, - fallback::Union{AbstractInitStrategy,Nothing}=InitFromPrior() - ) - -Initialize a model from the parameters stored in a `ModeResult`. The `fallback` is used if -some parameters are missing from the `ModeResult`. -""" -function DynamicPPL.InitFromParams( - m::ModeResult, fallback::Union{DynamicPPL.AbstractInitStrategy,Nothing}=InitFromPrior() -) - return DynamicPPL.InitFromParams(m.params, fallback) -end - -""" - Base.get(m::ModeResult, var_symbol::Symbol) - Base.get(m::ModeResult, var_symbols::AbstractVector{Symbol}) - -Return the values of all the variables with the symbol(s) `var_symbol` in the mode result -`m`. The return value is a `NamedTuple` with `var_symbols` as the key(s). The second -argument should be either a `Symbol` or a vector of `Symbol`s. -""" -function Base.get(m::ModeResult, var_symbols::AbstractVector{Symbol}) - vals_dict = m.params - iters = map(AbstractPPL.varname_and_value_leaves, keys(vals_dict), values(vals_dict)) - vns_and_vals = mapreduce(collect, vcat, iters) - varnames = collect(map(first, vns_and_vals)) - # For each symbol s in var_symbols, pick all the values from m.values for which the - # variable name has that symbol. - et = eltype(m.values) - value_vectors = Vector{et}[] - for s in var_symbols - push!( - value_vectors, - [m.values[Symbol(vn)] for vn in varnames if DynamicPPL.getsym(vn) == s], - ) - end - return (; zip(var_symbols, value_vectors)...) -end - -Base.get(m::ModeResult, var_symbol::Symbol) = get(m, [var_symbol]) - """ ModeResult( - log_density::OptimLogDensity, + log_density::DynamicPPL.LogDensityFunction, solution::SciMLBase.OptimizationSolution, linked::Bool, estimator::ModeEstimator, @@ -186,230 +111,264 @@ richer format of `ModeResult`. It also takes care of transforming them back to t parameter space in case the optimization was done in a transformed space. """ function ModeResult( - log_density::OptimLogDensity, + ldf::LogDensityFunction, solution::SciMLBase.OptimizationSolution, linked::Bool, estimator::ModeEstimator, ) - vals = DynamicPPL.ParamsWithStats(solution.u, log_density.ldf).params - iters = map(AbstractPPL.varname_and_value_leaves, keys(vals), values(vals)) - vns_vals_iter = mapreduce(collect, vcat, iters) - syms = map(Symbol ∘ first, vns_vals_iter) - split_vals = map(last, vns_vals_iter) - return ModeResult( - NamedArrays.NamedArray(split_vals, syms), - solution, - -solution.objective, - log_density, - vals, - linked, - estimator, - ) + # Get the parameter values in the original space. + parameters = DynamicPPL.ParamsWithStats(solution.u, ldf).params + return ModeResult(estimator, parameters, -solution.objective, linked, ldf, solution) end -""" - ModeEstimationConstraints - -A struct that holds constraints for mode estimation problems. +function Base.show(io::IO, ::MIME"text/plain", m::ModeResult) + printstyled(io, "ModeResult\n"; bold=true) + # typeof avoids the parentheses in the printed output + println(io, " ├ estimator : $(typeof(m.estimator))") + println(io, " ├ lp : $(m.lp)") + entries = length(m.params) == 1 ? "entry" : "entries" + println(io, " ├ params : $(typeof(m.params)) with $(length(m.params)) $(entries)") + for (i, (vn, val)) in enumerate(m.params) + tree_char = i == length(m.params) ? "└" : "├" + println(io, " │ $(tree_char) $vn => $(val)") + end + println(io, " └ linked : $(m.linked)") + return nothing +end -The fields are the same as possible constraints supported by the Optimization.jl: -`ub` and `lb` specify lower and upper bounds of box constraints. `cons` is a function that -takes the parameters of the model and returns a list of derived quantities, which are then -constrained by the lower and upper bounds set by `lcons` and `ucons`. We refer to these -as generic constraints. Please see the documentation of -[Optimization.jl](https://docs.sciml.ai/Optimization/stable/) for more details. +""" + InitFromParams( + m::ModeResult, + fallback::Union{AbstractInitStrategy,Nothing}=InitFromPrior() + ) -Any of the fields can be `nothing`, disabling the corresponding constraints. +Initialize a model from the parameters stored in a `ModeResult`. The `fallback` is used if +some parameters are missing from the `ModeResult`. """ -struct ModeEstimationConstraints{ - Lb<:Union{Nothing,AbstractVector}, - Ub<:Union{Nothing,AbstractVector}, - Cons, - LCons<:Union{Nothing,AbstractVector}, - UCons<:Union{Nothing,AbstractVector}, -} - lb::Lb - ub::Ub - cons::Cons - lcons::LCons - ucons::UCons +function DynamicPPL.InitFromParams( + m::ModeResult, fallback::Union{DynamicPPL.AbstractInitStrategy,Nothing}=InitFromPrior() +) + return DynamicPPL.InitFromParams(m.params, fallback) end -has_box_constraints(c::ModeEstimationConstraints) = c.ub !== nothing || c.lb !== nothing -function has_generic_constraints(c::ModeEstimationConstraints) - return (c.cons !== nothing || c.lcons !== nothing || c.ucons !== nothing) +struct ConstraintCheckAccumulator <: AbstractAccumulator + lb::NTOrVNDict # Must be in unlinked space + ub::NTOrVNDict # Must be in unlinked space end -has_constraints(c) = has_box_constraints(c) || has_generic_constraints(c) - -""" - generate_initial_params(model::DynamicPPL.Model, initial_params, constraints) - -Generate an initial value for the optimization problem. - -If `initial_params` is not `nothing`, a copy of it is returned. Otherwise initial parameter -values are generated either by sampling from the prior (if no constraints are present) or -uniformly from the box constraints. If generic constraints are set, an error is thrown. -""" -function generate_initial_params(model::DynamicPPL.Model, initial_params, constraints) - if initial_params === nothing && has_generic_constraints(constraints) +DynamicPPL.accumulator_name(::ConstraintCheckAccumulator) = :OptimConstraintCheck +function DynamicPPL.accumulate_assume!!( + acc::ConstraintCheckAccumulator, val::Any, ::Any, vn::VarName, dist::Distribution +) + # `val`, `acc.lb`, and `acc.ub` are all in unlinked space. + lb = get_constraints(acc.lb, vn) + ub = get_constraints(acc.ub, vn) + if !satisfies_constraints(lb, ub, val, dist) throw( - ArgumentError( - "You must provide an initial value when using generic constraints." + DomainError( + val, + "\nThe value for variable $(vn) ($(val)) went outside of constraints (lb=$(lb), ub=$(ub)) during optimisation.\n\nThis can happen when using constraints on a variable that has a dynamic support, e.g., `y ~ truncated(Normal(); lower=x)` where `x` is another variable in the model.\n\nTo avoid this, consider either running the optimisation in unlinked space (`link=false`) or removing the constraints.\n\nIf you are sure that this does not matter and you want to suppress this error, you can also set `check_constraints_at_runtime=false`.", ), ) end - - return if initial_params !== nothing - copy(initial_params) - elseif has_box_constraints(constraints) - [ - rand(Distributions.Uniform(lower, upper)) for - (lower, upper) in zip(constraints.lb, constraints.ub) - ] - else - rand(Vector, model) - end -end - -function default_solver(constraints::ModeEstimationConstraints) - return if has_generic_constraints(constraints) - OptimizationOptimJL.IPNewton() - else - OptimizationOptimJL.LBFGS() - end + return acc end - -""" - OptimizationProblem(log_density::OptimLogDensity, initial_params::AbstractVector, adtype, constraints) - -Create an `OptimizationProblem` for the objective function defined by `log_density`. - -Note that the adtype parameter here overrides any adtype parameter the -OptimLogDensity was constructed with. -""" -function Optimization.OptimizationProblem( - log_density::OptimLogDensity, initial_params::AbstractVector, adtype, constraints +function DynamicPPL.accumulate_observe!!( + acc::ConstraintCheckAccumulator, ::Distribution, ::Any, ::Union{VarName,Nothing} ) - # Note that OptimLogDensity is a callable that evaluates the model with given - # parameters. Hence we can use it in the objective function as below. - f = Optimization.OptimizationFunction(log_density, adtype; cons=constraints.cons) - prob = if !has_constraints(constraints) - Optimization.OptimizationProblem(f, initial_params) - else - Optimization.OptimizationProblem( - f, - initial_params; - lcons=constraints.lcons, - ucons=constraints.ucons, - lb=constraints.lb, - ub=constraints.ub, - ) - end - return prob + return acc end - -# Note that we use `getlogjoint` rather than `getlogjoint_internal`: this is intentional, -# because even though the VarInfo may be linked, the optimisation target should not take the -# Jacobian term into account. -_choose_getlogdensity(::MAP) = DynamicPPL.getlogjoint -_choose_getlogdensity(::MLE) = DynamicPPL.getloglikelihood +function DynamicPPL.reset(acc::ConstraintCheckAccumulator) + return acc +end +function Base.copy(acc::ConstraintCheckAccumulator) + # The copy here is probably not needed, since lb and ub are never mutated, and we are + # responsible for generating lb and ub. But we can just `copy` to be safe. + return ConstraintCheckAccumulator(copy(acc.lb), copy(acc.ub)) +end +DynamicPPL.split(acc::ConstraintCheckAccumulator) = acc +DynamicPPL.combine(acc1::ConstraintCheckAccumulator, ::ConstraintCheckAccumulator) = acc1 """ estimate_mode( + [rng::Random.AbstractRNG,] model::DynamicPPL.Model, estimator::ModeEstimator, - [solver]; - kwargs... + solver=OptimizationOptimJL.LBFGS(); + link::Bool=true, + initial_params=DynamicPPL.InitFromPrior(), + lb::Union{NamedTuple,AbstractDict{<:VarName,<:Any}}=(;), + ub::Union{NamedTuple,AbstractDict{<:VarName,<:Any}}=(;), + adtype::AbstractADType=AutoForwardDiff(), + check_model::Bool=true, + check_constraints_at_runtime::Bool=true, + kwargs..., ) Find the mode of the probability distribution of a model. -Under the hood this function calls `Optimization.solve`. +Under the hood this function constructs a `LogDensityFunction` and calls +`Optimization.solve` on it. + +Note that the optimisation interface that Turing exposes is a more high-level interface +which is tailored towards probabilistic modelling, so not every option available in +Optimization.jl is supported here. In particular, Turing's optimisation interface allows you +to: + +- Provide initial parameters, lower bounds, and upper bounds as mappings of `VarName`s to + values in original (unlinked space). + +- Choose whether to run the optimisation in linked or unlinked space (by default linked). + Linked space means that parameters are transformed to unconstrained Euclidean space, + meaning that you can avoid hard edges in the optimisation landscape (i.e., logpdf + suddenly dropping to `-Inf` outside the support of a variable). It also avoids cases + where parameters may not be independent, e.g., `x ~ Dirichlet(...)` where the components + of `x` must sum to 1. Optimisation in linked space is enabled by default. + +Turing is responsible for 'translating' these user-friendly specifications into vectorised +forms (of initial parameters, lower bounds, and upper bounds) that Optimization.jl can work +with. + +However, there are cases where this translation can fail or otherwise be ill-defined +(specifically when considering constraints). For example, recall that constraints are +supplied in unlinked space, but the optimisation is run by default in linked space. +Sometimes it is possible to translate constraints from unlinked space to linked space: for +example, for `x ~ Beta(2, 2)`, lower bounds in unlinked space can be translated to lower +bounds in linked space via the logit transform (specificallly, by calling +`to_linked_vec_transform(Beta(2, 2))`. + +However, if a user supplies a constraint on a Dirichlet variable, there is no well-defined +mapping of unlinked constraints to linked space. In such cases, Turing will throw an error +(although you can still run in unlinked space). Generic, non-box constraints are also not +possible to correctly support, so Turing's optimisation interface refuses to support them. + +See https://github.com/TuringLang/Turing.jl/issues/2634 for more discussion on the interface +and what it supports. + +If you need these capabilities, we suggest that you create your own LogDensityFunction and +call Optimization.jl directly on it. # Arguments + +- `rng::Random.AbstractRNG`: an optional random number generator. This is used only for + parameter initialisation; it does not affect the actual optimisation process. + - `model::DynamicPPL.Model`: The model for which to estimate the mode. + - `estimator::ModeEstimator`: Can be either `MLE()` for maximum likelihood estimation or - `MAP()` for maximum a posteriori estimation. -- `solver=nothing`. The optimization algorithm to use. Optional. Can be any solver - recognised by Optimization.jl. If omitted a default solver is used: LBFGS, or IPNewton - if non-box constraints are present. + `MAP()` for maximum a posteriori estimation. + +- `solver=OptimizationOptimJL.LBFGS()`: The optimization algorithm to use. The default + solver is L-BFGS, which is a good general-purpose solver that supports box constraints. + You can also use any solver supported by + [Optimization.jl](https://docs.sciml.ai/Optimization/stable/). # Keyword arguments -- `check_model::Bool=true`: If true, the model is checked for errors before - optimisation begins. -- `initial_params::Union{AbstractVector,Nothing}=nothing`: Initial value for the - optimization. Optional, unless non-box constraints are specified. If omitted it is - generated by either sampling from the prior distribution or uniformly from the box - constraints, if any. -- `adtype::AbstractADType=AutoForwardDiff()`: The automatic differentiation type to use. -- Keyword arguments `lb`, `ub`, `cons`, `lcons`, and `ucons` define constraints for the - optimization problem. Please see [`ModeEstimationConstraints`](@ref) for more details. -- Any extra keyword arguments are passed to `Optimization.solve`. + +- `link::Bool=true`: if true, the model parameters are transformed to an unconstrained + space for the optimisation. This is generally recommended as it avoids hard edges (i.e., + returning a probability of `Inf` outside the support of the parameters), which can lead to + NaN's or incorrect results. Note that the returned parameter values are always in the + original (unlinked) space, regardless of whether `link` is true or false. + +- `initial_params::DynamicPPL.AbstractInitStrategy=DynamicPPL.InitFromPrior()`: an + initialisation strategy for the parameters. By default, parameters are initialised by + generating from the prior. The initialisation strategy will always be augmented by + any contraints provided via `lb` and `ub`, in that the initial parameters will be + guaranteed to lie within the provided bounds. + +- `lb::Union{NamedTuple,AbstractDict{<:VarName,<:Any}}=(;)`: a mapping from variable names + to lower bounds for the optimisation. The bounds should be provided in the original + (unlinked) space. Not all constraints are supported by Turing's optimisation interface. + See details above. + +- `ub::Union{NamedTuple,AbstractDict{<:VarName,<:Any}}=(;)`: a mapping from variable names + to upper bounds for the optimisation. The bounds should be provided in the original + (unlinked) space. Not all constraints are supported by Turing's optimisation interface. + See details above. + +- `adtype::AbstractADType=AutoForwardDiff()`: The automatic differentiation backend to use. + +- `check_model::Bool=true`: if true, the model is checked for potential errors before + optimisation begins. + +- `check_constraints_at_runtime::Bool=true`: if true, the constraints provided via `lb` + and `ub` are checked at each evaluation of the log probability during optimisation (even + though Optimization.jl already has access to these constraints). This can be useful in a + very specific situation: consider a model where a variable has a dynamic support, e.g. + `y ~ truncated(Normal(); lower=x)`, where `x` is another variable in the model. In this + case, if the model is run in linked space, then the box constraints that Optimization.jl + sees may not always be correct, and `y` may go out of its intended bounds due to changes + in `x`. Enabling this option will ensure that such violations are caught and an error + thrown. This is very cheap to do, but if you absolutely need to squeeze out every last + bit of performance and you know you will not be hitting the edge case above, you can + disable this check. + +Any extra keyword arguments are passed to `Optimization.solve`. """ function estimate_mode( + rng::Random.AbstractRNG, model::DynamicPPL.Model, estimator::ModeEstimator, - solver=nothing; - check_model::Bool=true, - initial_params=nothing, + solver=LBFGS(); + link::Bool=true, + initial_params=DynamicPPL.InitFromPrior(), + lb::Union{NamedTuple,AbstractDict{<:VarName,<:Any}}=(;), + ub::Union{NamedTuple,AbstractDict{<:VarName,<:Any}}=(;), adtype=ADTypes.AutoForwardDiff(), - cons=nothing, - lcons=nothing, - ucons=nothing, - lb=nothing, - ub=nothing, - kwargs..., + check_model::Bool=true, + check_constraints_at_runtime::Bool=true, + solve_kwargs..., ) if check_model new_model = DynamicPPL.setleafcontext(model, DynamicPPL.InitContext()) - DynamicPPL.check_model(new_model, DynamicPPL.VarInfo(); error_on_failure=true) + DynamicPPL.check_model(new_model, VarInfo(); error_on_failure=true) end - constraints = ModeEstimationConstraints(lb, ub, cons, lcons, ucons) - initial_params = generate_initial_params(model, initial_params, constraints) - if solver === nothing - solver = default_solver(constraints) + # Generate a LogDensityFunction first. We do this first because we want to use the + # info stored in the LDF to generate the initial parameters and constraints in the + # correct order. + vi = VarInfo(model) + vi = if link + DynamicPPL.link!!(vi, model) + else + vi end - - # Set its VarInfo to the initial parameters. - # TODO(penelopeysm): Unclear if this is really needed? Any time that logp is calculated - # (using `LogDensityProblems.logdensity(ldf, x)`) the parameters in the - # varinfo are completely ignored. The parameters only matter if you are calling evaluate!! - # directly on the fields of the LogDensityFunction - vi = DynamicPPL.VarInfo(model) - vi = DynamicPPL.unflatten(vi, initial_params) - - # Link the varinfo if needed. - # TODO(mhauru) We currently couple together the questions of whether the user specified - # bounds/constraints and whether we transform the objective function to an - # unconstrained space. These should be separate concerns, but for that we need to - # implement getting the bounds of the prior distributions. - optimise_in_unconstrained_space = !has_constraints(constraints) - if optimise_in_unconstrained_space - vi = DynamicPPL.link(vi, model) + getlogdensity = logprob_func(estimator) + accs = if check_constraints_at_runtime + (logprob_accs(estimator)..., ConstraintCheckAccumulator(lb, ub)) + else + logprob_accs(estimator) end - # Re-extract initial parameters (which may now be linked). - initial_params = vi[:] - - # Note that we don't need adtype here, because it's specified inside the - # OptimizationProblem - getlogdensity = _choose_getlogdensity(estimator) - ldf = DynamicPPL.LogDensityFunction(model, getlogdensity, vi) - # Create an OptimLogDensity object that can be used to evaluate the objective function, - # i.e. the negative log density. - log_density = OptimLogDensity(ldf) - - prob = Optimization.OptimizationProblem( - log_density, initial_params, adtype, constraints + # Note that we don't need adtype to construct the LDF, because it's specified inside the + # OptimizationProblem. + ldf = LogDensityFunction(model, getlogdensity, vi, accs) + + # Generate bounds and initial parameters in the unlinked or linked space as requested. + lb_vec, ub_vec, inits_vec = make_optim_bounds_and_init( + rng, ldf, Turing._convert_initial_params(initial_params), lb, ub ) - solution = Optimization.solve(prob, solver; kwargs...) - # TODO(mhauru) We return a ModeResult for compatibility with the older Optim.jl - # interface. Might we want to break that and develop a better return type? - return ModeResult(log_density, solution, optimise_in_unconstrained_space, estimator) + # If there are no constraints, then we can omit them from the OptimizationProblem + # construction. Note that lb and ub must be provided together, not just one of them. + bounds_kwargs = if any(isfinite, lb_vec) || any(isfinite, ub_vec) + (lb=lb_vec, ub=ub_vec) + else + (;) + end + + # Insert a negative sign here because Optimization.jl does minimization. + lp_function = (x, _) -> -LogDensityProblems.logdensity(ldf, x) + optf = Optimization.OptimizationFunction(lp_function, adtype) + optprob = Optimization.OptimizationProblem(optf, inits_vec; bounds_kwargs...) + solution = Optimization.solve(optprob, solver; solve_kwargs...) + return ModeResult(ldf, solution, link, estimator) +end +function estimate_mode(model::DynamicPPL.Model, args...; kwargs...) + return estimate_mode(Random.default_rng(), model, args...; kwargs...) end """ maximum_a_posteriori( + [rng::Random.AbstractRNG,] model::DynamicPPL.Model, [solver]; kwargs... @@ -418,15 +377,21 @@ end Find the maximum a posteriori estimate of a model. This is a convenience function that calls `estimate_mode` with `MAP()` as the estimator. -Please see the documentation of [`Turing.Optimisation.estimate_mode`](@ref) for more +Please see the documentation of [`Turing.Optimisation.estimate_mode`](@ref) for full details. """ +function maximum_a_posteriori( + rng::Random.AbstractRNG, model::DynamicPPL.Model, args...; kwargs... +) + return estimate_mode(rng, model, MAP(), args...; kwargs...) +end function maximum_a_posteriori(model::DynamicPPL.Model, args...; kwargs...) - return estimate_mode(model, MAP(), args...; kwargs...) + return maximum_a_posteriori(Random.default_rng(), model, args...; kwargs...) end """ maximum_likelihood( + [rng::Random.AbstractRNG,] model::DynamicPPL.Model, [solver]; kwargs... @@ -435,11 +400,16 @@ end Find the maximum likelihood estimate of a model. This is a convenience function that calls `estimate_mode` with `MLE()` as the estimator. -Please see the documentation of [`Turing.Optimisation.estimate_mode`](@ref) for more +Please see the documentation of [`Turing.Optimisation.estimate_mode`](@ref) for full details. """ +function maximum_likelihood( + rng::Random.AbstractRNG, model::DynamicPPL.Model, args...; kwargs... +) + return estimate_mode(rng, model, MLE(), args...; kwargs...) +end function maximum_likelihood(model::DynamicPPL.Model, args...; kwargs...) - return estimate_mode(model, MLE(), args...; kwargs...) + return maximum_likelihood(Random.default_rng(), model, args...; kwargs...) end include("stats.jl") diff --git a/src/optimisation/init.jl b/src/optimisation/init.jl new file mode 100644 index 0000000000..3f65a9d5b1 --- /dev/null +++ b/src/optimisation/init.jl @@ -0,0 +1,316 @@ +using DynamicPPL: AbstractInitStrategy, AbstractAccumulator +using Distributions + +# TODO(penelopeysm): Eventually replace with VNT +const NTOrVNDict = Union{NamedTuple,AbstractDict{<:VarName,<:Any}} +function get_constraints(bounds::NTOrVNDict, vn::VarName) + return if AbstractPPL.hasvalue(bounds, vn) + return AbstractPPL.getvalue(bounds, vn) + else + nothing + end +end + +""" + InitWithConstraintCheck(lb, ub, actual_strategy) <: AbstractInitStrategy + +Initialise parameters with `actual_strategy`, but check that the initialised +parameters satisfy any bounds in `lb` and `ub`. +""" +struct InitWithConstraintCheck{Tlb<:NTOrVNDict,Tub<:NTOrVNDict} <: AbstractInitStrategy + lb::Tlb + ub::Tub + actual_strategy::AbstractInitStrategy +end + +const MAX_ATTEMPTS = 1000 + +""" + satisfies_constraints(lb, ub, proposed_val, dist) + +Check whether `proposed_val` satisfies the constraints defined by `lb` and `ub`. + +The methods that this function provides therefore dictate what values users can specify for +different types of distributions. For example, for `UnivariateDistribution`, the constraints +must be supplied as `Real` numbers. If other kinds of constraints are given, it will hit the +fallback method and an error will be thrown. + +This method intentionally does not handle `NaN` values as that is left to the optimiser to +deal with. +""" +function satisfies_constraints( + lb::Union{Nothing,Real}, + ub::Union{Nothing,Real}, + proposed_val::Real, + ::UnivariateDistribution, +) + satisfies_lb = lb === nothing || proposed_val >= lb + satisfies_ub = ub === nothing || proposed_val <= ub + return isnan(proposed_val) || (satisfies_lb && satisfies_ub) +end +function satisfies_constraints( + lb::Union{Nothing,Real}, + ub::Union{Nothing,Real}, + proposed_val::ForwardDiff.Dual, + dist::UnivariateDistribution, +) + # This overload is needed because ForwardDiff.Dual(2.0, 1.0) > 2.0 returns true, even + # though the primal value is within the constraints. + return satisfies_constraints(lb, ub, ForwardDiff.value(proposed_val), dist) +end +function satisfies_constraints( + lb::Union{Nothing,AbstractArray{<:Real}}, + ub::Union{Nothing,AbstractArray{<:Real}}, + proposed_val::AbstractArray{<:Real}, + ::Union{MultivariateDistribution,MatrixDistribution}, +) + satisfies_lb = + lb === nothing || all(p -> isnan(p[1]) || p[1] >= p[2], zip(proposed_val, lb)) + satisfies_ub = + ub === nothing || all(p -> isnan(p[1]) || p[1] <= p[2], zip(proposed_val, ub)) + return satisfies_lb && satisfies_ub +end +function satisfies_constraints( + lb::Union{Nothing,AbstractArray{<:Real}}, + ub::Union{Nothing,AbstractArray{<:Real}}, + proposed_val::AbstractArray{<:ForwardDiff.Dual}, + dist::Union{MultivariateDistribution,MatrixDistribution}, +) + return satisfies_constraints(lb, ub, ForwardDiff.value.(proposed_val), dist) +end +function satisfies_constraints( + lb::Union{Nothing,NamedTuple}, + ub::Union{Nothing,NamedTuple}, + proposed_val::NamedTuple, + dist::Distributions.ProductNamedTupleDistribution, +) + for sym in keys(proposed_val) + this_lb = lb === nothing ? nothing : get(lb, sym, nothing) + this_ub = ub === nothing ? nothing : get(ub, sym, nothing) + this_val = proposed_val[sym] + this_dist = dist.dists[sym] + if !satisfies_constraints(this_lb, this_ub, this_val, this_dist) + return false + end + end + return true +end +function satisfies_constraints(lb::Any, ub::Any, ::Any, d::Distribution) + # Trivially satisfied if no constraints are given. + lb === nothing && ub === nothing && return true + # Otherwise + throw( + ArgumentError( + "Constraints of type $((typeof(lb), typeof(ub))) are not yet implemented for distribution $(typeof(d)). If you need this functionality, please open an issue at https://github.com/TuringLang/Turing.jl/issues.", + ), + ) +end + +function DynamicPPL.init( + rng::Random.AbstractRNG, vn::VarName, dist::Distribution, c::InitWithConstraintCheck +) + # First check that the constraints are sensible. The call to satisfies_constraints will + # error if `lb` is 'greater' than `ub`. + lb = get_constraints(c.lb, vn) + ub = get_constraints(c.ub, vn) + if lb !== nothing && ub !== nothing && !satisfies_constraints(lb, ub, lb, dist) + throw(ArgumentError("Lower bound for variable $(vn) is greater than upper bound.")) + end + # The inner `init` might (for whatever reason) return linked or otherwise + # transformed values. We need to transform them back into to unlinked space, + # so that we can check the constraints properly. + maybe_transformed_val, transform = DynamicPPL.init(rng, vn, dist, c.actual_strategy) + proposed_val = transform(maybe_transformed_val) + attempts = 1 + while !satisfies_constraints(lb, ub, proposed_val, dist) + if attempts >= MAX_ATTEMPTS + throw( + ArgumentError( + "Could not initialise variable $(vn) within constraints after $(MAX_ATTEMPTS) attempts; please supply your own initialisation values using `InitFromParams`, or check that the values you supplied are valid", + ), + ) + end + maybe_transformed_val, transform = DynamicPPL.init(rng, vn, dist, c.actual_strategy) + proposed_val = transform(maybe_transformed_val) + attempts += 1 + end + return (proposed_val, DynamicPPL.typed_identity) +end + +can_have_linked_constraints(::Distribution) = false +can_have_linked_constraints(::UnivariateDistribution) = true +can_have_linked_constraints(::MultivariateDistribution) = true +can_have_linked_constraints(::MatrixDistribution) = false +function can_have_linked_constraints(pd::Distributions.Product) + return all(can_have_linked_constraints.(pd.v)) +end +function can_have_linked_constraints(pd::Distributions.ProductDistribution) + return all(can_have_linked_constraints.(pd.dists)) +end +function can_have_linked_constraints(pd::Distributions.ProductNamedTupleDistribution) + return all(can_have_linked_constraints.(values(pd.dists))) +end +can_have_linked_constraints(::Dirichlet) = false +can_have_linked_constraints(::LKJCholesky) = false + +struct ConstraintAccumulator <: AbstractAccumulator + "Whether to store constraints in linked space or not." + link::Bool + "A mapping of VarNames to lower bounds in untransformed space." + lb::NTOrVNDict + "A mapping of VarNames to upper bounds in untransformed space." + ub::NTOrVNDict + "The initial values for the optimisation in linked space (if link=true) or unlinked + space (if link=false)." + init_vecs::Dict{VarName,AbstractVector} + "The lower bound vectors for the optimisation in linked space (if link=true) or unlinked + space (if link=false)." + lb_vecs::Dict{VarName,AbstractVector} + "The upper bound vectors for the optimisation in linked space (if link=true) or unlinked + space (if link=false)." + ub_vecs::Dict{VarName,AbstractVector} + function ConstraintAccumulator(link::Bool, lb::NTOrVNDict, ub::NTOrVNDict) + return new( + link, + lb, + ub, + Dict{VarName,AbstractVector}(), + Dict{VarName,AbstractVector}(), + Dict{VarName,AbstractVector}(), + ) + end +end +const CONSTRAINT_ACC_NAME = :OptimConstraints +DynamicPPL.accumulator_name(::ConstraintAccumulator) = CONSTRAINT_ACC_NAME +function DynamicPPL.accumulate_assume!!( + acc::ConstraintAccumulator, val::Any, ::Any, vn::VarName, dist::Distribution +) + # First check if we have any incompatible constraints + linking. 'Incompatible', here, + # means that the constraints as defined in the unlinked space do not map to box + # constraints in the linked space, which would make it impossible to generate + # appropriate `lb` and `ub` arguments to pass to Optimization.jl. This is the case for + # e.g. Dirichlet. + lb = get_constraints(acc.lb, vn) + ub = get_constraints(acc.ub, vn) + if (lb !== nothing || ub !== nothing) && acc.link && !can_have_linked_constraints(dist) + throw( + ArgumentError( + "Cannot use constraints for variable $(vn) with distribution $(typeof(dist)) when performing linked optimisation; this is because the constraints cannot be cleanly mapped to linked space. If you need to use constraints for this variable, please set `link=false` when optimising, or manually perform optimisation with your own LogDensityFunction.", + ), + ) + end + transform = if acc.link + DynamicPPL.to_linked_vec_transform(dist) + else + DynamicPPL.to_vec_transform(dist) + end + # Transform the value and store it. + vectorised_val = transform(val) + acc.init_vecs[vn] = vectorised_val + nelems = length(vectorised_val) + # Then generate the constraints using the same transform. + if lb !== nothing + acc.lb_vecs[vn] = transform(lb) + else + acc.lb_vecs[vn] = fill(-Inf, nelems) + end + if ub !== nothing + acc.ub_vecs[vn] = transform(ub) + else + acc.ub_vecs[vn] = fill(Inf, nelems) + end + return acc +end +function DynamicPPL.accumulate_observe!!( + acc::ConstraintAccumulator, ::Distribution, ::Any, ::Union{VarName,Nothing} +) + return acc +end +function DynamicPPL.reset(acc::ConstraintAccumulator) + return ConstraintAccumulator(acc.link, acc.lb, acc.ub) +end +function Base.copy(acc::ConstraintAccumulator) + # ConstraintAccumulator should not ever modify `acc.lb` or `acc.ub` (and when + # constructing it inside `make_optim_bounds_and_init` we make sure to deepcopy any user + # input), so there is no chance that `lb` or `ub` could ever be mutated once they're + # inside the accumulator. Hence we don't need to copy them. + return ConstraintAccumulator(acc.link, acc.lb, acc.ub) +end +function DynamicPPL.split(acc::ConstraintAccumulator) + return ConstraintAccumulator(acc.link, acc.lb, acc.ub) +end +function DynamicPPL.combine(acc1::ConstraintAccumulator, acc2::ConstraintAccumulator) + combined = ConstraintAccumulator(acc1.link, acc1.lb, acc1.ub) + combined.init_vecs = merge(acc1.init_vecs, acc2.init_vecs) + combined.lb_vecs = merge(acc1.lb_vecs, acc2.lb_vecs) + combined.ub_vecs = merge(acc1.ub_vecs, acc2.ub_vecs) + return combined +end + +function _get_ldf_range(ldf::LogDensityFunction, vn::VarName) + if haskey(ldf._varname_ranges, vn) + return ldf._varname_ranges[vn].range + elseif haskey(ldf._iden_varname_ranges, AbstractPPL.getsym(vn)) + return ldf._iden_varname_ranges[AbstractPPL.getsym(vn)].range + else + # Should not happen. + error("could not find range for variable name $(vn) in LogDensityFunction") + end +end + +""" + make_optim_bounds_and_init( + rng::Random.AbstractRNG, + ldf::LogDensityFunction{Tlink}, + initial_params::AbstractInitStrategy, + lb::NTOrVNDict, + ub::NTOrVNDict, + ) where {Tlink} + +Generate a tuple of `(lb_vec, ub_vec, init_vec)` which are suitable for directly passing to +Optimization.jl. All three vectors returned will be in the unlinked or linked space +depending on the value of `link`. + +The `lb` and `ub` arguments, as well as any `initial_params` provided as `InitFromParams`, +are expected to be in the unlinked space. +""" +function make_optim_bounds_and_init( + rng::Random.AbstractRNG, + ldf::LogDensityFunction{Tlink}, + initial_params::AbstractInitStrategy, + lb::NTOrVNDict, + ub::NTOrVNDict, +) where {Tlink} + # Initialise a VarInfo with parameters that satisfy the constraints. + ctx = DynamicPPL.InitContext(rng, InitWithConstraintCheck(lb, ub, initial_params)) + new_model = DynamicPPL.setleafcontext(ldf.model, ctx) + # Run the model. + vi = DynamicPPL.OnlyAccsVarInfo(( + ConstraintAccumulator(Tlink, deepcopy(lb), deepcopy(ub)), + )) + _, vi = DynamicPPL.evaluate!!(new_model, vi) + # Now extract the accumulator, and construct the vectorised constraints using the + # ranges stored in the LDF. + constraint_acc = DynamicPPL.getacc(vi, Val(CONSTRAINT_ACC_NAME)) + nelems = LogDensityProblems.dimension(ldf) + inits = fill(NaN, nelems) + lb = fill(-Inf, nelems) + ub = fill(Inf, nelems) + for (vn, init_val) in constraint_acc.init_vecs + range = _get_ldf_range(ldf, vn) + inits[range] = init_val + if haskey(constraint_acc.lb_vecs, vn) + lb[range] = constraint_acc.lb_vecs[vn] + end + if haskey(constraint_acc.ub_vecs, vn) + ub[range] = constraint_acc.ub_vecs[vn] + end + end + # Make sure we have filled in all values. This should never happen, but we should just + # check. + if any(isnan, inits) + error("Could not generate vector of initial values as some values are missing.") + end + # Concretise before returning. + return [x for x in lb], [x for x in ub], [x for x in inits] +end diff --git a/src/optimisation/stats.jl b/src/optimisation/stats.jl index eae9e81c6b..06d80d02f6 100644 --- a/src/optimisation/stats.jl +++ b/src/optimisation/stats.jl @@ -1,8 +1,56 @@ -# Various StatsBase methods for ModeResult +import DifferentiationInterface as DI """ - StatsBase.coeftable(m::ModeResult; level::Real=0.95, numerrors_warnonly::Bool=true) + get_vectorised_params(m::ModeResult) +Generates a vectorised form of the optimised parameters stored in the `ModeResult`, along +with the corresponding variable names. These parameters correspond to unlinked space. + +This function returns two vectors: the first contains the variable names, and the second +contains the corresponding values. +""" +function get_vectorised_params(m::ModeResult) + # This function requires some subtlety. We _could_ simply iterate over keys(m.params) + # and values(m.params), apply AbstractPPL.varname_and_value_leaves to each pair, and + # then collect them into a vector. *However*, this vector will later have to be used + # with a LogDensityFunction again! That means that the order of the parameters in the + # vector must match the order expected by the LogDensityFunction. There's no guarantee + # that a simple iteration over the Dict will yield the parameters in the correct order. + # + # To ensure that this is always the case, we will have to create a LogDensityFunction + # and then use its stored ranges to extract the parameters in the correct order. This + # LDF will have to be created in unlinked space. + # + # TODO(penelopeysm): This needs to be done in a "better" way using DynamicPPL + # functionality. Note that, if https://github.com/TuringLang/DynamicPPL.jl/pull/1178 + # goes through, the value vector can just be generated using + # `first(DynamicPPL.rand_with_logdensity(m.ldf, InitFromParams(m.params)))`. The + # varnames, though, are a hot mess. See the TODO below. + ldf = LogDensityFunction(m.ldf.model) + vns = Vector{VarName}(undef, LogDensityProblems.dimension(ldf)) + vals = Vector{Any}(undef, LogDensityProblems.dimension(ldf)) + for vn in keys(m.params) + range = if AbstractPPL.getoptic(vn) === identity + ldf._iden_varname_ranges[AbstractPPL.getsym(vn)].range + else + ldf._varname_ranges[vn].range + end + # TODO(penelopeysm): This assumes that tovec and varname_leaves return the same + # number of sub-elements in the same order --- which is NOT true for things like + # Cholesky factors --- so this is a bug! We COULD use `varname_and_value_leaves` but + # that would just be a different kind of bug because LDF uses tovec for + # vectorisation!! https://github.com/TuringLang/Turing.jl/issues/2734 + val = m.params[vn] + vns[range] = collect(AbstractPPL.varname_leaves(vn, val)) + vals[range] = DynamicPPL.tovec(val) + end + # Concretise + return [x for x in vns], [x for x in vals] +end + +# Various StatsBase methods for ModeResult +""" + StatsBase.coeftable(m::ModeResult; level::Real=0.95, numerrors_warnonly::Bool=true) Return a table with coefficients and related statistics of the model. level determines the level for confidence intervals (by default, 95%). @@ -12,9 +60,9 @@ during the computation of the standard errors will be caught and reported in an "Error notes" column. """ function StatsBase.coeftable(m::ModeResult; level::Real=0.95, numerrors_warnonly::Bool=true) + vns, estimates = get_vectorised_params(m) # Get columns for coeftable. - terms = string.(StatsBase.coefnames(m)) - estimates = m.values.array[:, 1] + terms = string.(vns) # If numerrors_warnonly is true, and if either the information matrix is singular or has # negative entries on its diagonal, then `notes` will be a list of strings for each # value in `m.values`, explaining why the standard error is NaN. @@ -27,8 +75,8 @@ function StatsBase.coeftable(m::ModeResult; level::Real=0.95, numerrors_warnonly vcov = inv(infmat) catch e if isa(e, LinearAlgebra.SingularException) - stderrors = fill(NaN, length(m.values)) - notes = fill("Information matrix is singular", length(m.values)) + stderrors = fill(NaN, length(estimates)) + notes = fill("Information matrix is singular", length(estimates)) else rethrow(e) end @@ -82,24 +130,50 @@ function StatsBase.coeftable(m::ModeResult; level::Real=0.95, numerrors_warnonly return StatsBase.CoefTable(cols, colnms, terms) end +""" + StatsBase.informationmatrix( + m::ModeResult; + adtype::ADTypes.AbstractADType=ADTypes.AutoForwardDiff() + ) + +Calculate the [Fisher information matrix](https://en.wikipedia.org/wiki/Fisher_information) +for the mode result `m`. This is the negative Hessian of the log-probability at the mode. + +The Hessian is calculated using automatic differentiation with the specified `adtype`. By +default this is `ADTypes.AutoForwardDiff()`. In general, however, it may be more efficient +to use forward-over-reverse AD when the model has many parameters. This can be specified +using `DifferentiationInterface.SecondOrder(outer, inner)`; please consult the +[DifferentiationInterface.jl +documentation](https://juliadiff.org/DifferentiationInterface.jl/DifferentiationInterface/stable/explanation/backends/#Second-order) +for more details. +""" function StatsBase.informationmatrix( - m::ModeResult; hessian_function=ForwardDiff.hessian, kwargs... + m::ModeResult; adtype::ADTypes.AbstractADType=ADTypes.AutoForwardDiff() ) - # This needs to be calculated in unlinked space - model = m.f.ldf.model - vi = DynamicPPL.VarInfo(model) - getlogdensity = _choose_getlogdensity(m.estimator) - new_optimld = OptimLogDensity(DynamicPPL.LogDensityFunction(model, getlogdensity, vi)) + # This needs to be calculated in unlinked space, regardless of whether the optimization + # itself was run in linked space. + model = m.ldf.model + # We need to get the Hessian for the positive log density. + ldf = DynamicPPL.LogDensityFunction(model, logprob_func(m.estimator)) + f = Base.Fix1(LogDensityProblems.logdensity, ldf) + # Then get the vectorised parameters. + _, x = get_vectorised_params(m) - # Calculate the Hessian, which is the information matrix because the negative of the log - # likelihood was optimized - varnames = StatsBase.coefnames(m) - info = hessian_function(new_optimld, m.values.array[:, 1]) - return NamedArrays.NamedArray(info, (varnames, varnames)) + # We can include a check here to make sure that f(x) is in fact the log density at x. + # This helps guard against potential bugs where `get_vectorised_params` returns a + # wrongly-ordered parameter vector. + if !isapprox(f(x), m.lp) + error( + "The parameter vector extracted from the ModeResult does not match the " * + "log density stored in the ModeResult. This is a bug in Turing; please " * + "do file an issue at https://github.com/TuringLang/Turing.jl/issues.", + ) + end + return -DI.hessian(f, adtype, x) end -StatsBase.coef(m::ModeResult) = m.values -StatsBase.coefnames(m::ModeResult) = names(m.values)[1] +StatsBase.coef(m::ModeResult) = last(get_vectorised_params(m)) +StatsBase.coefnames(m::ModeResult) = first(get_vectorised_params(m)) StatsBase.params(m::ModeResult) = StatsBase.coefnames(m) StatsBase.vcov(m::ModeResult) = inv(StatsBase.informationmatrix(m)) StatsBase.loglikelihood(m::ModeResult) = m.lp diff --git a/test/Project.toml b/test/Project.toml index 7db79ea151..0459c16c53 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -10,6 +10,7 @@ BangBang = "198e06fe-97b7-11e9-32a5-e1d131e6ad66" Bijectors = "76274a88-744f-5084-9051-94815aaf08c4" Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" +DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" DistributionsAD = "ced4e74d-a319-5a8a-b0ac-84af2272839c" DynamicHMC = "bbc10e6e-7c05-544b-b16e-64fede858acb" @@ -22,7 +23,6 @@ LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" LogDensityProblemsAD = "996a588d-648d-4e1f-a8f0-a84b347e47b1" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6" -NamedArrays = "86f7a689-2022-50b4-a561-43c23ac3c673" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" OptimizationBBO = "3e6eede4-6085-4f62-9a71-46d9bc1eb92b" OptimizationNLopt = "4e6fcdb7-1186-4e1f-a706-475e75c168bb" @@ -50,10 +50,11 @@ BangBang = "0.4" Bijectors = "0.14, 0.15" Clustering = "0.14, 0.15" Combinatorics = "1" +DifferentiationInterface = "0.7" Distributions = "0.25" DistributionsAD = "0.6.3" DynamicHMC = "2.1.6, 3.0" -DynamicPPL = "0.39.6" +DynamicPPL = "0.39.11" FiniteDifferences = "0.10.8, 0.11, 0.12" ForwardDiff = "0.10.12 - 0.10.32, 0.10, 1" HypothesisTests = "0.11" @@ -62,7 +63,6 @@ LogDensityProblems = "2" LogDensityProblemsAD = "1.4" MCMCChains = "7.3.0" Mooncake = "0.4.182" -NamedArrays = "0.9.4, 0.10" Optimization = "3, 4, 5" OptimizationBBO = "0.1, 0.2, 0.3, 0.4" OptimizationNLopt = "0.1, 0.2, 0.3" diff --git a/test/ad.jl b/test/ad.jl index 2bda5ce475..0d4f55a643 100644 --- a/test/ad.jl +++ b/test/ad.jl @@ -130,7 +130,12 @@ function check_adtype(context::ADTypeCheckContext, vi::DynamicPPL.AbstractVarInf # then the parameter type will be Any, so we should skip the check. lc = DynamicPPL.leafcontext(context) if lc isa DynamicPPL.InitContext{ - <:Any,<:Union{DynamicPPL.InitFromPrior,DynamicPPL.InitFromUniform} + <:Any, + <:Union{ + DynamicPPL.InitFromPrior, + DynamicPPL.InitFromUniform, + Turing.Optimisation.InitWithConstraintCheck, + }, } return nothing end diff --git a/test/optimisation/Optimisation.jl b/test/optimisation/Optimisation.jl index c958456ffa..370e1f882c 100644 --- a/test/optimisation/Optimisation.jl +++ b/test/optimisation/Optimisation.jl @@ -1,10 +1,11 @@ module OptimisationTests -using ..Models: gdemo, gdemo_default using AbstractPPL: AbstractPPL +using Bijectors: Bijectors +import DifferentiationInterface as DI using Distributions -using Distributions.FillArrays: Zeros using DynamicPPL: DynamicPPL +using ForwardDiff: ForwardDiff using LinearAlgebra: Diagonal, I using Random: Random using Optimization @@ -12,83 +13,210 @@ using Optimization: Optimization using OptimizationBBO: OptimizationBBO using OptimizationNLopt: OptimizationNLopt using OptimizationOptimJL: OptimizationOptimJL +using Random: Random using ReverseDiff: ReverseDiff +using StableRNGs: StableRNG using StatsBase: StatsBase using StatsBase: coef, coefnames, coeftable, informationmatrix, stderror, vcov using Test: @test, @testset, @test_throws using Turing +using Turing.Optimisation: + ModeResult, InitWithConstraintCheck, satisfies_constraints, make_optim_bounds_and_init + +SECOND_ORDER_ADTYPE = DI.SecondOrder(AutoForwardDiff(), AutoForwardDiff()) +GDEMO_DEFAULT = DynamicPPL.TestUtils.demo_assume_observe_literal() + +function check_optimisation_result( + result::ModeResult, + true_values::AbstractDict{<:AbstractPPL.VarName,<:Any}, + true_logp::Real, + check_retcode=true, +) + # Check that `result.params` contains all the keys in `true_values` + @test Set(keys(result.params)) == Set(keys(true_values)) + # Check that their values are close + for (vn, val) in result.params + @test isapprox(val, true_values[vn], atol=0.01) + end + # Check logp and retcode + @test isapprox(result.lp, true_logp, atol=0.01) + if check_retcode + @test result.optim_result.retcode == Optimization.ReturnCode.Success + end +end -@testset "Optimisation" begin +@testset "Initialisation" begin + @testset "satisfies_constraints" begin + @testset "univariate" begin + val = 0.0 + dist = Normal() # only used for dispatch + @test satisfies_constraints(nothing, nothing, val, dist) + @test satisfies_constraints(-1.0, nothing, val, dist) + @test !satisfies_constraints(1.0, nothing, val, dist) + @test satisfies_constraints(nothing, 1.0, val, dist) + @test !satisfies_constraints(nothing, -1.0, val, dist) + @test satisfies_constraints(-1.0, 1.0, val, dist) + end - # The `stats` field is populated only in newer versions of OptimizationOptimJL and - # similar packages. Hence we end up doing this check a lot - hasstats(result) = result.optim_result.stats !== nothing + @testset "univariate ForwardDiff.Dual" begin + val = ForwardDiff.Dual(0.0, 1.0) + dist = Normal() # only used for dispatch + @test satisfies_constraints(nothing, 0.0, val, dist) + @test !satisfies_constraints(nothing, -0.01, val, dist) + val = ForwardDiff.Dual(0.0, -1.0) + @test satisfies_constraints(0.0, nothing, val, dist) + @test !satisfies_constraints(0.01, nothing, val, dist) + end - # Issue: https://discourse.julialang.org/t/two-equivalent-conditioning-syntaxes-giving-different-likelihood-values/100320 - @testset "OptimLogDensity and contexts" begin - @model function model1(x) - μ ~ Uniform(0, 2) - return x ~ LogNormal(μ, 1) + @testset "multivariate" begin + val = [0.3, 0.5, 0.2] + dist = Dirichlet(ones(3)) # only used for dispatch + @test satisfies_constraints(nothing, nothing, val, dist) + @test satisfies_constraints(zeros(3), nothing, val, dist) + @test !satisfies_constraints(ones(3), nothing, val, dist) + @test satisfies_constraints(nothing, ones(3), val, dist) + @test !satisfies_constraints(nothing, zeros(3), val, dist) + @test satisfies_constraints(zeros(3), ones(3), val, dist) + @test !satisfies_constraints([0.4, 0.0, 0.0], nothing, val, dist) + @test !satisfies_constraints(nothing, [1.0, 1.0, 0.1], val, dist) end - @model function model2() - μ ~ Uniform(0, 2) - return x ~ LogNormal(μ, 1) + @testset "multivariate ForwardDiff.Dual" begin + val = [ForwardDiff.Dual(0.5, 1.0), ForwardDiff.Dual(0.5, -1.0)] + dist = Dirichlet(ones(3)) # only used for dispatch + @test satisfies_constraints([0.5, 0.5], [0.5, 0.5], val, dist) end - x = 1.0 - w = [1.0] + @testset "Matrix distributions" begin + dist = Wishart(3, [0.5 0.0; 0.0 0.5]) # only used for dispatch + val = [1.0 0.0; 0.0 1.0] + @test satisfies_constraints(zeros(2, 2), ones(2, 2), val, dist) + @test satisfies_constraints(nothing, ones(2, 2), val, dist) + @test satisfies_constraints(zeros(2, 2), nothing, val, dist) + val = [2.0 -1.0; -1.0 2.0] + @test !satisfies_constraints(zeros(2, 2), ones(2, 2), val, dist) + @test !satisfies_constraints(nothing, ones(2, 2), val, dist) + @test !satisfies_constraints(zeros(2, 2), nothing, val, dist) + end - @testset "With ConditionContext" begin - m1 = model1(x) - m2 = model2() | (x=x,) - # Doesn't matter if we use getlogjoint or getlogjoint_internal since the - # VarInfo isn't linked. - ld1 = Turing.Optimisation.OptimLogDensity( - DynamicPPL.LogDensityFunction(m1, DynamicPPL.getlogjoint) - ) - ld2 = Turing.Optimisation.OptimLogDensity( - DynamicPPL.LogDensityFunction(m2, DynamicPPL.getlogjoint_internal) + @testset "LKJCholesky" begin + dist = LKJCholesky(3, 0.5) + val = rand(dist) + @test satisfies_constraints(nothing, nothing, val, dist) + # Just refuse to handle these. + @test_throws ArgumentError satisfies_constraints( + zeros(3, 3), nothing, val, dist ) - @test ld1(w) == ld2(w) + @test_throws ArgumentError satisfies_constraints(nothing, ones(3, 3), val, dist) end + end - @testset "With prefixes" begin - vn = @varname(inner) - m1 = prefix(model1(x), vn) - m2 = prefix((model2() | (x=x,)), vn) - ld1 = Turing.Optimisation.OptimLogDensity( - DynamicPPL.LogDensityFunction(m1, DynamicPPL.getlogjoint) - ) - ld2 = Turing.Optimisation.OptimLogDensity( - DynamicPPL.LogDensityFunction(m2, DynamicPPL.getlogjoint_internal) - ) - @test ld1(w) == ld2(w) + @testset "errors when constraints can't be satisfied" begin + @model function diric() + x ~ Dirichlet(ones(2)) + return 1.0 ~ Normal() + end + ldf = LogDensityFunction(diric()) + # These are all impossible constraints for a Dirichlet(ones(2)) + for (lb, ub) in + [([2.0, 2.0], nothing), (nothing, [-1.0, -1.0]), ([0.3, 0.3], [0.1, 0.1])] + # unit test the function + @test_throws ArgumentError make_optim_bounds_and_init( + Random.default_rng(), ldf, InitFromPrior(), (x=lb,), (x=ub,) + ) + # check that the high-level function also errors + @test_throws ArgumentError maximum_likelihood(diric(); lb=(x=lb,), ub=(x=ub,)) + @test_throws ArgumentError maximum_a_posteriori(diric(); lb=(x=lb,), ub=(x=ub,)) end - @testset "Joint, prior, and likelihood" begin - m1 = model1(x) - a = [0.3] - ld_joint = Turing.Optimisation.OptimLogDensity( - DynamicPPL.LogDensityFunction(m1, DynamicPPL.getlogjoint) - ) - ld_prior = Turing.Optimisation.OptimLogDensity( - DynamicPPL.LogDensityFunction(m1, DynamicPPL.getlogprior) - ) - ld_likelihood = Turing.Optimisation.OptimLogDensity( - DynamicPPL.LogDensityFunction(m1, DynamicPPL.getloglikelihood) + # Try to provide reasonable constraints, but bad initial params + @model function normal_model() + x ~ Normal() + return 1.0 ~ Normal(x) + end + ldf = LogDensityFunction(normal_model()) + lb = (x=-1.0,) + ub = (x=1.0,) + bad_init = (x=10.0,) + @test_throws ArgumentError make_optim_bounds_and_init( + Random.default_rng(), ldf, InitFromParams(bad_init), lb, ub + ) + @test_throws ArgumentError maximum_likelihood( + normal_model(); initial_params=InitFromParams(bad_init), lb=lb, ub=ub + ) + @test_throws ArgumentError maximum_a_posteriori( + normal_model(); initial_params=InitFromParams(bad_init), lb=lb, ub=ub + ) + end + + @testset "generation of vector constraints" begin + @testset "$dist" for (lb, ub, dist) in ( + ((x=0.1,), (x=0.5,), Normal()), + ((x=0.1,), (x=0.5,), Beta(2, 2)), + ((x=[0.1, 0.1],), (x=[0.5, 0.5],), MvNormal(zeros(2), I)), + ( + (x=[0.1, 0.1],), + (x=[0.5, 0.5],), + product_distribution([Beta(2, 2), Beta(2, 2)]), + ), + # TODO(penelopeysm): Broken because DynamicPPL.to_vec_transform(dist) fails. + # This needs an upstream fix. + # ((x=(a=0.1, b=0.1),), (x=(a=0.5, b=0.5),), product_distribution((a=Beta(2, 2), b=Beta(2, 2)))), + ) + @model f() = x ~ dist + model = f() + maybe_to_vec(x::AbstractVector) = x + maybe_to_vec(x) = [x] + + @testset "unlinked" begin + ldf = LogDensityFunction(model) + lb_vec, ub_vec, init_vec = make_optim_bounds_and_init( + Random.default_rng(), ldf, InitFromPrior(), lb, ub + ) + @test lb_vec == maybe_to_vec(lb.x) + @test ub_vec == maybe_to_vec(ub.x) + @test all(init_vec .>= lb_vec) + @test all(init_vec .<= ub_vec) + end + + @testset "linked" begin + vi = DynamicPPL.link!!(DynamicPPL.VarInfo(model), model) + ldf = LogDensityFunction(model, DynamicPPL.getlogjoint, vi) + lb_vec, ub_vec, init_vec = make_optim_bounds_and_init( + Random.default_rng(), ldf, InitFromPrior(), lb, ub + ) + b = Bijectors.bijector(dist) + @test lb_vec ≈ maybe_to_vec(b(lb.x)) + @test ub_vec ≈ maybe_to_vec(b(ub.x)) + @test all(init_vec .>= lb_vec) + @test all(init_vec .<= ub_vec) + end + end + end + + @testset "forbidding linked + constraints for complicated distributions" begin + @testset for dist in (LKJCholesky(3, 1.0), Dirichlet(ones(3))) + @model f() = x ~ dist + model = f() + + vi = DynamicPPL.link!!(DynamicPPL.VarInfo(model), model) + ldf = LogDensityFunction(model, DynamicPPL.getlogjoint, vi) + lb = (x=rand(dist),) + ub = (;) + + @test_throws ArgumentError make_optim_bounds_and_init( + Random.default_rng(), ldf, InitFromPrior(), lb, ub ) - @test ld_joint(a) == ld_prior(a) + ld_likelihood(a) - - # test that the prior accumulator is calculating the right thing - @test Turing.Optimisation.OptimLogDensity( - DynamicPPL.LogDensityFunction(m1, DynamicPPL.getlogprior) - )([0.3]) ≈ -Distributions.logpdf(Uniform(0, 2), 0.3) - @test Turing.Optimisation.OptimLogDensity( - DynamicPPL.LogDensityFunction(m1, DynamicPPL.getlogprior) - )([-0.3]) ≈ -Distributions.logpdf(Uniform(0, 2), -0.3) + @test_throws ArgumentError maximum_likelihood(model; lb=lb, ub=ub, link=true) + @test_throws ArgumentError maximum_a_posteriori(model; lb=lb, ub=ub, link=true) end end +end + +@testset "Optimisation" begin + # The `stats` field is populated only in newer versions of OptimizationOptimJL and + # similar packages. Hence we end up doing this check a lot + hasstats(result) = result.optim_result.stats !== nothing @testset "errors on invalid model" begin @model function invalid_model() @@ -101,47 +229,39 @@ using Turing end @testset "gdemo" begin - """ - check_success(result, true_value, true_logp, check_retcode=true) - - Check that the `result` returned by optimisation is close to the truth. - """ - function check_optimisation_result( - result, true_value, true_logp, check_retcode=true - ) - optimum = result.values.array - @test all(isapprox.(optimum - true_value, 0.0, atol=0.01)) - if check_retcode - @test result.optim_result.retcode == Optimization.ReturnCode.Success - end - @test isapprox(result.lp, true_logp, atol=0.01) - # check that the parameter dict matches the NamedArray - # NOTE: This test only works for models where all parameters are identity - # varnames AND real-valued. Thankfully, this is true for `gdemo`. - @test length(only(result.values.dicts)) == length(result.params) - for (k, index) in only(result.values.dicts) - @test result.params[AbstractPPL.VarName{k}()] == result.values.array[index] - end - end - @testset "MLE" begin - Random.seed!(222) - true_value = [0.0625, 1.75] - true_logp = loglikelihood(gdemo_default, (s=true_value[1], m=true_value[2])) + true_value = Dict(@varname(s) => 0.0625, @varname(m) => 1.75) + true_logp = loglikelihood(GDEMO_DEFAULT, true_value) check_success(result) = check_optimisation_result(result, true_value, true_logp) - m1 = Turing.Optimisation.estimate_mode(gdemo_default, MLE()) + m1 = Turing.Optimisation.estimate_mode(GDEMO_DEFAULT, MLE()) m2 = maximum_likelihood( - gdemo_default, OptimizationOptimJL.LBFGS(); initial_params=true_value + StableRNG(468), + GDEMO_DEFAULT, + OptimizationOptimJL.LBFGS(); + initial_params=InitFromParams(true_value), + ) + m3 = maximum_likelihood( + StableRNG(468), + GDEMO_DEFAULT, + OptimizationOptimJL.Newton(); + adtype=SECOND_ORDER_ADTYPE, ) - m3 = maximum_likelihood(gdemo_default, OptimizationOptimJL.Newton()) m4 = maximum_likelihood( - gdemo_default, OptimizationOptimJL.BFGS(); adtype=AutoReverseDiff() + StableRNG(468), + GDEMO_DEFAULT, + OptimizationOptimJL.BFGS(); + adtype=AutoReverseDiff(), ) m5 = maximum_likelihood( - gdemo_default, OptimizationOptimJL.NelderMead(); initial_params=true_value + StableRNG(468), + GDEMO_DEFAULT, + OptimizationOptimJL.NelderMead(); + initial_params=InitFromParams(true_value), + ) + m6 = maximum_likelihood( + StableRNG(468), GDEMO_DEFAULT, OptimizationOptimJL.NelderMead() ) - m6 = maximum_likelihood(gdemo_default, OptimizationOptimJL.NelderMead()) check_success(m1) check_success(m2) @@ -163,23 +283,38 @@ using Turing end @testset "MAP" begin - Random.seed!(222) - true_value = [49 / 54, 7 / 6] - true_logp = logjoint(gdemo_default, (s=true_value[1], m=true_value[2])) + true_value = Dict(@varname(s) => 49 / 54, @varname(m) => 7 / 6) + true_logp = logjoint(GDEMO_DEFAULT, true_value) check_success(result) = check_optimisation_result(result, true_value, true_logp) - m1 = Turing.Optimisation.estimate_mode(gdemo_default, MAP()) + m1 = Turing.Optimisation.estimate_mode(StableRNG(468), GDEMO_DEFAULT, MAP()) m2 = maximum_a_posteriori( - gdemo_default, OptimizationOptimJL.LBFGS(); initial_params=true_value + StableRNG(468), + GDEMO_DEFAULT, + OptimizationOptimJL.LBFGS(); + initial_params=InitFromParams(true_value), + ) + m3 = maximum_a_posteriori( + StableRNG(468), + GDEMO_DEFAULT, + OptimizationOptimJL.Newton(); + adtype=SECOND_ORDER_ADTYPE, ) - m3 = maximum_a_posteriori(gdemo_default, OptimizationOptimJL.Newton()) m4 = maximum_a_posteriori( - gdemo_default, OptimizationOptimJL.BFGS(); adtype=AutoReverseDiff() + StableRNG(468), + GDEMO_DEFAULT, + OptimizationOptimJL.BFGS(); + adtype=AutoReverseDiff(), ) m5 = maximum_a_posteriori( - gdemo_default, OptimizationOptimJL.NelderMead(); initial_params=true_value + StableRNG(468), + GDEMO_DEFAULT, + OptimizationOptimJL.NelderMead(); + initial_params=InitFromParams(true_value), + ) + m6 = maximum_a_posteriori( + StableRNG(468), GDEMO_DEFAULT, OptimizationOptimJL.NelderMead() ) - m6 = maximum_a_posteriori(gdemo_default, OptimizationOptimJL.NelderMead()) check_success(m1) check_success(m2) @@ -201,52 +336,52 @@ using Turing end @testset "MLE with box constraints" begin - Random.seed!(222) - true_value = [0.0625, 1.75] - true_logp = loglikelihood(gdemo_default, (s=true_value[1], m=true_value[2])) - check_success(result, check_retcode=true) = - check_optimisation_result(result, true_value, true_logp, check_retcode) + true_value = Dict(@varname(s) => 0.0625, @varname(m) => 1.75) + true_logp = loglikelihood(GDEMO_DEFAULT, true_value) + check_success(result) = check_optimisation_result(result, true_value, true_logp) - lb = [0.0, 0.0] - ub = [2.0, 2.0] + lb = (s=0.0, m=0.0) + ub = (s=2.0, m=2.0) + # We need to disable linking during the optimisation here, because it will + # result in NaN's. See the comment on allowed_incorrect_mle below. + kwargs = (; lb=lb, ub=ub, link=false) - m1 = Turing.Optimisation.estimate_mode(gdemo_default, MLE(); lb=lb, ub=ub) + m1 = Turing.Optimisation.estimate_mode(GDEMO_DEFAULT, MLE(); kwargs...) m2 = maximum_likelihood( - gdemo_default, + StableRNG(468), + GDEMO_DEFAULT, OptimizationOptimJL.Fminbox(OptimizationOptimJL.LBFGS()); - initial_params=true_value, - lb=lb, - ub=ub, + initial_params=InitFromParams(true_value), + kwargs..., ) m3 = maximum_likelihood( - gdemo_default, + StableRNG(468), + GDEMO_DEFAULT, OptimizationBBO.BBO_separable_nes(); maxiters=100_000, abstol=1e-5, - lb=lb, - ub=ub, + kwargs..., ) m4 = maximum_likelihood( - gdemo_default, + StableRNG(468), + GDEMO_DEFAULT, OptimizationOptimJL.Fminbox(OptimizationOptimJL.BFGS()); adtype=AutoReverseDiff(), - lb=lb, - ub=ub, + kwargs..., ) m5 = maximum_likelihood( - gdemo_default, + StableRNG(468), + GDEMO_DEFAULT, OptimizationOptimJL.IPNewton(); - initial_params=true_value, - lb=lb, - ub=ub, + initial_params=InitFromParams(true_value), + adtype=SECOND_ORDER_ADTYPE, + kwargs..., ) - m6 = maximum_likelihood(gdemo_default; lb=lb, ub=ub) + m6 = maximum_likelihood(StableRNG(468), GDEMO_DEFAULT; kwargs...) check_success(m1) check_success(m2) - # BBO retcodes are misconfigured, so skip checking the retcode in this case. - # See https://github.com/SciML/Optimization.jl/issues/745 - check_success(m3, false) + check_success(m3) check_success(m4) check_success(m5) check_success(m6) @@ -261,223 +396,114 @@ using Turing end @testset "MAP with box constraints" begin - Random.seed!(222) - true_value = [49 / 54, 7 / 6] - true_logp = logjoint(gdemo_default, (s=true_value[1], m=true_value[2])) - check_success(result, check_retcode=true) = - check_optimisation_result(result, true_value, true_logp, check_retcode) + true_value = Dict(@varname(s) => 49 / 54, @varname(m) => 7 / 6) + true_logp = logjoint(GDEMO_DEFAULT, true_value) + check_success(result) = check_optimisation_result(result, true_value, true_logp) - lb = [0.0, 0.0] - ub = [2.0, 2.0] + lb = (s=0.0, m=0.0) + ub = (s=2.0, m=2.0) + # We need to disable linking during the optimisation here, because it will + # result in NaN's. See the comment on allowed_incorrect_mle below. + kwargs = (; lb=lb, ub=ub, link=false) - m1 = Turing.Optimisation.estimate_mode(gdemo_default, MAP(); lb=lb, ub=ub) + m1 = Turing.Optimisation.estimate_mode( + StableRNG(468), GDEMO_DEFAULT, MAP(); kwargs... + ) m2 = maximum_a_posteriori( - gdemo_default, + StableRNG(468), + GDEMO_DEFAULT, OptimizationOptimJL.Fminbox(OptimizationOptimJL.LBFGS()); - initial_params=true_value, - lb=lb, - ub=ub, + initial_params=InitFromParams(true_value), + kwargs..., ) m3 = maximum_a_posteriori( - gdemo_default, + StableRNG(468), + GDEMO_DEFAULT, OptimizationBBO.BBO_separable_nes(); maxiters=100_000, abstol=1e-5, - lb=lb, - ub=ub, + kwargs..., ) m4 = maximum_a_posteriori( - gdemo_default, + StableRNG(468), + GDEMO_DEFAULT, OptimizationOptimJL.Fminbox(OptimizationOptimJL.BFGS()); adtype=AutoReverseDiff(), - lb=lb, - ub=ub, + kwargs..., ) m5 = maximum_a_posteriori( - gdemo_default, + StableRNG(468), + GDEMO_DEFAULT, OptimizationOptimJL.IPNewton(); - initial_params=true_value, - lb=lb, - ub=ub, + initial_params=InitFromParams(true_value), + adtype=SECOND_ORDER_ADTYPE, + kwargs..., ) - m6 = maximum_a_posteriori(gdemo_default; lb=lb, ub=ub) + m6 = maximum_a_posteriori(StableRNG(468), GDEMO_DEFAULT; kwargs...) check_success(m1) check_success(m2) - # BBO retcodes are misconfigured, so skip checking the retcode in this case. - # See https://github.com/SciML/Optimization.jl/issues/745 - check_success(m3, false) + check_success(m3) check_success(m4) check_success(m5) check_success(m6) @test !hasstats(m2) || m2.optim_result.stats.iterations <= 1 @test !hasstats(m5) || m5.optim_result.stats.iterations <= 1 - - @show m2.optim_result.stats @test !hasstats(m2) || m2.optim_result.stats.gevals > 0 @test !hasstats(m3) || m3.optim_result.stats.gevals == 0 @test !hasstats(m4) || m4.optim_result.stats.gevals > 0 @test !hasstats(m5) || m5.optim_result.stats.gevals > 0 end - - @testset "MLE with generic constraints" begin - Random.seed!(222) - true_value = [0.0625, 1.75] - true_logp = loglikelihood(gdemo_default, (s=true_value[1], m=true_value[2])) - check_success(result, check_retcode=true) = - check_optimisation_result(result, true_value, true_logp, check_retcode) - - # Set two constraints: The first parameter must be non-negative, and the L2 norm - # of the parameters must be between 0.5 and 2. - cons(res, x, _) = (res .= [x[1], sqrt(sum(x .^ 2))]) - lcons = [0, 0.5] - ucons = [Inf, 2.0] - cons_args = (cons=cons, lcons=lcons, ucons=ucons) - initial_params = [0.5, -1.0] - - m1 = Turing.Optimisation.estimate_mode( - gdemo_default, MLE(); initial_params=initial_params, cons_args... - ) - m2 = maximum_likelihood(gdemo_default; initial_params=true_value, cons_args...) - m3 = maximum_likelihood( - gdemo_default, - OptimizationOptimJL.IPNewton(); - initial_params=initial_params, - cons_args..., - ) - m4 = maximum_likelihood( - gdemo_default, - OptimizationOptimJL.IPNewton(); - initial_params=initial_params, - adtype=AutoReverseDiff(), - cons_args..., - ) - m5 = maximum_likelihood( - gdemo_default; initial_params=initial_params, cons_args... - ) - - check_success(m1) - check_success(m2) - check_success(m3) - check_success(m4) - check_success(m5) - - @test !hasstats(m2) || m2.optim_result.stats.iterations <= 1 - - @test !hasstats(m3) || m3.optim_result.stats.gevals > 0 - @test !hasstats(m4) || m4.optim_result.stats.gevals > 0 - - expected_error = ArgumentError( - "You must provide an initial value when using generic constraints." - ) - @test_throws expected_error maximum_likelihood(gdemo_default; cons_args...) - end - - @testset "MAP with generic constraints" begin - Random.seed!(222) - true_value = [49 / 54, 7 / 6] - true_logp = logjoint(gdemo_default, (s=true_value[1], m=true_value[2])) - check_success(result, check_retcode=true) = - check_optimisation_result(result, true_value, true_logp, check_retcode) - - # Set two constraints: The first parameter must be non-negative, and the L2 norm - # of the parameters must be between 0.5 and 2. - cons(res, x, _) = (res .= [x[1], sqrt(sum(x .^ 2))]) - lcons = [0, 0.5] - ucons = [Inf, 2.0] - cons_args = (cons=cons, lcons=lcons, ucons=ucons) - initial_params = [0.5, -1.0] - - m1 = Turing.Optimisation.estimate_mode( - gdemo_default, MAP(); initial_params=initial_params, cons_args... - ) - m2 = maximum_a_posteriori( - gdemo_default; initial_params=true_value, cons_args... - ) - m3 = maximum_a_posteriori( - gdemo_default, - OptimizationOptimJL.IPNewton(); - initial_params=initial_params, - cons_args..., - ) - m4 = maximum_a_posteriori( - gdemo_default, - OptimizationOptimJL.IPNewton(); - initial_params=initial_params, - adtype=AutoReverseDiff(), - cons_args..., - ) - m5 = maximum_a_posteriori( - gdemo_default; initial_params=initial_params, cons_args... - ) - - check_success(m1) - check_success(m2) - check_success(m3) - check_success(m4) - check_success(m5) - - @test !hasstats(m2) || m2.optim_result.stats.iterations <= 1 - - @test !hasstats(m3) || m3.optim_result.stats.gevals > 0 - @test !hasstats(m4) || m4.optim_result.stats.gevals > 0 - - expected_error = ArgumentError( - "You must provide an initial value when using generic constraints." - ) - @test_throws expected_error maximum_a_posteriori(gdemo_default; cons_args...) - end end @testset "StatsBase integration" begin - Random.seed!(54321) - mle_est = maximum_likelihood(gdemo_default) - # Calculated based on the two data points in gdemo_default, [1.5, 2.0] - true_values = [0.0625, 1.75] + true_s = 0.0625 + true_m = 1.75 + true_value = Dict(@varname(s) => true_s, @varname(m) => true_m) + true_lp = loglikelihood(GDEMO_DEFAULT, true_value) + mle_est = maximum_likelihood(GDEMO_DEFAULT) - @test coefnames(mle_est) == [:s, :m] + @test coefnames(mle_est) == [@varname(s), @varname(m)] + @test coefnames(mle_est) == params(mle_est) - diffs = coef(mle_est).array - [0.0625031; 1.75001] + diffs = coef(mle_est) .- [true_s, true_m] @test all(isapprox.(diffs, 0.0, atol=0.1)) - infomat = [2/(2 * true_values[1]^2) 0.0; 0.0 2/true_values[1]] + infomat = [2/(2 * true_s^2) 0.0; 0.0 2/true_s] @test all(isapprox.(infomat - informationmatrix(mle_est), 0.0, atol=0.01)) - vcovmat = [2 * true_values[1]^2/2 0.0; 0.0 true_values[1]/2] + @test vcov(mle_est) == inv(informationmatrix(mle_est)) + vcovmat = [2 * true_s^2/2 0.0; 0.0 true_s/2] @test all(isapprox.(vcovmat - vcov(mle_est), 0.0, atol=0.01)) ctable = coeftable(mle_est) @test ctable isa StatsBase.CoefTable - s = stderror(mle_est).array + s = stderror(mle_est) @test all(isapprox.(s - [0.06250415643292194, 0.17677963626053916], 0.0, atol=0.01)) - @test coefnames(mle_est) == Distributions.params(mle_est) - @test vcov(mle_est) == inv(informationmatrix(mle_est)) - - @test isapprox(loglikelihood(mle_est), -0.0652883561466624, atol=0.01) + @test isapprox(loglikelihood(mle_est), true_lp, atol=0.01) end @testset "Linear regression test" begin @model function regtest(x, y) - beta ~ MvNormal(Zeros(2), I) + beta ~ MvNormal(zeros(2), I) mu = x * beta return y ~ MvNormal(mu, I) end - Random.seed!(987) true_beta = [1.0, -2.2] - x = rand(40, 2) + x = rand(StableRNG(468), 40, 2) y = x * true_beta model = regtest(x, y) - mle = maximum_likelihood(model) + mle = maximum_likelihood(StableRNG(468), model) vcmat = inv(x'x) - vcmat_mle = vcov(mle).array + vcmat_mle = vcov(mle) - @test isapprox(mle.values.array, true_beta) + @test isapprox(mle.params[@varname(beta)], true_beta) @test isapprox(vcmat, vcmat_mle) end @@ -491,34 +517,41 @@ using Turing model_dot = dot_gdemo([1.5, 2.0]) - mle1 = maximum_likelihood(gdemo_default) + mle1 = maximum_likelihood(GDEMO_DEFAULT) mle2 = maximum_likelihood(model_dot) - map1 = maximum_a_posteriori(gdemo_default) + map1 = maximum_a_posteriori(GDEMO_DEFAULT) map2 = maximum_a_posteriori(model_dot) - @test isapprox(mle1.values.array, mle2.values.array) - @test isapprox(map1.values.array, map2.values.array) + @test isapprox(mle1.params[@varname(s)], mle2.params[@varname(s)]) + @test isapprox(mle1.params[@varname(m)], mle2.params[@varname(m)]) + @test isapprox(map1.params[@varname(s)], map2.params[@varname(s)]) + @test isapprox(map1.params[@varname(m)], map2.params[@varname(m)]) end - # TODO(mhauru): The corresponding Optim.jl test had a note saying that some models - # don't work for Tracker and ReverseDiff. Is that still the case? @testset "MAP for $(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS - Random.seed!(23) - result_true = DynamicPPL.TestUtils.posterior_optima(model) + true_optima = DynamicPPL.TestUtils.posterior_optima(model) optimizers = [ - OptimizationOptimJL.LBFGS(), - OptimizationOptimJL.NelderMead(), - OptimizationNLopt.NLopt.LD_TNEWTON_PRECOND_RESTART(), + (false, OptimizationOptimJL.LBFGS()), + (false, OptimizationOptimJL.NelderMead()), + (true, OptimizationNLopt.NLopt.LD_TNEWTON_PRECOND_RESTART()), ] - @testset "$(nameof(typeof(optimizer)))" for optimizer in optimizers - result = maximum_a_posteriori(model, optimizer) - vals = result.values + @testset "$(nameof(typeof(optimizer)))" for (needs_second_order, optimizer) in + optimizers + adtype = if needs_second_order + SECOND_ORDER_ADTYPE + else + AutoForwardDiff() + end + result = maximum_a_posteriori(StableRNG(468), model, optimizer; adtype=adtype) for vn in DynamicPPL.TestUtils.varnames(model) - for vn_leaf in AbstractPPL.varname_leaves(vn, get(result_true, vn)) - @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol = 0.05 + val = AbstractPPL.getvalue(true_optima, vn) + for vn_leaf in AbstractPPL.varname_leaves(vn, val) + expected = AbstractPPL.getvalue(true_optima, vn_leaf) + actual = AbstractPPL.getvalue(result.params, vn_leaf) + @test expected ≈ actual atol = 0.05 end end end @@ -543,26 +576,32 @@ using Turing DynamicPPL.TestUtils.demo_assume_matrix_observe_matrix_index, ] @testset "MLE for $(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS - Random.seed!(23) - result_true = DynamicPPL.TestUtils.likelihood_optima(model) + true_optima = DynamicPPL.TestUtils.likelihood_optima(model) optimizers = [ - OptimizationOptimJL.LBFGS(), - OptimizationOptimJL.NelderMead(), - OptimizationNLopt.NLopt.LD_TNEWTON_PRECOND_RESTART(), + (false, OptimizationOptimJL.LBFGS()), + (false, OptimizationOptimJL.NelderMead()), + (true, OptimizationNLopt.NLopt.LD_TNEWTON_PRECOND_RESTART()), ] - @testset "$(nameof(typeof(optimizer)))" for optimizer in optimizers + @testset "$(nameof(typeof(optimizer)))" for (needs_second_order, optimizer) in + optimizers try - result = maximum_likelihood(model, optimizer; reltol=1e-3) - vals = result.values + adtype = if needs_second_order + SECOND_ORDER_ADTYPE + else + AutoForwardDiff() + end + result = maximum_likelihood(model, optimizer; reltol=1e-3, adtype=adtype) for vn in DynamicPPL.TestUtils.varnames(model) - for vn_leaf in AbstractPPL.varname_leaves(vn, get(result_true, vn)) + val = AbstractPPL.getvalue(true_optima, vn) + for vn_leaf in AbstractPPL.varname_leaves(vn, val) + expected = AbstractPPL.getvalue(true_optima, vn_leaf) + actual = AbstractPPL.getvalue(result.params, vn_leaf) if model.f in allowed_incorrect_mle - @test isfinite(get(result_true, vn_leaf)) + @test isfinite(actual) else - @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol = - 0.05 + @test expected ≈ actual atol = 0.05 end end end @@ -576,6 +615,38 @@ using Turing end end + @testset "distribution with dynamic support and constraints" begin + @model function f() + x ~ Uniform(-5, 5) + return y ~ truncated(Normal(); lower=x) + end + # Note that in this testset we don't need to seed RNG because the initial params + # are fully specified. + inits = (x=0.0, y=2.5) + lb = (y=2.0,) + # Here, the constraints that are passed to Optimization.jl are not fully 'correct', + # because the constraints were determined with a single static value of `x`. Thus, + # during the optimization it is possible for `y` to go out of bounds. We check that + # such cases are caught. + @test_throws DomainError maximum_a_posteriori( + f(); initial_params=InitFromParams(inits), lb=lb, link=true + ) + # If there is no linking, then the constraints are no longer incorrect (they are + # always static). So the optimization should succeed (it might give silly results, + # but that's not our problem). + @test maximum_a_posteriori( + f(); initial_params=InitFromParams(inits), lb=lb, link=false + ) isa ModeResult + # If the user wants to disable it, they should be able to. + @test maximum_a_posteriori( + f(); + initial_params=InitFromParams(inits), + lb=lb, + link=true, + check_constraints_at_runtime=false, + ) isa ModeResult + end + @testset "using ModeResult to initialise MCMC" begin @model function f(y) μ ~ Normal(0, 1) @@ -614,7 +685,7 @@ using Turing @model demo_dirichlet() = x ~ Dirichlet(2 * ones(3)) model = demo_dirichlet() result = maximum_a_posteriori(model) - @test result.values ≈ mode(Dirichlet(2 * ones(3))) atol = 0.2 + @test result.params[@varname(x)] ≈ mode(Dirichlet(2 * ones(3))) atol = 0.2 end @testset "with :=" begin @@ -624,32 +695,8 @@ using Turing end model = demo_track() result = maximum_a_posteriori(model) - @test result.values[:x] ≈ 0 atol = 1e-1 - @test result.values[:y] ≈ 100 atol = 1e-1 - end - - @testset "get ModeResult" begin - @model function demo_model(N) - half_N = N ÷ 2 - a ~ arraydist(LogNormal.(fill(0, half_N), 1)) - b ~ arraydist(LogNormal.(fill(0, N - half_N), 1)) - covariance_matrix = Diagonal(vcat(a, b)) - x ~ MvNormal(covariance_matrix) - return nothing - end - - N = 12 - m = demo_model(N) | (x=randn(N),) - result = maximum_a_posteriori(m) - get_a = get(result, :a) - get_b = get(result, :b) - get_ab = get(result, [:a, :b]) - @assert keys(get_a) == (:a,) - @assert keys(get_b) == (:b,) - @assert keys(get_ab) == (:a, :b) - @assert get_b[:b] == get_ab[:b] - @assert vcat(get_a[:a], get_b[:b]) == result.values.array - @assert get(result, :c) == (; :c => Array{Float64}[]) + @test result.params[@varname(x)] ≈ 0 atol = 1e-1 + @test result.params[@varname(y)] ≈ 100 atol = 1e-1 end @testset "Collinear coeftable" begin @@ -663,11 +710,11 @@ using Turing end model = collinear(xs, ys) - mle_estimate = Turing.Optimisation.estimate_mode(model, MLE()) + mle_estimate = maximum_likelihood(model) tab = coeftable(mle_estimate) - @assert isnan(tab.cols[2][1]) - @assert tab.colnms[end] == "Error notes" - @assert occursin("singular", tab.cols[end][1]) + @test isnan(tab.cols[2][1]) + @test tab.colnms[end] == "Error notes" + @test occursin("singular", tab.cols[end][1]) end @testset "Negative variance" begin @@ -681,24 +728,18 @@ using Turing @addlogprob! x^2 - y^2 return nothing end - m = saddle_model() - optim_ld = Turing.Optimisation.OptimLogDensity( - DynamicPPL.LogDensityFunction(m, DynamicPPL.getloglikelihood) - ) - vals = Turing.Optimisation.NamedArrays.NamedArray([0.0, 0.0]) m = Turing.Optimisation.ModeResult( - vals, - nothing, - 0.0, - optim_ld, + MLE(), Dict{AbstractPPL.VarName,Float64}(@varname(x) => 0.0, @varname(y) => 0.0), + 0.0, false, - MLE(), + DynamicPPL.LogDensityFunction(saddle_model(), DynamicPPL.getloglikelihood), + nothing, ) ct = coeftable(m) - @assert isnan(ct.cols[2][1]) - @assert ct.colnms[end] == "Error notes" - @assert occursin("Negative variance", ct.cols[end][1]) + @test isnan(ct.cols[2][1]) + @test ct.colnms[end] == "Error notes" + @test occursin("Negative variance", ct.cols[end][1]) end @testset "Same coeftable with/without numerrors_warnonly" begin @@ -710,14 +751,14 @@ using Turing end model = extranormal(xs) - mle_estimate = Turing.Optimisation.estimate_mode(model, MLE()) + mle_estimate = maximum_likelihood(model) warnonly_coeftable = coeftable(mle_estimate; numerrors_warnonly=true) no_warnonly_coeftable = coeftable(mle_estimate; numerrors_warnonly=false) - @assert warnonly_coeftable.cols == no_warnonly_coeftable.cols - @assert warnonly_coeftable.colnms == no_warnonly_coeftable.colnms - @assert warnonly_coeftable.rownms == no_warnonly_coeftable.rownms - @assert warnonly_coeftable.pvalcol == no_warnonly_coeftable.pvalcol - @assert warnonly_coeftable.teststatcol == no_warnonly_coeftable.teststatcol + @test warnonly_coeftable.cols == no_warnonly_coeftable.cols + @test warnonly_coeftable.colnms == no_warnonly_coeftable.colnms + @test warnonly_coeftable.rownms == no_warnonly_coeftable.rownms + @test warnonly_coeftable.pvalcol == no_warnonly_coeftable.pvalcol + @test warnonly_coeftable.teststatcol == no_warnonly_coeftable.teststatcol end end From 0059311d73f247044c40e8b2d08278b5be8ed1f8 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 26 Jan 2026 11:12:10 +0000 Subject: [PATCH 04/48] Export get_vector_params since it's useful --- src/Turing.jl | 1 + src/optimisation/Optimisation.jl | 12 ++++++++++-- src/optimisation/stats.jl | 14 +++++++------- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/Turing.jl b/src/Turing.jl index 9f084dc14f..244a116878 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -171,6 +171,7 @@ export maximum_likelihood, MAP, MLE, + get_vector_params, # Chain save/resume loadstate diff --git a/src/optimisation/Optimisation.jl b/src/optimisation/Optimisation.jl index 2aef6c69ce..281a00c172 100644 --- a/src/optimisation/Optimisation.jl +++ b/src/optimisation/Optimisation.jl @@ -18,7 +18,7 @@ using StatsAPI: StatsAPI using Statistics: Statistics using LinearAlgebra: LinearAlgebra -export maximum_a_posteriori, maximum_likelihood, MAP, MLE +export maximum_a_posteriori, maximum_likelihood, MAP, MLE, get_vector_params include("init.jl") @@ -89,7 +89,15 @@ struct ModeResult{ linked::Bool "The LogDensityFunction used to calculate the output. Note that this LogDensityFunction calculates the actual (non-negated) log density. It should hold that `m.lp == - LogDensityProblems.logdensity(m.ldf, m.optim_result.u)`." + LogDensityProblems.logdensity(m.ldf, m.optim_result.u)` for a ModeResult `m`. + + The objective function used for minimisation is equivalent to `p -> + -LogDensityProblems.logdensity(m.ldf, p)`). Note, however, that `p` has to be provided + as a vector in linked or unlinked space depending on the value of `m.linked`. + + If `m.linked` is true, to evaluate the log-density using unlinked parameters, you + can use `logjoint(m.ldf.model, params)` where `params` is a NamedTuple or Dictionary + of unlinked parameters." ldf::L "The stored optimiser results." optim_result::O diff --git a/src/optimisation/stats.jl b/src/optimisation/stats.jl index 06d80d02f6..b05fa6b5e4 100644 --- a/src/optimisation/stats.jl +++ b/src/optimisation/stats.jl @@ -1,7 +1,7 @@ import DifferentiationInterface as DI """ - get_vectorised_params(m::ModeResult) + get_vector_params(m::ModeResult) Generates a vectorised form of the optimised parameters stored in the `ModeResult`, along with the corresponding variable names. These parameters correspond to unlinked space. @@ -9,7 +9,7 @@ with the corresponding variable names. These parameters correspond to unlinked s This function returns two vectors: the first contains the variable names, and the second contains the corresponding values. """ -function get_vectorised_params(m::ModeResult) +function get_vector_params(m::ModeResult) # This function requires some subtlety. We _could_ simply iterate over keys(m.params) # and values(m.params), apply AbstractPPL.varname_and_value_leaves to each pair, and # then collect them into a vector. *However*, this vector will later have to be used @@ -60,7 +60,7 @@ during the computation of the standard errors will be caught and reported in an "Error notes" column. """ function StatsBase.coeftable(m::ModeResult; level::Real=0.95, numerrors_warnonly::Bool=true) - vns, estimates = get_vectorised_params(m) + vns, estimates = get_vector_params(m) # Get columns for coeftable. terms = string.(vns) # If numerrors_warnonly is true, and if either the information matrix is singular or has @@ -157,10 +157,10 @@ function StatsBase.informationmatrix( ldf = DynamicPPL.LogDensityFunction(model, logprob_func(m.estimator)) f = Base.Fix1(LogDensityProblems.logdensity, ldf) # Then get the vectorised parameters. - _, x = get_vectorised_params(m) + _, x = get_vector_params(m) # We can include a check here to make sure that f(x) is in fact the log density at x. - # This helps guard against potential bugs where `get_vectorised_params` returns a + # This helps guard against potential bugs where `get_vector_params` returns a # wrongly-ordered parameter vector. if !isapprox(f(x), m.lp) error( @@ -172,8 +172,8 @@ function StatsBase.informationmatrix( return -DI.hessian(f, adtype, x) end -StatsBase.coef(m::ModeResult) = last(get_vectorised_params(m)) -StatsBase.coefnames(m::ModeResult) = first(get_vectorised_params(m)) +StatsBase.coef(m::ModeResult) = last(get_vector_params(m)) +StatsBase.coefnames(m::ModeResult) = first(get_vector_params(m)) StatsBase.params(m::ModeResult) = StatsBase.coefnames(m) StatsBase.vcov(m::ModeResult) = inv(StatsBase.informationmatrix(m)) StatsBase.loglikelihood(m::ModeResult) = m.lp From 4b3a969d57aad1af0d85dbb6c3be5cc3579be096 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 26 Jan 2026 11:17:13 +0000 Subject: [PATCH 05/48] Document --- docs/src/api.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/src/api.md b/docs/src/api.md index 37ff96f814..00bd5db179 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -167,9 +167,10 @@ LogPoisson See the [mode estimation tutorial](https://turinglang.org/docs/tutorials/docs-17-mode-estimation/) for more information. -| Exported symbol | Documentation | Description | -|:---------------------- |:-------------------------------------------------- |:-------------------------------------------- | -| `maximum_a_posteriori` | [`Turing.Optimisation.maximum_a_posteriori`](@ref) | Find a MAP estimate for a model | -| `maximum_likelihood` | [`Turing.Optimisation.maximum_likelihood`](@ref) | Find a MLE estimate for a model | -| `MAP` | [`Turing.Optimisation.MAP`](@ref) | Type to use with Optim.jl for MAP estimation | -| `MLE` | [`Turing.Optimisation.MLE`](@ref) | Type to use with Optim.jl for MLE estimation | +| Exported symbol | Documentation | Description | +|:---------------------- |:-------------------------------------------------- |:--------------------------------------------- | +| `maximum_a_posteriori` | [`Turing.Optimisation.maximum_a_posteriori`](@ref) | Find a MAP estimate for a model | +| `maximum_likelihood` | [`Turing.Optimisation.maximum_likelihood`](@ref) | Find a MLE estimate for a model | +| `MAP` | [`Turing.Optimisation.MAP`](@ref) | Type to use with Optim.jl for MAP estimation | +| `MLE` | [`Turing.Optimisation.MLE`](@ref) | Type to use with Optim.jl for MLE estimation | +| `get_vector_params` | [`Turing.Optimisation.get_vector_params`](@ref) | Extract parameter names and values as vectors | From 19730fec7f623146929ac4dab94e62b1e93a38ac Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 26 Jan 2026 16:34:49 +0000 Subject: [PATCH 06/48] Seed a test properly --- src/optimisation/Optimisation.jl | 2 +- test/optimisation/Optimisation.jl | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/optimisation/Optimisation.jl b/src/optimisation/Optimisation.jl index 281a00c172..f4535bac89 100644 --- a/src/optimisation/Optimisation.jl +++ b/src/optimisation/Optimisation.jl @@ -140,7 +140,7 @@ function Base.show(io::IO, ::MIME"text/plain", m::ModeResult) tree_char = i == length(m.params) ? "└" : "├" println(io, " │ $(tree_char) $vn => $(val)") end - println(io, " └ linked : $(m.linked)") + print(io, " └ linked : $(m.linked)") return nothing end diff --git a/test/optimisation/Optimisation.jl b/test/optimisation/Optimisation.jl index 370e1f882c..15de4df305 100644 --- a/test/optimisation/Optimisation.jl +++ b/test/optimisation/Optimisation.jl @@ -343,10 +343,14 @@ end lb = (s=0.0, m=0.0) ub = (s=2.0, m=2.0) # We need to disable linking during the optimisation here, because it will - # result in NaN's. See the comment on allowed_incorrect_mle below. + # result in NaN's. See the comment on allowed_incorrect_mle below. In fact + # even sometimes without linking it still gets NaN's -- we get round that + # in these tests by seeding the RNG. kwargs = (; lb=lb, ub=ub, link=false) - m1 = Turing.Optimisation.estimate_mode(GDEMO_DEFAULT, MLE(); kwargs...) + m1 = Turing.Optimisation.estimate_mode( + StableRNG(468), GDEMO_DEFAULT, MLE(); kwargs... + ) m2 = maximum_likelihood( StableRNG(468), GDEMO_DEFAULT, From 22bcc2a6472a4ce822128b82525012dbe73ee3d3 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sat, 7 Feb 2026 15:45:10 +0000 Subject: [PATCH 07/48] Run CI against new DPPL / VNT (#2756) The usual game of trying to see if DPPL breaks anything upstream. (20 minutes later) Okay, maybe '*if* DPPL breaks anything upstream' was a bit optimistic. More like seeing *what* DPPL broke upstream. --- HISTORY.md | 5 + Project.toml | 7 +- docs/src/api.md | 1 - ext/TuringDynamicHMCExt.jl | 17 +- src/Turing.jl | 10 +- src/init_strategy.jl | 21 +- src/mcmc/Inference.jl | 61 +-- src/mcmc/abstractmcmc.jl | 63 +-- src/mcmc/emcee.jl | 2 +- src/mcmc/ess.jl | 66 ++- src/mcmc/external_sampler.jl | 10 +- src/mcmc/gibbs.jl | 129 ++--- src/mcmc/gibbs_conditional.jl | 171 ++++-- src/mcmc/hmc.jl | 8 +- src/mcmc/is.jl | 75 --- src/mcmc/mh.jl | 762 +++++++++++++-------------- src/mcmc/particle_mcmc.jl | 50 +- src/mcmc/prior.jl | 6 +- src/mcmc/repeat_sampler.jl | 4 +- src/optimisation/Optimisation.jl | 41 +- src/optimisation/init.jl | 118 +++-- src/variational/Variational.jl | 5 +- test/Project.toml | 7 +- test/ad.jl | 14 +- test/integration/enzyme/Project.toml | 1 + test/mcmc/Inference.jl | 11 +- test/mcmc/abstractmcmc.jl | 38 +- test/mcmc/external_sampler.jl | 8 +- test/mcmc/gibbs.jl | 108 +--- test/mcmc/gibbs_conditional.jl | 19 +- test/mcmc/is.jl | 74 --- test/mcmc/mh.jl | 221 ++++---- test/optimisation/Optimisation.jl | 37 +- test/runtests.jl | 1 - test/stdlib/RandomMeasures.jl | 13 +- test/test_utils/sampler.jl | 11 +- 36 files changed, 1007 insertions(+), 1188 deletions(-) delete mode 100644 test/mcmc/is.jl diff --git a/HISTORY.md b/HISTORY.md index 7dd01561bf..43245f54b6 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -38,6 +38,11 @@ In these cases, Turing will raise an error if bounds are provided. Users who wish to perform optimisation with such constraints should directly use `LogDensityFunction` and Optimization.jl. Documentation on this matter will be forthcoming. +## `IS` sampler + +The `IS` sampler has been removed (its behaviour was in fact exactly the same as `Prior`). +To see an example of importance sampling (via `Prior()` and then subsequent reweighting), see e.g. [this issue](https://github.com/TuringLang/Turing.jl/issues/2767). + # 0.42.7 Avoid reevaluating the model on MCMC iterations where the transition is not saved to the chain (e.g. in initial burn-in, or when using thinning). diff --git a/Project.toml b/Project.toml index 3624c694dd..aae6045498 100644 --- a/Project.toml +++ b/Project.toml @@ -42,13 +42,16 @@ StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" [weakdeps] DynamicHMC = "bbc10e6e-7c05-544b-b16e-64fede858acb" +[sources] +DynamicPPL = {rev = "breaking", url = "https://github.com/TuringLang/DynamicPPL.jl"} + [extensions] TuringDynamicHMCExt = "DynamicHMC" [compat] ADTypes = "1.9" AbstractMCMC = "5.13" -AbstractPPL = "0.11, 0.12, 0.13" +AbstractPPL = "0.14" Accessors = "0.1" AdvancedHMC = "0.8.3" AdvancedMH = "0.8.9" @@ -63,7 +66,7 @@ Distributions = "0.25.77" DistributionsAD = "0.6" DocStringExtensions = "0.8, 0.9" DynamicHMC = "3.4" -DynamicPPL = "0.39.11" +DynamicPPL = "0.40" EllipticalSliceSampling = "0.5, 1, 2" ForwardDiff = "0.10.3, 1" Libtask = "0.9.5" diff --git a/docs/src/api.md b/docs/src/api.md index 1f41c9d3cd..0d5ec72154 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -73,7 +73,6 @@ even though [`Prior()`](@ref) is actually defined in the `Turing.Inference` modu | `PolynomialStepsize` | [`Turing.Inference.PolynomialStepsize`](@ref) | Returns a function which generates polynomially decaying step sizes | | `HMCDA` | [`Turing.Inference.HMCDA`](@ref) | Hamiltonian Monte Carlo with dual averaging | | `NUTS` | [`Turing.Inference.NUTS`](@ref) | No-U-Turn Sampler | -| `IS` | [`Turing.Inference.IS`](@ref) | Importance sampling | | `SMC` | [`Turing.Inference.SMC`](@ref) | Sequential Monte Carlo | | `PG` | [`Turing.Inference.PG`](@ref) | Particle Gibbs | | `CSMC` | [`Turing.Inference.CSMC`](@ref) | The same as PG | diff --git a/ext/TuringDynamicHMCExt.jl b/ext/TuringDynamicHMCExt.jl index 7e5792974b..041aa0865d 100644 --- a/ext/TuringDynamicHMCExt.jl +++ b/ext/TuringDynamicHMCExt.jl @@ -43,20 +43,19 @@ struct DynamicNUTSState{L,C,M,S} stepsize::S end -function Turing.Inference.initialstep( +function AbstractMCMC.step( rng::Random.AbstractRNG, model::DynamicPPL.Model, - spl::DynamicNUTS, - vi::DynamicPPL.AbstractVarInfo; + spl::DynamicNUTS; + initial_params, kwargs..., ) - # Ensure that initial sample is in unconstrained space. - if !DynamicPPL.is_transformed(vi) - vi = DynamicPPL.link!!(vi, model) - vi = last(DynamicPPL.evaluate!!(model, vi)) - end - # Define log-density function. + # TODO(penelopeysm) We need to check that the initial parameters are valid. Same as how + # we do it for HMC + _, vi = DynamicPPL.init!!( + rng, model, DynamicPPL.VarInfo(), initial_params, DynamicPPL.LinkAll() + ) ℓ = DynamicPPL.LogDensityFunction( model, DynamicPPL.getlogjoint_internal, vi; adtype=spl.adtype ) diff --git a/src/Turing.jl b/src/Turing.jl index 37d33d672a..afe9f6269a 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -11,13 +11,10 @@ using AdvancedVI: AdvancedVI using DynamicPPL: DynamicPPL import DynamicPPL: NoDist, NamedDist using LogDensityProblems: LogDensityProblems -using Accessors: Accessors using StatsAPI: StatsAPI using StatsBase: StatsBase using AbstractMCMC -using Accessors: Accessors - using Printf: Printf using Random: Random using LinearAlgebra: I @@ -73,6 +70,8 @@ using DynamicPPL: conditioned, to_submodel, LogDensityFunction, + VarNamedTuple, + @vnt, @addlogprob!, InitFromPrior, InitFromUniform, @@ -102,6 +101,7 @@ export # Samplers - Turing.Inference Prior, MH, + LinkedRW, Emcee, ESS, Gibbs, @@ -112,7 +112,6 @@ export PolynomialStepsize, HMCDA, NUTS, - IS, SMC, PG, CSMC, @@ -166,6 +165,9 @@ export InitFromPrior, InitFromUniform, InitFromParams, + # VNT, + VarNamedTuple, + @vnt, # Point estimates - Turing.Optimisation # The MAP and MLE exports are only needed for the Optim.jl interface. maximum_a_posteriori, diff --git a/src/init_strategy.jl b/src/init_strategy.jl index 455a734c45..cf66e341db 100644 --- a/src/init_strategy.jl +++ b/src/init_strategy.jl @@ -1,7 +1,8 @@ using AbstractPPL: VarName using DynamicPPL: DynamicPPL -# This function is shared by both MCMC and optimisation, so has to exist outside of both. +# These functions are shared by both MCMC and optimisation, so has to exist outside of both. + """ _convert_initial_params(initial_params) @@ -18,10 +19,24 @@ function _convert_initial_params(d::AbstractDict{<:VarName}) return DynamicPPL.InitFromParams(d) end function _convert_initial_params(::AbstractVector{<:Real}) - errmsg = "`initial_params` must be a `NamedTuple`, an `AbstractDict{<:VarName}`, or ideally a `DynamicPPL.AbstractInitStrategy`. Using a vector of parameters for `initial_params` is no longer supported. Please see https://turinglang.org/docs/usage/sampling-options/#specifying-initial-parameters for details on how to update your code." + errmsg = "`initial_params` must be an `DynamicPPL.AbstractInitStrategy`. Using a vector of parameters for `initial_params` is no longer supported. Please see https://turinglang.org/docs/usage/sampling-options/#specifying-initial-parameters for details on how to update your code." throw(ArgumentError(errmsg)) end function _convert_initial_params(@nospecialize(_::Any)) - errmsg = "`initial_params` must be a `NamedTuple`, an `AbstractDict{<:VarName}`, or a `DynamicPPL.AbstractInitStrategy`." + errmsg = "`initial_params` must be a `DynamicPPL.AbstractInitStrategy`." throw(ArgumentError(errmsg)) end + +# TODO: Implement additional checks for certain samplers, e.g. +# HMC not supporting discrete parameters. +function _check_model(model::DynamicPPL.Model) + return DynamicPPL.check_model(model; error_on_failure=true) +end +function _check_model(model::DynamicPPL.Model, ::AbstractMCMC.AbstractSampler) + return _check_model(model) +end + +# Similar to InitFromParams, this is just for convenience +_to_varnamedtuple(nt::NamedTuple) = DynamicPPL.VarNamedTuple(nt) +_to_varnamedtuple(d::AbstractDict{<:VarName}) = DynamicPPL.VarNamedTuple(pairs(d)) +_to_varnamedtuple(vnt::DynamicPPL.VarNamedTuple) = vnt diff --git a/src/mcmc/Inference.jl b/src/mcmc/Inference.jl index b02d0887f7..7950aa12f3 100644 --- a/src/mcmc/Inference.jl +++ b/src/mcmc/Inference.jl @@ -3,16 +3,9 @@ module Inference using DynamicPPL: DynamicPPL, @model, - Metadata, VarInfo, - SimpleVarInfo, LogDensityFunction, AbstractVarInfo, - # TODO(mhauru) all_varnames_grouped_by_symbol isn't exported by DPPL, because it is only - # implemented for NTVarInfo. It is used by mh.jl. Either refactor mh.jl to not use it - # or implement it for other VarInfo types and export it from DPPL. - all_varnames_grouped_by_symbol, - syms, setindex!!, push!!, setlogp!!, @@ -20,7 +13,6 @@ using DynamicPPL: getlogjoint_internal, VarName, getsym, - getdist, Model, DefaultContext using Distributions, Libtask, Bijectors @@ -32,7 +24,6 @@ using Random: AbstractRNG using AbstractMCMC: AbstractModel, AbstractSampler using DocStringExtensions: FIELDS, TYPEDEF, TYPEDFIELDS using DataStructures: OrderedSet, OrderedDict -using Accessors: Accessors import ADTypes import AbstractMCMC @@ -42,7 +33,6 @@ const AHMC = AdvancedHMC import AdvancedMH const AMH = AdvancedMH import AdvancedPS -import Accessors import EllipticalSliceSampling import LogDensityProblems import Random @@ -53,72 +43,32 @@ export Hamiltonian, StaticHamiltonian, AdaptiveHamiltonian, MH, + LinkedRW, ESS, Emcee, - Gibbs, # classic sampling - GibbsConditional, # conditional sampling + Gibbs, + GibbsConditional, HMC, SGLD, PolynomialStepsize, SGHMC, HMCDA, - NUTS, # Hamiltonian-like sampling - IS, + NUTS, SMC, CSMC, PG, RepeatSampler, Prior, - predict, externalsampler, init_strategy, loadstate -######################################### -# Generic AbstractMCMC methods dispatch # -######################################### - const DEFAULT_CHAIN_TYPE = MCMCChains.Chains -include("abstractmcmc.jl") - -#################### -# Sampler wrappers # -#################### +include("abstractmcmc.jl") include("repeat_sampler.jl") include("external_sampler.jl") -# TODO: make a nicer `set_namedtuple!` and move these functions to DynamicPPL. -function DynamicPPL.unflatten(vi::DynamicPPL.NTVarInfo, θ::NamedTuple) - set_namedtuple!(deepcopy(vi), θ) - return vi -end -function DynamicPPL.unflatten(vi::SimpleVarInfo, θ::NamedTuple) - return SimpleVarInfo(θ, vi.logp, vi.transformation) -end - -""" - mh_accept(logp_current::Real, logp_proposal::Real, log_proposal_ratio::Real) - -Decide if a proposal ``x'`` with log probability ``\\log p(x') = logp_proposal`` and -log proposal ratio ``\\log k(x', x) - \\log k(x, x') = log_proposal_ratio`` in a -Metropolis-Hastings algorithm with Markov kernel ``k(x_t, x_{t+1})`` and current state -``x`` with log probability ``\\log p(x) = logp_current`` is accepted by evaluating the -Metropolis-Hastings acceptance criterion -```math -\\log U \\leq \\log p(x') - \\log p(x) + \\log k(x', x) - \\log k(x, x') -``` -for a uniform random number ``U \\in [0, 1)``. -""" -function mh_accept(logp_current::Real, logp_proposal::Real, log_proposal_ratio::Real) - # replacing log(rand()) with -randexp() yields test errors - return log(rand()) + logp_current ≤ logp_proposal + log_proposal_ratio -end - -####################################### -# Concrete algorithm implementations. # -####################################### - include("ess.jl") include("hmc.jl") include("mh.jl") @@ -127,6 +77,7 @@ include("particle_mcmc.jl") include("sghmc.jl") include("emcee.jl") include("prior.jl") + include("gibbs.jl") include("gibbs_conditional.jl") diff --git a/src/mcmc/abstractmcmc.jl b/src/mcmc/abstractmcmc.jl index c8c4db1329..f867ab5344 100644 --- a/src/mcmc/abstractmcmc.jl +++ b/src/mcmc/abstractmcmc.jl @@ -1,15 +1,3 @@ -# TODO: Implement additional checks for certain samplers, e.g. -# HMC not supporting discrete parameters. -function _check_model(model::DynamicPPL.Model) - new_model = DynamicPPL.setleafcontext(model, DynamicPPL.InitContext()) - return DynamicPPL.check_model( - new_model, DynamicPPL.OnlyAccsVarInfo(); error_on_failure=true - ) -end -function _check_model(model::DynamicPPL.Model, ::AbstractSampler) - return _check_model(model) -end - """ Turing.Inference.init_strategy(spl::AbstractSampler) @@ -20,23 +8,27 @@ parameters for sampling are chosen if not specified by the user. By default, thi init_strategy(::AbstractSampler) = DynamicPPL.InitFromPrior() """ - default_varinfo(rng, model, sampler) + _convert_initial_params(initial_params) -Return a default varinfo object for the given `model` and `sampler`. -The default method for this returns a NTVarInfo (i.e. 'typed varinfo'). +Convert `initial_params` to a `DynamicPPl.AbstractInitStrategy` if it is not already one, or +throw a useful error message. """ -function default_varinfo( - rng::Random.AbstractRNG, model::DynamicPPL.Model, ::AbstractSampler -) - # Note that in `AbstractMCMC.step`, the values in the varinfo returned here are - # immediately overwritten by a subsequent call to `init!!`. The reason why we - # _do_ create a varinfo with parameters here (as opposed to simply returning - # an empty `typed_varinfo(VarInfo())`) is to avoid issues where pushing to an empty - # typed VarInfo would fail. This can happen if two VarNames have different types - # but share the same symbol (e.g. `x.a` and `x.b`). - # TODO(mhauru) Fix push!! to work with arbitrary lens types, and then remove the arguments - # and return an empty VarInfo instead. - return DynamicPPL.typed_varinfo(VarInfo(rng, model)) +_convert_initial_params(initial_params::DynamicPPL.AbstractInitStrategy) = initial_params +function _convert_initial_params(nt::NamedTuple) + @info "Using a NamedTuple for `initial_params` will be deprecated in a future release. Please use `InitFromParams(namedtuple)` instead." + return DynamicPPL.InitFromParams(nt) +end +function _convert_initial_params(d::AbstractDict{<:VarName}) + @info "Using a Dict for `initial_params` will be deprecated in a future release. Please use `InitFromParams(dict)` instead." + return DynamicPPL.InitFromParams(d) +end +function _convert_initial_params(::AbstractVector{<:Real}) + errmsg = "`initial_params` must be a `NamedTuple`, an `AbstractDict{<:VarName}`, or ideally a `DynamicPPL.AbstractInitStrategy`. Using a vector of parameters for `initial_params` is no longer supported. Please see https://turinglang.org/docs/usage/sampling-options/#specifying-initial-parameters for details on how to update your code." + throw(ArgumentError(errmsg)) +end +function _convert_initial_params(@nospecialize(_::Any)) + errmsg = "`initial_params` must be a `NamedTuple`, an `AbstractDict{<:VarName}`, or a `DynamicPPL.AbstractInitStrategy`." + throw(ArgumentError(errmsg)) end ######################################### @@ -59,7 +51,7 @@ function AbstractMCMC.sample( chain_type=DEFAULT_CHAIN_TYPE, kwargs..., ) - check_model && _check_model(model, spl) + check_model && Turing._check_model(model, spl) return AbstractMCMC.mcmcsample( rng, model, @@ -96,7 +88,7 @@ function AbstractMCMC.sample( initial_params=fill(init_strategy(spl), n_chains), kwargs..., ) - check_model && _check_model(model, spl) + check_model && Turing._check_model(model, spl) if !(initial_params isa AbstractVector) || length(initial_params) != n_chains errmsg = "`initial_params` must be an AbstractVector of length `n_chains`; one element per chain" throw(ArgumentError(errmsg)) @@ -144,15 +136,10 @@ function AbstractMCMC.step( initial_params, kwargs..., ) - # Generate the default varinfo. Note that any parameters inside this varinfo - # will be immediately overwritten by the next call to `init!!`. - vi = default_varinfo(rng, model, spl) - - # Fill it with initial parameters. Note that, if `InitFromParams` is used, the - # parameters provided must be in unlinked space (when inserted into the - # varinfo, they will be adjusted to match the linking status of the - # varinfo). - _, vi = DynamicPPL.init!!(rng, model, vi, initial_params) + # Generate a VarInfo with initial parameters. Note that, if `InitFromParams` is used, + # the parameters provided must be in unlinked space (when inserted into the varinfo, + # they will be adjusted to match the linking status of the varinfo). + _, vi = DynamicPPL.init!!(rng, model, VarInfo(), initial_params, DynamicPPL.UnlinkAll()) # Call the actual function that does the first step. return initialstep(rng, model, spl, vi; initial_params, kwargs...) diff --git a/src/mcmc/emcee.jl b/src/mcmc/emcee.jl index 03c25c796d..a776579fbe 100644 --- a/src/mcmc/emcee.jl +++ b/src/mcmc/emcee.jl @@ -66,7 +66,7 @@ function AbstractMCMC.step( throw(ArgumentError(err_msg)) end vis = map(vis, initial_params) do vi, strategy - last(DynamicPPL.init!!(rng, model, vi, strategy)) + last(DynamicPPL.init!!(rng, model, vi, strategy, DynamicPPL.UnlinkAll())) end # Compute initial transition and states. diff --git a/src/mcmc/ess.jl b/src/mcmc/ess.jl index 4469bc119d..05031fcd34 100644 --- a/src/mcmc/ess.jl +++ b/src/mcmc/ess.jl @@ -22,53 +22,71 @@ Mean """ struct ESS <: AbstractSampler end +struct TuringESSState{V<:DynamicPPL.AbstractVarInfo,VNT<:DynamicPPL.VarNamedTuple} + vi::V + priors::VNT +end +get_varinfo(state::TuringESSState) = state.vi + # always accept in the first step -function Turing.Inference.initialstep( +function AbstractMCMC.step( rng::AbstractRNG, model::DynamicPPL.Model, - ::ESS, - vi::AbstractVarInfo; + ::ESS; discard_sample=false, + initial_params, kwargs..., ) - for vn in keys(vi) - dist = getdist(vi, vn) + vi = DynamicPPL.VarInfo() + vi = DynamicPPL.setacc!!(vi, DynamicPPL.RawValueAccumulator(true)) + prior_acc = DynamicPPL.PriorDistributionAccumulator() + prior_accname = DynamicPPL.accumulator_name(prior_acc) + vi = DynamicPPL.setacc!!(vi, prior_acc) + _, vi = DynamicPPL.init!!(rng, model, vi, initial_params, DynamicPPL.UnlinkAll()) + priors = DynamicPPL.getacc(vi, Val(prior_accname)).values + + for dist in values(priors) EllipticalSliceSampling.isgaussian(typeof(dist)) || error("ESS only supports Gaussian prior distributions") end transition = discard_sample ? nothing : DynamicPPL.ParamsWithStats(vi, model) - return transition, vi + return transition, TuringESSState(vi, priors) end function AbstractMCMC.step( rng::AbstractRNG, model::DynamicPPL.Model, ::ESS, - vi::AbstractVarInfo; + state::TuringESSState; discard_sample=false, kwargs..., ) # obtain previous sample + vi = state.vi f = vi[:] # define previous sampler state # (do not use cache to avoid in-place sampling from prior) - oldstate = EllipticalSliceSampling.ESSState(f, DynamicPPL.getloglikelihood(vi), nothing) + wrapped_state = EllipticalSliceSampling.ESSState( + f, DynamicPPL.getloglikelihood(vi), nothing + ) # compute next state - sample, state = AbstractMCMC.step( + sample, new_wrapped_state = AbstractMCMC.step( rng, - EllipticalSliceSampling.ESSModel(ESSPrior(model, vi), ESSLikelihood(model, vi)), + EllipticalSliceSampling.ESSModel( + ESSPrior(model, vi, state.priors), ESSLikelihood(model, vi) + ), EllipticalSliceSampling.ESS(), - oldstate, + wrapped_state, ) # update sample and log-likelihood - vi = DynamicPPL.unflatten(vi, sample) - vi = DynamicPPL.setloglikelihood!!(vi, state.loglikelihood) + vi = DynamicPPL.unflatten!!(vi, sample) + vi = DynamicPPL.setloglikelihood!!(vi, new_wrapped_state.loglikelihood) transition = discard_sample ? nothing : DynamicPPL.ParamsWithStats(vi, model) - return transition, vi + return transition, TuringESSState(vi, state.priors) end # Prior distribution of considered random variable @@ -77,13 +95,15 @@ struct ESSPrior{M<:Model,V<:AbstractVarInfo,T} varinfo::V μ::T - function ESSPrior(model::Model, varinfo::AbstractVarInfo) - vns = keys(varinfo) - μ = mapreduce(vcat, vns) do vn - dist = getdist(varinfo, vn) - EllipticalSliceSampling.isgaussian(typeof(dist)) || - error("[ESS] only supports Gaussian prior distributions") - DynamicPPL.tovec(mean(dist)) + function ESSPrior( + model::Model, varinfo::AbstractVarInfo, priors::DynamicPPL.VarNamedTuple + ) + μ = mapreduce(vcat, priors; init=Float64[]) do pair + prior_dist = pair.second + EllipticalSliceSampling.isgaussian(typeof(prior_dist)) || error( + "[ESS] only supports Gaussian prior distributions, but found $(typeof(prior_dist))", + ) + DynamicPPL.tovec(mean(prior_dist)) end return new{typeof(model),typeof(varinfo),typeof(μ)}(model, varinfo, μ) end @@ -94,7 +114,9 @@ EllipticalSliceSampling.isgaussian(::Type{<:ESSPrior}) = true # Only define out-of-place sampling function Base.rand(rng::Random.AbstractRNG, p::ESSPrior) - _, vi = DynamicPPL.init!!(rng, p.model, p.varinfo, DynamicPPL.InitFromPrior()) + _, vi = DynamicPPL.init!!( + rng, p.model, p.varinfo, DynamicPPL.InitFromPrior(), DynamicPPL.UnlinkAll() + ) return vi[:] end diff --git a/src/mcmc/external_sampler.jl b/src/mcmc/external_sampler.jl index 32f5439766..2037260e0f 100644 --- a/src/mcmc/external_sampler.jl +++ b/src/mcmc/external_sampler.jl @@ -134,7 +134,7 @@ end # get_varinfo must return something from which the correct parameters can be obtained function get_varinfo(state::TuringState) - return DynamicPPL.unflatten(state.varinfo, state.params) + return DynamicPPL.unflatten!!(state.varinfo, state.params) end get_varinfo(state::AbstractVarInfo) = state @@ -150,12 +150,8 @@ function AbstractMCMC.step( sampler = sampler_wrapper.sampler # Initialise varinfo with initial params and link the varinfo if needed. - varinfo = DynamicPPL.VarInfo(model) - _, varinfo = DynamicPPL.init!!(rng, model, varinfo, initial_params) - - if unconstrained - varinfo = DynamicPPL.link(varinfo, model) - end + tfm_strategy = unconstrained ? DynamicPPL.LinkAll() : DynamicPPL.UnlinkAll() + _, varinfo = DynamicPPL.init!!(rng, model, VarInfo(), initial_params, tfm_strategy) # We need to extract the vectorised initial_params, because the later call to # AbstractMCMC.step only sees a `LogDensityModel` which expects `initial_params` diff --git a/src/mcmc/gibbs.jl b/src/mcmc/gibbs.jl index 314f818209..4c419540c2 100644 --- a/src/mcmc/gibbs.jl +++ b/src/mcmc/gibbs.jl @@ -10,7 +10,6 @@ isgibbscomponent(::AbstractSampler) = true isgibbscomponent(spl::RepeatSampler) = isgibbscomponent(spl.sampler) isgibbscomponent(spl::ExternalSampler) = isgibbscomponent(spl.sampler) -isgibbscomponent(::IS) = false isgibbscomponent(::Prior) = false isgibbscomponent(::Emcee) = false isgibbscomponent(::SGLD) = false @@ -134,7 +133,11 @@ end # Tilde pipeline function DynamicPPL.tilde_assume!!( - context::GibbsContext, right::Distribution, vn::VarName, vi::DynamicPPL.AbstractVarInfo + context::GibbsContext, + right::Distribution, + vn::VarName, + template::Any, + vi::DynamicPPL.AbstractVarInfo, ) child_context = DynamicPPL.childcontext(context) @@ -170,7 +173,7 @@ function DynamicPPL.tilde_assume!!( return if is_target_varname(context, vn) # Fall back to the default behavior. - DynamicPPL.tilde_assume!!(child_context, right, vn, vi) + DynamicPPL.tilde_assume!!(child_context, right, vn, template, vi) elseif has_conditioned_gibbs(context, vn) # This branch means that a different sampler is supposed to handle this # variable. From the perspective of this sampler, this variable is @@ -188,10 +191,15 @@ function DynamicPPL.tilde_assume!!( # being used by the current sampler. value, new_global_vi = DynamicPPL.tilde_assume!!( # child_context might be a PrefixContext so we have to be careful to not - # overwrite it. - DynamicPPL.setleafcontext(child_context, DynamicPPL.InitContext()), + # overwrite it. We assume that the new variable should just be sampled in + # unlinked space. + DynamicPPL.setleafcontext( + child_context, + DynamicPPL.InitContext(DynamicPPL.InitFromPrior(), DynamicPPL.UnlinkAll()), + ), right, vn, + template, get_global_varinfo(context), ) set_global_varinfo!(context, new_global_vi) @@ -205,12 +213,15 @@ end Return a new, conditioned model for a component of a Gibbs sampler. # Arguments +# - `model::DynamicPPL.Model`: The model to condition. + - `target_variables::AbstractVector{<:VarName}`: The target variables of the component -sampler. These will _not_ be conditioned. + sampler. These will _not_ be conditioned. + - `varinfo::DynamicPPL.AbstractVarInfo`: Values for all variables in the model. All the -values in `varinfo` but not in `target_variables` will be conditioned to the values they -have in `varinfo`. + values in `varinfo` but not in `target_variables` will be conditioned to the values they + have in `varinfo`. # Returns - A new model with the variables _not_ in `target_variables` conditioned. @@ -295,19 +306,6 @@ end get_varinfo(state::GibbsState) = state.vi -""" -Initialise a VarInfo for the Gibbs sampler. - -This is straight up copypasta from DynamicPPL's src/sampler.jl. It is repeated here to -support calling both step and step_warmup as the initial step. DynamicPPL initialstep is -incompatible with step_warmup. -""" -function initial_varinfo(rng, model, spl, initial_params::DynamicPPL.AbstractInitStrategy) - vi = Turing.Inference.default_varinfo(rng, model, spl) - _, vi = DynamicPPL.init!!(rng, model, vi, initial_params) - return vi -end - function AbstractMCMC.step( rng::Random.AbstractRNG, model::DynamicPPL.Model, @@ -318,7 +316,7 @@ function AbstractMCMC.step( ) varnames = spl.varnames samplers = spl.samplers - vi = initial_varinfo(rng, model, spl, initial_params) + _, vi = DynamicPPL.init!!(rng, model, VarInfo(), initial_params, DynamicPPL.UnlinkAll()) vi, states = gibbs_initialstep_recursive( rng, @@ -344,7 +342,7 @@ function AbstractMCMC.step_warmup( ) varnames = spl.varnames samplers = spl.samplers - vi = initial_varinfo(rng, model, spl, initial_params) + _, vi = DynamicPPL.init!!(rng, model, VarInfo(), initial_params, DynamicPPL.UnlinkAll()) vi, states = gibbs_initialstep_recursive( rng, @@ -478,19 +476,33 @@ function setparams_varinfo!!( end function setparams_varinfo!!( - model::DynamicPPL.Model, sampler::MH, state::MHState, params::AbstractVarInfo + model::DynamicPPL.Model, spl::MH, ::AbstractVarInfo, params::AbstractVarInfo ) - # Re-evaluate to update the logprob. - new_vi = last(DynamicPPL.evaluate!!(model, params)) - return MHState(new_vi, DynamicPPL.getlogjoint_internal(new_vi)) + # Setting `params` into `state` really just means using `params` itself, but we + # need to update the logprob. We also need to be a bit more careful, because + # the `state` here carries a VAIMAcc, which is needed for the MH step() function + # but may not be present in `params`. So we need to make sure that the value + # we return from this function also has a VAIMAcc which corresponds to the + # values in `params`. Likewise with the other MH-specific accumulators. + params = DynamicPPL.setacc!!(params, DynamicPPL.RawValueAccumulator(false)) + params = DynamicPPL.setacc!!(params, MHLinkedValuesAccumulator()) + params = DynamicPPL.setacc!!( + params, MHUnspecifiedPriorsAccumulator(spl.vns_with_proposal) + ) + # TODO(penelopeysm): Remove need for evaluate_nowarn here, by allowing MH-in-Gibbs to + # use OAVI. + return last(DynamicPPL.evaluate_nowarn!!(model, params)) end function setparams_varinfo!!( - model::DynamicPPL.Model, sampler::ESS, state::AbstractVarInfo, params::AbstractVarInfo + model::DynamicPPL.Model, ::ESS, state::TuringESSState, params::AbstractVarInfo ) - # The state is already a VarInfo, so we can just return `params`, but first we need to - # update its logprob. - return last(DynamicPPL.evaluate!!(model, params)) + # The state is basically a VarInfo (plus a constant `priors` field), so we can just + # return `params`, but first we need to update its logprob. + # TODO(penelopeysm): Remove need for evaluate_nowarn here, by allowing ESS-in-Gibbs to + # use OAVI. + new_vi = last(DynamicPPL.evaluate_nowarn!!(model, params)) + return TuringESSState(new_vi, state.priors) end function setparams_varinfo!!( @@ -537,39 +549,28 @@ variables, and one might need it to be linked while the other doesn't. """ function match_linking!!(varinfo_local, prev_state_local, model) prev_varinfo_local = get_varinfo(prev_state_local) - was_linked = DynamicPPL.is_transformed(prev_varinfo_local) - is_linked = DynamicPPL.is_transformed(varinfo_local) - if was_linked && !is_linked - varinfo_local = DynamicPPL.link!!(varinfo_local, model) - elseif !was_linked && is_linked - varinfo_local = DynamicPPL.invlink!!(varinfo_local, model) + # Get a set of all previously linked variables + linked_vns = Set{VarName}() + unlinked_vns = Set{VarName}() + for vn in keys(prev_varinfo_local) + if DynamicPPL.is_transformed(prev_varinfo_local, vn) + push!(linked_vns, vn) + else + push!(unlinked_vns, vn) + end + end + transform_strategy = if isempty(unlinked_vns) + # All variables were linked + DynamicPPL.LinkAll() + elseif isempty(linked_vns) + # No variables were linked + DynamicPPL.UnlinkAll() + else + DynamicPPL.LinkSome( + linked_vns, DynamicPPL.UnlinkSome(unlinked_vns, DynamicPPL.LinkAll()) + ) end - # TODO(mhauru) The above might run into trouble if some variables are linked and others - # are not. `is_transformed(varinfo)` returns an `all` over the individual variables. This could - # especially be a problem with dynamic models, where new variables may get introduced, - # but also in cases where component samplers have partial overlap in their target - # variables. The below is how I would like to implement this, but DynamicPPL at this - # time does not support linking individual variables selected by `VarName`. It soon - # should though, so come back to this. - # Issue ref: https://github.com/TuringLang/Turing.jl/issues/2401 - # prev_links_dict = Dict(vn => DynamicPPL.is_transformed(prev_varinfo_local, vn) for vn in keys(prev_varinfo_local)) - # any_linked = any(values(prev_links_dict)) - # for vn in keys(varinfo_local) - # was_linked = if haskey(prev_varinfo_local, vn) - # prev_links_dict[vn] - # else - # # If the old state didn't have this variable, we assume it was linked if _any_ - # # of the variables of the old state were linked. - # any_linked - # end - # is_linked = DynamicPPL.is_transformed(varinfo_local, vn) - # if was_linked && !is_linked - # varinfo_local = DynamicPPL.invlink!!(varinfo_local, vn) - # elseif !was_linked && is_linked - # varinfo_local = DynamicPPL.link!!(varinfo_local, vn) - # end - # end - return varinfo_local + return DynamicPPL.update_link_status!!(varinfo_local, transform_strategy, model) end """ @@ -602,7 +603,7 @@ function gibbs_step_recursive( # Construct the conditional model and the varinfo that this sampler should use. conditioned_model, context = make_conditional(model, varnames, global_vi) vi = DynamicPPL.subset(global_vi, varnames) - vi = match_linking!!(vi, state, model) + vi = match_linking!!(vi, state, conditioned_model) # TODO(mhauru) The below may be overkill. If the varnames for this sampler are not # sampled by other samplers, we don't need to `setparams`, but could rather simply diff --git a/src/mcmc/gibbs_conditional.jl b/src/mcmc/gibbs_conditional.jl index bd78576491..ff65e3a800 100644 --- a/src/mcmc/gibbs_conditional.jl +++ b/src/mcmc/gibbs_conditional.jl @@ -16,21 +16,40 @@ sampler = Gibbs( ) ``` -Here `get_cond_dists(c::Dict{<:VarName})` should be a function that takes a `Dict` mapping -the conditioned variables (anything other than `var1` and `var2`) to their values, and -returns the conditional posterior distributions for `var1` and `var2`. You may, of course, -have any number of variables being sampled as a block in this manner, we only use two as an -example. The return value of `get_cond_dists` should be one of the following: +Here `get_cond_dists(vnt::VarNamedTuple)` should be a function that takes a `VarNamedTuple` +that contains the values of all other variables (apart from `var1` and `var2`), and returns +the conditional posterior distributions for `var1` and `var2`. + +`VarNamedTuple`s behave very similarly to `Dict{VarName,Any}`s, but are more efficient and +more general: you can obtain values simply by using, e.g. `vnt[@varname(var3)]`. See +https://turinglang.org/docs/usage/varnamedtuple/ for more details on `VarNamedTuple`s. + +You may, of course, have any number of variables being sampled as a block in this manner, we +only use two as an example. + +The return value of `get_cond_dists(vnt)` should be one of the following: + - A single `Distribution`, if only one variable is being sampled. +- A `VarNamedTuple` of `Distribution`s, which represents a mapping from variable names to their + conditional posteriors. Please see the documentation linked above for information on how to + construct `VarNamedTuple`s. + +For convenience, we also allow the following return values (which are internally converted into +a `VarNamedTuple`): + +- A `NamedTuple` of `Distribution`s, which is like the `AbstractDict` case but can be used + if all the variable names are single `Symbol`s, e.g.: `(; var1=dist1, var2=dist2)`. - An `AbstractDict{<:VarName,<:Distribution}` that maps the variables being sampled to their conditional posteriors E.g. `Dict(@varname(var1) => dist1, @varname(var2) => dist2)`. -- A `NamedTuple` of `Distribution`s, which is like the `AbstractDict` case but can be used - if all the variable names are single `Symbol`s, and may be more performant. E.g. - `(; var1=dist1, var2=dist2)`. + +Note that the `AbstractDict` case is likely to incur a performance penalty; we recommend using +`VarNamedTuple`s directly. # Examples ```julia +using Turing + # Define a model @model function inverse_gdemo(x) precision ~ Gamma(2, inv(3)) @@ -43,24 +62,20 @@ end # Define analytical conditionals. See # https://en.wikipedia.org/wiki/Conjugate_prior#When_likelihood_function_is_a_continuous_distribution -function cond_precision(c) +function cond_precision(vnt) a = 2.0 b = 3.0 - # We use AbstractPPL.getvalue instead of indexing into `c` directly to guard against - # issues where e.g. you try to get `c[@varname(x[1])]` but only `@varname(x)` is present - # in `c`. `getvalue` handles that gracefully, `getindex` doesn't. In this case - # `getindex` would suffice, but `getvalue` is good practice. - m = AbstractPPL.getvalue(c, @varname(m)) - x = AbstractPPL.getvalue(c, @varname(x)) + m = vnt[@varname(m)] + x = vnt[@varname(x)] n = length(x) a_new = a + (n + 1) / 2 b_new = b + sum(abs2, x .- m) / 2 + m^2 / 2 return Gamma(a_new, 1 / b_new) end -function cond_m(c) - precision = AbstractPPL.getvalue(c, @varname(precision)) - x = AbstractPPL.getvalue(c, @varname(x)) +function cond_m(vnt) + precision = vnt[@varname(precision)] + x = vnt[@varname(x)] n = length(x) m_mean = sum(x) / (n + 1) m_var = 1 / (precision * (n + 1)) @@ -82,26 +97,38 @@ end isgibbscomponent(::GibbsConditional) = true """ - build_variable_dict(model::DynamicPPL.Model) + build_values_vnt(model::DynamicPPL.Model) -Traverse the context stack of `model` and build a `Dict` of all the variable values that are -set in GibbsContext, ConditionContext, or FixedContext. +Traverse the context stack of `model` and build a `VarNamedTuple` of all the variable values +that are set in GibbsContext, ConditionContext, or FixedContext. """ -function build_variable_dict(model::DynamicPPL.Model) +function build_values_vnt(model::DynamicPPL.Model) context = model.context cond_vals = DynamicPPL.conditioned(context) fixed_vals = DynamicPPL.fixed(context) - # TODO(mhauru) Can we avoid invlinking all the time? - global_vi = DynamicPPL.invlink(get_gibbs_global_varinfo(context), model) - # TODO(mhauru) This creates a lot of Dicts, which are then immediately merged into one. - # Also, DynamicPPL.to_varname_dict is known to be inefficient. Make a more efficient - # implementation. - return merge( - DynamicPPL.values_as(global_vi, Dict), - DynamicPPL.to_varname_dict(cond_vals), - DynamicPPL.to_varname_dict(fixed_vals), - DynamicPPL.to_varname_dict(model.args), + # model.args is a NamedTuple + arg_vals = DynamicPPL.VarNamedTuple(model.args) + # Extract values from the GibbsContext itself, as a VNT. + init_strat = DynamicPPL.InitFromParams( + get_gibbs_global_varinfo(context).values, nothing ) + oavi = DynamicPPL.OnlyAccsVarInfo((DynamicPPL.RawValueAccumulator(false),)) + # We need to remove the Gibbs conditioning so that we can get all variables in the + # accumulator (otherwise those that are conditioned on in `model` will not be included). + defmodel = replace_gibbs_context(model) + _, oavi = DynamicPPL.init!!(defmodel, oavi, init_strat, DynamicPPL.UnlinkAll()) + global_vals = DynamicPPL.get_raw_values(oavi) + # Merge them. + return merge(global_vals, cond_vals, fixed_vals, arg_vals) +end + +replace_gibbs_context(::GibbsContext) = DefaultContext() +replace_gibbs_context(::DynamicPPL.AbstractContext) = DefaultContext() +function replace_gibbs_context(c::DynamicPPL.AbstractParentContext) + return DynamicPPL.setchildcontext(c, replace_gibbs_context(DynamicPPL.childcontext(c))) +end +function replace_gibbs_context(m::DynamicPPL.Model) + return DynamicPPL.contextualize(m, replace_gibbs_context(m.context)) end function get_gibbs_global_varinfo(context::GibbsContext) @@ -117,11 +144,11 @@ function get_gibbs_global_varinfo(::DynamicPPL.AbstractContext) throw(ArgumentError(msg)) end -function initialstep( +function Turing.Inference.initialstep( ::Random.AbstractRNG, model::DynamicPPL.Model, ::GibbsConditional, - vi::DynamicPPL.AbstractVarInfo; + vi::DynamicPPL.VarInfo; kwargs..., ) state = DynamicPPL.is_transformed(vi) ? DynamicPPL.invlink(vi, model) : vi @@ -130,42 +157,72 @@ function initialstep( return nothing, state end +@inline _to_varnamedtuple(dists::NamedTuple, ::DynamicPPL.VarInfo) = + DynamicPPL.VarNamedTuple(dists) +@inline _to_varnamedtuple(dists::DynamicPPL.VarNamedTuple, ::DynamicPPL.VarInfo) = dists +function _to_varnamedtuple(dists::AbstractDict{<:VarName}, state::DynamicPPL.VarInfo) + template_vnt = state.values + vnt = DynamicPPL.VarNamedTuple() + for (vn, dist) in dists + top_sym = AbstractPPL.getsym(vn) + template = get(template_vnt.data, top_sym, DynamicPPL.NoTemplate()) + vnt = DynamicPPL.templated_setindex!!(vnt, dist, vn, template) + end + return vnt +end +function _to_varnamedtuple(dist::Distribution, state::DynamicPPL.VarInfo) + vns = keys(state) + if length(vns) > 1 + msg = ( + "In GibbsConditional, `get_cond_dists` returned a single distribution," * + " but multiple variables ($vns) are being sampled. Please return a" * + " VarNamedTuple mapping variable names to distributions instead." + ) + throw(ArgumentError(msg)) + end + vn = only(vns) + top_sym = AbstractPPL.getsym(vn) + template = get(state.values.data, top_sym, DynamicPPL.NoTemplate()) + return DynamicPPL.templated_setindex!!(DynamicPPL.VarNamedTuple(), dist, vn, template) +end + +struct InitFromCondDists{V<:DynamicPPL.VarNamedTuple} <: DynamicPPL.AbstractInitStrategy + cond_dists::V +end +function DynamicPPL.init( + rng::Random.AbstractRNG, vn::VarName, ::Distribution, init_strat::InitFromCondDists +) + return DynamicPPL.UntransformedValue(rand(rng, init_strat.cond_dists[vn])) +end + function AbstractMCMC.step( rng::Random.AbstractRNG, model::DynamicPPL.Model, sampler::GibbsConditional, - state::DynamicPPL.AbstractVarInfo; + state::DynamicPPL.VarInfo; kwargs..., ) # Get all the conditioned variable values from the model context. This is assumed to # include a GibbsContext as part of the context stack. - condvals = build_variable_dict(model) - conddists = sampler.get_cond_dists(condvals) - - # We support three different kinds of return values for `sample.get_cond_dists`, to make - # life easier for the user. - if conddists isa AbstractDict - for (vn, dist) in conddists - state = setindex!!(state, rand(rng, dist), vn) - end - elseif conddists isa NamedTuple - for (vn_sym, dist) in pairs(conddists) - vn = VarName{vn_sym}() - state = setindex!!(state, rand(rng, dist), vn) - end - else - # Single variable case - vn = only(keys(state)) - state = setindex!!(state, rand(rng, conddists), vn) - end - + condvals = build_values_vnt(model) + # `sampler.get_cond_dists(condvals)` could return many things, unfortunately, so we need + # to handle the different cases. + # - just a distribution, in which case we assume there is only one variable being + # sampled, and we can just sample from it directly. + # - a VarNamedTuple of distributions + # - a NamedTuple of distributions + # - an AbstractDict mapping VarNames to distributions + conddists = _to_varnamedtuple(sampler.get_cond_dists(condvals), state) + + init_strategy = InitFromCondDists(conddists) + _, new_state = DynamicPPL.init!!(rng, model, state, init_strategy) # Since GibbsConditional is only used within Gibbs, it does not need to return a # transition. - return nothing, state + return nothing, new_state end function setparams_varinfo!!( - ::DynamicPPL.Model, ::GibbsConditional, ::Any, params::DynamicPPL.AbstractVarInfo + ::DynamicPPL.Model, ::GibbsConditional, ::Any, params::DynamicPPL.VarInfo ) return params end diff --git a/src/mcmc/hmc.jl b/src/mcmc/hmc.jl index 884341c3eb..84b9c628ff 100644 --- a/src/mcmc/hmc.jl +++ b/src/mcmc/hmc.jl @@ -100,7 +100,7 @@ function AbstractMCMC.sample( discard_initial=-1, kwargs..., ) - check_model && _check_model(model, sampler) + check_model && Turing._check_model(model, sampler) if initial_state === nothing # If `nadapts` is `-1`, then the user called a convenience # constructor like `NUTS()` or `NUTS(0.65)`, @@ -168,7 +168,9 @@ function find_initial_params( @warn "failed to find valid initial parameters in $(attempts) tries; consider providing a different initialisation strategy with the `initial_params` keyword" # Resample and try again. - _, varinfo = DynamicPPL.init!!(rng, model, varinfo, init_strategy) + _, varinfo = DynamicPPL.init!!( + rng, model, varinfo, init_strategy, DynamicPPL.LinkAll() + ) end # if we failed to find valid initial parameters, error @@ -274,7 +276,7 @@ function AbstractMCMC.step( # Update variables vi = state.vi if t.stat.is_accept - vi = DynamicPPL.unflatten(vi, t.z.θ) + vi = DynamicPPL.unflatten!!(vi, t.z.θ) end # Compute next transition and state. diff --git a/src/mcmc/is.jl b/src/mcmc/is.jl index dbe966ebab..8b13789179 100644 --- a/src/mcmc/is.jl +++ b/src/mcmc/is.jl @@ -1,76 +1 @@ -""" - IS() -Importance sampling algorithm. - -Usage: - -```julia -IS() -``` - -Example: - -```julia -# Define a simple Normal model with unknown mean and variance. -@model function gdemo(x) - s² ~ InverseGamma(2,3) - m ~ Normal(0,sqrt.(s)) - x[1] ~ Normal(m, sqrt.(s)) - x[2] ~ Normal(m, sqrt.(s)) - return s², m -end - -sample(gdemo([1.5, 2]), IS(), 1000) -``` -""" -struct IS <: AbstractSampler end - -function Turing.Inference.initialstep( - rng::AbstractRNG, - model::Model, - spl::IS, - vi::AbstractVarInfo; - discard_sample=false, - kwargs..., -) - transition = discard_sample ? nothing : DynamicPPL.ParamsWithStats(vi, model) - return transition, nothing -end - -function AbstractMCMC.step( - rng::Random.AbstractRNG, - model::Model, - spl::IS, - ::Nothing; - discard_sample=false, - kwargs..., -) - model = DynamicPPL.setleafcontext(model, ISContext(rng)) - _, vi = DynamicPPL.evaluate!!(model, DynamicPPL.VarInfo()) - vi = DynamicPPL.typed_varinfo(vi) - transition = discard_sample ? nothing : DynamicPPL.ParamsWithStats(vi, model) - return transition, nothing -end - -struct ISContext{R<:AbstractRNG} <: DynamicPPL.AbstractContext - rng::R -end - -function DynamicPPL.tilde_assume!!( - ctx::ISContext, dist::Distribution, vn::VarName, vi::AbstractVarInfo -) - if haskey(vi, vn) - r = vi[vn] - else - r = rand(ctx.rng, dist) - vi = push!!(vi, vn, r, dist) - end - vi = DynamicPPL.accumulate_assume!!(vi, r, 0.0, vn, dist) - return r, vi -end -function DynamicPPL.tilde_observe!!( - ::ISContext, right::Distribution, left, vn::Union{VarName,Nothing}, vi::AbstractVarInfo -) - return DynamicPPL.tilde_observe!!(DefaultContext(), right, left, vn, vi) -end diff --git a/src/mcmc/mh.jl b/src/mcmc/mh.jl index 7e1665e65a..d92360edd5 100644 --- a/src/mcmc/mh.jl +++ b/src/mcmc/mh.jl @@ -1,464 +1,464 @@ -### -### Sampler states -### - -proposal(p::AdvancedMH.Proposal) = p -proposal(f::Function) = AdvancedMH.StaticProposal(f) -proposal(d::Distribution) = AdvancedMH.StaticProposal(d) -proposal(cov::AbstractMatrix) = AdvancedMH.RandomWalkProposal(MvNormal(cov)) -proposal(x) = error("proposals of type ", typeof(x), " are not supported") +using AdvancedMH: AdvancedMH +using AbstractPPL: @varname """ - MH(proposals...) + MH(vn1 => proposal1, vn2 => proposal2, ...) Construct a Metropolis-Hastings algorithm. -The arguments `proposals` can be +Each argument `proposal` can be + +- Blank (i.e. `MH()`), in which case `MH` defaults to using the prior for each parameter as + the proposal distribution. +- A mapping of `VarName`s to a `Distribution`, `LinkedRW`, or a generic callable that + defines a conditional proposal distribution. + + + MH(cov_matrix) -- Blank (i.e. `MH()`), in which case `MH` defaults to using the prior for each parameter as the proposal distribution. -- An iterable of pairs or tuples mapping a `Symbol` to a `AdvancedMH.Proposal`, `Distribution`, or `Function` - that returns a conditional proposal distribution. -- A covariance matrix to use as for mean-zero multivariate normal proposals. +Construct a Metropolis-Hastings algorithm that performs random-walk sampling in linked +space, with proposals drawn from a multivariate normal distribution with the given +covariance matrix. # Examples -The default `MH` will draw proposal samples from the prior distribution using `AdvancedMH.StaticProposal`. +Consider the model below: ```julia -@model function gdemo(x, y) - s² ~ InverseGamma(2,3) - m ~ Normal(0, sqrt(s²)) - x ~ Normal(m, sqrt(s²)) - y ~ Normal(m, sqrt(s²)) +@model function gdemo() + s ~ InverseGamma(2,3) + m ~ Normal(0, sqrt(s)) + 1.5 ~ Normal(m, sqrt(s)) + 2.0 ~ Normal(m, sqrt(s)) end - -chain = sample(gdemo(1.5, 2.0), MH(), 1_000) -mean(chain) ``` -Specifying a single distribution implies the use of static MH: +The default constructor, `MH()`, uses the prior distributions as proposals. So, new +proposals are obtained by sampling `s` from `InverseGamma(2,3)` and `m` from `Normal(0, +sqrt(s))`. ```julia -# Use a static proposal for s² (which happens to be the same -# as the prior) and a static proposal for m (note that this -# isn't a random walk proposal). -chain = sample( - gdemo(1.5, 2.0), - MH( - :s² => InverseGamma(2, 3), - :m => Normal(0, 1) - ), - 1_000 -) -mean(chain) +spl = MH() ``` -Specifying explicit proposals using the `AdvancedMH` interface: +Alternatively, a mapping of variable names to proposal distributions can be provided. +This implies the use of static proposals for each variable. If a variable is not specified, +its prior distribution is used as the proposal. ```julia -# Use a static proposal for s² and random walk with proposal -# standard deviation of 0.25 for m. -chain = sample( - gdemo(1.5, 2.0), - MH( - :s² => AdvancedMH.StaticProposal(InverseGamma(2,3)), - :m => AdvancedMH.RandomWalkProposal(Normal(0, 0.25)) - ), - 1_000 +# Use a static proposal for s² (which happens to be the same as the prior) and a static +# proposal for m (note that this isn't a random walk proposal). +spl = MH( + # This happens to be the same as the prior + @varname(s) => InverseGamma(2, 3), + # This is different from the prior + @varname(m) => Normal(0, 1), ) -mean(chain) ``` -Using a custom function to specify a conditional distribution: +If the `VarName` of interest is a single symbol, you can also use a `Symbol` instead. ```julia -# Use a static proposal for s and and a conditional proposal for m, -# where the proposal is centered around the current sample. -chain = sample( - gdemo(1.5, 2.0), - MH( - :s² => InverseGamma(2, 3), - :m => x -> Normal(x, 1) - ), - 1_000 +spl = MH( + :s => InverseGamma(2, 3), + :m => Normal(0, 1), ) -mean(chain) ``` -Providing a covariance matrix will cause `MH` to perform random-walk -sampling in the transformed space with proposals drawn from a multivariate -normal distribution. The provided matrix must be positive semi-definite and -square: +You can also use a callable to define a proposal that is conditional on the current values. +The callable must accept a single argument, which is a `DynamicPPL.VarNamedTuple` that holds +all the values of the parameters from the previous step. You can obtain the value of a +specific parameter by indexing into this `VarNamedTuple` using a `VarName` (note that symbol +indexing is not supported). The callable must then return a `Distribution` from which to +draw the proposal. + +!!! note + In general, there is no way for Turing to reliably detect whether a proposal is meant to + be a callable or not, since callable structs may have any type. Hence, any proposal that + is *not* a distribution is assumed to be a callable. ```julia -# Providing a custom variance-covariance matrix -chain = sample( - gdemo(1.5, 2.0), - MH( - [0.25 0.05; - 0.05 0.50] - ), - 1_000 +spl = MH( + # This is a static proposal (same as above). + @varname(s) => InverseGamma(2, 3), + # This is a conditional proposal, which proposes m from a normal + # distribution centred at the current value of m, with a standard + # deviation of 0.5. + @varname(m) => (vnt -> Normal(vnt[@varname(m)], 0.5)), ) -mean(chain) ``` -""" -struct MH{P} <: AbstractSampler - proposals::P - - function MH(proposals...) - prop_syms = Symbol[] - props = AMH.Proposal[] - - for s in proposals - if s isa Pair || s isa Tuple - # Check to see whether it's a pair that specifies a kernel - # or a specific proposal distribution. - push!(prop_syms, s[1]) - push!(props, proposal(s[2])) - elseif length(proposals) == 1 - # If we hit this block, check to see if it's - # a run-of-the-mill proposal or covariance - # matrix. - prop = proposal(s) - - # Return early, we got a covariance matrix. - return new{typeof(prop)}(prop) - else - # Try to convert it to a proposal anyways, - # throw an error if not acceptable. - prop = proposal(s) - push!(props, prop) - end - end - - proposals = NamedTuple{tuple(prop_syms...)}(tuple(props...)) +**Note that when using conditional proposals, the values obtained by indexing into the +`VarNamedTuple` are always in unlinked space.** Sometimes, you may want to define a random-walk +proposal in linked space. For this, you can use `LinkedRW` as a proposal, which takes a +covariance matrix as an argument: - return new{typeof(proposals)}(proposals) - end -end - -# Some of the proposals require working in unconstrained space. -transform_maybe(proposal::AMH.Proposal) = proposal -function transform_maybe(proposal::AMH.RandomWalkProposal) - return AMH.RandomWalkProposal(Bijectors.transformed(proposal.proposal)) -end - -function MH(model::Model; proposal_type=AMH.StaticProposal) - priors = DynamicPPL.extract_priors(model) - props = Tuple([proposal_type(prop) for prop in values(priors)]) - vars = Tuple(map(Symbol, collect(keys(priors)))) - priors = map(transform_maybe, NamedTuple{vars}(props)) - return AMH.MetropolisHastings(priors) -end +```julia +using LinearAlgebra: Diagonal +spl = MH( + @varname(s) => InverseGamma(2, 3), + @varname(m) => LinkedRW(Diagonal([0.25])) +) +``` -""" - MHState(varinfo::AbstractVarInfo, logjoint_internal::Real) +In the above example, `LinkedRW(Diagonal([0.25]))` defines a random-walk proposal for `m` in +linked space. This is in fact the same as the conditional proposal above, because `m` is +already unconstrained, and so linked space and unlinked space are the same for this +variable. However, `s` is constrained to be positive, and so using a `LinkedRW` proposal for +`s` would be different from using a normal proposal in unlinked space (`LinkedRW` will +ensure that the proposals for `s` always remain positive in unlinked space). -State for Metropolis-Hastings sampling. +```julia +spl = MH( + @varname(s) => LinkedRW(Diagonal([0.5])), + @varname(m) => LinkedRW(Diagonal([0.25])), +) +``` -`varinfo` must have the correct parameters set inside it, but its other fields -(e.g. accumulators, which track logp) can in general be missing or incorrect. +Finally, providing just a single covariance matrix will cause `MH` to perform random-walk +sampling in linked space with proposals drawn from a multivariate normal distribution. All +variables are linked in this case. The provided matrix must be positive semi-definite and +square. This example is therefore equivalent to the previous one: -`logjoint_internal` is the log joint probability of the model, evaluated using -the parameters and linking status of `varinfo`. It should be equal to -`DynamicPPL.getlogjoint_internal(varinfo)`. This information is returned by the -MH sampler so we store this here to avoid re-evaluating the model -unnecessarily. +```julia +# Providing a custom variance-covariance matrix +spl = MH( + [0.50 0; + 0 0.25] +) +``` """ -struct MHState{V<:AbstractVarInfo,L<:Real} - varinfo::V - logjoint_internal::L +struct MH{I,L<:DynamicPPL.AbstractTransformStrategy} <: AbstractSampler + "A function which takes two arguments: (1) the VarNamedTuple of raw values at the + previous step, and (2) a VarNamedTuple of linked values for any variables that have + `LinkedRW` proposals; and returns an AbstractInitStrategy. We don't have access to the + VNTs until the actual sampling, so we have to use a function here; the strategy itself + will be constructed anew in each sampling step." + init_strategy_constructor::I + "Linked variables, i.e., variables which have a `LinkedRW` proposal." + transform_strategy::L + "All variables with a proposal" + vns_with_proposal::Set{VarName} end - -get_varinfo(s::MHState) = s.varinfo - -##################### -# Utility functions # -##################### +# If no proposals are given, then the initialisation strategy to use is always +# `InitFromPrior`. +MH() = MH(Returns(DynamicPPL.InitFromPrior()), DynamicPPL.UnlinkAll(), Set{VarName}()) """ - OldLogDensityFunction - -This is a clone of pre-0.39 DynamicPPL.LogDensityFunction. It is needed for MH because MH -doesn't actually obey the LogDensityProblems.jl interface: it evaluates -'LogDensityFunctions' with a NamedTuple(!!) + LinkedRW(cov_matrix) -This means that we can't _really_ use DynamicPPL's LogDensityFunction, since that only -promises to obey the interface of being called with a vector. - -In particular, because `set_namedtuple!` acts on a VarInfo, we need to store the VarInfo -inside this struct (which DynamicPPL's LogDensityFunction no longer does). - -This SHOULD really be refactored to remove this requirement. +Define a random-walk proposal in linked space with the given covariance matrix. Note that +the covariance matrix must correspond exactly to the size of the variable in linked space. """ -struct OldLogDensityFunction{M<:DynamicPPL.Model,V<:DynamicPPL.AbstractVarInfo} - model::M - varinfo::V -end -function (f::OldLogDensityFunction)(x::AbstractVector) - vi = DynamicPPL.unflatten(f.varinfo, x) - _, vi = DynamicPPL.evaluate!!(f.model, vi) - return DynamicPPL.getlogjoint_internal(vi) -end -# NOTE(penelopeysm): MH does not conform to the usual LogDensityProblems -# interface in that it gets evaluated with a NamedTuple. Hence we need this -# method just to deal with MH. -function (f::OldLogDensityFunction)(x::NamedTuple) - vi = deepcopy(f.varinfo) - # Note that the NamedTuple `x` does NOT conform to the structure required for - # `InitFromParams`. In particular, for models that look like this: - # - # @model function f() - # v = Vector{Vector{Float64}} - # v[1] ~ MvNormal(zeros(2), I) - # end - # - # `InitFromParams` will expect Dict(@varname(v[1]) => [x1, x2]), but `x` will have the - # format `(v = [x1, x2])`. Hence we still need this `set_namedtuple!` function. - # - # In general `init!!(f.model, vi, InitFromParams(x))` will work iff the model only - # contains 'basic' varnames. - set_namedtuple!(vi, x) - # Update log probability. - _, vi_new = DynamicPPL.evaluate!!(f.model, vi) - return DynamicPPL.getlogjoint_internal(vi_new) +struct LinkedRW{C} + "The covariance matrix to use for the random-walk proposal in linked space." + cov_matrix::C end """ - set_namedtuple!(vi::VarInfo, nt::NamedTuple) + InitFromProposals(proposals::VarNamedTuple) -Places the values of a `NamedTuple` into the relevant places of a `VarInfo`. +An initialisation strategy that samples variables from user-defined proposal distributions. +If a proposal distribution is not found in `proposals`, then we defer to sampling from the +prior. """ -function set_namedtuple!(vi::DynamicPPL.VarInfoOrThreadSafeVarInfo, nt::NamedTuple) - for (n, vals) in pairs(nt) - vns = vi.metadata[n].vns - if vals isa AbstractVector - vals = unvectorize(vals) - end - if length(vns) == 1 - # Only one variable, assign the values to it - DynamicPPL.setindex!(vi, vals, vns[1]) +struct InitFromProposals{V<:DynamicPPL.VarNamedTuple} <: DynamicPPL.AbstractInitStrategy + "A mapping of VarNames to Tuple{Bool,Distribution}s that they should be sampled from. If + the VarName is not in this VarNamedTuple, then it will be sampled from the prior. The + Bool indicates whether the proposal is in linked space (true, i.e., the strategy should + return a `LinkedVectorValue`); or in untransformed space (false, i.e., the strategy + should return an `UntransformedValue`)." + proposals::V +end +function DynamicPPL.init( + rng::Random.AbstractRNG, vn::VarName, prior::Distribution, strategy::InitFromProposals +) + if haskey(strategy.proposals, vn) + # this is the proposal that the user wanted + is_linkedrw, dist = strategy.proposals[vn] + if is_linkedrw + transform = DynamicPPL.from_linked_vec_transform(prior) + linked_vec = rand(rng, dist) + sz = hasmethod(size, Tuple{typeof(prior)}) ? size(prior) : () + return DynamicPPL.LinkedVectorValue(linked_vec, transform, sz) else - # Spread the values across the variables - length(vns) == length(vals) || error("Unequal number of variables and values") - for (vn, val) in zip(vns, vals) - DynamicPPL.setindex!(vi, val, vn) - end + # Static or conditional proposal in untransformed space. + return DynamicPPL.UntransformedValue(rand(rng, dist)) end + else + # No proposal was specified for this variable, so we sample from the prior. + return DynamicPPL.UntransformedValue(rand(rng, prior)) end end -# unpack a vector if possible -unvectorize(dists::AbstractVector) = length(dists) == 1 ? first(dists) : dists - -# possibly unpack and reshape samples according to the prior distribution -function reconstruct(dist::Distribution, val::AbstractVector) - return DynamicPPL.from_vec_transform(dist)(val) -end -reconstruct(dist::AbstractVector{<:UnivariateDistribution}, val::AbstractVector) = val -function reconstruct(dist::AbstractVector{<:MultivariateDistribution}, val::AbstractVector) - offset = 0 - return map(dist) do d - n = length(d) - newoffset = offset + n - v = val[(offset + 1):newoffset] - offset = newoffset - return v - end -end - -""" - dist_val_tuple(spl::MH, vi::VarInfo) - -Return two `NamedTuples`. - -The first `NamedTuple` has symbols as keys and distributions as values. -The second `NamedTuple` has model symbols as keys and their stored values as values. -""" -function dist_val_tuple(spl::MH, vi::DynamicPPL.VarInfoOrThreadSafeVarInfo) - vns = all_varnames_grouped_by_symbol(vi) - dt = _dist_tuple(spl.proposals, vi, vns) - vt = _val_tuple(vi, vns) - return dt, vt -end - -@generated function _val_tuple(vi::VarInfo, vns::NamedTuple{names}) where {names} - isempty(names) && return :(NamedTuple()) - expr = Expr(:tuple) - expr.args = Any[ - :( - $name = reconstruct( - unvectorize(DynamicPPL.getdist.(Ref(vi), vns.$name)), - DynamicPPL.getindex_internal(vi, vns.$name), - ) - ) for name in names - ] - return expr -end -_val_tuple(::VarInfo, ::Tuple{}) = () - -@generated function _dist_tuple( - props::NamedTuple{propnames}, vi::VarInfo, vns::NamedTuple{names} -) where {names,propnames} - isempty(names) === 0 && return :(NamedTuple()) - expr = Expr(:tuple) - expr.args = Any[ - if name in propnames - # We've been given a custom proposal, use that instead. - :($name = props.$name) - else - # Otherwise, use the default proposal. - :( - $name = AMH.StaticProposal( - unvectorize(DynamicPPL.getdist.(Ref(vi), vns.$name)) - ) +const SymOrVNPair = Pair{<:Union{Symbol,VarName},<:Any} + +_to_varname(s::Symbol) = DynamicPPL.VarName{s}() +_to_varname(vn::VarName) = vn +_to_varname(x) = throw(ArgumentError("Expected Symbol or VarName, got $(typeof(x))")) + +function MH(pair1::SymOrVNPair, pairs::Vararg{SymOrVNPair}) + vn_proposal_pairs = (pair1, pairs...) + # It is assumed that `raw_vals` is a VarNamedTuple that has all the variables' values + # already set. We can obtain this by using `RawValueAccumulator`. Furthermore, + # `linked_vals` is a VarNamedTuple that stores a `MHLinkedVal` for any variables that + # have `LinkedRW` proposals. That in turn is obtained using `MHLinkedValuesAccumulator`. + function init_strategy_constructor(raw_vals, linked_vals) + proposals = DynamicPPL.VarNamedTuple() + for pair in vn_proposal_pairs + # Convert all keys to VarNames. + vn, proposal = pair + vn = _to_varname(vn) + if !haskey(raw_vals, vn) + continue + end + proposal_dist = if proposal isa Distribution + # Static proposal. + (false, proposal) + elseif proposal isa LinkedRW + # The distribution we draw from is an MvNormal, centred at the current + # linked value, and with the given covariance matrix. We also need to add a + # flag to signal that this is being sampled in linked space. + (true, MvNormal(linked_vals[vn], proposal.cov_matrix)) + else + # It's a callable that takes `vnt` and returns a distribution. + (false, proposal(raw_vals)) + end + proposals = DynamicPPL.templated_setindex!!( + proposals, proposal_dist, vn, raw_vals.data[AbstractPPL.getsym(vn)] ) - end for name in names - ] - return expr -end -_dist_tuple(::@NamedTuple{}, ::VarInfo, ::Tuple{}) = () - -# Utility functions to link -should_link(varinfo, sampler, proposal) = false -function should_link(varinfo, sampler, proposal::NamedTuple{(),Tuple{}}) - # If it's an empty `NamedTuple`, we're using the priors as proposals - # in which case we shouldn't link. - return false -end -function should_link(varinfo, sampler, proposal::AdvancedMH.RandomWalkProposal) - return true -end -# FIXME: This won't be hit unless `vals` are all the exactly same concrete type of `AdvancedMH.RandomWalkProposal`! -function should_link( - varinfo, sampler, proposal::NamedTuple{names,vals} -) where {names,vals<:NTuple{<:Any,<:AdvancedMH.RandomWalkProposal}} - return true -end - -function maybe_link!!(varinfo, sampler, proposal, model) - return if should_link(varinfo, sampler, proposal) - DynamicPPL.link!!(varinfo, model) + end + return InitFromProposals(proposals) + end + all_vns = Set{VarName}(_to_varname(pair[1]) for pair in vn_proposal_pairs) + linkedrw_vns = Set{VarName}( + _to_varname(vn) for (vn, proposal) in vn_proposal_pairs if proposal isa LinkedRW + ) + link_strategy = if isempty(linkedrw_vns) + DynamicPPL.UnlinkAll() else - varinfo + DynamicPPL.LinkSome(linkedrw_vns, DynamicPPL.UnlinkAll()) end + return MH(init_strategy_constructor, link_strategy, all_vns) end -# Make a proposal if we don't have a covariance proposal matrix (the default). -function propose!!(rng::AbstractRNG, prev_state::MHState, model::Model, spl::MH, proposal) - vi = prev_state.varinfo - # Retrieve distribution and value NamedTuples. - dt, vt = dist_val_tuple(spl, vi) - - # Create a sampler and the previous transition. - mh_sampler = AMH.MetropolisHastings(dt) - prev_trans = AMH.Transition(vt, prev_state.logjoint_internal, false) - - # Make a new transition. - model = DynamicPPL.setleafcontext(model, MHContext(rng)) - densitymodel = AMH.DensityModel(OldLogDensityFunction(model, vi)) - trans, _ = AbstractMCMC.step(rng, densitymodel, mh_sampler, prev_trans) - # trans.params isa NamedTuple - set_namedtuple!(vi, trans.params) - # Here, `trans.lp` is equal to `getlogjoint_internal(vi)`. We don't know - # how to set this back inside vi (without re-evaluating). However, the next - # MH step will require this information to calculate the acceptance - # probability, so we return it together with vi. - return MHState(vi, trans.lp) -end - -# Make a proposal if we DO have a covariance proposal matrix. -function propose!!( - rng::AbstractRNG, - prev_state::MHState, - model::Model, - spl::MH, - proposal::AdvancedMH.RandomWalkProposal, -) - vi = prev_state.varinfo - # If this is the case, we can just draw directly from the proposal - # matrix. - vals = vi[:] - - # Create a sampler and the previous transition. - mh_sampler = AMH.MetropolisHastings(spl.proposals) - prev_trans = AMH.Transition(vals, prev_state.logjoint_internal, false) - - # Make a new transition. - model = DynamicPPL.setleafcontext(model, MHContext(rng)) - densitymodel = AMH.DensityModel(OldLogDensityFunction(model, vi)) - trans, _ = AbstractMCMC.step(rng, densitymodel, mh_sampler, prev_trans) - # trans.params isa AbstractVector - vi = DynamicPPL.unflatten(vi, trans.params) - # Here, `trans.lp` is equal to `getlogjoint_internal(vi)`. We don't know - # how to set this back inside vi (without re-evaluating). However, the next - # MH step will require this information to calculate the acceptance - # probability, so we return it together with vi. - return MHState(vi, trans.lp) -end - -function Turing.Inference.initialstep( - rng::AbstractRNG, +function AbstractMCMC.step( + rng::Random.AbstractRNG, model::DynamicPPL.Model, - spl::MH, - vi::AbstractVarInfo; + spl::MH; + initial_params::DynamicPPL.AbstractInitStrategy, discard_sample=false, kwargs..., ) - # If we're doing random walk with a covariance matrix, - # just link everything before sampling. - vi = maybe_link!!(vi, spl, spl.proposals, model) - - sample = if discard_sample - nothing - else - DynamicPPL.ParamsWithStats(vi, model) + # Generate and return initial parameters. We need to use VAIMAcc because that will + # generate the VNT for us that provides the values (as opposed to `vi.values` which + # stores `AbstractTransformedValues`). + # + # TODO(penelopeysm): This in fact could very well be OnlyAccsVarInfo. Indeed, if you + # only run MH, OnlyAccsVarInfo already works right now. The problem is that using MH + # inside Gibbs needs a full VarInfo. + # + # see e.g. + # @model f() = x ~ Beta(2, 2) + # sample(f(), MH(:x => LinkedRW(0.4)), 100_000; progress=false) + # with full VarInfo: + # 2.302728 seconds (18.81 M allocations: 782.125 MiB, 9.00% gc time) + # with OnlyAccsVarInfo: + # 1.196674 seconds (18.51 M allocations: 722.256 MiB, 5.11% gc time) + vi = DynamicPPL.VarInfo() + vi = DynamicPPL.setacc!!(vi, DynamicPPL.RawValueAccumulator(false)) + vi = DynamicPPL.setacc!!(vi, MHLinkedValuesAccumulator()) + vi = DynamicPPL.setacc!!(vi, MHUnspecifiedPriorsAccumulator(spl.vns_with_proposal)) + _, vi = DynamicPPL.init!!(rng, model, vi, initial_params, spl.transform_strategy) + + # Since our initial parameters are sampled with `initial_params`, which could be + # anything, it's possible that the initial parameters are outside the support of the + # proposal. That will mess up the sampling because when calculating the proposal density + # ratio, we will get -Inf for the forward proposal density (i.e., log(g(x|x'))), because + # `log(g(x))` is already -Inf regardless of what `x'` is. We insert a check for this + # here. + initial_raw_values = DynamicPPL.get_raw_values(vi) + initial_linked_values = DynamicPPL.getacc(vi, Val(MH_ACC_NAME)).values + init_strategy = spl.init_strategy_constructor(initial_raw_values, initial_linked_values) + initial_unspecified_priors = DynamicPPL.getacc(vi, Val(MH_PRIOR_ACC_NAME)).values + initial_log_proposal_density = log_proposal_density( + vi, init_strategy, initial_unspecified_priors + ) + if initial_log_proposal_density == -Inf + io = IOContext(IOBuffer(), :color => true) + show(io, "text/plain", initial_raw_values) + init_str = String(take!(io.io)) + error( + "The initial parameters have zero probability density under the proposal" * + " distribution (for example, an initial value of `x=2.0` for a" * + " proposal `@varname(x) => Uniform(0, 1)`. This will cause the" * + " sampler to get stuck at the initial parameters. Consider specifying" * + " different initial parameters (e.g. via `InitFromParams`) or using a" * + " different proposal distribution." * + " Your initial values were:\n\n$init_str\n", + ) end - state = MHState(vi, DynamicPPL.getlogjoint_internal(vi)) - return sample, state + + transition = + discard_sample ? nothing : DynamicPPL.ParamsWithStats(vi, (; accepted=true)) + return transition, vi end function AbstractMCMC.step( - rng::AbstractRNG, + rng::Random.AbstractRNG, model::DynamicPPL.Model, spl::MH, - state::MHState; + old_vi::DynamicPPL.AbstractVarInfo; discard_sample=false, kwargs..., ) - # Cases: - # 1. A covariance proposal matrix - # 2. A bunch of NamedTuples that specify the proposal space - new_state = propose!!(rng, state, model, spl, spl.proposals) - - sample = if discard_sample - nothing + old_lp = DynamicPPL.getlogjoint_internal(old_vi) + # The initialisation strategy that we use to generate a proposal depends on the + # state from the previous step. We need to extract the raw values and linked values + # that were used in the previous step. + old_raw_values = DynamicPPL.get_raw_values(old_vi) + old_linked_values = DynamicPPL.getacc(old_vi, Val(MH_ACC_NAME)).values + old_unspecified_priors = DynamicPPL.getacc(old_vi, Val(MH_PRIOR_ACC_NAME)).values + + init_strategy_given_old = spl.init_strategy_constructor( + old_raw_values, old_linked_values + ) + + # Evaluate the model with a new proposal. + new_vi = DynamicPPL.VarInfo() + new_vi = DynamicPPL.setacc!!(new_vi, DynamicPPL.RawValueAccumulator(false)) + new_vi = DynamicPPL.setacc!!(new_vi, MHLinkedValuesAccumulator()) + new_vi = DynamicPPL.setacc!!( + new_vi, MHUnspecifiedPriorsAccumulator(spl.vns_with_proposal) + ) + _, new_vi = DynamicPPL.init!!( + rng, model, new_vi, init_strategy_given_old, spl.transform_strategy + ) + new_lp = DynamicPPL.getlogjoint_internal(new_vi) + # We need to reconstruct the initialisation strategy for the 'reverse' transition + # i.e. from new_vi to old_vi. That allows us to calculate the proposal density + # ratio. + new_raw_values = DynamicPPL.get_raw_values(new_vi) + new_linked_values = DynamicPPL.getacc(new_vi, Val(MH_ACC_NAME)).values + new_unspecified_priors = DynamicPPL.getacc(new_vi, Val(MH_PRIOR_ACC_NAME)).values + + init_strategy_given_new = spl.init_strategy_constructor( + new_raw_values, new_linked_values + ) + + # Calculate the log-acceptance probability. + log_a = ( + new_lp - old_lp + + log_proposal_density(old_vi, init_strategy_given_new, old_unspecified_priors) - + log_proposal_density(new_vi, init_strategy_given_old, new_unspecified_priors) + ) + + # Decide whether to accept. + accepted, vi = if -Random.randexp(rng) < log_a + true, new_vi else - DynamicPPL.ParamsWithStats(new_state.varinfo, model) + false, old_vi end - return sample, new_state + transition = + discard_sample ? nothing : DynamicPPL.ParamsWithStats(vi, (; accepted=accepted)) + return transition, vi end -struct MHContext{R<:AbstractRNG} <: DynamicPPL.AbstractContext - rng::R +""" + log_proposal_density( + old_vi::DynamicPPL.AbstractVarInfo, + init_strategy_given_new::DynamicPPL.AbstractInitStrategy, + old_unspecified_priors::DynamicPPL.VarNamedTuple + ) + +Calculate the ratio `g(x|x')` where `g` is the proposal distribution used to generate +`x` (represented by `old_vi`), given the new state `x'`. + +If the arguments are switched (i.e., `new_vi` is passed as the first argument, and +`init_strategy_given_old` as the second), the function calculates `g(x'|x)`. + +The log-density of the proposal distribution is calculated by summing up the contributions +from: + +- any variables that have an explicit proposal in `init_strategy_given_new` (i.e., those + in `spl.vns_with_proposal`), which can be either static or conditional proposals; and +- any variables that do not have an explicit proposal, for which we defer to its prior + distribution. +""" +function log_proposal_density( + vi::DynamicPPL.AbstractVarInfo, ::DynamicPPL.InitFromPrior, ::DynamicPPL.VarNamedTuple +) + # All samples were drawn from the prior -- in this case g(x|x') = g(x) = prior + # probability of x. + return DynamicPPL.getlogprior(vi) +end +function log_proposal_density( + vi::DynamicPPL.AbstractVarInfo, + strategy::InitFromProposals, + unspecified_priors::DynamicPPL.VarNamedTuple, +) + # In this case, the proposal distribution might indeed be conditional, so we need to + # 'run' the initialisation strategies both ways. Luckily, we don't need to run the model + # itself, since all the information we need is in the proposals. That is the reason why + # we have to cache the priors in the InitFromProposals struct -- if any variables were + # not given an explicit proposal (in `strategy.proposals`) we need to know what their + # prior was. + vals = DynamicPPL.get_raw_values(vi) + g = 0.0 + for (vn, (is_linkedrw, proposal)) in pairs(strategy.proposals) + if is_linkedrw + # LinkedRW proposals end up here, but they are symmetric proposals, so we can + # skip their contribution. + continue + else + # proposal isa Distribution + g += logpdf(proposal, vals[vn]) + end + end + for (vn, prior) in pairs(unspecified_priors) + g += logpdf(prior, vals[vn]) + end + return g end -function DynamicPPL.tilde_assume!!( - context::MHContext, right::Distribution, vn::VarName, vi::AbstractVarInfo +# Accumulator to store linked values; but only the ones that have a LinkedRW proposal. Since +# model evaluation should have happened with `s.transform_strategy`, any variables that are +# marked by `s.transform_strategy` as being linked should generate a LinkedVectorValue here. +const MH_ACC_NAME = :MHLinkedValues +struct StoreLinkedValues end +function (s::StoreLinkedValues)(val, tval::DynamicPPL.LinkedVectorValue, logjac, vn, dist) + return DynamicPPL.get_internal_value(tval) +end +function (s::StoreLinkedValues)( + val, ::DynamicPPL.AbstractTransformedValue, logjac, vn, dist ) - # Allow MH to sample new variables from the prior if it's not already present in the - # VarInfo. - dispatch_ctx = if haskey(vi, vn) - DynamicPPL.DefaultContext() + return DynamicPPL.DoNotAccumulate() +end +function MHLinkedValuesAccumulator() + return DynamicPPL.VNTAccumulator{MH_ACC_NAME}(StoreLinkedValues()) +end + +# Accumulator to store priors for any variables that were not given an explicit proposal. +# This is needed to compute the log-proposal density correctly. +const MH_PRIOR_ACC_NAME = :MHUnspecifiedPriors +struct StoreUnspecifiedPriors + vns_with_proposal::Set{VarName} +end +function (s::StoreUnspecifiedPriors)(val, tval, logjac, vn, dist::Distribution) + return if vn in s.vns_with_proposal + DynamicPPL.DoNotAccumulate() else - DynamicPPL.InitContext(context.rng, DynamicPPL.InitFromPrior()) + dist end - return DynamicPPL.tilde_assume!!(dispatch_ctx, right, vn, vi) end -function DynamicPPL.tilde_observe!!( - ::MHContext, right::Distribution, left, vn::Union{VarName,Nothing}, vi::AbstractVarInfo -) - return DynamicPPL.tilde_observe!!(DefaultContext(), right, left, vn, vi) +function MHUnspecifiedPriorsAccumulator(vns_with_proposal) + return DynamicPPL.VNTAccumulator{MH_PRIOR_ACC_NAME}( + StoreUnspecifiedPriors(vns_with_proposal) + ) +end + +# RWMH can be delegated to AdvancedMH. The type bound is intentionally lax because we just +# let the MvNormal constructor handle it. +function MH(cov_matrix::Any) + return externalsampler(AdvancedMH.RWMH(MvNormal(cov_matrix)); unconstrained=true) end diff --git a/src/mcmc/particle_mcmc.jl b/src/mcmc/particle_mcmc.jl index 09251ad36f..f02f87b81e 100644 --- a/src/mcmc/particle_mcmc.jl +++ b/src/mcmc/particle_mcmc.jl @@ -2,6 +2,8 @@ ### Particle Filtering and Particle MCMC Samplers. ### +using Accessors: Accessors + function error_if_threadsafe_eval(model::DynamicPPL.Model) if DynamicPPL.requires_threadsafe(model) throw( @@ -19,10 +21,15 @@ struct ParticleMCMCContext{R<:AbstractRNG} <: DynamicPPL.AbstractContext rng::R end -struct TracedModel{V<:AbstractVarInfo,M<:Model,T<:Tuple,NT<:NamedTuple} <: - AdvancedPS.AbstractTuringLibtaskModel +mutable struct TracedModel{M<:Model,T<:Tuple,NT<:NamedTuple} <: + AdvancedPS.AbstractGenericModel model::M - varinfo::V + # TODO(penelopeysm): I don't like that this is an abstract type. However, the problem is + # that the type of VarInfo can change during execution, especially with PG-inside-Gibbs + # when you have to muck with merging VarInfos from different sub-conditioned models. I + # do think that this can be solved. I would recommend that if you have time try to + # change this back into a type parameter, see what breaks in CI, and try to fix it. + varinfo::AbstractVarInfo resample::Bool fargs::T kwargs::NT @@ -121,7 +128,7 @@ function AbstractMCMC.sample( progress=PROGRESS[], kwargs..., ) - check_model && _check_model(model, sampler) + check_model && Turing._check_model(model, sampler) error_if_threadsafe_eval(model) check_model_kwargs(model) # need to add on the `nparticles` keyword argument for `initialstep` to make use of @@ -176,6 +183,7 @@ function Turing.Inference.initialstep( # Create a new set of particles. particles = AdvancedPS.ParticleContainer( + # her [AdvancedPS.Trace(model, vi, AdvancedPS.TracedRNG(), true) for _ in 1:nparticles], AdvancedPS.TracedRNG(), rng, @@ -376,12 +384,7 @@ end Get the varinfo stored in the 'taped globals' of a `Libtask.TapedTask`. """ function get_trace_local_varinfo() - trace = try - Libtask.get_taped_globals(Any).other - catch e - e == KeyError(:task_variable) ? nothing : rethrow(e) - end - trace === nothing && error("No trace found in Libtask globals; this should not happen.") + trace = Libtask.get_taped_globals(Any).other return trace.model.f.varinfo::AbstractVarInfo end @@ -395,22 +398,14 @@ example, in SMC, this is true for all particles; in PG, this is true for all par except the reference particle, whose trajectory must be reproduced exactly. """ function get_trace_local_resampled() - trace = try - Libtask.get_taped_globals(Any).other - catch e - e == KeyError(:task_variable) ? nothing : rethrow(e) - end - trace === nothing && error("No trace found in Libtask globals; this should not happen.") + trace = Libtask.get_taped_globals(Any).other return trace.model.f.resample::Bool end """ get_trace_local_rng() -Get the `Trace` local rng if one exists. - -If executed within a `TapedTask`, return the `rng` stored in the "taped globals" of the -task, otherwise return `vi`. +Get the RNG stored in the 'taped globals' of a `Libtask.TapedTask`, if one exists. """ function get_trace_local_rng() return Libtask.get_taped_globals(Any).rng @@ -426,15 +421,12 @@ Returns `nothing`. """ function set_trace_local_varinfo(vi::AbstractVarInfo) trace = Libtask.get_taped_globals(Any).other - trace === nothing && error("No trace found in Libtask globals; this should not happen.") - model = trace.model - model = Accessors.@set model.f.varinfo = vi - trace.model = model + trace.model.f.varinfo = vi return nothing end function DynamicPPL.tilde_assume!!( - ctx::ParticleMCMCContext, dist::Distribution, vn::VarName, vi::AbstractVarInfo + ::ParticleMCMCContext, dist::Distribution, vn::VarName, template::Any, ::AbstractVarInfo ) # Get all the info we need from the trace, namely, the stored VarInfo, and whether # we need to sample a new value or use the existing one. @@ -443,11 +435,11 @@ function DynamicPPL.tilde_assume!!( resample = get_trace_local_resampled() # Modify the varinfo as appropriate. dispatch_ctx = if ~haskey(vi, vn) || resample - DynamicPPL.InitContext(trng, DynamicPPL.InitFromPrior()) + DynamicPPL.InitContext(trng, DynamicPPL.InitFromPrior(), DynamicPPL.UnlinkAll()) else DynamicPPL.DefaultContext() end - x, vi = DynamicPPL.tilde_assume!!(dispatch_ctx, dist, vn, vi) + x, vi = DynamicPPL.tilde_assume!!(dispatch_ctx, dist, vn, template, vi) # Set the varinfo back in the trace. set_trace_local_varinfo(vi) return x, vi @@ -501,7 +493,7 @@ function DynamicPPL.acclogp(acc1::ProduceLogLikelihoodAccumulator, val) end function DynamicPPL.accumulate_assume!!( - acc::ProduceLogLikelihoodAccumulator, val, logjac, vn, right + acc::ProduceLogLikelihoodAccumulator, val, tval, logjac, vn, right, template ) return acc end @@ -534,6 +526,6 @@ Libtask.@might_produce(DynamicPPL.tilde_observe!!) # Could tilde_assume!! have tighter type bounds on the arguments, namely a GibbsContext? # That's the only thing that makes tilde_assume calls result in tilde_observe calls. Libtask.@might_produce(DynamicPPL.tilde_assume!!) -Libtask.@might_produce(DynamicPPL.evaluate!!) +Libtask.@might_produce(DynamicPPL._evaluate!!) Libtask.@might_produce(DynamicPPL.init!!) Libtask.might_produce(::Type{<:Tuple{<:DynamicPPL.Model,Vararg}}) = true diff --git a/src/mcmc/prior.jl b/src/mcmc/prior.jl index a0665affc6..44a004086d 100644 --- a/src/mcmc/prior.jl +++ b/src/mcmc/prior.jl @@ -14,12 +14,14 @@ function AbstractMCMC.step( kwargs..., ) accs = DynamicPPL.AccumulatorTuple(( - DynamicPPL.ValuesAsInModelAccumulator(true), + DynamicPPL.RawValueAccumulator(true), DynamicPPL.LogPriorAccumulator(), DynamicPPL.LogLikelihoodAccumulator(), )) vi = DynamicPPL.OnlyAccsVarInfo(accs) - _, vi = DynamicPPL.init!!(rng, model, vi, DynamicPPL.InitFromPrior()) + _, vi = DynamicPPL.init!!( + rng, model, vi, DynamicPPL.InitFromPrior(), DynamicPPL.UnlinkAll() + ) transition = discard_sample ? nothing : DynamicPPL.ParamsWithStats(vi) return transition, nothing end diff --git a/src/mcmc/repeat_sampler.jl b/src/mcmc/repeat_sampler.jl index 976b361c77..494f455b0d 100644 --- a/src/mcmc/repeat_sampler.jl +++ b/src/mcmc/repeat_sampler.jl @@ -119,7 +119,7 @@ function AbstractMCMC.sample( progress=PROGRESS[], kwargs..., ) - check_model && _check_model(model, sampler) + check_model && Turing._check_model(model, sampler) return AbstractMCMC.mcmcsample( rng, model, @@ -145,7 +145,7 @@ function AbstractMCMC.sample( progress=PROGRESS[], kwargs..., ) - check_model && _check_model(model, sampler) + check_model && Turing._check_model(model, sampler) return AbstractMCMC.mcmcsample( rng, model, diff --git a/src/optimisation/Optimisation.jl b/src/optimisation/Optimisation.jl index f4535bac89..74f68b66e6 100644 --- a/src/optimisation/Optimisation.jl +++ b/src/optimisation/Optimisation.jl @@ -2,7 +2,7 @@ module Optimisation using ..Turing using AbstractPPL: AbstractPPL, VarName -using DynamicPPL: DynamicPPL, VarInfo, LogDensityFunction +using DynamicPPL: DynamicPPL, VarInfo, LogDensityFunction, VarNamedTuple using DocStringExtensions: TYPEDFIELDS using LogDensityProblems: LogDensityProblems using Optimization: Optimization @@ -11,7 +11,6 @@ using Random: Random using SciMLBase: SciMLBase using ADTypes: ADTypes using StatsBase: StatsBase -using Accessors: Accessors using Printf: Printf using ForwardDiff: ForwardDiff using StatsAPI: StatsAPI @@ -56,7 +55,7 @@ end """ ModeResult{ E<:ModeEstimator, - P<:AbstractDict{<:VarName,<:Any} + P<:DynamicPPL.VarNamedTuple, LP<:Real, L<:DynamicPPL.LogDensityFunction, O<:Any, @@ -69,11 +68,7 @@ A wrapper struct to store various results from a MAP or MLE estimation. $(TYPEDFIELDS) """ struct ModeResult{ - E<:ModeEstimator, - P<:AbstractDict{<:AbstractPPL.VarName,<:Any}, - LP<:Real, - L<:LogDensityFunction, - O, + E<:ModeEstimator,P<:DynamicPPL.VarNamedTuple,LP<:Real,L<:DynamicPPL.LogDensityFunction,O } <: StatsBase.StatisticalModel "The type of mode estimation (MAP or MLE)." estimator::E @@ -135,8 +130,8 @@ function Base.show(io::IO, ::MIME"text/plain", m::ModeResult) println(io, " ├ estimator : $(typeof(m.estimator))") println(io, " ├ lp : $(m.lp)") entries = length(m.params) == 1 ? "entry" : "entries" - println(io, " ├ params : $(typeof(m.params)) with $(length(m.params)) $(entries)") - for (i, (vn, val)) in enumerate(m.params) + println(io, " ├ params : VarNamedTuple with $(length(m.params)) $(entries)") + for (i, (vn, val)) in enumerate(pairs(m.params)) tree_char = i == length(m.params) ? "└" : "├" println(io, " │ $(tree_char) $vn => $(val)") end @@ -159,13 +154,20 @@ function DynamicPPL.InitFromParams( return DynamicPPL.InitFromParams(m.params, fallback) end -struct ConstraintCheckAccumulator <: AbstractAccumulator - lb::NTOrVNDict # Must be in unlinked space - ub::NTOrVNDict # Must be in unlinked space +struct ConstraintCheckAccumulator{Vlb<:VarNamedTuple,Vub<:VarNamedTuple} <: + AbstractAccumulator + lb::Vlb # Must be in unlinked space + ub::Vub # Must be in unlinked space end DynamicPPL.accumulator_name(::ConstraintCheckAccumulator) = :OptimConstraintCheck function DynamicPPL.accumulate_assume!!( - acc::ConstraintCheckAccumulator, val::Any, ::Any, vn::VarName, dist::Distribution + acc::ConstraintCheckAccumulator, + val::Any, + tval::Any, + logjac::Any, + vn::VarName, + dist::Distribution, + template::Any, ) # `val`, `acc.lb`, and `acc.ub` are all in unlinked space. lb = get_constraints(acc.lb, vn) @@ -320,17 +322,16 @@ function estimate_mode( solver=LBFGS(); link::Bool=true, initial_params=DynamicPPL.InitFromPrior(), - lb::Union{NamedTuple,AbstractDict{<:VarName,<:Any}}=(;), - ub::Union{NamedTuple,AbstractDict{<:VarName,<:Any}}=(;), + lb::Union{NamedTuple,AbstractDict{<:VarName,<:Any},VarNamedTuple}=VarNamedTuple(), + ub::Union{NamedTuple,AbstractDict{<:VarName,<:Any},VarNamedTuple}=VarNamedTuple(), adtype=ADTypes.AutoForwardDiff(), check_model::Bool=true, check_constraints_at_runtime::Bool=true, solve_kwargs..., ) - if check_model - new_model = DynamicPPL.setleafcontext(model, DynamicPPL.InitContext()) - DynamicPPL.check_model(new_model, VarInfo(); error_on_failure=true) - end + check_model && Turing._check_model(model) + lb = Turing._to_varnamedtuple(lb) + ub = Turing._to_varnamedtuple(ub) # Generate a LogDensityFunction first. We do this first because we want to use the # info stored in the LDF to generate the initial parameters and constraints in the diff --git a/src/optimisation/init.jl b/src/optimisation/init.jl index 3f65a9d5b1..1d81a0130e 100644 --- a/src/optimisation/init.jl +++ b/src/optimisation/init.jl @@ -1,28 +1,27 @@ using DynamicPPL: AbstractInitStrategy, AbstractAccumulator using Distributions -# TODO(penelopeysm): Eventually replace with VNT -const NTOrVNDict = Union{NamedTuple,AbstractDict{<:VarName,<:Any}} -function get_constraints(bounds::NTOrVNDict, vn::VarName) - return if AbstractPPL.hasvalue(bounds, vn) - return AbstractPPL.getvalue(bounds, vn) - else - nothing - end -end - """ InitWithConstraintCheck(lb, ub, actual_strategy) <: AbstractInitStrategy Initialise parameters with `actual_strategy`, but check that the initialised parameters satisfy any bounds in `lb` and `ub`. """ -struct InitWithConstraintCheck{Tlb<:NTOrVNDict,Tub<:NTOrVNDict} <: AbstractInitStrategy +struct InitWithConstraintCheck{Tlb<:VarNamedTuple,Tub<:VarNamedTuple} <: + AbstractInitStrategy lb::Tlb ub::Tub actual_strategy::AbstractInitStrategy end +function get_constraints(constraints::VarNamedTuple, vn::VarName) + if haskey(constraints, vn) + return constraints[vn] + else + return nothing + end +end + const MAX_ATTEMPTS = 1000 """ @@ -119,8 +118,10 @@ function DynamicPPL.init( # The inner `init` might (for whatever reason) return linked or otherwise # transformed values. We need to transform them back into to unlinked space, # so that we can check the constraints properly. - maybe_transformed_val, transform = DynamicPPL.init(rng, vn, dist, c.actual_strategy) - proposed_val = transform(maybe_transformed_val) + maybe_transformed_val = DynamicPPL.init(rng, vn, dist, c.actual_strategy) + proposed_val = DynamicPPL.get_transform(maybe_transformed_val)( + DynamicPPL.get_internal_value(maybe_transformed_val) + ) attempts = 1 while !satisfies_constraints(lb, ub, proposed_val, dist) if attempts >= MAX_ATTEMPTS @@ -130,11 +131,13 @@ function DynamicPPL.init( ), ) end - maybe_transformed_val, transform = DynamicPPL.init(rng, vn, dist, c.actual_strategy) - proposed_val = transform(maybe_transformed_val) + maybe_transformed_val = DynamicPPL.init(rng, vn, dist, c.actual_strategy) + proposed_val = DynamicPPL.get_transform(maybe_transformed_val)( + DynamicPPL.get_internal_value(maybe_transformed_val) + ) attempts += 1 end - return (proposed_val, DynamicPPL.typed_identity) + return DynamicPPL.UntransformedValue(proposed_val) end can_have_linked_constraints(::Distribution) = false @@ -153,13 +156,15 @@ end can_have_linked_constraints(::Dirichlet) = false can_have_linked_constraints(::LKJCholesky) = false -struct ConstraintAccumulator <: AbstractAccumulator +struct ConstraintAccumulator{ + T<:DynamicPPL.AbstractTransformStrategy,Vlb<:VarNamedTuple,Vub<:VarNamedTuple +} <: AbstractAccumulator "Whether to store constraints in linked space or not." - link::Bool + transform_strategy::T "A mapping of VarNames to lower bounds in untransformed space." - lb::NTOrVNDict + lb::Vlb "A mapping of VarNames to upper bounds in untransformed space." - ub::NTOrVNDict + ub::Vub "The initial values for the optimisation in linked space (if link=true) or unlinked space (if link=false)." init_vecs::Dict{VarName,AbstractVector} @@ -169,8 +174,10 @@ struct ConstraintAccumulator <: AbstractAccumulator "The upper bound vectors for the optimisation in linked space (if link=true) or unlinked space (if link=false)." ub_vecs::Dict{VarName,AbstractVector} - function ConstraintAccumulator(link::Bool, lb::NTOrVNDict, ub::NTOrVNDict) - return new( + function ConstraintAccumulator( + link::DynamicPPL.AbstractTransformStrategy, lb::VarNamedTuple, ub::VarNamedTuple + ) + return new{typeof(link),typeof(lb),typeof(ub)}( link, lb, ub, @@ -183,7 +190,13 @@ end const CONSTRAINT_ACC_NAME = :OptimConstraints DynamicPPL.accumulator_name(::ConstraintAccumulator) = CONSTRAINT_ACC_NAME function DynamicPPL.accumulate_assume!!( - acc::ConstraintAccumulator, val::Any, ::Any, vn::VarName, dist::Distribution + acc::ConstraintAccumulator, + val::Any, + tval::Any, + logjac::Any, + vn::VarName, + dist::Distribution, + template::Any, ) # First check if we have any incompatible constraints + linking. 'Incompatible', here, # means that the constraints as defined in the unlinked space do not map to box @@ -192,18 +205,28 @@ function DynamicPPL.accumulate_assume!!( # e.g. Dirichlet. lb = get_constraints(acc.lb, vn) ub = get_constraints(acc.ub, vn) - if (lb !== nothing || ub !== nothing) && acc.link && !can_have_linked_constraints(dist) + should_be_linked = + DynamicPPL.target_transform(acc.transform_strategy, vn) isa DynamicPPL.DynamicLink + if (lb !== nothing || ub !== nothing) && + should_be_linked && + !can_have_linked_constraints(dist) throw( ArgumentError( "Cannot use constraints for variable $(vn) with distribution $(typeof(dist)) when performing linked optimisation; this is because the constraints cannot be cleanly mapped to linked space. If you need to use constraints for this variable, please set `link=false` when optimising, or manually perform optimisation with your own LogDensityFunction.", ), ) end - transform = if acc.link - DynamicPPL.to_linked_vec_transform(dist) - else - DynamicPPL.to_vec_transform(dist) - end + transform = + if DynamicPPL.target_transform(acc.transform_strategy, vn) isa + DynamicPPL.DynamicLink + DynamicPPL.to_linked_vec_transform(dist) + elseif DynamicPPL.target_transform(acc.transform_strategy, vn) isa DynamicPPL.Unlink + DynamicPPL.to_vec_transform(dist) + else + error( + "don't know how to handle transform strategy $(acc.transform_strategy) for variable $(vn)", + ) + end # Transform the value and store it. vectorised_val = transform(val) acc.init_vecs[vn] = vectorised_val @@ -227,20 +250,20 @@ function DynamicPPL.accumulate_observe!!( return acc end function DynamicPPL.reset(acc::ConstraintAccumulator) - return ConstraintAccumulator(acc.link, acc.lb, acc.ub) + return ConstraintAccumulator(acc.transform_strategy, acc.lb, acc.ub) end function Base.copy(acc::ConstraintAccumulator) # ConstraintAccumulator should not ever modify `acc.lb` or `acc.ub` (and when # constructing it inside `make_optim_bounds_and_init` we make sure to deepcopy any user # input), so there is no chance that `lb` or `ub` could ever be mutated once they're # inside the accumulator. Hence we don't need to copy them. - return ConstraintAccumulator(acc.link, acc.lb, acc.ub) + return ConstraintAccumulator(acc.transform_strategy, acc.lb, acc.ub) end function DynamicPPL.split(acc::ConstraintAccumulator) - return ConstraintAccumulator(acc.link, acc.lb, acc.ub) + return ConstraintAccumulator(acc.transform_strategy, acc.lb, acc.ub) end function DynamicPPL.combine(acc1::ConstraintAccumulator, acc2::ConstraintAccumulator) - combined = ConstraintAccumulator(acc1.link, acc1.lb, acc1.ub) + combined = ConstraintAccumulator(acc1.transform_strategy, acc1.lb, acc1.ub) combined.init_vecs = merge(acc1.init_vecs, acc2.init_vecs) combined.lb_vecs = merge(acc1.lb_vecs, acc2.lb_vecs) combined.ub_vecs = merge(acc1.ub_vecs, acc2.ub_vecs) @@ -261,34 +284,35 @@ end """ make_optim_bounds_and_init( rng::Random.AbstractRNG, - ldf::LogDensityFunction{Tlink}, + ldf::LogDensityFunction, initial_params::AbstractInitStrategy, - lb::NTOrVNDict, - ub::NTOrVNDict, - ) where {Tlink} + lb::VarNamedTuple, + ub::VarNamedTuple, + ) Generate a tuple of `(lb_vec, ub_vec, init_vec)` which are suitable for directly passing to Optimization.jl. All three vectors returned will be in the unlinked or linked space -depending on the value of `link`. +depending on `ldf.transform_strategy`, which in turn is defined by the value of `link` passed +to `mode_estimate`. The `lb` and `ub` arguments, as well as any `initial_params` provided as `InitFromParams`, are expected to be in the unlinked space. """ function make_optim_bounds_and_init( rng::Random.AbstractRNG, - ldf::LogDensityFunction{Tlink}, + ldf::LogDensityFunction, initial_params::AbstractInitStrategy, - lb::NTOrVNDict, - ub::NTOrVNDict, -) where {Tlink} + lb::VarNamedTuple, + ub::VarNamedTuple, +) # Initialise a VarInfo with parameters that satisfy the constraints. - ctx = DynamicPPL.InitContext(rng, InitWithConstraintCheck(lb, ub, initial_params)) - new_model = DynamicPPL.setleafcontext(ldf.model, ctx) - # Run the model. + # ConstraintAccumulator only needs the raw value so we can use UnlinkAll() as the + # transform strategy for this + init_strategy = InitWithConstraintCheck(lb, ub, initial_params) vi = DynamicPPL.OnlyAccsVarInfo(( - ConstraintAccumulator(Tlink, deepcopy(lb), deepcopy(ub)), + ConstraintAccumulator(ldf.transform_strategy, deepcopy(lb), deepcopy(ub)), )) - _, vi = DynamicPPL.evaluate!!(new_model, vi) + _, vi = DynamicPPL.init!!(rng, ldf.model, vi, init_strategy, DynamicPPL.UnlinkAll()) # Now extract the accumulator, and construct the vectorised constraints using the # ranges stored in the LDF. constraint_acc = DynamicPPL.getacc(vi, Val(CONSTRAINT_ACC_NAME)) diff --git a/src/variational/Variational.jl b/src/variational/Variational.jl index 2949e6902c..0ff18483f8 100644 --- a/src/variational/Variational.jl +++ b/src/variational/Variational.jl @@ -88,12 +88,11 @@ function q_initialize_scale( ) prob = DynamicPPL.LogDensityFunction(model) ℓπ = Base.Fix1(LogDensityProblems.logdensity, prob) - varinfo = DynamicPPL.VarInfo(model) n_trial = 0 while true q = AdvancedVI.MvLocationScale(location, scale, basedist) - b = Bijectors.bijector(model; varinfo=varinfo) + b = Bijectors.bijector(model) q_trans = Bijectors.transformed(q, Bijectors.inverse(b)) energy = mean(ℓπ, eachcol(rand(rng, q_trans, num_samples))) @@ -187,7 +186,7 @@ function q_locationscale( end end q = AdvancedVI.MvLocationScale(μ, L, basedist) - b = Bijectors.bijector(model; varinfo=varinfo) + b = Bijectors.bijector(model) return Bijectors.transformed(q, Bijectors.inverse(b)) end diff --git a/test/Project.toml b/test/Project.toml index c93498e47c..ed520f1300 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -41,7 +41,7 @@ TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" [compat] ADTypes = "1" AbstractMCMC = "5.9" -AbstractPPL = "0.11, 0.12, 0.13" +AbstractPPL = "0.14" AdvancedMH = "0.8.9" AdvancedPS = "0.7.2" AdvancedVI = "0.6" @@ -54,7 +54,7 @@ DifferentiationInterface = "0.7" Distributions = "0.25" DistributionsAD = "0.6.3" DynamicHMC = "2.1.6, 3.0" -DynamicPPL = "0.39.11" +DynamicPPL = "0.40" FiniteDifferences = "0.10.8, 0.11, 0.12" ForwardDiff = "0.10.12 - 0.10.32, 0.10, 1" HypothesisTests = "0.11" @@ -77,3 +77,6 @@ StatsBase = "0.33, 0.34" StatsFuns = "0.9.5, 1" TimerOutputs = "0.5" julia = "1.10" + +[sources] +DynamicPPL = {url = "https://github.com/TuringLang/DynamicPPL.jl", rev = "breaking"} diff --git a/test/ad.jl b/test/ad.jl index 0d4f55a643..5218fbf7d0 100644 --- a/test/ad.jl +++ b/test/ad.jl @@ -7,9 +7,10 @@ using DynamicPPL.TestUtils.AD: run_ad using Random: Random using StableRNGs: StableRNG using Test -using ..Models: gdemo_default import ForwardDiff, ReverseDiff, Mooncake +gdemo_default = DynamicPPL.TestUtils.demo_assume_observe_literal() + """Element types that are always valid for a VarInfo regardless of ADType.""" const always_valid_eltypes = (AbstractFloat, AbstractIrrational, Integer, Rational) @@ -148,6 +149,7 @@ function check_adtype(context::ADTypeCheckContext, vi::DynamicPPL.AbstractVarInf param_eltype = DynamicPPL.get_param_eltype(vi, context) valids = valid_eltypes(context) if !(any(param_eltype .<: valids)) + @show context throw(IncompatibleADTypeError(param_eltype, adtype(context))) end end @@ -157,9 +159,15 @@ end # child context. function DynamicPPL.tilde_assume!!( - context::ADTypeCheckContext, right::Distribution, vn::VarName, vi::AbstractVarInfo + context::ADTypeCheckContext, + right::Distribution, + vn::VarName, + template::Any, + vi::AbstractVarInfo, ) - value, vi = DynamicPPL.tilde_assume!!(DynamicPPL.childcontext(context), right, vn, vi) + value, vi = DynamicPPL.tilde_assume!!( + DynamicPPL.childcontext(context), right, vn, template, vi + ) check_adtype(context, vi) return value, vi end diff --git a/test/integration/enzyme/Project.toml b/test/integration/enzyme/Project.toml index 5069b35d89..2a7f3c47f3 100644 --- a/test/integration/enzyme/Project.toml +++ b/test/integration/enzyme/Project.toml @@ -9,3 +9,4 @@ Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" [sources] Turing = {path = "../../../"} +DynamicPPL = {url = "https://github.com/TuringLang/DynamicPPL.jl", rev = "breaking"} diff --git a/test/mcmc/Inference.jl b/test/mcmc/Inference.jl index 99c38616de..3b8a80c999 100644 --- a/test/mcmc/Inference.jl +++ b/test/mcmc/Inference.jl @@ -29,7 +29,6 @@ using Turing samplers = ( HMC(0.1, 7), PG(10), - IS(), MH(), Gibbs(:s => PG(3), :m => HMC(0.4, 8)), Gibbs(:s => HMC(0.1, 5), :m => ESS()), @@ -391,19 +390,11 @@ using Turing return x end - is = IS() smc = SMC() pg = PG(10) N = 1_000 - # For IS, we need to calculate the logevidence ourselves - res_is = sample(StableRNG(seed), test(), is, N) - @test all(isone, res_is[:x]) - # below is more numerically stable than log(mean(exp.(res_is[:loglikelihood]))) - is_logevidence = logsumexp(res_is[:loglikelihood]) - log(N) - @test is_logevidence ≈ 2 * log(0.5) - # For SMC, the chain stores the collective logevidence of the sampled trajectories # as a statistic (which is the same for all 'iterations'). So we can just pick the # first one. @@ -634,7 +625,7 @@ using Turing end # Can't test with HMC/NUTS because some AD backends error; see # https://github.com/JuliaDiff/DifferentiationInterface.jl/issues/802 - @test sample(e(), IS(), 100) isa MCMCChains.Chains + @test sample(e(), Prior(), 100) isa MCMCChains.Chains end end diff --git a/test/mcmc/abstractmcmc.jl b/test/mcmc/abstractmcmc.jl index 957d33acfb..3774707f7f 100644 --- a/test/mcmc/abstractmcmc.jl +++ b/test/mcmc/abstractmcmc.jl @@ -10,7 +10,7 @@ using Turing # Set up a model for which check_model errors. @model f() = x ~ Normal() model = f() - Turing.Inference._check_model(::typeof(model)) = error("nope") + Turing._check_model(::typeof(model)) = error("nope") # Make sure that default sampling does throw the error. @test_throws "nope" sample(model, NUTS(), 100) @test_throws "nope" sample(model, NUTS(), MCMCThreads(), 100, 2) @@ -54,9 +54,10 @@ end model = coinflip() lptrue = logpdf(Binomial(25, 0.2), 10) let inits = InitFromParams((; p=0.2)) - chain = sample(model, spl, 1; initial_params=inits, progress=false) - @test chain[1].metadata.p.vals == [0.2] - @test DynamicPPL.getlogjoint(chain[1]) == lptrue + varinfos = sample(model, spl, 1; initial_params=inits, progress=false) + varinfo = only(varinfos) + @test varinfo[@varname(p)] == 0.2 + @test DynamicPPL.getlogjoint(varinfo) == lptrue # parallel sampling chains = sample( @@ -69,8 +70,9 @@ end progress=false, ) for c in chains - @test c[1].metadata.p.vals == [0.2] - @test DynamicPPL.getlogjoint(c[1]) == lptrue + varinfo = only(c) + @test varinfo[@varname(p)] == 0.2 + @test DynamicPPL.getlogjoint(varinfo) == lptrue end end @@ -96,9 +98,10 @@ end Dict(@varname(s) => 4, @varname(m) => -1), ) chain = sample(model, spl, 1; initial_params=inits, progress=false) - @test chain[1].metadata.s.vals == [4] - @test chain[1].metadata.m.vals == [-1] - @test DynamicPPL.getlogjoint(chain[1]) == lptrue + varinfo = only(chain) + @test varinfo[@varname(s)] == 4 + @test varinfo[@varname(m)] == -1 + @test DynamicPPL.getlogjoint(varinfo) == lptrue # parallel sampling chains = sample( @@ -111,9 +114,10 @@ end progress=false, ) for c in chains - @test c[1].metadata.s.vals == [4] - @test c[1].metadata.m.vals == [-1] - @test DynamicPPL.getlogjoint(c[1]) == lptrue + varinfo = only(c) + @test varinfo[@varname(s)] == 4 + @test varinfo[@varname(m)] == -1 + @test DynamicPPL.getlogjoint(varinfo) == lptrue end end @@ -129,8 +133,9 @@ end Dict(@varname(m) => -1), ) chain = sample(model, spl, 1; initial_params=inits, progress=false) - @test !ismissing(chain[1].metadata.s.vals[1]) - @test chain[1].metadata.m.vals == [-1] + varinfo = only(chain) + @test !ismissing(varinfo[@varname(s)]) + @test varinfo[@varname(m)] == -1 # parallel sampling chains = sample( @@ -143,8 +148,9 @@ end progress=false, ) for c in chains - @test !ismissing(c[1].metadata.s.vals[1]) - @test c[1].metadata.m.vals == [-1] + varinfo = only(c) + @test !ismissing(varinfo[@varname(s)]) + @test varinfo[@varname(m)] == -1 end end end diff --git a/test/mcmc/external_sampler.jl b/test/mcmc/external_sampler.jl index bc557dea1d..38f8f90142 100644 --- a/test/mcmc/external_sampler.jl +++ b/test/mcmc/external_sampler.jl @@ -151,7 +151,7 @@ end Base.length(d::ModelDistribution) = length(d.varinfo[:]) function Distributions._logpdf(d::ModelDistribution, x::AbstractVector) - return logprior(d.model, DynamicPPL.unflatten(d.varinfo, x)) + return logprior(d.model, DynamicPPL.unflatten!!(d.varinfo, x)) end function Distributions._rand!( rng::Random.AbstractRNG, d::ModelDistribution, x::AbstractVector{<:Real} @@ -170,9 +170,9 @@ function initialize_mh_with_prior_proposal(model) end function test_initial_params(model, sampler; kwargs...) - # Generate some parameters. - dict = DynamicPPL.values_as(DynamicPPL.VarInfo(model), Dict) - init_strategy = DynamicPPL.InitFromParams(dict) + # Generate some parameters. Doesn't really matter what. + vnt = rand(model) + init_strategy = DynamicPPL.InitFromParams(vnt) # Execute the transition with two different RNGs and check that the resulting # parameter values are the same. This ensures that the `initial_params` are diff --git a/test/mcmc/gibbs.jl b/test/mcmc/gibbs.jl index a15b5dc74b..7353170dcf 100644 --- a/test/mcmc/gibbs.jl +++ b/test/mcmc/gibbs.jl @@ -97,9 +97,9 @@ end end end - # Check that evaluate!! and the result it returns are type stable. + # Check that evaluate_nowarn!! and the result it returns are type stable. conditioned_model = DynamicPPL.contextualize(model, ctx) - _, post_eval_varinfo = @inferred DynamicPPL.evaluate!!( + _, post_eval_varinfo = @inferred DynamicPPL.evaluate_nowarn!!( conditioned_model, local_varinfo ) for k in keys(post_eval_varinfo) @@ -116,7 +116,6 @@ end (@varname(s), @varname(m), @varname(x)), (NUTS(), NUTS()) ) # Invalid samplers - @test_throws ArgumentError Gibbs(@varname(s) => IS()) @test_throws ArgumentError Gibbs(@varname(s) => Emcee(10, 2.0)) @test_throws ArgumentError Gibbs( @varname(s) => SGHMC(; learning_rate=0.01, momentum_decay=0.1) @@ -499,12 +498,11 @@ end @model function dynamic_bernoulli_normal(y_obs=2.0) b ~ Bernoulli(0.3) + θ = zeros(2) if b == 0 - θ = Vector{Float64}(undef, 1) θ[1] ~ Normal(0.0, 1.0) y_obs ~ Normal(θ[1], 0.5) else - θ = Vector{Float64}(undef, 2) θ[1] ~ Normal(0.0, 1.0) θ[2] ~ Normal(0.0, 1.0) y_obs ~ Normal(θ[1] + θ[2], 0.5) @@ -514,11 +512,7 @@ end # Run the sampler - focus on testing that it works rather than exact convergence model = dynamic_bernoulli_normal(2.0) chn = sample( - StableRNG(42), - model, - Gibbs(:b => MH(), :θ => HMC(0.1, 10)), - 1000; - discard_initial=500, + StableRNG(42), model, Gibbs(:b => MH(), :θ => MH()), 1000; discard_initial=500 ) # Test that sampling completes without error @@ -550,100 +544,6 @@ end end end - @testset "PG with variable number of observations" begin - # When sampling from a model with Particle Gibbs, it is mandatory for - # the number of observations to be the same in all particles, since the - # observations trigger particle resampling. - # - # Up until Turing v0.39, `x ~ dist` statements where `x` was the - # responsibility of a different (non-PG) Gibbs subsampler used to not - # count as an observation. Instead, the log-probability `logpdf(dist, x)` - # would be manually added to the VarInfo's `logp` field and included in the - # weighting for the _following_ observation. - # - # In Turing v0.40, this is now changed: `x ~ dist` uses tilde_observe!! - # which thus triggers resampling. Thus, for example, the following model - # does not work any more: - # - # @model function f() - # a ~ Poisson(2.0) - # x = Vector{Float64}(undef, a) - # for i in eachindex(x) - # x[i] ~ Normal() - # end - # end - # sample(f(), Gibbs(:a => PG(10), :x => MH()), 1000) - # - # because the number of observations in each particle depends on the value - # of `a`. - # - # This testset checks that ways of working around such a situation. - - function test_dynamic_bernoulli(chain) - means = Dict(:b => 0.5, "x[1]" => 1.0, "x[2]" => 2.0) - stds = Dict(:b => 0.5, "x[1]" => 1.0, "x[2]" => 1.0) - for vn in keys(means) - @test isapprox(mean(skipmissing(chain[:, vn, 1])), means[vn]; atol=0.15) - @test isapprox(std(skipmissing(chain[:, vn, 1])), stds[vn]; atol=0.15) - end - end - - # TODO(DPPL0.37/penelopeysm): decide what to do with these tests - @testset "Coalescing multiple observations into one" begin - # Instead of observing x[1] and x[2] separately, we lump them into a - # single distribution. - @model function dynamic_bernoulli() - b ~ Bernoulli() - if b - dists = [Normal(1.0)] - else - dists = [Normal(1.0), Normal(2.0)] - end - return x ~ product_distribution(dists) - end - model = dynamic_bernoulli() - # This currently fails because if the global varinfo has `x` with length 2, - # and the particle sampler has `b = true`, it attempts to calculate the - # log-likelihood of a length-2 vector with respect to a length-1 - # distribution. - @test_throws DimensionMismatch chain = sample( - StableRNG(468), - model, - Gibbs(:b => PG(10), :x => ESS()), - 2000; - discard_initial=100, - ) - # test_dynamic_bernoulli(chain) - end - - @testset "Inserting @addlogprob!" begin - # On top of observing x[i], we also add in extra 'observations' - @model function dynamic_bernoulli_2() - b ~ Bernoulli() - x_length = b ? 1 : 2 - x = Vector{Float64}(undef, x_length) - for i in 1:x_length - x[i] ~ Normal(i, 1.0) - end - if length(x) == 1 - # This value is the expectation value of logpdf(Normal(), x) where x ~ Normal(). - # See discussion in - # https://github.com/TuringLang/Turing.jl/pull/2629#discussion_r2237323817 - @addlogprob!(-1.418849) - end - end - model = dynamic_bernoulli_2() - chain = sample( - StableRNG(468), - model, - Gibbs(:b => PG(20), :x => ESS()), - 2000; - discard_initial=100, - ) - test_dynamic_bernoulli(chain) - end - end - @testset "Demo model" begin @testset verbose = true "$(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS vns = DynamicPPL.TestUtils.varnames(model) diff --git a/test/mcmc/gibbs_conditional.jl b/test/mcmc/gibbs_conditional.jl index 07d676df1e..22b04c30ba 100644 --- a/test/mcmc/gibbs_conditional.jl +++ b/test/mcmc/gibbs_conditional.jl @@ -252,7 +252,12 @@ using Turing d1 = Normal(c[@varname(a[1])], 1) d2 = Normal(c[@varname(a[2])], 1) d3 = Normal(c[@varname(a[3])], 1) - return Dict(@varname(b[1]) => d1, @varname(b[2]) => d2, @varname(b[3]) => d3) + return @vnt begin + @template b = zeros(3) + b[1] := d1 + b[2] := d2 + b[3] := d3 + end end sampler = Gibbs( @@ -265,9 +270,15 @@ using Turing @test mean(chain, Symbol("b[2]")) ≈ 10.0 atol = 0.05 @test mean(chain, Symbol("b[3]")) ≈ 20.0 atol = 0.05 - m_condfix = fix( - condition(m, Dict(@varname(a[1]) => 100.0)), Dict(@varname(a[2]) => 200.0) - ) + condvals = @vnt begin + @template a = zeros(3) + a[1] := 100.0 + end + fixvals = @vnt begin + @template a = zeros(3) + a[2] := 200.0 + end + m_condfix = fix(condition(m, condvals), fixvals) sampler = Gibbs( (@varname(b[1]), @varname(b[2]), @varname(b[3])) => GibbsConditional(conditionals_b), diff --git a/test/mcmc/is.jl b/test/mcmc/is.jl deleted file mode 100644 index d9f635fc6f..0000000000 --- a/test/mcmc/is.jl +++ /dev/null @@ -1,74 +0,0 @@ -module ISTests - -using DynamicPPL: logpdf -using Random: Random -using StableRNGs: StableRNG -using StatsFuns: logsumexp -using Test: @test, @testset -using Turing - -@testset "is.jl" begin - @testset "numerical accuracy" begin - function reference(n) - rng = StableRNG(468) - as = Vector{Float64}(undef, n) - bs = Vector{Float64}(undef, n) - - for i in 1:n - as[i] = rand(rng, Normal(4, 5)) - bs[i] = rand(rng, Normal(as[i], 1)) - end - return (as=as, bs=bs) - end - - @model function normal() - a ~ Normal(4, 5) - 3 ~ Normal(a, 2) - b ~ Normal(a, 1) - 1.5 ~ Normal(b, 2) - return a, b - end - - alg = IS() - N = 1000 - model = normal() - chain = sample(StableRNG(468), model, alg, N) - ref = reference(N) - - # Note that in general, mean(chain) will differ from mean(ref). This is because the - # sampling process introduces extra calls to rand(), etc. which changes the output. - # These tests therefore are only meant to check that the results are qualitatively - # similar to the reference implementation of IS, and hence the atol is set to - # something fairly large. - @test isapprox(mean(chain[:a]), mean(ref.as); atol=0.1) - @test isapprox(mean(chain[:b]), mean(ref.bs); atol=0.1) - - function expected_loglikelihoods(as, bs) - return logpdf.(Normal.(as, 2), 3) .+ logpdf.(Normal.(bs, 2), 1.5) - end - @test isapprox(chain[:loglikelihood], expected_loglikelihoods(chain[:a], chain[:b])) - end - - @testset "logevidence" begin - @model function test() - a ~ Normal(0, 1) - x ~ Bernoulli(1) - b ~ Gamma(2, 3) - 1 ~ Bernoulli(x / 2) - c ~ Beta() - 0 ~ Bernoulli(x / 2) - return x - end - - N = 1_000 - chains = sample(test(), IS(), N) - - @test all(isone, chains[:x]) - # The below is equivalent to log(mean(exp.(chains[:loglikelihood]))), but more - # numerically stable - logevidence = logsumexp(chains[:loglikelihood]) - log(N) - @test logevidence ≈ -2 * log(2) - end -end - -end diff --git a/test/mcmc/mh.jl b/test/mcmc/mh.jl index 7c19f022b3..5a1182d6ce 100644 --- a/test/mcmc/mh.jl +++ b/test/mcmc/mh.jl @@ -7,14 +7,14 @@ using DynamicPPL: DynamicPPL using LinearAlgebra: I using Random: Random using StableRNGs: StableRNG -using Test: @test, @testset +using Test: @test, @testset, @test_throws using Turing using Turing.Inference: Inference using ..Models: gdemo_default, MoGtest_default using ..NumericalTests: check_MoGtest_default, check_gdemo, check_numerical -GKernel(var) = (x) -> Normal(x, sqrt.(var)) +GKernel(variance, vn) = (vnt -> Normal(vnt[vn], sqrt(variance))) @testset "mh.jl" begin @info "Starting MH tests" @@ -22,29 +22,102 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var)) @testset "mh constructor" begin N = 10 - s1 = MH((:s, InverseGamma(2, 3)), (:m, GKernel(3.0))) - s2 = MH(:s => InverseGamma(2, 3), :m => GKernel(3.0)) - s3 = MH() - s4 = MH([1.0 0.1; 0.1 1.0]) + s1 = MH(:s => InverseGamma(2, 3), :m => GKernel(3.0, @varname(m))) + s2 = MH() + s3 = MH([1.0 0.1; 0.1 1.0]) c1 = sample(gdemo_default, s1, N) c2 = sample(gdemo_default, s2, N) c3 = sample(gdemo_default, s3, N) + + s4 = Gibbs(:m => MH(), :s => MH()) c4 = sample(gdemo_default, s4, N) + end - s5 = Gibbs(:m => MH(), :s => MH()) - c5 = sample(gdemo_default, s5, N) + @testset "basic accuracy tests" begin + @testset "linking and Jacobian" begin + # This model has no likelihood, it's mainly here to test that linking and + # Jacobians work fine. + @model function f() + x ~ Normal() + return y ~ Beta(2, 2) + end + function test_mean_and_std(spl) + @testset let spl = spl + chn = sample(StableRNG(468), f(), spl, 20_000) + @test mean(chn[:x]) ≈ mean(Normal()) atol = 0.1 + @test std(chn[:x]) ≈ std(Normal()) atol = 0.1 + @test mean(chn[:y]) ≈ mean(Beta(2, 2)) atol = 0.1 + @test std(chn[:y]) ≈ std(Beta(2, 2)) atol = 0.1 + end + end + test_mean_and_std(MH()) + test_mean_and_std(MH(@varname(x) => Normal(1.0))) + test_mean_and_std(MH(@varname(y) => Uniform(0, 1))) + test_mean_and_std(MH(@varname(x) => Normal(1.0), @varname(y) => Uniform(0, 1))) + test_mean_and_std(MH(@varname(x) => LinkedRW(0.5))) + test_mean_and_std(MH(@varname(y) => LinkedRW(0.5))) + # this is a random walk in unlinked space + test_mean_and_std(MH(@varname(y) => vnt -> Normal(vnt[@varname(y)], 0.5))) + test_mean_and_std(MH(@varname(x) => Normal(), @varname(y) => LinkedRW(0.5))) + test_mean_and_std(MH(@varname(x) => LinkedRW(0.5), @varname(y) => Normal())) + # this uses AdvancedMH + test_mean_and_std(MH([1.0 0.1; 0.1 1.0])) + end - # s6 = externalsampler(MH(gdemo_default, proposal_type=AdvancedMH.RandomWalkProposal)) - # c6 = sample(gdemo_default, s6, N) + @testset "bad proposals" begin + errmsg = "The initial parameters have zero probability density" + + @model f() = x ~ Normal() + # Here we give `x` a constrained proposal. Any samples of `x` that fall outside + # of it will get a proposal density of -Inf, so should be rejected + fspl = MH(@varname(x) => Uniform(-1, 1)) + # We now start the chain outside the proposal region. The point of this test is + # to make sure that we throw a sensible error. + @test_throws errmsg sample(f(), fspl, 2; initial_params=InitFromParams((; x=2))) + + # Same here, except that now it's the proposal that's bad, not the initial + # parameters. + @model g() = x ~ Beta(2, 2) + gspl = MH(@varname(x) => Uniform(-2, -1)) + @test_throws errmsg sample(g(), gspl, 2; initial_params=InitFromPrior()) + end - # NOTE: Broken because MH doesn't really follow the `logdensity` interface, but calls - # it with `NamedTuple` instead of `AbstractVector`. - # s7 = externalsampler(MH(gdemo_default, proposal_type=AdvancedMH.StaticProposal)) - # c7 = sample(gdemo_default, s7, N) + @testset "with unspecified priors that depend on other variables" begin + @model function f() + a ~ Normal() + x ~ Normal(0.0) + y ~ Normal(x) + return 2.0 ~ Normal(y) + end + # If we don't specify a proposal for `y`, it will be sampled from `Normal(x)`. + # However, we need to be careful here since the value of `x` varies! This testset is + # essentially a test to check that `MHUnspecifiedPriorAccumulator` is doing + # the right thing, i.e., it correctly accumulates the prior for the evaluation that + # we're interested in. + chn = sample(StableRNG(468), f(), MH(@varname(a) => Normal()), 10000) + @test mean(chn[:a]) ≈ 0.0 atol = 0.05 + @test mean(chn[:x]) ≈ 2 / 3 atol = 0.05 + @test mean(chn[:y]) ≈ 4 / 3 atol = 0.05 + + # Note that if we additionally don't specify a proposal for `a`, then it will defer + # to InitFromPrior for all variables, and its prior *is* Normal(). So we can do + # this: + chn2 = sample(StableRNG(468), f(), MH(), 10000) + # and it should actually give us exactly the same results as above. + # + # This test *does* depend on internal implementation details of MH and is a bit + # brittle. If it fails, you should check that the values are still approximately + # correct (i.e. 0, 2/3, and 4/3). If they are still correct, then it probably just + # means that the internal implementation details have changed, and this test can + # just be updated (or removed). + @test mean(chn2[:a]) ≈ mean(chn[:a]) + @test mean(chn2[:x]) ≈ mean(chn[:x]) + @test mean(chn2[:y]) ≈ mean(chn[:y]) + end end - @testset "mh inference" begin + @testset "with demo models" begin # Set the initial parameters, because if we get unlucky with the initial state, # these chains are too short to converge to reasonable numbers. discard_initial = 1_000 @@ -59,7 +132,7 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var)) end @testset "gdemo_default with custom proposals" begin - alg = MH((:s, InverseGamma(2, 3)), (:m, GKernel(1.0))) + alg = MH(:s => InverseGamma(2, 3), :m => GKernel(1.0, @varname(m))) chain = sample( StableRNG(seed), gdemo_default, alg, 10_000; discard_initial, initial_params ) @@ -77,12 +150,10 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var)) @testset "MoGtest_default with Gibbs" begin gibbs = Gibbs( (@varname(z1), @varname(z2), @varname(z3), @varname(z4)) => CSMC(15), - @varname(mu1) => MH((:mu1, GKernel(1))), - @varname(mu2) => MH((:mu2, GKernel(1))), + @varname(mu1) => MH(:mu1 => GKernel(1, @varname(mu1))), + @varname(mu2) => MH(:mu2 => GKernel(1, @varname(mu2))), ) - initial_params = InitFromParams(( - mu1=1.0, mu2=1.0, z1=0.0, z2=0.0, z3=1.0, z4=1.0 - )) + initial_params = InitFromParams((mu1=1.0, mu2=1.0, z1=0, z2=0, z3=1, z4=1)) chain = sample( StableRNG(seed), MoGtest_default, @@ -95,64 +166,11 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var)) end end - # Test MH shape passing. - @testset "shape" begin - @model function M(mu, sigma, observable) - z ~ MvNormal(mu, sigma) - - m = Array{Float64}(undef, 1, 2) - m[1] ~ Normal(0, 1) - m[2] ~ InverseGamma(2, 1) - s ~ InverseGamma(2, 1) - - observable ~ Bernoulli(cdf(Normal(), z' * z)) - - 1.5 ~ Normal(m[1], m[2]) - -1.5 ~ Normal(m[1], m[2]) - - 1.5 ~ Normal(m[1], s) - return 2.0 ~ Normal(m[1], s) - end - - model = M(zeros(2), I, 1) - sampler = MH() - - dt, vt = Inference.dist_val_tuple(sampler, DynamicPPL.VarInfo(model)) - - @test dt[:z] isa AdvancedMH.StaticProposal{false,<:MvNormal} - @test dt[:m] isa - AdvancedMH.StaticProposal{false,Vector{ContinuousUnivariateDistribution}} - @test dt[:m].proposal[1] isa Normal && dt[:m].proposal[2] isa InverseGamma - @test dt[:s] isa AdvancedMH.StaticProposal{false,<:InverseGamma} - - @test vt[:z] isa Vector{Float64} && length(vt[:z]) == 2 - @test vt[:m] isa Vector{Float64} && length(vt[:m]) == 2 - @test vt[:s] isa Float64 - - chain = sample(model, MH(), 10) - - @test chain isa MCMCChains.Chains - end - - @testset "proposal matrix" begin + @testset "with proposal matrix" begin mat = [1.0 -0.05; -0.05 1.0] - - prop1 = mat # Matrix only constructor - prop2 = AdvancedMH.RandomWalkProposal(MvNormal(mat)) # Explicit proposal constructor - - spl1 = MH(prop1) - spl2 = MH(prop2) - - # Test that the two constructors are equivalent. - @test spl1.proposals.proposal.μ == spl2.proposals.proposal.μ - @test spl1.proposals.proposal.Σ.mat == spl2.proposals.proposal.Σ.mat - - # Test inference. + spl1 = MH(mat) chain1 = sample(StableRNG(seed), gdemo_default, spl1, 2_000) - chain2 = sample(StableRNG(seed), gdemo_default, spl2, 2_000) - check_gdemo(chain1) - check_gdemo(chain2) end @testset "gibbs MH proposal matrix" begin @@ -178,7 +196,7 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var)) # with small-valued VC matrix to check if we only see very small steps vc_μ = convert(Array, 1e-4 * I(2)) vc_σ = convert(Array, 1e-4 * I(2)) - alg_small = Gibbs(:μ => MH((:μ, vc_μ)), :σ => MH((:σ, vc_σ))) + alg_small = Gibbs(:μ => MH(vc_μ), :σ => MH(vc_σ)) alg_big = MH() chn_small = sample(StableRNG(seed), mod, alg_small, 1_000) chn_big = sample(StableRNG(seed), mod, alg_big, 1_000) @@ -223,55 +241,6 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var)) end end end - - @testset "MH link/invlink" begin - vi_base = DynamicPPL.VarInfo(gdemo_default) - - # Don't link when no proposals are given since we're using priors - # as proposals. - vi = deepcopy(vi_base) - spl = MH() - vi = Turing.Inference.maybe_link!!(vi, spl, spl.proposals, gdemo_default) - @test !DynamicPPL.is_transformed(vi) - - # Link if proposal is `AdvancedHM.RandomWalkProposal` - vi = deepcopy(vi_base) - d = length(vi_base[:]) - spl = MH(AdvancedMH.RandomWalkProposal(MvNormal(zeros(d), I))) - vi = Turing.Inference.maybe_link!!(vi, spl, spl.proposals, gdemo_default) - @test DynamicPPL.is_transformed(vi) - - # Link if ALL proposals are `AdvancedHM.RandomWalkProposal`. - vi = deepcopy(vi_base) - spl = MH(:s => AdvancedMH.RandomWalkProposal(Normal())) - vi = Turing.Inference.maybe_link!!(vi, spl, spl.proposals, gdemo_default) - @test DynamicPPL.is_transformed(vi) - - # Don't link if at least one proposal is NOT `RandomWalkProposal`. - # TODO: make it so that only those that are using `RandomWalkProposal` - # are linked! I.e. resolve https://github.com/TuringLang/Turing.jl/issues/1583. - # https://github.com/TuringLang/Turing.jl/pull/1582#issuecomment-817148192 - vi = deepcopy(vi_base) - spl = MH( - :m => AdvancedMH.StaticProposal(Normal()), - :s => AdvancedMH.RandomWalkProposal(Normal()), - ) - vi = Turing.Inference.maybe_link!!(vi, spl, spl.proposals, gdemo_default) - @test !DynamicPPL.is_transformed(vi) - end - - @testset "`filldist` proposal (issue #2180)" begin - @model demo_filldist_issue2180() = x ~ MvNormal(zeros(3), I) - chain = sample( - StableRNG(seed), - demo_filldist_issue2180(), - MH(AdvancedMH.RandomWalkProposal(filldist(Normal(), 3))), - 10_000, - ) - check_numerical( - chain, [Symbol("x[1]"), Symbol("x[2]"), Symbol("x[3]")], [0, 0, 0]; atol=0.2 - ) - end end end diff --git a/test/optimisation/Optimisation.jl b/test/optimisation/Optimisation.jl index 15de4df305..dcc1f74326 100644 --- a/test/optimisation/Optimisation.jl +++ b/test/optimisation/Optimisation.jl @@ -35,7 +35,7 @@ function check_optimisation_result( # Check that `result.params` contains all the keys in `true_values` @test Set(keys(result.params)) == Set(keys(true_values)) # Check that their values are close - for (vn, val) in result.params + for (vn, val) in pairs(result.params) @test isapprox(val, true_values[vn], atol=0.01) end # Check logp and retcode @@ -122,7 +122,11 @@ end [([2.0, 2.0], nothing), (nothing, [-1.0, -1.0]), ([0.3, 0.3], [0.1, 0.1])] # unit test the function @test_throws ArgumentError make_optim_bounds_and_init( - Random.default_rng(), ldf, InitFromPrior(), (x=lb,), (x=ub,) + Random.default_rng(), + ldf, + InitFromPrior(), + VarNamedTuple(; x=lb), + VarNamedTuple(; x=ub), ) # check that the high-level function also errors @test_throws ArgumentError maximum_likelihood(diric(); lb=(x=lb,), ub=(x=ub,)) @@ -139,7 +143,11 @@ end ub = (x=1.0,) bad_init = (x=10.0,) @test_throws ArgumentError make_optim_bounds_and_init( - Random.default_rng(), ldf, InitFromParams(bad_init), lb, ub + Random.default_rng(), + ldf, + InitFromParams(bad_init), + VarNamedTuple(lb), + VarNamedTuple(ub), ) @test_throws ArgumentError maximum_likelihood( normal_model(); initial_params=InitFromParams(bad_init), lb=lb, ub=ub @@ -171,7 +179,11 @@ end @testset "unlinked" begin ldf = LogDensityFunction(model) lb_vec, ub_vec, init_vec = make_optim_bounds_and_init( - Random.default_rng(), ldf, InitFromPrior(), lb, ub + Random.default_rng(), + ldf, + InitFromPrior(), + VarNamedTuple(lb), + VarNamedTuple(ub), ) @test lb_vec == maybe_to_vec(lb.x) @test ub_vec == maybe_to_vec(ub.x) @@ -183,7 +195,11 @@ end vi = DynamicPPL.link!!(DynamicPPL.VarInfo(model), model) ldf = LogDensityFunction(model, DynamicPPL.getlogjoint, vi) lb_vec, ub_vec, init_vec = make_optim_bounds_and_init( - Random.default_rng(), ldf, InitFromPrior(), lb, ub + Random.default_rng(), + ldf, + InitFromPrior(), + VarNamedTuple(lb), + VarNamedTuple(ub), ) b = Bijectors.bijector(dist) @test lb_vec ≈ maybe_to_vec(b(lb.x)) @@ -201,8 +217,8 @@ end vi = DynamicPPL.link!!(DynamicPPL.VarInfo(model), model) ldf = LogDensityFunction(model, DynamicPPL.getlogjoint, vi) - lb = (x=rand(dist),) - ub = (;) + lb = VarNamedTuple(; x=rand(dist)) + ub = VarNamedTuple() @test_throws ArgumentError make_optim_bounds_and_init( Random.default_rng(), ldf, InitFromPrior(), lb, ub @@ -554,7 +570,7 @@ end val = AbstractPPL.getvalue(true_optima, vn) for vn_leaf in AbstractPPL.varname_leaves(vn, val) expected = AbstractPPL.getvalue(true_optima, vn_leaf) - actual = AbstractPPL.getvalue(result.params, vn_leaf) + actual = result.params[vn_leaf] @test expected ≈ actual atol = 0.05 end end @@ -578,6 +594,7 @@ end DynamicPPL.TestUtils.demo_dot_assume_observe_index, DynamicPPL.TestUtils.demo_dot_assume_observe_index_literal, DynamicPPL.TestUtils.demo_assume_matrix_observe_matrix_index, + DynamicPPL.TestUtils.demo_nested_colons, ] @testset "MLE for $(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS true_optima = DynamicPPL.TestUtils.likelihood_optima(model) @@ -601,7 +618,7 @@ end val = AbstractPPL.getvalue(true_optima, vn) for vn_leaf in AbstractPPL.varname_leaves(vn, val) expected = AbstractPPL.getvalue(true_optima, vn_leaf) - actual = AbstractPPL.getvalue(result.params, vn_leaf) + actual = result.params[vn_leaf] if model.f in allowed_incorrect_mle @test isfinite(actual) else @@ -734,7 +751,7 @@ end end m = Turing.Optimisation.ModeResult( MLE(), - Dict{AbstractPPL.VarName,Float64}(@varname(x) => 0.0, @varname(y) => 0.0), + DynamicPPL.VarNamedTuple((; x=0.0, y=0.0)), 0.0, false, DynamicPPL.LogDensityFunction(saddle_model(), DynamicPPL.getloglikelihood), diff --git a/test/runtests.jl b/test/runtests.jl index a4a60d8e29..ea29c5310a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -47,7 +47,6 @@ end @timeit_include("mcmc/particle_mcmc.jl") @timeit_include("mcmc/emcee.jl") @timeit_include("mcmc/ess.jl") - @timeit_include("mcmc/is.jl") end @timeit TIMEROUTPUT "inference" begin diff --git a/test/stdlib/RandomMeasures.jl b/test/stdlib/RandomMeasures.jl index cf4ef90ada..289c2a7615 100644 --- a/test/stdlib/RandomMeasures.jl +++ b/test/stdlib/RandomMeasures.jl @@ -23,10 +23,11 @@ using Turing.RandomMeasures: ChineseRestaurantProcess, DirichletProcess # Latent assignment. z = zeros(Int, length(x)) - # Locations of the infinitely many clusters. - μ = zeros(Float64, 0) + # Locations of the infinitely many clusters. µ[i] represents the location + # of the cluster number z[i]. + μ = zeros(Float64, length(x)) - for i in 1:length(x) + for i in eachindex(x) # Number of clusters. K = maximum(z) @@ -37,14 +38,12 @@ using Turing.RandomMeasures: ChineseRestaurantProcess, DirichletProcess # Create a new cluster? if z[i] > K - push!(μ, 0.0) - # Draw location of new cluster. - μ[z[i]] ~ H + μ[i] ~ H end # Draw observation. - x[i] ~ Normal(μ[z[i]], 1.0) + x[i] ~ Normal(μ[i], 1.0) end end diff --git a/test/test_utils/sampler.jl b/test/test_utils/sampler.jl index 46f740cf30..f2a15047e1 100644 --- a/test/test_utils/sampler.jl +++ b/test/test_utils/sampler.jl @@ -81,13 +81,20 @@ function test_sampler_analytical( kwargs..., ) @testset "$(sampler_name) on $(nameof(model))" for model in models + # TODO(penelopeysm): Test demo_nested_colons again when FlexiChains is in. + # The subparams[:, 1, :] thing just completely breaks MCMCChains. + if model.f == DynamicPPL.TestUtils.demo_nested_colons + @info "Skipping test_sampler_analytical for demo_nested_colons due to MCMCChains limitations." + continue + end chain = AbstractMCMC.sample(model, sampler, args...; kwargs...) target_values = DynamicPPL.TestUtils.posterior_mean(model) for vn in filter(varnames_filter, DynamicPPL.TestUtils.varnames(model)) # We want to compare elementwise which can be achieved by # extracting the leaves of the `VarName` and the corresponding value. - for vn_leaf in AbstractPPL.varname_leaves(vn, get(target_values, vn)) - target_value = get(target_values, vn_leaf) + for vn_leaf in + AbstractPPL.varname_leaves(vn, AbstractPPL.getvalue(target_values, vn)) + target_value = AbstractPPL.getvalue(target_values, vn_leaf) chain_mean_value = mean(chain[Symbol(vn_leaf)]) @test chain_mean_value ≈ target_value atol = atol rtol = rtol end From fd3ab947d48bbbbb7101d580d6d0376aa04b77ef Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 10 Feb 2026 16:13:30 +0000 Subject: [PATCH 08/48] Changelog --- HISTORY.md | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/HISTORY.md b/HISTORY.md index 04828362f2..16cee9c35c 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,40 @@ # 0.43.0 +## DynamicPPL 0.40 and `VarNamedTuple` + +DynamicPPL v0.40 is a major overhaul of the internal data structure. +Most notably, cases where we might once have used `Dict{VarName}` or `NamedTuple` have all been replaced with a single data structure, called `VarNamedTuple`. + +This provides substantial benefits in terms of robustness and performance. + +However, it does place some constraints on Turing models. +Specifically: + + - Arrays whose elements are random variables should have a constant size and type. For example, the following will no longer work: + + ```julia + x = Float64[] + for i in 1:10 + push!(x, 0.0) + x[i] ~ Normal() + end + ``` + + This only applies to arrays on the left-hand side of tilde-statements. In general, there are no restrictions on the code that you can use outside of tilde-statements. + + - Likewise, arrays of random variables should ideally have a constant size from iteration to iteration. That means a model like this will fail sometimes (*but* see below): + + ```julia + n ~ Poisson(2.0) + x = Vector{Float64}(undef, n) + for i in 1:n + x[i] ~ Normal() + end + ``` + + *Technically speaking*: Inference (e.g. MCMC sampling) on this model will still work, but if you want to use `returned` or `predict`, both of the following conditions must hold true: (1) you must use FlexiChains.jl; (2) all elements of `x` must be random variables, i.e., you cannot have a mixture of `x[i]`'s being random variables and observed. + - TODO + ## Optimisation interface Turing.jl's optimisation interface has been completely overhauled in this release. @@ -43,6 +78,17 @@ Documentation on this matter will be forthcoming. The `IS` sampler has been removed (its behaviour was in fact exactly the same as `Prior`). To see an example of importance sampling (via `Prior()` and then subsequent reweighting), see e.g. [this issue](https://github.com/TuringLang/Turing.jl/issues/2767). +## `MH` sampler + +The interface of the MH sampler is slightly different. +It no longer accepts `AdvancedMH` proposals, and is now more flexible: you can specify proposals for individual `VarName`s (not just top-level symbols), and any unspecified `VarName`s will be drawn from the prior, instead of being silently ignored. +It is also faster than before (by around 30% on simple models). + +## `GibbsConditional` + +When defining a conditional posterior, instead of being provided with a Dict of values, the function must now take a `VarNamedTuple` containing the values. +Note that indexing into a `VarNamedTuple` is very similar to indexing into a `Dict`; however, it is more flexible since you can use syntax such as `x[1:2]` even if `x[1]` and `x[2]` are separate variables in the model. + # 0.42.8 Add support for `TensorBoardLogger.jl` via `AbstractMCMC.mcmc_callback`. From 68ccbf344f242e93abb1a31d8f6ccb121e0c8a51 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 10 Feb 2026 17:25:26 +0000 Subject: [PATCH 09/48] fixes for densify --- test/mcmc/gibbs.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/mcmc/gibbs.jl b/test/mcmc/gibbs.jl index 7353170dcf..f7de4925d2 100644 --- a/test/mcmc/gibbs.jl +++ b/test/mcmc/gibbs.jl @@ -23,9 +23,13 @@ using Turing.Inference: AdvancedHMC, AdvancedMH using Turing.RandomMeasures: ChineseRestaurantProcess, DirichletProcess function check_transition_varnames(transition::DynamicPPL.ParamsWithStats, parent_varnames) - # Varnames in `transition` should be subsumed by those in `parent_varnames`. + # Varnames in `transition` should be subsumed by those in `parent_varnames`; but + # sometimes `densify!!` can introduce varnames that are joined together (e.g. s[1] and + # s[2] becomes s), so we need to specialcase that.... ugly. for vn in keys(transition.params) - @test any(Base.Fix2(DynamicPPL.subsumes, vn), parent_varnames) + @test vn == @varname(m) || + vn == @varname(s) || + any(Base.Fix2(DynamicPPL.subsumes, vn), parent_varnames) end end From f9ee09fec329a4566adb6bfaf859ccef0942ac1a Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 12 Feb 2026 01:24:17 +0000 Subject: [PATCH 10/48] fix gibbs tests --- test/mcmc/gibbs.jl | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/test/mcmc/gibbs.jl b/test/mcmc/gibbs.jl index f7de4925d2..4aba670e45 100644 --- a/test/mcmc/gibbs.jl +++ b/test/mcmc/gibbs.jl @@ -23,13 +23,8 @@ using Turing.Inference: AdvancedHMC, AdvancedMH using Turing.RandomMeasures: ChineseRestaurantProcess, DirichletProcess function check_transition_varnames(transition::DynamicPPL.ParamsWithStats, parent_varnames) - # Varnames in `transition` should be subsumed by those in `parent_varnames`; but - # sometimes `densify!!` can introduce varnames that are joined together (e.g. s[1] and - # s[2] becomes s), so we need to specialcase that.... ugly. for vn in keys(transition.params) - @test vn == @varname(m) || - vn == @varname(s) || - any(Base.Fix2(DynamicPPL.subsumes, vn), parent_varnames) + @test any(Base.Fix2(DynamicPPL.subsumes, vn), parent_varnames) end end @@ -550,7 +545,7 @@ end @testset "Demo model" begin @testset verbose = true "$(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS - vns = DynamicPPL.TestUtils.varnames(model) + vns = (@varname(m), @varname(s)) samplers = [ Gibbs(@varname(s) => NUTS(), @varname(m) => NUTS()), Gibbs(@varname(s) => NUTS(), @varname(m) => HMC(0.01, 4)), @@ -733,14 +728,7 @@ end rng = Random.default_rng() model = MoGtest_default_z_vector spl = Gibbs(@varname(z) => CSMC(15), @varname(mu1) => ESS(), @varname(mu2) => ESS()) - vns = ( - @varname(z[1]), - @varname(z[2]), - @varname(z[3]), - @varname(z[4]), - @varname(mu1), - @varname(mu2) - ) + vns = (@varname(z), @varname(mu1), @varname(mu2)) # `step` transition, state = AbstractMCMC.step(rng, model, spl) check_transition_varnames(transition, vns) From d79e8b7fd9c0aaa98b5746002a181d4ba0aa479a Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 23 Feb 2026 17:32:17 +0000 Subject: [PATCH 11/48] Silence Aqua on `Libtask.might_produce` --- test/Aqua.jl | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/test/Aqua.jl b/test/Aqua.jl index e159cae9ca..3793f1ee11 100644 --- a/test/Aqua.jl +++ b/test/Aqua.jl @@ -3,9 +3,25 @@ module AquaTests using Aqua: Aqua using Turing -# We test ambiguities separately because it catches a lot of problems -# in dependencies but we test it for Turing. -Aqua.test_ambiguities([Turing]) +# We test ambiguities separately because it catches a lot of problems in dependencies but we +# test it for Turing. +# +# `Libtask.might_produce` is excluded because the `@might_produce` macro generates a lot of +# ambiguities that will never happen in practice. +# +# Specifically, when you write `@might_produce f` for a function `f` that has methods that +# take keyword arguments, we have to generate a `might_produce` method for +# `Type{<:Tuple{<:Function,...,typeof(f)}}`. There is no way to circumvent this: see +# https://github.com/TuringLang/Libtask.jl/issues/197. This in turn will cause method +# ambiguities with any other function, say `g`, for which +# `::Type{<:Tuple{typeof(g),Vararg}}` is marked as produceable. +# +# To avoid the method ambiguities, we *could* manually spell out `might_produce` methods for +# each method of `g` manually instead of using Vararg, but that would be both very verbose +# and fragile. It would also not provide any real benefit since those ambiguities are not +# meaningful in practice (in particular, to trigger this we would need to call `g(..., f)`, +# which is incredibly unlikely). +Aqua.test_ambiguities([Turing]; exclude=[Libtask.might_produce]) Aqua.test_all(Turing; ambiguities=false) end From 992efff502914212f4a2530bae4c3d7a19a1ae90 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 23 Feb 2026 17:36:10 +0000 Subject: [PATCH 12/48] Remove size from LinkedVectorValue --- src/mcmc/mh.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/mcmc/mh.jl b/src/mcmc/mh.jl index d92360edd5..cf67d44263 100644 --- a/src/mcmc/mh.jl +++ b/src/mcmc/mh.jl @@ -179,8 +179,7 @@ function DynamicPPL.init( if is_linkedrw transform = DynamicPPL.from_linked_vec_transform(prior) linked_vec = rand(rng, dist) - sz = hasmethod(size, Tuple{typeof(prior)}) ? size(prior) : () - return DynamicPPL.LinkedVectorValue(linked_vec, transform, sz) + return DynamicPPL.LinkedVectorValue(linked_vec, transform) else # Static or conditional proposal in untransformed space. return DynamicPPL.UntransformedValue(rand(rng, dist)) From ee4c4f24ae7bbeca2b63b7a770483def479ea9c3 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 23 Feb 2026 17:47:21 +0000 Subject: [PATCH 13/48] Fix typo --- src/mcmc/mh.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mcmc/mh.jl b/src/mcmc/mh.jl index cf67d44263..33da7834ad 100644 --- a/src/mcmc/mh.jl +++ b/src/mcmc/mh.jl @@ -46,7 +46,7 @@ This implies the use of static proposals for each variable. If a variable is not its prior distribution is used as the proposal. ```julia -# Use a static proposal for s² (which happens to be the same as the prior) and a static +# Use a static proposal for s (which happens to be the same as the prior) and a static # proposal for m (note that this isn't a random walk proposal). spl = MH( # This happens to be the same as the prior From 8442ce2945c1f6071394d658cd8e33865847a3ba Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 23 Feb 2026 17:48:04 +0000 Subject: [PATCH 14/48] Add template to tilde_observe!! --- src/mcmc/gibbs.jl | 2 +- src/mcmc/particle_mcmc.jl | 3 ++- test/ad.jl | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mcmc/gibbs.jl b/src/mcmc/gibbs.jl index 4c419540c2..6e7f796e78 100644 --- a/src/mcmc/gibbs.jl +++ b/src/mcmc/gibbs.jl @@ -183,7 +183,7 @@ function DynamicPPL.tilde_assume!!( # Note that tilde_observe!! will trigger resampling in particle methods # for variables that are handled by other Gibbs component samplers. val = get_conditioned_gibbs(context, vn) - DynamicPPL.tilde_observe!!(child_context, right, val, vn, vi) + DynamicPPL.tilde_observe!!(child_context, right, val, vn, template, vi) else # If the varname has not been conditioned on, nor is it a target variable, its # presumably a new variable that should be sampled from its prior. We need to add diff --git a/src/mcmc/particle_mcmc.jl b/src/mcmc/particle_mcmc.jl index f02f87b81e..42717701f8 100644 --- a/src/mcmc/particle_mcmc.jl +++ b/src/mcmc/particle_mcmc.jl @@ -450,10 +450,11 @@ function DynamicPPL.tilde_observe!!( right::Distribution, left, vn::Union{VarName,Nothing}, + template::Any, vi::AbstractVarInfo, ) vi = get_trace_local_varinfo() - left, vi = DynamicPPL.tilde_observe!!(DefaultContext(), right, left, vn, vi) + left, vi = DynamicPPL.tilde_observe!!(DefaultContext(), right, left, vn, template, vi) set_trace_local_varinfo(vi) return left, vi end diff --git a/test/ad.jl b/test/ad.jl index 5218fbf7d0..5e5f367470 100644 --- a/test/ad.jl +++ b/test/ad.jl @@ -177,10 +177,11 @@ function DynamicPPL.tilde_observe!!( right::Distribution, left, vn::Union{VarName,Nothing}, + template::Any, vi::AbstractVarInfo, ) left, vi = DynamicPPL.tilde_observe!!( - DynamicPPL.childcontext(context), right, left, vn, vi + DynamicPPL.childcontext(context), right, left, vn, template, vi ) check_adtype(context, vi) return left, vi From c162b0ad21c3e72ce70c4ed558016d1cc3f54c54 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 23 Feb 2026 17:49:35 +0000 Subject: [PATCH 15/48] Add isnan check to MH initialisation --- src/mcmc/mh.jl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/mcmc/mh.jl b/src/mcmc/mh.jl index 33da7834ad..fb3ff2ac86 100644 --- a/src/mcmc/mh.jl +++ b/src/mcmc/mh.jl @@ -283,14 +283,19 @@ function AbstractMCMC.step( initial_log_proposal_density = log_proposal_density( vi, init_strategy, initial_unspecified_priors ) - if initial_log_proposal_density == -Inf + if initial_log_proposal_density == -Inf || isnan(initial_log_proposal_density) io = IOContext(IOBuffer(), :color => true) show(io, "text/plain", initial_raw_values) init_str = String(take!(io.io)) + prob_dens_string = if initial_log_proposal_density == -Inf + "zero" + else + "NaN" + end error( - "The initial parameters have zero probability density under the proposal" * - " distribution (for example, an initial value of `x=2.0` for a" * - " proposal `@varname(x) => Uniform(0, 1)`. This will cause the" * + "The initial parameters have a $prob_dens_string probability density under" * + " the proposal distribution (for example, an initial value of `x=2.0`" * + " for a proposal `@varname(x) => Uniform(0, 1)`. This will cause the" * " sampler to get stuck at the initial parameters. Consider specifying" * " different initial parameters (e.g. via `InitFromParams`) or using a" * " different proposal distribution." * From 4ea8d8dfb51afb5f38696047fb696a2b4aec0883 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 23 Feb 2026 18:19:16 +0000 Subject: [PATCH 16/48] Add template to accumulate_observe!! --- src/mcmc/particle_mcmc.jl | 2 +- src/optimisation/Optimisation.jl | 2 +- src/optimisation/init.jl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mcmc/particle_mcmc.jl b/src/mcmc/particle_mcmc.jl index 42717701f8..ad87aeec95 100644 --- a/src/mcmc/particle_mcmc.jl +++ b/src/mcmc/particle_mcmc.jl @@ -499,7 +499,7 @@ function DynamicPPL.accumulate_assume!!( return acc end function DynamicPPL.accumulate_observe!!( - acc::ProduceLogLikelihoodAccumulator, right, left, vn + acc::ProduceLogLikelihoodAccumulator, right, left, vn, template ) return DynamicPPL.acclogp(acc, Distributions.loglikelihood(right, left)) end diff --git a/src/optimisation/Optimisation.jl b/src/optimisation/Optimisation.jl index 74f68b66e6..a9ac2906fe 100644 --- a/src/optimisation/Optimisation.jl +++ b/src/optimisation/Optimisation.jl @@ -183,7 +183,7 @@ function DynamicPPL.accumulate_assume!!( return acc end function DynamicPPL.accumulate_observe!!( - acc::ConstraintCheckAccumulator, ::Distribution, ::Any, ::Union{VarName,Nothing} + acc::ConstraintCheckAccumulator, ::Distribution, ::Any, ::Union{VarName,Nothing}, ::Any ) return acc end diff --git a/src/optimisation/init.jl b/src/optimisation/init.jl index 1d81a0130e..2438a8f093 100644 --- a/src/optimisation/init.jl +++ b/src/optimisation/init.jl @@ -245,7 +245,7 @@ function DynamicPPL.accumulate_assume!!( return acc end function DynamicPPL.accumulate_observe!!( - acc::ConstraintAccumulator, ::Distribution, ::Any, ::Union{VarName,Nothing} + acc::ConstraintAccumulator, ::Distribution, ::Any, ::Union{VarName,Nothing}, ::Any ) return acc end From ad197110540a63cce2576cf81b0008da478be1c6 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 24 Feb 2026 15:54:02 +0000 Subject: [PATCH 17/48] Tell users about the proposals that are being used in MH (#2774) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This (I think) is the best way to protect against unexpected behaviour when the user specifies proposals in a different format from the VarNames in the model. For example ```julia using Turing @model function f() x = zeros(2) x[1] ~ Normal() x[2] ~ Normal() end spl = MH(@varname(x) => MvNormal(zeros(2), I)) chain = sample(f(), spl, 100) ``` produces ```julia julia> chain = sample(f(), spl, 100) ┌ Info: When sampling with MH, the following proposals will be used at each step. └ This output can be disabled by passing `verbose=false` to `sample()`. [ Info: varname x[1]: no proposal specified, drawing from prior [ Info: varname x[2]: no proposal specified, drawing from prior Sampling 100%|█████████████████████████████████████████████████████████████████████████| Time: 0:00:00 ``` I think in general it's not possible to figure out on behalf of the user whether that's the behaviour they wanted or not. The user just has to manually check that against what they expected. --- src/mcmc/mh.jl | 37 ++++++++++++++++++++++++++++--------- test/mcmc/mh.jl | 29 +++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 9 deletions(-) diff --git a/src/mcmc/mh.jl b/src/mcmc/mh.jl index fb3ff2ac86..2db9554000 100644 --- a/src/mcmc/mh.jl +++ b/src/mcmc/mh.jl @@ -89,9 +89,10 @@ spl = MH( ``` **Note that when using conditional proposals, the values obtained by indexing into the -`VarNamedTuple` are always in unlinked space.** Sometimes, you may want to define a random-walk -proposal in linked space. For this, you can use `LinkedRW` as a proposal, which takes a -covariance matrix as an argument: +`VarNamedTuple` are always in untransformed space, which are constrained to the support of +the distribution.** Sometimes, you may want to define a random-walk proposal in +unconstrained (i.e. 'linked') space. For this, you can use `LinkedRW` as a proposal, which +takes a covariance matrix as an argument: ```julia using LinearAlgebra: Diagonal @@ -103,10 +104,11 @@ spl = MH( In the above example, `LinkedRW(Diagonal([0.25]))` defines a random-walk proposal for `m` in linked space. This is in fact the same as the conditional proposal above, because `m` is -already unconstrained, and so linked space and unlinked space are the same for this -variable. However, `s` is constrained to be positive, and so using a `LinkedRW` proposal for -`s` would be different from using a normal proposal in unlinked space (`LinkedRW` will -ensure that the proposals for `s` always remain positive in unlinked space). +already unconstrained, and so the unconstraining transformation is the identity. + +However, `s` is constrained to be positive, and so using a `LinkedRW` proposal for `s` would +be different from using a normal proposal in untransformed space (`LinkedRW` will ensure +that the proposals for `s` always remain positive in untransformed space). ```julia spl = MH( @@ -156,7 +158,7 @@ struct LinkedRW{C} end """ - InitFromProposals(proposals::VarNamedTuple) + InitFromProposals(proposals::VarNamedTuple, verbose::Bool) An initialisation strategy that samples variables from user-defined proposal distributions. If a proposal distribution is not found in `proposals`, then we defer to sampling from the @@ -169,11 +171,14 @@ struct InitFromProposals{V<:DynamicPPL.VarNamedTuple} <: DynamicPPL.AbstractInit return a `LinkedVectorValue`); or in untransformed space (false, i.e., the strategy should return an `UntransformedValue`)." proposals::V + "Whether to print the proposals as they are being sampled" + verbose::Bool end function DynamicPPL.init( rng::Random.AbstractRNG, vn::VarName, prior::Distribution, strategy::InitFromProposals ) if haskey(strategy.proposals, vn) + strategy.verbose && @info "varname $vn: proposal $(strategy.proposals[vn][2])" # this is the proposal that the user wanted is_linkedrw, dist = strategy.proposals[vn] if is_linkedrw @@ -185,6 +190,7 @@ function DynamicPPL.init( return DynamicPPL.UntransformedValue(rand(rng, dist)) end else + strategy.verbose && @info "varname $vn: no proposal specified, drawing from prior" # No proposal was specified for this variable, so we sample from the prior. return DynamicPPL.UntransformedValue(rand(rng, prior)) end @@ -227,7 +233,7 @@ function MH(pair1::SymOrVNPair, pairs::Vararg{SymOrVNPair}) proposals, proposal_dist, vn, raw_vals.data[AbstractPPL.getsym(vn)] ) end - return InitFromProposals(proposals) + return InitFromProposals(proposals, false) end all_vns = Set{VarName}(_to_varname(pair[1]) for pair in vn_proposal_pairs) linkedrw_vns = Set{VarName}( @@ -247,6 +253,7 @@ function AbstractMCMC.step( spl::MH; initial_params::DynamicPPL.AbstractInitStrategy, discard_sample=false, + verbose=true, kwargs..., ) # Generate and return initial parameters. We need to use VAIMAcc because that will @@ -303,6 +310,17 @@ function AbstractMCMC.step( ) end + # We evaluate the model once with the sampler's init strategy and print all the + # proposals that were used. This helps the user detect cases where the proposals are + # silently ignored (e.g. because the VarName in the proposal doesn't match the VarName + # in the model). + if verbose && init_strategy isa InitFromProposals + @info "When sampling with MH, the following proposals will be used at each step.\nThis output can be disabled by passing `verbose=false` to `sample()`." + verbose_init_strategy = InitFromProposals(init_strategy.proposals, true) + oavi = DynamicPPL.OnlyAccsVarInfo(()) # No need to accumulate anything + DynamicPPL.init!!(rng, model, oavi, verbose_init_strategy, DynamicPPL.UnlinkAll()) + end + transition = discard_sample ? nothing : DynamicPPL.ParamsWithStats(vi, (; accepted=true)) return transition, vi @@ -356,6 +374,7 @@ function AbstractMCMC.step( log_proposal_density(old_vi, init_strategy_given_new, old_unspecified_priors) - log_proposal_density(new_vi, init_strategy_given_old, new_unspecified_priors) ) + isnan(log_a) && @warn "MH log-acceptance probability is NaN; sample will be rejected" # Decide whether to accept. accepted, vi = if -Random.randexp(rng) < log_a diff --git a/test/mcmc/mh.jl b/test/mcmc/mh.jl index 5a1182d6ce..bc60b32520 100644 --- a/test/mcmc/mh.jl +++ b/test/mcmc/mh.jl @@ -5,6 +5,7 @@ using Distributions: Bernoulli, Dirichlet, Exponential, InverseGamma, LogNormal, MvNormal, Normal, sample using DynamicPPL: DynamicPPL using LinearAlgebra: I +using Logging: Logging using Random: Random using StableRNGs: StableRNG using Test: @test, @testset, @test_throws @@ -117,6 +118,34 @@ GKernel(variance, vn) = (vnt -> Normal(vnt[vn], sqrt(variance))) end end + @testset "info statements about proposals" begin + @model function f() + x = zeros(2) + x[1] ~ Normal() + return x[2] ~ Normal() + end + + spl = MH(@varname(x[1]) => Normal(), @varname(x[2]) => Normal()) + @test_logs (:info, r"varname x\[1\]: proposal .*Normal") ( + :info, r"varname x\[2\]: proposal .*Normal" + ) match_mode = :any sample(f(), spl, 2; progress=false) + + spl = MH(@varname(x) => MvNormal(zeros(2), I)) + @test_logs (:info, r"varname x\[1\]: no proposal specified") ( + :info, r"varname x\[2\]: no proposal specified" + ) match_mode = :any sample(f(), spl, 2; progress=false) + + spl = MH(@varname(x.a) => Normal(), @varname(x[2]) => Normal()) + @test_logs (:info, r"varname x\[1\]: no proposal specified") ( + :info, r"varname x\[2\]: proposal .*Normal" + ) match_mode = :any sample(f(), spl, 2; progress=false) + + # Check that verbose=false disables it + @test_logs min_level = Logging.Info sample( + f(), spl, 2; progress=false, verbose=false + ) + end + @testset "with demo models" begin # Set the initial parameters, because if we get unlucky with the initial state, # these chains are too short to converge to reasonable numbers. From 7e1f9bfb28d162b9cc160fc50c37faa08519ff8d Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 24 Feb 2026 17:11:38 +0000 Subject: [PATCH 18/48] Add Logging test dep --- test/Project.toml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/Project.toml b/test/Project.toml index 5cd42680a2..e722e2a110 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -21,6 +21,7 @@ HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" LogDensityProblemsAD = "996a588d-648d-4e1f-a8f0-a84b347e47b1" +Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" @@ -38,6 +39,9 @@ StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" +[sources] +DynamicPPL = {rev = "breaking", url = "https://github.com/TuringLang/DynamicPPL.jl"} + [compat] ADTypes = "1" AbstractMCMC = "5.13" @@ -77,6 +81,3 @@ StatsBase = "0.33, 0.34" StatsFuns = "0.9.5, 1" TimerOutputs = "0.5" julia = "1.10" - -[sources] -DynamicPPL = {url = "https://github.com/TuringLang/DynamicPPL.jl", rev = "breaking"} From 66e28d9fb1b67e2a9d9c7f1d47efda5fb26f44b1 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 24 Feb 2026 18:03:55 +0000 Subject: [PATCH 19/48] Fix test import --- test/Aqua.jl | 1 + test/Project.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/test/Aqua.jl b/test/Aqua.jl index 3793f1ee11..403f8e746d 100644 --- a/test/Aqua.jl +++ b/test/Aqua.jl @@ -1,6 +1,7 @@ module AquaTests using Aqua: Aqua +using Libtask: Libtask using Turing # We test ambiguities separately because it catches a lot of problems in dependencies but we diff --git a/test/Project.toml b/test/Project.toml index e722e2a110..b1b2c4a2f2 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -18,6 +18,7 @@ DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5" +Libtask = "6f1fad26-d15e-5dc8-ae53-837a1d7b8c9f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" LogDensityProblemsAD = "996a588d-648d-4e1f-a8f0-a84b347e47b1" From 948f09cdbde75e77d3be0a2a38934a27911c4676 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 24 Feb 2026 18:59:46 +0000 Subject: [PATCH 20/48] More imports... --- test/mcmc/mh.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/mcmc/mh.jl b/test/mcmc/mh.jl index bc60b32520..273bbc72c8 100644 --- a/test/mcmc/mh.jl +++ b/test/mcmc/mh.jl @@ -8,7 +8,7 @@ using LinearAlgebra: I using Logging: Logging using Random: Random using StableRNGs: StableRNG -using Test: @test, @testset, @test_throws +using Test: @test, @testset, @test_throws, @test_logs using Turing using Turing.Inference: Inference From dde2fc13077322545cc3bbd8065878af106b38c3 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 24 Feb 2026 20:26:16 +0000 Subject: [PATCH 21/48] DPPL is released --- Project.toml | 3 --- test/Project.toml | 3 --- 2 files changed, 6 deletions(-) diff --git a/Project.toml b/Project.toml index aae6045498..5c57f37bcf 100644 --- a/Project.toml +++ b/Project.toml @@ -42,9 +42,6 @@ StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" [weakdeps] DynamicHMC = "bbc10e6e-7c05-544b-b16e-64fede858acb" -[sources] -DynamicPPL = {rev = "breaking", url = "https://github.com/TuringLang/DynamicPPL.jl"} - [extensions] TuringDynamicHMCExt = "DynamicHMC" diff --git a/test/Project.toml b/test/Project.toml index b1b2c4a2f2..1e768916d2 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -40,9 +40,6 @@ StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" -[sources] -DynamicPPL = {rev = "breaking", url = "https://github.com/TuringLang/DynamicPPL.jl"} - [compat] ADTypes = "1" AbstractMCMC = "5.13" From 4c3d67c6201c4e9b778673a3171716d95807ef13 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Wed, 25 Feb 2026 12:09:48 +0000 Subject: [PATCH 22/48] Fix tests --- src/mcmc/mh.jl | 4 ++-- test/mcmc/mh.jl | 17 ++++------------- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/src/mcmc/mh.jl b/src/mcmc/mh.jl index 2db9554000..971fbb5b12 100644 --- a/src/mcmc/mh.jl +++ b/src/mcmc/mh.jl @@ -297,10 +297,10 @@ function AbstractMCMC.step( prob_dens_string = if initial_log_proposal_density == -Inf "zero" else - "NaN" + "a NaN" end error( - "The initial parameters have a $prob_dens_string probability density under" * + "The initial parameters have $prob_dens_string probability density under" * " the proposal distribution (for example, an initial value of `x=2.0`" * " for a proposal `@varname(x) => Uniform(0, 1)`. This will cause the" * " sampler to get stuck at the initial parameters. Consider specifying" * diff --git a/test/mcmc/mh.jl b/test/mcmc/mh.jl index 273bbc72c8..9a3e3be34f 100644 --- a/test/mcmc/mh.jl +++ b/test/mcmc/mh.jl @@ -101,20 +101,11 @@ GKernel(variance, vn) = (vnt -> Normal(vnt[vn], sqrt(variance))) @test mean(chn[:x]) ≈ 2 / 3 atol = 0.05 @test mean(chn[:y]) ≈ 4 / 3 atol = 0.05 - # Note that if we additionally don't specify a proposal for `a`, then it will defer - # to InitFromPrior for all variables, and its prior *is* Normal(). So we can do - # this: + # This should work too. chn2 = sample(StableRNG(468), f(), MH(), 10000) - # and it should actually give us exactly the same results as above. - # - # This test *does* depend on internal implementation details of MH and is a bit - # brittle. If it fails, you should check that the values are still approximately - # correct (i.e. 0, 2/3, and 4/3). If they are still correct, then it probably just - # means that the internal implementation details have changed, and this test can - # just be updated (or removed). - @test mean(chn2[:a]) ≈ mean(chn[:a]) - @test mean(chn2[:x]) ≈ mean(chn[:x]) - @test mean(chn2[:y]) ≈ mean(chn[:y]) + @test mean(chn[:a]) ≈ 0.0 atol = 0.05 + @test mean(chn[:x]) ≈ 2 / 3 atol = 0.05 + @test mean(chn[:y]) ≈ 4 / 3 atol = 0.05 end end From 92b9c63fceb6afd044e0edec31077858b7864df1 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Wed, 25 Feb 2026 13:38:06 +0000 Subject: [PATCH 23/48] fix Enzyme project env --- test/integration/enzyme/Project.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/test/integration/enzyme/Project.toml b/test/integration/enzyme/Project.toml index 2a7f3c47f3..5069b35d89 100644 --- a/test/integration/enzyme/Project.toml +++ b/test/integration/enzyme/Project.toml @@ -9,4 +9,3 @@ Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" [sources] Turing = {path = "../../../"} -DynamicPPL = {url = "https://github.com/TuringLang/DynamicPPL.jl", rev = "breaking"} From 6001b236a871b8df8b17e3bc4d99c42304b8cc27 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sat, 28 Feb 2026 14:05:16 +0000 Subject: [PATCH 24/48] fix LinkedRW printing --- src/mcmc/mh.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/mcmc/mh.jl b/src/mcmc/mh.jl index 971fbb5b12..a6990e596e 100644 --- a/src/mcmc/mh.jl +++ b/src/mcmc/mh.jl @@ -178,9 +178,15 @@ function DynamicPPL.init( rng::Random.AbstractRNG, vn::VarName, prior::Distribution, strategy::InitFromProposals ) if haskey(strategy.proposals, vn) - strategy.verbose && @info "varname $vn: proposal $(strategy.proposals[vn][2])" # this is the proposal that the user wanted is_linkedrw, dist = strategy.proposals[vn] + if strategy.verbose + if is_linkedrw + @info "varname $vn: proposal is a LinkedRW with covariance matrix $(dist.Σ)" + else + @info "varname $vn: proposal $(strategy.proposals[vn][2])" + end + end if is_linkedrw transform = DynamicPPL.from_linked_vec_transform(prior) linked_vec = rand(rng, dist) From 0cdbfe5722b2e8bbdd5ddd7c5d0601f24cf70d1c Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 2 Mar 2026 15:08:56 +0000 Subject: [PATCH 25/48] Implement checks for discrete variables with HMC --- src/Turing.jl | 2 +- src/{init_strategy.jl => common.jl} | 26 ++++++++++++++++++++------ src/mcmc/hmc.jl | 1 + test/mcmc/hmc.jl | 11 +++++++++++ 4 files changed, 33 insertions(+), 7 deletions(-) rename src/{init_strategy.jl => common.jl} (62%) diff --git a/src/Turing.jl b/src/Turing.jl index afe9f6269a..197cc0eeea 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -41,7 +41,7 @@ end # Random probability measures. include("stdlib/distributions.jl") include("stdlib/RandomMeasures.jl") -include("init_strategy.jl") +include("common.jl") include("mcmc/Inference.jl") # inference algorithms using .Inference include("variational/Variational.jl") diff --git a/src/init_strategy.jl b/src/common.jl similarity index 62% rename from src/init_strategy.jl rename to src/common.jl index cf66e341db..e3a380f35e 100644 --- a/src/init_strategy.jl +++ b/src/common.jl @@ -27,13 +27,27 @@ function _convert_initial_params(@nospecialize(_::Any)) throw(ArgumentError(errmsg)) end -# TODO: Implement additional checks for certain samplers, e.g. -# HMC not supporting discrete parameters. -function _check_model(model::DynamicPPL.Model) - return DynamicPPL.check_model(model; error_on_failure=true) +allow_discrete_variables(sampler::AbstractMCMC.AbstractSampler) = true +function _check_model(model::DynamicPPL.Model, fail_if_discrete::Bool) + result = DynamicPPL.check_model( + model; error_on_failure=false, fail_if_discrete=fail_if_discrete + ) + if !result + throw( + ArgumentError( + "The model $(model.f) has one or more issues that may cause inference to fail. Please see the warnings above for details.\n\nIf you think that this is a false positive, you can disable this by passing the `check_model=false` keyword argument to `sample` or the mode estimation functions. Please also consider opening an issue.\n", + ), + ) + end +end +function _check_model(model::DynamicPPL.Model, sampler::AbstractMCMC.AbstractSampler) + # This is hit by MCMC + return _check_model(model, !allow_discrete_variables(sampler)) end -function _check_model(model::DynamicPPL.Model, ::AbstractMCMC.AbstractSampler) - return _check_model(model) +function _check_model(model::DynamicPPL.Model) + # Optimisation hits this. TODO: We allow discrete variables now, but that does depend on + # the optimisation algorithm, surely? + return _check_model(model, false) end # Similar to InitFromParams, this is just for convenience diff --git a/src/mcmc/hmc.jl b/src/mcmc/hmc.jl index 84b9c628ff..8be1b48784 100644 --- a/src/mcmc/hmc.jl +++ b/src/mcmc/hmc.jl @@ -1,6 +1,7 @@ abstract type Hamiltonian <: AbstractSampler end abstract type StaticHamiltonian <: Hamiltonian end abstract type AdaptiveHamiltonian <: Hamiltonian end +Turing.allow_discrete_variables(sampler::Hamiltonian) = false ### ### Sampler states diff --git a/test/mcmc/hmc.jl b/test/mcmc/hmc.jl index c6b5af2162..b8875a689b 100644 --- a/test/mcmc/hmc.jl +++ b/test/mcmc/hmc.jl @@ -323,6 +323,17 @@ using Turing failing_model(), NUTS(), 10; progress=false ) end + + @testset "check_model fails with discrete variables" begin + @model function discrete_model() + return x ~ Categorical([0.5, 0.5]) + end + + for spl in (HMC(0.1, 10), NUTS()) + @test_throws ArgumentError Turing._check_model(discrete_model(), spl) + @test_throws ArgumentError sample(discrete_model(), spl, 10) + end + end end end From ac4c9df8d2272553b8b43d15966b004a7d15bbcc Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 2 Mar 2026 16:50:58 +0000 Subject: [PATCH 26/48] Fix tests --- test/mcmc/Inference.jl | 4 ++-- test/mcmc/abstractmcmc.jl | 19 ++++++++++--------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/test/mcmc/Inference.jl b/test/mcmc/Inference.jl index b67c08b2bd..85a94193ee 100644 --- a/test/mcmc/Inference.jl +++ b/test/mcmc/Inference.jl @@ -595,7 +595,7 @@ using Turing return x ~ Normal(x, 1) end - @test_throws ErrorException sample( + @test_throws ArgumentError sample( StableRNG(seed), demo_repeated_varname(), NUTS(), 10; check_model=true ) # Make sure that disabling the check also works. @@ -609,7 +609,7 @@ using Turing @model function demo_incorrect_missing(y) return y[1:1] ~ MvNormal(zeros(1), I) end - @test_throws ErrorException sample( + @test_throws ArgumentError sample( StableRNG(seed), demo_incorrect_missing([missing]), NUTS(), 10; check_model=true ) end diff --git a/test/mcmc/abstractmcmc.jl b/test/mcmc/abstractmcmc.jl index 3774707f7f..d743f25318 100644 --- a/test/mcmc/abstractmcmc.jl +++ b/test/mcmc/abstractmcmc.jl @@ -10,17 +10,18 @@ using Turing # Set up a model for which check_model errors. @model f() = x ~ Normal() model = f() - Turing._check_model(::typeof(model)) = error("nope") + spl = NUTS() + Turing._check_model(::typeof(model), ::typeof(spl)) = error("nope") # Make sure that default sampling does throw the error. - @test_throws "nope" sample(model, NUTS(), 100) - @test_throws "nope" sample(model, NUTS(), MCMCThreads(), 100, 2) - @test_throws "nope" sample(model, NUTS(), MCMCSerial(), 100, 2) - @test_throws "nope" sample(model, NUTS(), MCMCDistributed(), 100, 2) + @test_throws "nope" sample(model, spl, 10) + @test_throws "nope" sample(model, spl, MCMCThreads(), 10, 2) + @test_throws "nope" sample(model, spl, MCMCSerial(), 10, 2) + @test_throws "nope" sample(model, spl, MCMCDistributed(), 10, 2) # Now disable the check and make sure sampling works. - @test sample(model, NUTS(), 100; check_model=false) isa Any - @test sample(model, NUTS(), MCMCThreads(), 100, 2; check_model=false) isa Any - @test sample(model, NUTS(), MCMCSerial(), 100, 2; check_model=false) isa Any - @test sample(model, NUTS(), MCMCDistributed(), 100, 2; check_model=false) isa Any + @test sample(model, spl, 10; check_model=false) isa Any + @test sample(model, spl, MCMCThreads(), 10, 2; check_model=false) isa Any + @test sample(model, spl, MCMCSerial(), 10, 2; check_model=false) isa Any + @test sample(model, spl, MCMCDistributed(), 10, 2; check_model=false) isa Any end @testset "Initial parameters" begin From 9fdd23f19d9c50f7ab162528e91a3e5576e8a285 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 2 Mar 2026 16:51:30 +0000 Subject: [PATCH 27/48] Bump min DPPL --- test/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Project.toml b/test/Project.toml index 3343a14b81..002e8a9914 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -56,7 +56,7 @@ DifferentiationInterface = "0.7" Distributions = "0.25" DistributionsAD = "0.6.3" DynamicHMC = "2.1.6, 3.0" -DynamicPPL = "0.40" +DynamicPPL = "0.40.2" FiniteDifferences = "0.10.8, 0.11, 0.12" ForwardDiff = "0.10.12 - 0.10.32, 0.10, 1" HypothesisTests = "0.11" From 3a834eac4c9d332ec1a4180cf5f64e62290d11c1 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 2 Mar 2026 17:23:30 +0000 Subject: [PATCH 28/48] Improve changelog --- HISTORY.md | 79 +++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 70 insertions(+), 9 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 610ea49e10..7a79ed14b7 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -2,15 +2,30 @@ ## DynamicPPL 0.40 and `VarNamedTuple` -DynamicPPL v0.40 is a major overhaul of the internal data structure. +DynamicPPL v0.40 includes a major overhaul of Turing's internal data structures. Most notably, cases where we might once have used `Dict{VarName}` or `NamedTuple` have all been replaced with a single data structure, called `VarNamedTuple`. This provides substantial benefits in terms of robustness and performance. However, it does place some constraints on Turing models. -Specifically: +Specifically, the types of **containers that can include random variables** are now more limited: +if `x[i] ~ dist` is a random variable, then `x` must obey the following criteria: - - Arrays whose elements are random variables should have a constant size and type. For example, the following will no longer work: + - They must be arrays. Dicts and other containers are currently unsupported (we have [an issue to track this](https://github.com/TuringLang/DynamicPPL.jl/issues/1263)). If you really need this functionality, please open an issue and let us know; we can try to make it a priority. + + ```julia + @model function f() + # Allowed + x = Array{Float64}(undef, 1) + x[1] ~ Normal() + + # Forbidden + x = Dict{Int,Float64}() + return x[1] ~ Normal() + end + ``` + + - They must not be resized between calls to `~`. The following is forbidden (you should initialise `x` to the correct size before the loop): ```julia x = Float64[] @@ -19,8 +34,9 @@ Specifically: x[i] ~ Normal() end ``` - - This only applies to arrays on the left-hand side of tilde-statements. In general, there are no restrictions on the code that you can use outside of tilde-statements. + +However, please note that this only applies to **containers that contain random variables on the left-hand side of tilde-statements.** +In general, there are no restrictions on containers of *observed* data, or containers that are not used in tilde-statements. - Likewise, arrays of random variables should ideally have a constant size from iteration to iteration. That means a model like this will fail sometimes (*but* see below): @@ -33,7 +49,9 @@ Specifically: ``` *Technically speaking*: Inference (e.g. MCMC sampling) on this model will still work, but if you want to use `returned` or `predict`, both of the following conditions must hold true: (1) you must use FlexiChains.jl; (2) all elements of `x` must be random variables, i.e., you cannot have a mixture of `x[i]`'s being random variables and observed. - - TODO + +`VarNamedTuple` and `@vnt` are now re-exported from Turing directly. +There is a docs page explaining how to use and create `VarNamedTuple`s, which [can be found here](https://turinglang.org/docs/usage/varnamedtuple/). ## Optimisation interface @@ -43,11 +61,11 @@ The aim of this has been to provide users with a more consistent and principled The crux of the issue is that Optimization.jl expects vectorised inputs, whereas Turing models are more high-level: they have named variables which may be scalars, vectors, or in general anything. Prior to this version, Turing's interface required the user to provide the vectorised inputs 'raw', which is both unintuitive and error-prone (especially when considering that optimisation may run in linked or unlinked space). -Going forward, initial parameters for optimisation are specified using `AbstractInitStrategy`. +Going forward, initial parameters for optimisation are specified using `AbstractInitStrategy` (for more information about this, please see [the docs on MCMC sampling](https://turinglang.org/docs/usage/sampling-options/#specifying-initial-parameters)). If specific parameters are provided (via `InitFromParams`), these must be in model space (i.e. untransformed). This directly mimics the interface for MCMC sampling that has been in place since v0.41. -Furthermore, lower and upper bounds (if desired) must be specified as either a NamedTuple or an AbstractDict{<:VarName}. +Furthermore, lower and upper bounds (if desired) can be specified as `VarNamedTuple`s using the `lb` and `ub` keyword arguments. Bounds are always provided in model space; Turing will handle the transformation of these bounds to linked space if necessary. Constraints are respected when creating initial parameters for optimisation: if the `AbstractInitStrategy` provided is incompatible with the constraints (for example `InitFromParams((; x = 2.0))` but `x` is constrained to be between `[0, 1]`), an error will be raised. @@ -59,6 +77,9 @@ using Turing maximum_a_posteriori( f(); # All of the following are in unlinked space. + # We use NamedTuples here for simplicity, but you can use + # VarNamedTuple or Dict{<:VarName} as well (internally they + # will be converted to VarNamedTuple). initial_params=InitFromParams((; x=0.3)), lb=(; x=0.1), ub=(; x=0.4), @@ -69,10 +90,28 @@ For more information, please see the docstring of `estimate_mode`. Note that in some cases, the translation of bounds to linked space may not be well-defined. This is especially true for distributions where the samples have elements that are not independent (for example, Dirichlet, or LKJCholesky). -In these cases, Turing will raise an error if bounds are provided. +**In these cases, Turing will raise an error if bounds are provided.** Users who wish to perform optimisation with such constraints should directly use `LogDensityFunction` and Optimization.jl. Documentation on this matter will be forthcoming. +### Other changes to the optimisation interface + + - `estimate_mode`, `maximum_a_posteriori`, and `maximum_likelihood` now accept an optional `rng` first argument for reproducible initialisation. + - New keyword argument `link::Bool=true` controls whether to optimise in linked (transformed) space. + - New keyword argument `check_constraints_at_runtime::Bool=true` enables runtime constraint checking during model evaluation. + - Generic (non-box) constraints via `cons`, `lcons`, `ucons` are no longer supported. Users who need these should use `LogDensityFunction` and Optimization.jl directly. + +### `ModeResult` changes + +The return type from an optimisation procedure, `ModeResult`, has been substantially reworked: + + - `ModeResult.params` is now a `VarNamedTuple` (previously an `AbstractDict{<:VarName}`). Parameters can be accessed via e.g. `m.params[@varname(x)]`. + - The `values::NamedArray` field has been removed. Use `get_vector_params(m)` (newly exported) to obtain `(Vector{VarName}, Vector{values})`. + - `Base.get(m::ModeResult, ...)` has been removed; use `m.params[@varname(x)]` instead. + - `StatsBase.coef` now returns a plain `Vector` (not a `NamedArray`). + - `StatsBase.coefnames` now returns a `Vector{VarName}` (not strings or symbols). + - `StatsBase.informationmatrix`: the `hessian_function` keyword argument has been replaced by `adtype::ADTypes.AbstractADType` (default `AutoForwardDiff()`). Hessian computation uses DifferentiationInterface under the hood. + ## `IS` sampler The `IS` sampler has been removed (its behaviour was in fact exactly the same as `Prior`). @@ -84,6 +123,28 @@ The interface of the MH sampler is slightly different. It no longer accepts `AdvancedMH` proposals, and is now more flexible: you can specify proposals for individual `VarName`s (not just top-level symbols), and any unspecified `VarName`s will be drawn from the prior, instead of being silently ignored. It is also faster than before (by around 30% on simple models). +Additional changes: + + - A new type `LinkedRW` allows specifying random-walk proposals in linked (unconstrained) space, e.g. `MH(@varname(x) => LinkedRW(cov_matrix))`. + + - Callable (conditional) proposals now receive a `VarNamedTuple` of the full parameter state, rather than a single scalar. For example: + + ```julia + # Old + MH(:m => x -> Normal(x, 1)) + # New + MH(@varname(m) => (vnt -> Normal(vnt[@varname(m)], 1))) + ``` + - MH now reports whether each proposal was `accepted` in the chain stats. + - At the start of sampling, MH logs `@info` messages showing which proposal is used for each variable (disable with `verbose=false`). This helps detect misspecified proposals. + - MH validates initial parameters against the proposal distribution; if they have zero or NaN probability, a clear error is thrown. + +## HMC / NUTS + +HMC-family samplers now check for discrete variables before sampling begins. +If a model contains discrete variables (e.g. `x ~ Categorical(...)`) and an HMC sampler is used, an `ArgumentError` is thrown immediately. +Previously, this would silently proceed. + ## `GibbsConditional` When defining a conditional posterior, instead of being provided with a Dict of values, the function must now take a `VarNamedTuple` containing the values. From 772330af19ff769abbcf484cda8019a826ad75bf Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 2 Mar 2026 17:32:18 +0000 Subject: [PATCH 29/48] Rename get_vector_params -> vector_names_and_params --- HISTORY.md | 2 +- docs/make.jl | 1 + docs/src/api.md | 14 +++++++------- docs/src/optim.md | 1 + src/Turing.jl | 2 +- src/optimisation/Optimisation.jl | 2 +- src/optimisation/stats.jl | 14 +++++++------- 7 files changed, 19 insertions(+), 17 deletions(-) create mode 100644 docs/src/optim.md diff --git a/HISTORY.md b/HISTORY.md index 7a79ed14b7..a7c6c9787a 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -106,7 +106,7 @@ Documentation on this matter will be forthcoming. The return type from an optimisation procedure, `ModeResult`, has been substantially reworked: - `ModeResult.params` is now a `VarNamedTuple` (previously an `AbstractDict{<:VarName}`). Parameters can be accessed via e.g. `m.params[@varname(x)]`. - - The `values::NamedArray` field has been removed. Use `get_vector_params(m)` (newly exported) to obtain `(Vector{VarName}, Vector{values})`. + - The `values::NamedArray` field has been removed. Use `vector_names_and_params(m)` (newly exported) to obtain `(Vector{VarName}, Vector{values})`. - `Base.get(m::ModeResult, ...)` has been removed; use `m.params[@varname(x)]` instead. - `StatsBase.coef` now returns a plain `Vector` (not a `NamedArray`). - `StatsBase.coefnames` now returns a `Vector{VarName}` (not strings or symbols). diff --git a/docs/make.jl b/docs/make.jl index a58663e20e..4f6b096997 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -31,6 +31,7 @@ makedocs(; "Variational " => "api/Variational.md", "RandomMeasures " => "api/RandomMeasures.md", ], + "Optimisation" => "optim.md", ], checkdocs=:exports, doctest=false, diff --git a/docs/src/api.md b/docs/src/api.md index 0d5ec72154..0f4cdde93d 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -168,10 +168,10 @@ LogPoisson See the [mode estimation tutorial](https://turinglang.org/docs/tutorials/docs-17-mode-estimation/) for more information. -| Exported symbol | Documentation | Description | -|:---------------------- |:-------------------------------------------------- |:--------------------------------------------- | -| `maximum_a_posteriori` | [`Turing.Optimisation.maximum_a_posteriori`](@ref) | Find a MAP estimate for a model | -| `maximum_likelihood` | [`Turing.Optimisation.maximum_likelihood`](@ref) | Find a MLE estimate for a model | -| `MAP` | [`Turing.Optimisation.MAP`](@ref) | Type to use with Optim.jl for MAP estimation | -| `MLE` | [`Turing.Optimisation.MLE`](@ref) | Type to use with Optim.jl for MLE estimation | -| `get_vector_params` | [`Turing.Optimisation.get_vector_params`](@ref) | Extract parameter names and values as vectors | +| Exported symbol | Documentation | Description | +|:------------------------- |:----------------------------------------------------- |:--------------------------------------------- | +| `maximum_a_posteriori` | [`Turing.Optimisation.maximum_a_posteriori`](@ref) | Find a MAP estimate for a model | +| `maximum_likelihood` | [`Turing.Optimisation.maximum_likelihood`](@ref) | Find a MLE estimate for a model | +| `MAP` | [`Turing.Optimisation.MAP`](@ref) | Type to use with Optim.jl for MAP estimation | +| `MLE` | [`Turing.Optimisation.MLE`](@ref) | Type to use with Optim.jl for MLE estimation | +| `vector_names_and_params` | [`Turing.Optimisation.vector_names_and_params`](@ref) | Extract parameter names and values as vectors | diff --git a/docs/src/optim.md b/docs/src/optim.md new file mode 100644 index 0000000000..b41334f88f --- /dev/null +++ b/docs/src/optim.md @@ -0,0 +1 @@ +# Mode Estimation diff --git a/src/Turing.jl b/src/Turing.jl index 197cc0eeea..6cbf13741a 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -174,7 +174,7 @@ export maximum_likelihood, MAP, MLE, - get_vector_params, + vector_names_and_params, # Chain save/resume loadstate, # kwargs in SMC diff --git a/src/optimisation/Optimisation.jl b/src/optimisation/Optimisation.jl index a9ac2906fe..b396910715 100644 --- a/src/optimisation/Optimisation.jl +++ b/src/optimisation/Optimisation.jl @@ -17,7 +17,7 @@ using StatsAPI: StatsAPI using Statistics: Statistics using LinearAlgebra: LinearAlgebra -export maximum_a_posteriori, maximum_likelihood, MAP, MLE, get_vector_params +export maximum_a_posteriori, maximum_likelihood, MAP, MLE, vector_names_and_params include("init.jl") diff --git a/src/optimisation/stats.jl b/src/optimisation/stats.jl index b05fa6b5e4..fffffd959c 100644 --- a/src/optimisation/stats.jl +++ b/src/optimisation/stats.jl @@ -1,7 +1,7 @@ import DifferentiationInterface as DI """ - get_vector_params(m::ModeResult) + vector_names_and_params(m::ModeResult) Generates a vectorised form of the optimised parameters stored in the `ModeResult`, along with the corresponding variable names. These parameters correspond to unlinked space. @@ -9,7 +9,7 @@ with the corresponding variable names. These parameters correspond to unlinked s This function returns two vectors: the first contains the variable names, and the second contains the corresponding values. """ -function get_vector_params(m::ModeResult) +function vector_names_and_params(m::ModeResult) # This function requires some subtlety. We _could_ simply iterate over keys(m.params) # and values(m.params), apply AbstractPPL.varname_and_value_leaves to each pair, and # then collect them into a vector. *However*, this vector will later have to be used @@ -60,7 +60,7 @@ during the computation of the standard errors will be caught and reported in an "Error notes" column. """ function StatsBase.coeftable(m::ModeResult; level::Real=0.95, numerrors_warnonly::Bool=true) - vns, estimates = get_vector_params(m) + vns, estimates = vector_names_and_params(m) # Get columns for coeftable. terms = string.(vns) # If numerrors_warnonly is true, and if either the information matrix is singular or has @@ -157,10 +157,10 @@ function StatsBase.informationmatrix( ldf = DynamicPPL.LogDensityFunction(model, logprob_func(m.estimator)) f = Base.Fix1(LogDensityProblems.logdensity, ldf) # Then get the vectorised parameters. - _, x = get_vector_params(m) + _, x = vector_names_and_params(m) # We can include a check here to make sure that f(x) is in fact the log density at x. - # This helps guard against potential bugs where `get_vector_params` returns a + # This helps guard against potential bugs where `vector_names_and_params` returns a # wrongly-ordered parameter vector. if !isapprox(f(x), m.lp) error( @@ -172,8 +172,8 @@ function StatsBase.informationmatrix( return -DI.hessian(f, adtype, x) end -StatsBase.coef(m::ModeResult) = last(get_vector_params(m)) -StatsBase.coefnames(m::ModeResult) = first(get_vector_params(m)) +StatsBase.coef(m::ModeResult) = last(vector_names_and_params(m)) +StatsBase.coefnames(m::ModeResult) = first(vector_names_and_params(m)) StatsBase.params(m::ModeResult) = StatsBase.coefnames(m) StatsBase.vcov(m::ModeResult) = inv(StatsBase.informationmatrix(m)) StatsBase.loglikelihood(m::ModeResult) = m.lp From f87f1fa94c53b05acfdecdd9faf38a2b9e1e7d12 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 2 Mar 2026 18:07:55 +0000 Subject: [PATCH 30/48] Rewrite optimisation docs --- docs/Project.toml | 4 + docs/src/optim.md | 193 +++++++++++++++++++++++++++++++ src/optimisation/Optimisation.jl | 11 +- 3 files changed, 203 insertions(+), 5 deletions(-) diff --git a/docs/Project.toml b/docs/Project.toml index fd26d25df9..377c0dbd4e 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,4 +1,8 @@ [deps] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterInterLinks = "d12716ef-a0f6-4df4-a9f1-a5a34e75c656" +Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6" +OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e" +ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" diff --git a/docs/src/optim.md b/docs/src/optim.md index b41334f88f..d7629006b8 100644 --- a/docs/src/optim.md +++ b/docs/src/optim.md @@ -1 +1,194 @@ # Mode Estimation + +After defining a statistical model, in addition to sampling from its distributions, one may be interested in finding the parameter values that maximise (for instance) the posterior density, or the likelihood. +This is called mode estimation. + +Turing provides support for two mode estimation techniques, [maximum likelihood estimation](https://en.wikipedia.org/wiki/Maximum_likelihood_estimation) (MLE) and [maximum a posteriori](https://en.wikipedia.org/wiki/Maximum_a_posteriori_estimation) (MAP) estimation. + +We begin by defining a simple model to work with: + +```@example 1 +using Turing + +@model function normal_model(y) + x ~ Normal() + y ~ Normal(x) + return nothing +end +``` + +Once the model is defined, we can construct a model instance as we normally would: + +```@example 1 +model = normal_model(2.0) +``` + +In its simplest form, finding the maximum a posteriori or maximum likelihood parameters is just a function call: + +```@example 1 +# Generate a MLE estimate. +mle_estimate = maximum_likelihood(model) +``` + +```@example 1 +# Generate a MAP estimate. +map_estimate = maximum_a_posteriori(model) +``` + +The estimates are returned as instances of the `ModeResult` type. +It has the fields `params` (a `VarNamedTuple` mapping `VarName`s to the parameter values found) and `lp` for the log probability at the optimum. +For more information, please see the docstring of `ModeResult`. + +You can access individual parameter values by indexing into the `params` field with `VarName`s: + +```@example 1 +map_estimate.params[@varname(x)] +``` + +If you need a vectorised form of the parameters, you can use `vector_names_and_params`, which return a tuple of two vectors: one of `VarName`s and one of the corresponding parameter values. + +```@example 1 +vector_names_and_params(map_estimate) +``` + +The `optim_result` field (which is not printed by default) contains the original result from the underlying optimisation solver, which is useful for diagnosing convergence issues and accessing solver-specific information: + +```@example 1 +map_estimate.optim_result +``` + +## Controlling the optimisation process + +### Solvers + +Under the hood, `maximum_likelihood` and `maximum_a_posteriori` use the [Optimization.jl](https://github.com/SciML/Optimization.jl) package, which provides a unified interface to many other optimisation packages. +By default Turing uses the [LBFGS](https://en.wikipedia.org/wiki/Limited-memory_BFGS) method from [Optim.jl](https://docs.sciml.ai/Optimization/stable/optimization_packages/optim/) to find the mode estimate, but we can change that to any other solver by passing it as the second argument: + +```@example 1 +using OptimizationOptimJL: NelderMead + +maximum_likelihood(model, NelderMead()) +``` + +Optimization.jl supports [many more solvers](https://docs.sciml.ai/Optimization/stable/); please see its documentation for details. + +### Initial parameters + +We can help the optimisation by giving it a starting point we know is close to the final solution. +Initial parameters are specified using `InitFromParams`, and must be provided in model space (i.e. untransformed): + +```@example 1 +params = VarNamedTuple(; x=0.5) +maximum_likelihood(model; initial_params=InitFromParams(params)) +``` + +The default initialisation strategy is `InitFromPrior()`, which draws initial values from the prior. + +### AD backend + +You can also specify an automatic differentiation method using the `adtype` keyword argument: + +```@example 1 +import Mooncake + +maximum_likelihood(model; adtype=AutoMooncake()) +``` + +### Linked vs unlinked optimisation + +By default, Turing transforms model parameters to an unconstrained space before optimising (`link=true`). +This avoids discontinuities where the log-density drops to `-Inf` outside the support of a distribution. +Note that the returned parameter values are always in the original (untransformed) space, regardless of the `link` setting. + +If you want to optimise in the original parameter space instead, set `link=false`. + +```@example 1 +maximum_a_posteriori(model; link=false) +``` + +This is usually only useful under very specific circumstances, namely when your model contains distributions for which the mapping from model space to linked space is dependent on another parameter's value. + +### Box constraints + +You can provide lower and upper bounds on parameters using the `lb` and `ub` keywords respectively. +Bounds are specified as a `VarNamedTuple` and, just like initial values, must be provided in model space (i.e. untransformed): + +```@example 1 +lb = VarNamedTuple(; x=0.0) +ub = VarNamedTuple(; x=0.2) +maximum_likelihood(model; lb=lb, ub=ub) +``` + +Turing will internally translate these bounds to unconstrained space if `link=true`; as a user you should not need to worry at all about the details of this transformation. + +In this case we only have one parameter, but if there are multiple parameters and you only want to constrain some of them, you can provide bounds for the parameters you want to constrain and omit the others. + +Note that for some distributions (e.g. `Dirichlet`, `LKJCholesky`), the mapping from model-space bounds to linked-space bounds is not well-defined. +In these cases, Turing will raise an error. +If you need constrained optimisation for such variables, either set `link=false` or use `LogDensityFunction` with Optimization.jl directly. + +::: {.callout-note} +Generic (non-box) constraints are not supported by Turing's optimisation interface. +For these, please use `LogDensityFunction` and Optimization.jl directly. +::: + +### Solver options + +Any extra keyword arguments are passed through to `Optimization.solve`. +Some commonly useful ones are `maxiters`, `abstol`, and `reltol`: + +```@example 1 +params = VarNamedTuple(; x=-4.0) +badly_converged_mle = maximum_likelihood( + model, NelderMead(); initial_params=InitFromParams(params), maxiters=10, reltol=1e-9 +) +``` + +### Reproducibility + +To get reproducible results, pass an `rng` as the first argument: + +```@example 1 +using Random: Xoshiro +maximum_a_posteriori(Xoshiro(468), model) +``` + +This controls the random number generator used for parameter initialisation; the actual optimisation process is deterministic. + +For more details and a full list of keyword arguments, see the docstring of `Turing.Optimisation.estimate_mode`. + +## Analysing your mode estimate + +Turing extends several methods from `StatsBase` that can be used to analyse your mode estimation results. +Methods implemented include `vcov`, `informationmatrix`, `coeftable`, `coef`, and `coefnames`. + +For example, let's examine our MLE estimate from above using `coeftable`: + +```@example 1 +using StatsBase: coeftable +coeftable(mle_estimate) +``` + +Standard errors are calculated from the Fisher information matrix (inverse Hessian of the log likelihood or log joint). +Note that standard errors calculated in this way may not always be appropriate for MAP estimates, so please be cautious in interpreting them. + +The Hessian is computed using automatic differentiation. +By default, `ForwardDiff` is used, but if you are feeling brave you can specify a different backend via the `adtype` keyword argument to `informationmatrix`. +(Note that AD backend support for second-order derivatives is more limited than for first-order derivatives, so not all backends will work here.) + +```@example 1 +using StatsBase: informationmatrix +import ReverseDiff + +informationmatrix(mle_estimate; adtype=AutoReverseDiff()) +``` + +## Sampling with the MAP/MLE as initial states + +You can begin sampling your chain from an MLE/MAP estimate by wrapping it in `InitFromParams` and providing it to the `sample` function with the keyword `initial_params`. +For example, here is how to sample from the full posterior using the MAP estimate as the starting point: + +```@example 1 +map_estimate = maximum_a_posteriori(model) +chain = sample(model, NUTS(), 1_000; initial_params=InitFromParams(map_estimate)) +``` diff --git a/src/optimisation/Optimisation.jl b/src/optimisation/Optimisation.jl index b396910715..a8c51256d3 100644 --- a/src/optimisation/Optimisation.jl +++ b/src/optimisation/Optimisation.jl @@ -127,15 +127,16 @@ end function Base.show(io::IO, ::MIME"text/plain", m::ModeResult) printstyled(io, "ModeResult\n"; bold=true) # typeof avoids the parentheses in the printed output - println(io, " ├ estimator : $(typeof(m.estimator))") - println(io, " ├ lp : $(m.lp)") + println(io, " ├ estimator : $(typeof(m.estimator))") + println(io, " ├ lp : $(m.lp)") entries = length(m.params) == 1 ? "entry" : "entries" - println(io, " ├ params : VarNamedTuple with $(length(m.params)) $(entries)") + println(io, " ├ params : VarNamedTuple with $(length(m.params)) $(entries)") for (i, (vn, val)) in enumerate(pairs(m.params)) tree_char = i == length(m.params) ? "└" : "├" - println(io, " │ $(tree_char) $vn => $(val)") + println(io, " │ $(tree_char) $vn => $(val)") end - print(io, " └ linked : $(m.linked)") + println(io, " │ linked : $(m.linked)") + print(io, " └ (2 more fields: optim_result, ldf)") return nothing end From b8f7a2e3bd0e527415a97919c79d3a188476986f Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 2 Mar 2026 18:45:33 +0000 Subject: [PATCH 31/48] use VectorBijectors in optimisation --- Project.toml | 7 ++++-- docs/Project.toml | 2 ++ docs/src/optim.md | 15 +++++++++++-- src/optimisation/stats.jl | 47 ++++++++++++++++++++++----------------- test/Project.toml | 7 ++++-- 5 files changed, 51 insertions(+), 27 deletions(-) diff --git a/Project.toml b/Project.toml index 2173ff6e55..2e1a32b10f 100644 --- a/Project.toml +++ b/Project.toml @@ -39,6 +39,9 @@ StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" +[sources] +Bijectors = {url = "https://github.com/TuringLang/Bijectors.jl", rev = "py/vec"} + [weakdeps] DynamicHMC = "bbc10e6e-7c05-544b-b16e-64fede858acb" @@ -55,7 +58,7 @@ AdvancedMH = "0.8.9" AdvancedPS = "0.7.2" AdvancedVI = "0.6" BangBang = "0.4.2" -Bijectors = "0.14, 0.15" +Bijectors = "0.15.17" Compat = "4.15.0" DataStructures = "0.18, 0.19" DifferentiationInterface = "0.7" @@ -63,7 +66,7 @@ Distributions = "0.25.77" DistributionsAD = "0.6" DocStringExtensions = "0.8, 0.9" DynamicHMC = "3.4" -DynamicPPL = "0.40" +DynamicPPL = "0.40.3" EllipticalSliceSampling = "0.5, 1, 2" ForwardDiff = "0.10.3, 1" Libtask = "0.9.14" diff --git a/docs/Project.toml b/docs/Project.toml index 377c0dbd4e..8cf000eca9 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,6 +1,8 @@ [deps] +Bijectors = "76274a88-744f-5084-9051-94815aaf08c4" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterInterLinks = "d12716ef-a0f6-4df4-a9f1-a5a34e75c656" +DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6" OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" diff --git a/docs/src/optim.md b/docs/src/optim.md index d7629006b8..88495c3680 100644 --- a/docs/src/optim.md +++ b/docs/src/optim.md @@ -46,6 +46,7 @@ map_estimate.params[@varname(x)] ``` If you need a vectorised form of the parameters, you can use `vector_names_and_params`, which return a tuple of two vectors: one of `VarName`s and one of the corresponding parameter values. +(Note that these values are *always* returned in untransformed space.) ```@example 1 vector_names_and_params(map_estimate) @@ -98,7 +99,17 @@ maximum_likelihood(model; adtype=AutoMooncake()) By default, Turing transforms model parameters to an unconstrained space before optimising (`link=true`). This avoids discontinuities where the log-density drops to `-Inf` outside the support of a distribution. -Note that the returned parameter values are always in the original (untransformed) space, regardless of the `link` setting. +Note that the parameter values returned are always in the original (untransformed) space, regardless of the `link` setting. + +::: {.callout-note} + +## What does 'unconstrained' really mean? + +Note that the transformation to unconstrained space refers to the support of the *original* distribution prior to any optimisation constraints being applied. +For example, a parameter `x ~ Beta(2, 2)` will be transformed from the original space of `(0, 1)` to the unconstrained space of `(-Inf, Inf)` (via the logit transform). +However, it is possible that the optimisation still proceeds in a constrained space, if constraints on the parameter are specified via `lb` or `ub`. +For example, if we specify `lb=0.0` and `ub=0.2` for the same parameter, then the optimisation will proceed in the constrained space of `(-Inf, logit(0.2))`. +::: If you want to optimise in the original parameter space instead, set `link=false`. @@ -106,7 +117,7 @@ If you want to optimise in the original parameter space instead, set `link=false maximum_a_posteriori(model; link=false) ``` -This is usually only useful under very specific circumstances, namely when your model contains distributions for which the mapping from model space to linked space is dependent on another parameter's value. +This is usually only useful under very specific circumstances, namely when your model contains distributions for which the mapping from model space to unconstrained space is dependent on another parameter's value. ### Box constraints diff --git a/src/optimisation/stats.jl b/src/optimisation/stats.jl index fffffd959c..cacb511361 100644 --- a/src/optimisation/stats.jl +++ b/src/optimisation/stats.jl @@ -1,4 +1,5 @@ import DifferentiationInterface as DI +import Bijectors.VectorBijectors: optic_vec """ vector_names_and_params(m::ModeResult) @@ -20,32 +21,36 @@ function vector_names_and_params(m::ModeResult) # To ensure that this is always the case, we will have to create a LogDensityFunction # and then use its stored ranges to extract the parameters in the correct order. This # LDF will have to be created in unlinked space. - # - # TODO(penelopeysm): This needs to be done in a "better" way using DynamicPPL - # functionality. Note that, if https://github.com/TuringLang/DynamicPPL.jl/pull/1178 - # goes through, the value vector can just be generated using - # `first(DynamicPPL.rand_with_logdensity(m.ldf, InitFromParams(m.params)))`. The - # varnames, though, are a hot mess. See the TODO below. ldf = LogDensityFunction(m.ldf.model) vns = Vector{VarName}(undef, LogDensityProblems.dimension(ldf)) - vals = Vector{Any}(undef, LogDensityProblems.dimension(ldf)) - for vn in keys(m.params) - range = if AbstractPPL.getoptic(vn) === identity - ldf._iden_varname_ranges[AbstractPPL.getsym(vn)].range - else - ldf._varname_ranges[vn].range + + # Evaluate the model to get the vectorised parameters in the right order. + accs = DynamicPPL.OnlyAccsVarInfo( + DynamicPPL.PriorDistributionAccumulator(), DynamicPPL.VectorParamAccumulator(ldf) + ) + _, accs = DynamicPPL.init!!( + ldf.model, accs, InitFromParams(m.params), DynamicPPL.UnlinkAll() + ) + vector_params = DynamicPPL.get_vector_params(accs) + + # Figure out the VarNames. + priors = DynamicPPL.get_priors(accs) + vector_varnames = Vector{VarName}(undef, length(vector_params)) + for (vn, dist) in pairs(priors) + range = ldf._varname_ranges[vn].range + optics = optic_vec(dist) + # Really shouldn't happen, but catch in case optic_vec isn't properly defined + if any(isnothing, optics) + error( + "The sub-optics for the distribution $dist are not defined. This is a bug in Turing; please file an issue at https://github.com/TuringLang/Turing.jl/issues.", + ) end - # TODO(penelopeysm): This assumes that tovec and varname_leaves return the same - # number of sub-elements in the same order --- which is NOT true for things like - # Cholesky factors --- so this is a bug! We COULD use `varname_and_value_leaves` but - # that would just be a different kind of bug because LDF uses tovec for - # vectorisation!! https://github.com/TuringLang/Turing.jl/issues/2734 - val = m.params[vn] - vns[range] = collect(AbstractPPL.varname_leaves(vn, val)) - vals[range] = DynamicPPL.tovec(val) + vns = map(optic -> AbstractPPL.append_optic(vn, optic), optics) + vector_varnames[range] = vns end + # Concretise - return [x for x in vns], [x for x in vals] + return [x for x in vector_varnames], [x for x in vector_params] end # Various StatsBase methods for ModeResult diff --git a/test/Project.toml b/test/Project.toml index 002e8a9914..bdf79f2602 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -40,6 +40,9 @@ StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" +[sources] +Bijectors = {url = "https://github.com/TuringLang/Bijectors.jl", rev = "py/vec"} + [compat] ADTypes = "1" AbstractMCMC = "5.13" @@ -49,14 +52,14 @@ AdvancedPS = "0.7.2" AdvancedVI = "0.6" Aqua = "0.8" BangBang = "0.4" -Bijectors = "0.14, 0.15" +Bijectors = "0.15.17" Clustering = "0.14, 0.15" Combinatorics = "1" DifferentiationInterface = "0.7" Distributions = "0.25" DistributionsAD = "0.6.3" DynamicHMC = "2.1.6, 3.0" -DynamicPPL = "0.40.2" +DynamicPPL = "0.40.3" FiniteDifferences = "0.10.8, 0.11, 0.12" ForwardDiff = "0.10.12 - 0.10.32, 0.10, 1" HypothesisTests = "0.11" From c5b1f72df64eef880901ab0f790ec5b2062077ff Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 2 Mar 2026 18:48:12 +0000 Subject: [PATCH 32/48] use get_priors in ESS --- src/mcmc/ess.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mcmc/ess.jl b/src/mcmc/ess.jl index 05031fcd34..c2f81839a3 100644 --- a/src/mcmc/ess.jl +++ b/src/mcmc/ess.jl @@ -43,7 +43,7 @@ function AbstractMCMC.step( prior_accname = DynamicPPL.accumulator_name(prior_acc) vi = DynamicPPL.setacc!!(vi, prior_acc) _, vi = DynamicPPL.init!!(rng, model, vi, initial_params, DynamicPPL.UnlinkAll()) - priors = DynamicPPL.getacc(vi, Val(prior_accname)).values + priors = DynamicPPL.get_priors(vi) for dist in values(priors) EllipticalSliceSampling.isgaussian(typeof(dist)) || From 6f4a17c6b46828d885aeb1669ef6d4c4107e27d9 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 3 Mar 2026 03:11:21 +0000 Subject: [PATCH 33/48] point to DPPL branch --- Project.toml | 3 ++- docs/Project.toml | 4 ++++ test/Project.toml | 3 ++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 2e1a32b10f..1067a228da 100644 --- a/Project.toml +++ b/Project.toml @@ -40,7 +40,8 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" [sources] -Bijectors = {url = "https://github.com/TuringLang/Bijectors.jl", rev = "py/vec"} +Bijectors = {url = "https://github.com/TuringLang/Bijectors.jl", rev = "py/vec2"} +DynamicPPL = {url = "https://github.com/TuringLang/DynamicPPL.jl", rev = "py/vec"} [weakdeps] DynamicHMC = "bbc10e6e-7c05-544b-b16e-64fede858acb" diff --git a/docs/Project.toml b/docs/Project.toml index 8cf000eca9..b7b75bffe2 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -8,3 +8,7 @@ OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" + +[sources] +Bijectors = {url = "https://github.com/TuringLang/Bijectors.jl", rev = "py/vec2"} +DynamicPPL = {url = "https://github.com/TuringLang/DynamicPPL.jl", rev = "py/vec"} diff --git a/test/Project.toml b/test/Project.toml index bdf79f2602..0902fa7c1d 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -41,7 +41,8 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" [sources] -Bijectors = {url = "https://github.com/TuringLang/Bijectors.jl", rev = "py/vec"} +Bijectors = {url = "https://github.com/TuringLang/Bijectors.jl", rev = "py/vec2"} +DynamicPPL = {url = "https://github.com/TuringLang/DynamicPPL.jl", rev = "py/vec"} [compat] ADTypes = "1" From ff574e9a4c168c3dbae799e6f98a8ab799797374 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 3 Mar 2026 03:16:18 +0000 Subject: [PATCH 34/48] Squish more bugs --- docs/src/optim.md | 9 ++++++++- src/optimisation/Optimisation.jl | 3 ++- src/optimisation/init.jl | 4 ++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/docs/src/optim.md b/docs/src/optim.md index 88495c3680..ea2ae48956 100644 --- a/docs/src/optim.md +++ b/docs/src/optim.md @@ -98,7 +98,14 @@ maximum_likelihood(model; adtype=AutoMooncake()) ### Linked vs unlinked optimisation By default, Turing transforms model parameters to an unconstrained space before optimising (`link=true`). -This avoids discontinuities where the log-density drops to `-Inf` outside the support of a distribution. +There are two reasons why one might want to do this: + + 1. This avoids discontinuities where the log-density drops to `-Inf` outside the support of a distribution. + 2. But more importantly, this avoids situations where the original sample contains values that depend on each other. + For example, in a `Dirichlet` distribution, the parameters must sum to 1. + That means that if we do not perform linking, these parameters cannot be varied completely independently, which can lead to numerical issues. + In contrast, when linking is performed, the parameters are transformed into a (shorter) vector of parameters that are completely unconstrained and independent. + Note that the parameter values returned are always in the original (untransformed) space, regardless of the `link` setting. ::: {.callout-note} diff --git a/src/optimisation/Optimisation.jl b/src/optimisation/Optimisation.jl index a8c51256d3..5a925b214f 100644 --- a/src/optimisation/Optimisation.jl +++ b/src/optimisation/Optimisation.jl @@ -2,6 +2,7 @@ module Optimisation using ..Turing using AbstractPPL: AbstractPPL, VarName +using Bijectors: Bijectors using DynamicPPL: DynamicPPL, VarInfo, LogDensityFunction, VarNamedTuple using DocStringExtensions: TYPEDFIELDS using LogDensityProblems: LogDensityProblems @@ -245,7 +246,7 @@ supplied in unlinked space, but the optimisation is run by default in linked spa Sometimes it is possible to translate constraints from unlinked space to linked space: for example, for `x ~ Beta(2, 2)`, lower bounds in unlinked space can be translated to lower bounds in linked space via the logit transform (specificallly, by calling -`to_linked_vec_transform(Beta(2, 2))`. +`Bijectors.VectorBijectors.to_linked_vec(Beta(2, 2))`. However, if a user supplies a constraint on a Dirichlet variable, there is no well-defined mapping of unlinked constraints to linked space. In such cases, Turing will throw an error diff --git a/src/optimisation/init.jl b/src/optimisation/init.jl index 2438a8f093..a88ffccf56 100644 --- a/src/optimisation/init.jl +++ b/src/optimisation/init.jl @@ -219,9 +219,9 @@ function DynamicPPL.accumulate_assume!!( transform = if DynamicPPL.target_transform(acc.transform_strategy, vn) isa DynamicPPL.DynamicLink - DynamicPPL.to_linked_vec_transform(dist) + Bijectors.VectorBijectors.to_linked_vec(dist) elseif DynamicPPL.target_transform(acc.transform_strategy, vn) isa DynamicPPL.Unlink - DynamicPPL.to_vec_transform(dist) + Bijectors.VectorBijectors.to_vec(dist) else error( "don't know how to handle transform strategy $(acc.transform_strategy) for variable $(vn)", From 2c2474293c4114f61407adbc8cf9bd1a4a3b3305 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 3 Mar 2026 03:18:20 +0000 Subject: [PATCH 35/48] fix more things --- src/mcmc/mh.jl | 2 +- test/optimisation/Optimisation.jl | 19 +++++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/mcmc/mh.jl b/src/mcmc/mh.jl index a6990e596e..073b45721d 100644 --- a/src/mcmc/mh.jl +++ b/src/mcmc/mh.jl @@ -188,7 +188,7 @@ function DynamicPPL.init( end end if is_linkedrw - transform = DynamicPPL.from_linked_vec_transform(prior) + transform = Bijectors.VectorBijectors.from_linked_vec(prior) linked_vec = rand(rng, dist) return DynamicPPL.LinkedVectorValue(linked_vec, transform) else diff --git a/test/optimisation/Optimisation.jl b/test/optimisation/Optimisation.jl index dcc1f74326..b8d418046e 100644 --- a/test/optimisation/Optimisation.jl +++ b/test/optimisation/Optimisation.jl @@ -167,14 +167,14 @@ end (x=[0.5, 0.5],), product_distribution([Beta(2, 2), Beta(2, 2)]), ), - # TODO(penelopeysm): Broken because DynamicPPL.to_vec_transform(dist) fails. - # This needs an upstream fix. - # ((x=(a=0.1, b=0.1),), (x=(a=0.5, b=0.5),), product_distribution((a=Beta(2, 2), b=Beta(2, 2)))), + ( + (x=(a=0.1, b=0.1),), + (x=(a=0.5, b=0.5),), + product_distribution((a=Beta(2, 2), b=Beta(2, 2))), + ), ) @model f() = x ~ dist model = f() - maybe_to_vec(x::AbstractVector) = x - maybe_to_vec(x) = [x] @testset "unlinked" begin ldf = LogDensityFunction(model) @@ -185,8 +185,8 @@ end VarNamedTuple(lb), VarNamedTuple(ub), ) - @test lb_vec == maybe_to_vec(lb.x) - @test ub_vec == maybe_to_vec(ub.x) + @test lb_vec == Bijectors.VectorBijectors.to_vec(dist)(lb.x) + @test ub_vec == Bijectors.VectorBijectors.to_vec(dist)(ub.x) @test all(init_vec .>= lb_vec) @test all(init_vec .<= ub_vec) end @@ -201,9 +201,8 @@ end VarNamedTuple(lb), VarNamedTuple(ub), ) - b = Bijectors.bijector(dist) - @test lb_vec ≈ maybe_to_vec(b(lb.x)) - @test ub_vec ≈ maybe_to_vec(b(ub.x)) + @test lb_vec ≈ Bijectors.VectorBijectors.to_linked_vec(dist)(lb.x) + @test ub_vec ≈ Bijectors.VectorBijectors.to_linked_vec(dist)(ub.x) @test all(init_vec .>= lb_vec) @test all(init_vec .<= ub_vec) end From de8655f31522d46499eb7eccccbac5d681168851 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 3 Mar 2026 20:23:20 +0000 Subject: [PATCH 36/48] fix tests --- Project.toml | 6 +----- docs/Project.toml | 4 ---- src/mcmc/ess.jl | 2 +- test/Project.toml | 6 +----- test/mcmc/particle_mcmc.jl | 4 ++-- test/optimisation/Optimisation.jl | 4 ++-- 6 files changed, 7 insertions(+), 19 deletions(-) diff --git a/Project.toml b/Project.toml index 1067a228da..6e0202c91f 100644 --- a/Project.toml +++ b/Project.toml @@ -39,10 +39,6 @@ StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" -[sources] -Bijectors = {url = "https://github.com/TuringLang/Bijectors.jl", rev = "py/vec2"} -DynamicPPL = {url = "https://github.com/TuringLang/DynamicPPL.jl", rev = "py/vec"} - [weakdeps] DynamicHMC = "bbc10e6e-7c05-544b-b16e-64fede858acb" @@ -67,7 +63,7 @@ Distributions = "0.25.77" DistributionsAD = "0.6" DocStringExtensions = "0.8, 0.9" DynamicHMC = "3.4" -DynamicPPL = "0.40.3" +DynamicPPL = "0.40.6" EllipticalSliceSampling = "0.5, 1, 2" ForwardDiff = "0.10.3, 1" Libtask = "0.9.14" diff --git a/docs/Project.toml b/docs/Project.toml index b7b75bffe2..8cf000eca9 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -8,7 +8,3 @@ OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" - -[sources] -Bijectors = {url = "https://github.com/TuringLang/Bijectors.jl", rev = "py/vec2"} -DynamicPPL = {url = "https://github.com/TuringLang/DynamicPPL.jl", rev = "py/vec"} diff --git a/src/mcmc/ess.jl b/src/mcmc/ess.jl index c2f81839a3..98ff3844b8 100644 --- a/src/mcmc/ess.jl +++ b/src/mcmc/ess.jl @@ -103,7 +103,7 @@ struct ESSPrior{M<:Model,V<:AbstractVarInfo,T} EllipticalSliceSampling.isgaussian(typeof(prior_dist)) || error( "[ESS] only supports Gaussian prior distributions, but found $(typeof(prior_dist))", ) - DynamicPPL.tovec(mean(prior_dist)) + vec(mean(prior_dist)) end return new{typeof(model),typeof(varinfo),typeof(μ)}(model, varinfo, μ) end diff --git a/test/Project.toml b/test/Project.toml index 0902fa7c1d..359f495dc6 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -40,10 +40,6 @@ StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" -[sources] -Bijectors = {url = "https://github.com/TuringLang/Bijectors.jl", rev = "py/vec2"} -DynamicPPL = {url = "https://github.com/TuringLang/DynamicPPL.jl", rev = "py/vec"} - [compat] ADTypes = "1" AbstractMCMC = "5.13" @@ -60,7 +56,7 @@ DifferentiationInterface = "0.7" Distributions = "0.25" DistributionsAD = "0.6.3" DynamicHMC = "2.1.6, 3.0" -DynamicPPL = "0.40.3" +DynamicPPL = "0.40.6" FiniteDifferences = "0.10.8, 0.11, 0.12" ForwardDiff = "0.10.12 - 0.10.32, 0.10, 1" HypothesisTests = "0.11" diff --git a/test/mcmc/particle_mcmc.jl b/test/mcmc/particle_mcmc.jl index 4c13d59bff..d7411f9a68 100644 --- a/test/mcmc/particle_mcmc.jl +++ b/test/mcmc/particle_mcmc.jl @@ -204,14 +204,14 @@ end end m1 = outer_kwarg1() chn1 = sample(StableRNG(468), m1, PG(10), 1000) - @test mean(chn1[Symbol("a.x")]) ≈ 2.5 atol = 0.2 + @test mean(chn1[Symbol("a.x")]) ≈ 2.5 atol = 0.3 @model function outer_kwarg2(n) return a ~ to_submodel(inner_kwarg(5.0; n=n)) end m2 = outer_kwarg2(10.0) chn2 = sample(StableRNG(468), m2, PG(10), 1000) - @test mean(chn2[Symbol("a.x")]) ≈ 7.5 atol = 0.2 + @test mean(chn2[Symbol("a.x")]) ≈ 7.5 atol = 0.3 end @testset "refuses to run threadsafe eval" begin diff --git a/test/optimisation/Optimisation.jl b/test/optimisation/Optimisation.jl index b8d418046e..ffccb862b1 100644 --- a/test/optimisation/Optimisation.jl +++ b/test/optimisation/Optimisation.jl @@ -239,8 +239,8 @@ end return x ~ Beta() end m = invalid_model() - @test_throws ErrorException maximum_likelihood(m) - @test_throws ErrorException maximum_a_posteriori(m) + @test_throws ArgumentError maximum_likelihood(m) + @test_throws ArgumentError maximum_a_posteriori(m) end @testset "gdemo" begin From 78b5aa34b989bd41d4b95c648ff1a4c6e3fb3856 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 3 Mar 2026 20:30:02 +0000 Subject: [PATCH 37/48] Add a test for the LKJCholesky optimisation --- test/optimisation/Optimisation.jl | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/optimisation/Optimisation.jl b/test/optimisation/Optimisation.jl index ffccb862b1..f878fe39a7 100644 --- a/test/optimisation/Optimisation.jl +++ b/test/optimisation/Optimisation.jl @@ -708,6 +708,21 @@ end @test result.params[@varname(x)] ≈ mode(Dirichlet(2 * ones(3))) atol = 0.2 end + @testset "vector_names_and_params with LKJCholesky" begin + # In the past this used to be inconsistent because the names would have length 6, + # but the params would have length 9 (because the Cholesky factor of a 3x3 matrix + # has 6 free parameters, but is represented as a 3x3 matrix). See + # https://github.com/TuringLang/Turing.jl/issues/2734. This was largely fixed by + # adoption of Bijectors.VectorBijectors, so this is just a regression test to make + # sure it doesn't break again. + @model demo_lkj() = x ~ LKJCholesky(3, 1.0) + model = demo_lkj() + result = maximum_a_posteriori(model) + nms, ps = vector_names_and_params(result) + @test length(nms) == 6 + @test length(ps) == 6 + end + @testset "with :=" begin @model function demo_track() x ~ Normal() From f39110280688fff81f5a614b4a19f683a56eb78f Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 3 Mar 2026 20:52:32 +0000 Subject: [PATCH 38/48] some pMCMC docstrings --- src/mcmc/particle_mcmc.jl | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/mcmc/particle_mcmc.jl b/src/mcmc/particle_mcmc.jl index 2f6b412292..feb64c53f3 100644 --- a/src/mcmc/particle_mcmc.jl +++ b/src/mcmc/particle_mcmc.jl @@ -92,8 +92,8 @@ struct SMC{R} <: ParticleInference end """ -SMC([resampler = AdvancedPS.ResampleWithESSThreshold()]) -SMC([resampler = AdvancedPS.resample_systematic, ]threshold) + SMC([resampler = AdvancedPS.ResampleWithESSThreshold()]) + SMC([resampler = AdvancedPS.resample_systematic, ]threshold) Create a sequential Monte Carlo sampler of type [`SMC`](@ref). @@ -356,7 +356,8 @@ end """ get_trace_local_varinfo() -Get the varinfo stored in the 'taped globals' of a `Libtask.TapedTask`. +Get the varinfo stored in the 'taped globals' of a `Libtask.TapedTask`. This function +is meant to be called from *inside* the TapedTask itself. """ function get_trace_local_varinfo() trace = Libtask.get_taped_globals(Any).other @@ -371,6 +372,8 @@ Get the `resample` flag stored in the 'taped globals' of a `Libtask.TapedTask`. This indicates whether new variable values should be sampled from the prior or not. For example, in SMC, this is true for all particles; in PG, this is true for all particles except the reference particle, whose trajectory must be reproduced exactly. + +This function is meant to be called from *inside* the TapedTask itself. """ function get_trace_local_resampled() trace = Libtask.get_taped_globals(Any).other @@ -381,6 +384,8 @@ end get_trace_local_rng() Get the RNG stored in the 'taped globals' of a `Libtask.TapedTask`, if one exists. + +This function is meant to be called from *inside* the TapedTask itself. """ function get_trace_local_rng() return Libtask.get_taped_globals(Any).rng @@ -393,6 +398,8 @@ Set the `varinfo` stored in Libtask's taped globals. The 'other' taped global in is expected to be an `AdvancedPS.Trace`. Returns `nothing`. + +This function is meant to be called from *inside* the TapedTask itself. """ function set_trace_local_varinfo(vi::AbstractVarInfo) trace = Libtask.get_taped_globals(Any).other From 51f55586bc818534b394ef4f7c3141097ac1c625 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 3 Mar 2026 21:00:44 +0000 Subject: [PATCH 39/48] Clarify abstract type parameter --- src/mcmc/particle_mcmc.jl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/mcmc/particle_mcmc.jl b/src/mcmc/particle_mcmc.jl index feb64c53f3..ab2a6119d9 100644 --- a/src/mcmc/particle_mcmc.jl +++ b/src/mcmc/particle_mcmc.jl @@ -26,9 +26,14 @@ mutable struct TracedModel{M<:Model,T<:Tuple,NT<:NamedTuple} <: model::M # TODO(penelopeysm): I don't like that this is an abstract type. However, the problem is # that the type of VarInfo can change during execution, especially with PG-inside-Gibbs - # when you have to muck with merging VarInfos from different sub-conditioned models. I - # do think that this can be solved. I would recommend that if you have time try to - # change this back into a type parameter, see what breaks in CI, and try to fix it. + # when you have to muck with merging VarInfos from different sub-conditioned models. + # + # However, I don't think that this is actually a problem in practice. Whenever we do + # Libtask.get_taped_globals, that is already type unstable anyway, so accessing this + # field here is not going to cause extra type instability. This change is associated + # with Turing v0.43, and I benchmarked on v0.42 vs v0.43, and v0.43 is actually faster + # (probably due to underlying changes in DynamicPPL), so I'm not really bothered by + # this. varinfo::AbstractVarInfo resample::Bool fargs::T From 1845c9a5d2cb397389051a54c967fe28bcb64005 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 3 Mar 2026 21:04:48 +0000 Subject: [PATCH 40/48] tidy --- test/ad.jl | 5 +++-- test/ext/dynamichmc.jl | 3 +-- test/mcmc/callbacks.jl | 7 +++++-- test/mcmc/emcee.jl | 5 +---- test/mcmc/gibbs.jl | 29 +++++++++++++++-------------- test/mcmc/sghmc.jl | 9 ++++----- 6 files changed, 29 insertions(+), 29 deletions(-) diff --git a/test/ad.jl b/test/ad.jl index 5e5f367470..8d87eea3d8 100644 --- a/test/ad.jl +++ b/test/ad.jl @@ -253,8 +253,9 @@ end conditioned_model = Turing.Inference.make_conditional( model, varnames, deepcopy(global_vi) ) - rng = StableRNG(123) - @test run_ad(model, adtype; test=true, benchmark=false) isa Any + @test run_ad( + model, adtype; rng=StableRNG(123), test=true, benchmark=false + ) isa Any end end end diff --git a/test/ext/dynamichmc.jl b/test/ext/dynamichmc.jl index f460bffda1..26d90d78de 100644 --- a/test/ext/dynamichmc.jl +++ b/test/ext/dynamichmc.jl @@ -11,9 +11,8 @@ using StableRNGs: StableRNG using Turing @testset "TuringDynamicHMCExt" begin - rng = StableRNG(100) spl = externalsampler(DynamicHMC.NUTS()) - chn = sample(rng, gdemo_default, spl, 10_000) + chn = sample(StableRNG(100), gdemo_default, spl, 10_000) check_gdemo(chn) end diff --git a/test/mcmc/callbacks.jl b/test/mcmc/callbacks.jl index c62e9d133a..bced5675cd 100644 --- a/test/mcmc/callbacks.jl +++ b/test/mcmc/callbacks.jl @@ -8,7 +8,6 @@ using Test, Turing, AbstractMCMC, Random, Distributions, LinearAlgebra end @testset "AbstractMCMC Callbacks Interface" begin - rng = Random.default_rng() model = test_normals() samplers = [ @@ -24,7 +23,10 @@ end for (name, sampler) in samplers @testset "$name" begin t1, s1 = AbstractMCMC.step( - rng, model, sampler; initial_params=Turing.Inference.init_strategy(sampler) + Random.default_rng(), + model, + sampler; + initial_params=Turing.Inference.init_strategy(sampler), ) # ParamsWithStats returns named params (not θ[i]) @@ -46,6 +48,7 @@ end # NUTS second step has full AHMC transition metrics @testset "NUTS Transition Metrics" begin sampler = NUTS(10, 0.65) + rng = Random.default_rng() t1, s1 = AbstractMCMC.step( rng, model, sampler; initial_params=Turing.Inference.init_strategy(sampler) ) diff --git a/test/mcmc/emcee.jl b/test/mcmc/emcee.jl index a13ccbba00..ee786b8584 100644 --- a/test/mcmc/emcee.jl +++ b/test/mcmc/emcee.jl @@ -11,13 +11,10 @@ using Turing @testset "emcee.jl" begin @testset "gdemo" begin - rng = StableRNG(9876) - n_samples = 1000 n_walkers = 250 - spl = Emcee(n_walkers, 2.0) - chain = sample(rng, gdemo_default, spl, n_samples) + chain = sample(StableRNG(9876), gdemo_default, spl, n_samples) check_gdemo(chain) end diff --git a/test/mcmc/gibbs.jl b/test/mcmc/gibbs.jl index b623306fe9..34107202da 100644 --- a/test/mcmc/gibbs.jl +++ b/test/mcmc/gibbs.jl @@ -578,9 +578,8 @@ end # Sampler to use for Gibbs components. hmc = HMC(0.1, 32) sampler = Gibbs(@varname(s) => hmc, @varname(m) => hmc) - rng = StableRNG(42) chain = sample( - rng, + StableRNG(42), model, sampler, MCMCThreads(), @@ -625,14 +624,13 @@ end end @testset "multiple varnames" begin - rng = Random.default_rng() - @testset "with both `s` and `m` as random" begin model = gdemo(1.5, 2.0) vns = (@varname(s), @varname(m)) spl = Gibbs(vns => MH()) # `step` + rng = Random.default_rng() transition, state = AbstractMCMC.step(rng, model, spl) check_transition_varnames(transition, vns) for _ in 1:5 @@ -641,8 +639,7 @@ end end # `sample` - rng = StableRNG(42) - chain = sample(rng, model, spl, 1_000; progress=false) + chain = sample(StableRNG(42), model, spl, 1_000; progress=false) check_numerical(chain, [:s, :m], [49 / 24, 7 / 6]; atol=0.4) end @@ -652,6 +649,7 @@ end spl = Gibbs(vns => MH()) # `step` + rng = Random.default_rng() transition, state = AbstractMCMC.step(rng, model, spl) check_transition_varnames(transition, vns) for _ in 1:5 @@ -695,7 +693,6 @@ end end @testset "CSMC + ESS" begin - rng = Random.default_rng() model = MoGtest_default spl = Gibbs( (@varname(z1), @varname(z2), @varname(z3), @varname(z4)) => CSMC(15), @@ -711,6 +708,7 @@ end @varname(mu2) ) # `step` + rng = Random.default_rng() transition, state = AbstractMCMC.step(rng, model, spl) check_transition_varnames(transition, vns) for _ in 1:5 @@ -719,17 +717,16 @@ end end # Sample! - rng = StableRNG(42) - chain = sample(rng, MoGtest_default, spl, 1000; progress=false) + chain = sample(StableRNG(42), MoGtest_default, spl, 1000; progress=false) check_MoGtest_default(chain; atol=0.2) end @testset "CSMC + ESS (usage of implicit varname)" begin - rng = Random.default_rng() model = MoGtest_default_z_vector spl = Gibbs(@varname(z) => CSMC(15), @varname(mu1) => ESS(), @varname(mu2) => ESS()) vns = (@varname(z), @varname(mu1), @varname(mu2)) # `step` + rng = Random.default_rng() transition, state = AbstractMCMC.step(rng, model, spl) check_transition_varnames(transition, vns) for _ in 1:5 @@ -738,8 +735,7 @@ end end # Sample! - rng = StableRNG(42) - chain = sample(rng, model, spl, 1000; progress=false) + chain = sample(StableRNG(42), model, spl, 1000; progress=false) check_MoGtest_default_z_vector(chain; atol=0.2) end @@ -775,9 +771,14 @@ end ] @testset "$(sampler_inner)" for sampler_inner in samplers_inner sampler = Gibbs(@varname(m1) => sampler_inner, @varname(m2) => sampler_inner) - rng = StableRNG(42) chain = sample( - rng, model, sampler, 1000; discard_initial=1000, thinning=10, n_adapts=0 + StableRNG(42), + model, + sampler, + 1000; + discard_initial=1000, + thinning=10, + n_adapts=0, ) check_numerical(chain, [:m1, :m2], [-0.2, 0.6]; atol=0.1) check_logp_correct(sampler_inner) diff --git a/test/mcmc/sghmc.jl b/test/mcmc/sghmc.jl index e08137109d..c4f1e48c06 100644 --- a/test/mcmc/sghmc.jl +++ b/test/mcmc/sghmc.jl @@ -21,9 +21,8 @@ using Turing end @testset "sghmc inference" begin - rng = StableRNG(123) alg = SGHMC(; learning_rate=0.02, momentum_decay=0.5) - chain = sample(rng, gdemo_default, alg, 10_000) + chain = sample(StableRNG(123), gdemo_default, alg, 10_000) check_gdemo(chain; atol=0.1) end @@ -39,9 +38,9 @@ end end @testset "sgld inference" begin - rng = StableRNG(1) - - chain = sample(rng, gdemo_default, SGLD(; stepsize=PolynomialStepsize(0.5)), 20_000) + chain = sample( + StableRNG(1), gdemo_default, SGLD(; stepsize=PolynomialStepsize(0.5)), 20_000 + ) check_gdemo(chain; atol=0.25) # Weight samples by step sizes (cf section 4.2 in the paper by Welling and Teh) From c10ae926eac822a4d452907067c6a7a2ff9d94de Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 3 Mar 2026 21:16:50 +0000 Subject: [PATCH 41/48] fix vec --- src/mcmc/ess.jl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mcmc/ess.jl b/src/mcmc/ess.jl index 98ff3844b8..0fdaf7219f 100644 --- a/src/mcmc/ess.jl +++ b/src/mcmc/ess.jl @@ -89,6 +89,9 @@ function AbstractMCMC.step( return transition, TuringESSState(vi, state.priors) end +_vec(x::Real) = [x] +_vec(x::AbstractArray) = vec(x) + # Prior distribution of considered random variable struct ESSPrior{M<:Model,V<:AbstractVarInfo,T} model::M @@ -103,7 +106,7 @@ struct ESSPrior{M<:Model,V<:AbstractVarInfo,T} EllipticalSliceSampling.isgaussian(typeof(prior_dist)) || error( "[ESS] only supports Gaussian prior distributions, but found $(typeof(prior_dist))", ) - vec(mean(prior_dist)) + _vec(mean(prior_dist)) end return new{typeof(model),typeof(varinfo),typeof(μ)}(model, varinfo, μ) end From 8006bb76d48ee644904ab3985759a5b6363a0afe Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 3 Mar 2026 21:31:42 +0000 Subject: [PATCH 42/48] Remove DistributionsAD --- Project.toml | 2 -- docs/make.jl | 1 - docs/src/api.md | 12 ++++++------ src/Turing.jl | 6 +++--- src/mcmc/Inference.jl | 1 - test/Project.toml | 2 -- 6 files changed, 9 insertions(+), 15 deletions(-) diff --git a/Project.toml b/Project.toml index 6e0202c91f..63c620edec 100644 --- a/Project.toml +++ b/Project.toml @@ -17,7 +17,6 @@ Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" -DistributionsAD = "ced4e74d-a319-5a8a-b0ac-84af2272839c" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" EllipticalSliceSampling = "cad2338a-1db2-11e9-3401-43bc07c9ede2" @@ -60,7 +59,6 @@ Compat = "4.15.0" DataStructures = "0.18, 0.19" DifferentiationInterface = "0.7" Distributions = "0.25.77" -DistributionsAD = "0.6" DocStringExtensions = "0.8, 0.9" DynamicHMC = "3.4" DynamicPPL = "0.40.6" diff --git a/docs/make.jl b/docs/make.jl index 4f6b096997..23567cfba2 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -11,7 +11,6 @@ links = InterLinks( "AbstractMCMC" => "https://turinglang.org/AbstractMCMC.jl/stable/", "ADTypes" => "https://sciml.github.io/ADTypes.jl/stable/", "AdvancedVI" => "https://turinglang.org/AdvancedVI.jl/stable/", - "DistributionsAD" => "https://turinglang.org/DistributionsAD.jl/stable/", "OrderedCollections" => "https://juliacollections.github.io/OrderedCollections.jl/stable/", "Distributions" => "https://juliastats.org/Distributions.jl/stable/", ) diff --git a/docs/src/api.md b/docs/src/api.md index 0f4cdde93d..8bc70ad32a 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -157,12 +157,12 @@ LogPoisson ### Tools to work with distributions -| Exported symbol | Documentation | Description | -|:--------------- |:-------------------------------------- |:-------------------------------------------------------------- | -| `I` | [`LinearAlgebra.I`](@extref) | Identity matrix | -| `filldist` | [`DistributionsAD.filldist`](@extref) | Create a product distribution from a distribution and integers | -| `arraydist` | [`DistributionsAD.arraydist`](@extref) | Create a product distribution from an array of distributions | -| `NamedDist` | [`DynamicPPL.NamedDist`](@extref) | A distribution that carries the name of the variable | +| Exported symbol | Documentation | Description | +|:--------------- |:--------------------------------- |:-------------------------------------------------------------- | +| `I` | [`LinearAlgebra.I`](@extref) | Identity matrix | +| `filldist` | [`DynamicPPL.filldist`](@extref) | Create a product distribution from a distribution and integers | +| `arraydist` | [`DynamicPPL.arraydist`](@extref) | Create a product distribution from an array of distributions | +| `NamedDist` | [`DynamicPPL.NamedDist`](@extref) | A distribution that carries the name of the variable | ### Point estimates diff --git a/src/Turing.jl b/src/Turing.jl index 6cbf13741a..8f0f9089b6 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -1,7 +1,7 @@ module Turing using Reexport, ForwardDiff -using DistributionsAD, Bijectors, StatsFuns, SpecialFunctions +using Bijectors, StatsFuns, SpecialFunctions using Statistics, LinearAlgebra using Libtask @reexport using Distributions, MCMCChains @@ -144,8 +144,8 @@ export LogPoisson, # Tools to work with Distributions I, # LinearAlgebra - filldist, # DistributionsAD - arraydist, # DistributionsAD + filldist, # DynamicPPL + arraydist, # DynamicPPL NamedDist, # DynamicPPL # Predictions - DynamicPPL predict, diff --git a/src/mcmc/Inference.jl b/src/mcmc/Inference.jl index aa8cb9c410..73f0661e61 100644 --- a/src/mcmc/Inference.jl +++ b/src/mcmc/Inference.jl @@ -16,7 +16,6 @@ using DynamicPPL: Model, DefaultContext using Distributions, Libtask, Bijectors -using DistributionsAD: VectorOfMultivariate using LinearAlgebra using ..Turing: PROGRESS, Turing using StatsFuns: logsumexp diff --git a/test/Project.toml b/test/Project.toml index 359f495dc6..dee67a1dcd 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -12,7 +12,6 @@ Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" -DistributionsAD = "ced4e74d-a319-5a8a-b0ac-84af2272839c" DynamicHMC = "bbc10e6e-7c05-544b-b16e-64fede858acb" DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" @@ -54,7 +53,6 @@ Clustering = "0.14, 0.15" Combinatorics = "1" DifferentiationInterface = "0.7" Distributions = "0.25" -DistributionsAD = "0.6.3" DynamicHMC = "2.1.6, 3.0" DynamicPPL = "0.40.6" FiniteDifferences = "0.10.8, 0.11, 0.12" From e0aae9f7551726b4e82fd07d0d41f1fefe41ed38 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 3 Mar 2026 21:44:20 +0000 Subject: [PATCH 43/48] fix import --- test/mcmc/Inference.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/mcmc/Inference.jl b/test/mcmc/Inference.jl index 8c6b4108eb..73f83040fc 100644 --- a/test/mcmc/Inference.jl +++ b/test/mcmc/Inference.jl @@ -6,6 +6,7 @@ using Distributions: Bernoulli, Beta, InverseGamma, Normal using Distributions: sample using AbstractMCMC: AbstractMCMC import DynamicPPL +using DynamicPPL: filldist import ForwardDiff using LinearAlgebra: I import MCMCChains From 8b4170f478802e8413fd4603c92c088bd7e56266 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Wed, 4 Mar 2026 01:45:31 +0000 Subject: [PATCH 44/48] Fix imports --- src/Turing.jl | 4 +++- test/mcmc/mh.jl | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Turing.jl b/src/Turing.jl index 8f0f9089b6..87f09a4533 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -76,7 +76,9 @@ using DynamicPPL: InitFromPrior, InitFromUniform, InitFromParams, - setthreadsafe + setthreadsafe, + filldist, + arraydist using StatsBase: predict using OrderedCollections: OrderedDict using Libtask: might_produce, @might_produce diff --git a/test/mcmc/mh.jl b/test/mcmc/mh.jl index 9a3e3be34f..fecd5804ff 100644 --- a/test/mcmc/mh.jl +++ b/test/mcmc/mh.jl @@ -3,7 +3,7 @@ module MHTests using AdvancedMH: AdvancedMH using Distributions: Bernoulli, Dirichlet, Exponential, InverseGamma, LogNormal, MvNormal, Normal, sample -using DynamicPPL: DynamicPPL +using DynamicPPL: DynamicPPL, filldist using LinearAlgebra: I using Logging: Logging using Random: Random From 01d5a4a8981022acfde545a59c0a54371a3508e1 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Wed, 4 Mar 2026 01:52:14 +0000 Subject: [PATCH 45/48] changelog --- HISTORY.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/HISTORY.md b/HISTORY.md index a7c6c9787a..e848a49672 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -150,6 +150,12 @@ Previously, this would silently proceed. When defining a conditional posterior, instead of being provided with a Dict of values, the function must now take a `VarNamedTuple` containing the values. Note that indexing into a `VarNamedTuple` is very similar to indexing into a `Dict`; however, it is more flexible since you can use syntax such as `x[1:2]` even if `x[1]` and `x[2]` are separate variables in the model. +## `filldist` and `arraydist` + +These two convenience functions are now imported and re-exported from DynamicPPL, rather than DistributionsAD.jl. +They are now just wrappers around `Distributions.product_distribution`, instead of the specialised implementations that were in DistributionsAD.jl. +DistributionsAD.jl is for all intents and purposes deprecated: it is no longer a dependency in the Turing stack. + # 0.42.9 Improve handling of model evaluator functions with Libtask. From 8e0d448ac5ede7257afbc072350fbb3aa810b795 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Wed, 4 Mar 2026 02:09:35 +0000 Subject: [PATCH 46/48] Fix CI hang due to depwarn --- src/mcmc/mh.jl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/mcmc/mh.jl b/src/mcmc/mh.jl index 073b45721d..935a0a7e15 100644 --- a/src/mcmc/mh.jl +++ b/src/mcmc/mh.jl @@ -150,12 +150,19 @@ MH() = MH(Returns(DynamicPPL.InitFromPrior()), DynamicPPL.UnlinkAll(), Set{VarNa LinkedRW(cov_matrix) Define a random-walk proposal in linked space with the given covariance matrix. Note that -the covariance matrix must correspond exactly to the size of the variable in linked space. +the size of the covariance matrix must correspond exactly to the size of the variable in +linked space. + + LinkedRW(variance::Real) + +If a `Real` variance is provided, `LinkedRW` will just generate a covariance matrix of +`variance * LinearAlgebra.I`. """ struct LinkedRW{C} "The covariance matrix to use for the random-walk proposal in linked space." cov_matrix::C end +LinkedRW(var::Real) = LinkedRW(var * I) """ InitFromProposals(proposals::VarNamedTuple, verbose::Bool) From 1369bf53b91e868cee8e33aa8e9909770f1152ad Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Wed, 4 Mar 2026 02:52:29 +0000 Subject: [PATCH 47/48] increase atol on PG tests --- test/mcmc/particle_mcmc.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/mcmc/particle_mcmc.jl b/test/mcmc/particle_mcmc.jl index f19c4ad024..18f4b0a8a3 100644 --- a/test/mcmc/particle_mcmc.jl +++ b/test/mcmc/particle_mcmc.jl @@ -167,11 +167,11 @@ end chain = sample(StableRNG(468), kwarg_demo(5.0), PG(20), 1000) @test chain isa MCMCChains.Chains - @test mean(chain[:x]) ≈ 2.5 atol = 0.2 + @test mean(chain[:x]) ≈ 2.5 atol = 0.3 chain2 = sample(StableRNG(468), kwarg_demo(5.0; n=10.0), PG(20), 1000) @test chain2 isa MCMCChains.Chains - @test mean(chain2[:x]) ≈ 7.5 atol = 0.2 + @test mean(chain2[:x]) ≈ 7.5 atol = 0.3 end @testset "submodels without kwargs" begin From 427fab9ca7d23958dde4ff9ed1ee9668c0f07be7 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Wed, 4 Mar 2026 15:26:04 +0000 Subject: [PATCH 48/48] Update patch notes --- HISTORY.md | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index e848a49672..77be324cd3 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -11,7 +11,7 @@ However, it does place some constraints on Turing models. Specifically, the types of **containers that can include random variables** are now more limited: if `x[i] ~ dist` is a random variable, then `x` must obey the following criteria: - - They must be arrays. Dicts and other containers are currently unsupported (we have [an issue to track this](https://github.com/TuringLang/DynamicPPL.jl/issues/1263)). If you really need this functionality, please open an issue and let us know; we can try to make it a priority. + - They must be `AbstractArray`s. Dicts and other containers are currently unsupported (we have [an issue to track this](https://github.com/TuringLang/DynamicPPL.jl/issues/1263)). If you really need this functionality, please open an issue and let us know; we can try to make it a priority. ```julia @model function f() @@ -36,7 +36,7 @@ if `x[i] ~ dist` is a random variable, then `x` must obey the following criteria ``` However, please note that this only applies to **containers that contain random variables on the left-hand side of tilde-statements.** -In general, there are no restrictions on containers of *observed* data, or containers that are not used in tilde-statements. +In general, there are no restrictions on containers of *observed* data, containers that are not used in tilde-statements, or containers that are *themselves* random variables (e.g. `x ~ MvNormal(...)`). - Likewise, arrays of random variables should ideally have a constant size from iteration to iteration. That means a model like this will fail sometimes (*but* see below): @@ -53,6 +53,31 @@ In general, there are no restrictions on containers of *observed* data, or conta `VarNamedTuple` and `@vnt` are now re-exported from Turing directly. There is a docs page explaining how to use and create `VarNamedTuple`s, which [can be found here](https://turinglang.org/docs/usage/varnamedtuple/). +## Conditioning and fixing + +When providing conditioned or fixed variables to Turing models, we recommend that you use a `VarNamedTuple` to do so. +The main benefit of this is that it correctly captures the structure of arrays of random variables. +In the past, this used to depend on whether you specified variables exactly as they were seen in the model: for example, if the model had `x ~ MvNormal(zeros(2), I)`, and you conditioned separately on `x[1]` and `x[2]`, the conditioned variables would be silently ignored. +There were also similar inconsistencies with colons in variable names. + +With a VarNamedTuple-based approach, both should be equivalent: you can do + +```julia +vnt1 = @vnt begin + @template x = zeros(2) + x[1] := 1.0 + x[2] := 2.0 +end +cond_model = model() | vnt1 + +vnt2 = @vnt begin + x := [1.0, 2.0] +end +cond_model = model() | vnt2 +``` + +and both should work correctly. + ## Optimisation interface Turing.jl's optimisation interface has been completely overhauled in this release. @@ -139,6 +164,11 @@ Additional changes: - At the start of sampling, MH logs `@info` messages showing which proposal is used for each variable (disable with `verbose=false`). This helps detect misspecified proposals. - MH validates initial parameters against the proposal distribution; if they have zero or NaN probability, a clear error is thrown. +## Gibbs sampler + +Both the compilation and runtime of Gibbs sampling should now be significantly faster. +(This is largely due to the underlying changes in DynamicPPL's data structures.) + ## HMC / NUTS HMC-family samplers now check for discrete variables before sampling begins.