[no ci] More fixes, reexport InitFrom

penelopeysm · penelopeysm · commit 27b0096d3b15 · 2025-09-24T23:16:54.000+01:00
diff --git a/docs/src/api.md b/docs/src/api.md
@@ -75,6 +75,16 @@ even though [`Prior()`](@ref) is actually defined in the `Turing.Inference` modu
 | `RepeatSampler`      | [`Turing.Inference.RepeatSampler`](@ref)      | A sampler that runs multiple times on the same variable             |
 | `externalsampler`    | [`Turing.Inference.externalsampler`](@ref)    | Wrap an external sampler for use in Turing                          |
 
+### Initialisation strategies
+
+Turing.jl provides several strategies to initialise parameters for models.
+
+| Exported symbol   | Documentation                           | Description                                                     |
+|:----------------- |:--------------------------------------- |:--------------------------------------------------------------- |
+| `InitFromPrior`   | [`DynamicPPL.InitFromPrior`](@extref)   | Obtain initial parameters from the prior distribution           |
+| `InitFromUniform` | [`DynamicPPL.InitFromUniform`](@extref) | Obtain initial parameters by sampling uniformly in linked space |
+| `InitFromParams`  | [`DynamicPPL.InitFromParams`](@extref)  | Manually specify (possibly a subset of) initial parameters      |
+
 ### Variational inference
 
 See the [docs of AdvancedVI.jl](https://turinglang.org/AdvancedVI.jl/stable/) for detailed usage and the [variational inference tutorial](https://turinglang.org/docs/tutorials/09-variational-inference/) for a basic walkthrough.
diff --git a/src/Turing.jl b/src/Turing.jl
@@ -73,7 +73,10 @@ using DynamicPPL:
     conditioned,
     to_submodel,
     LogDensityFunction,
-    @addlogprob!
+    @addlogprob!,
+    InitFromPrior,
+    InitFromUniform,
+    InitFromParams
 using StatsBase: predict
 using OrderedCollections: OrderedDict
 
@@ -148,6 +151,10 @@ export
     fix,
     unfix,
     OrderedDict, # OrderedCollections
+    # Initialisation strategies for models
+    InitFromPrior,
+    InitFromUniform,
+    InitFromParams,
     # Point estimates - Turing.Optimisation
     # The MAP and MLE exports are only needed for the Optim.jl interface.
     maximum_a_posteriori,
diff --git a/src/mcmc/emcee.jl b/src/mcmc/emcee.jl
@@ -31,12 +31,16 @@ struct EmceeState{V<:AbstractVarInfo,S}
     states::S
 end
 
+# Utility function to tetrieve the number of walkers
+_get_n_walkers(e::Emcee) = e.ensemble.n_walkers
+_get_n_walkers(spl::Sampler{<:Emcee}) = _get_n_walkers(spl.alg)
+
 function AbstractMCMC.step(
     rng::Random.AbstractRNG,
     model::Model,
     spl::Sampler{<:Emcee};
     resume_from=nothing,
-    initial_params=nothing,
+    initial_params=fill(DynamicPPL.init_strategy(spl), _get_n_walkers(spl)),
     kwargs...,
 )
     if resume_from !== nothing
@@ -45,21 +49,19 @@ function AbstractMCMC.step(
     end
 
     # Sample from the prior
-    n = spl.alg.ensemble.n_walkers
+    n = _get_n_walkers(spl)
     vis = [VarInfo(rng, model) for _ in 1:n]
 
     # Update the parameters if provided.
-    if initial_params !== nothing
-        if !(
-            initial_params isa AbstractVector{<:DynamicPPL.AbstractInitStrategy} &&
-            length(initial_params) == n
-        )
-            err_msg = "initial_params for `Emcee` must be a vector of `DynamicPPL.AbstractInitStrategy`, with length equal to the number of walkers ($n)"
-            throw(ArgumentError(err_msg))
-        end
-        vis = map(vis, initial_params) do vi, strategy
-            DynamicPPL.init!!(rng, model, vi, strategy)
-        end
+    if !(
+        initial_params isa AbstractVector{<:DynamicPPL.AbstractInitStrategy} &&
+        length(initial_params) == n
+    )
+        err_msg = "initial_params for `Emcee` must be a vector of `DynamicPPL.AbstractInitStrategy`, with length equal to the number of walkers ($n)"
+        throw(ArgumentError(err_msg))
+    end
+    vis = map(vis, initial_params) do vi, strategy
+        last(DynamicPPL.init!!(rng, model, vi, strategy))
     end
 
     # Compute initial transition and states.
diff --git a/src/mcmc/external_sampler.jl b/src/mcmc/external_sampler.jl
@@ -117,25 +117,25 @@ function AbstractMCMC.step(
     model::DynamicPPL.Model,
     sampler_wrapper::Sampler{<:ExternalSampler};
     initial_state=nothing,
-    initial_params=nothing,
+    initial_params=DynamicPPL.init_strategy(sampler_wrapper.alg.sampler),
     kwargs...,
 )
     alg = sampler_wrapper.alg
     sampler = alg.sampler
 
     # Initialise varinfo with initial params and link the varinfo if needed.
     varinfo = DynamicPPL.VarInfo(model)
+    _, varinfo = DynamicPPL.init!!(rng, model, varinfo, initial_params)
+
     if requires_unconstrained_space(alg)
-        if initial_params !== nothing
-            # If we have initial parameters, we need to set the varinfo before linking.
-            varinfo = DynamicPPL.link(DynamicPPL.unflatten(varinfo, initial_params), model)
-            # Extract initial parameters in unconstrained space.
-            initial_params = varinfo[:]
-        else
-            varinfo = DynamicPPL.link(varinfo, model)
-        end
+        varinfo = DynamicPPL.link(varinfo, model)
     end
 
+    # We need to extract the vectorised initial_params, because the later call to
+    # AbstractMCMC.step only sees a `LogDensityModel` which expects `initial_params`
+    # to be a vector.
+    initial_params_vector = varinfo[:]
+
     # Construct LogDensityFunction
     f = DynamicPPL.LogDensityFunction(
         model, DynamicPPL.getlogjoint_internal, varinfo; adtype=alg.adtype
@@ -144,15 +144,19 @@ function AbstractMCMC.step(
     # Then just call `AbstractMCMC.step` with the right arguments.
     if initial_state === nothing
         transition_inner, state_inner = AbstractMCMC.step(
-            rng, AbstractMCMC.LogDensityModel(f), sampler; initial_params, kwargs...
+            rng,
+            AbstractMCMC.LogDensityModel(f),
+            sampler;
+            initial_params=initial_params_vector,
+            kwargs...,
         )
     else
         transition_inner, state_inner = AbstractMCMC.step(
             rng,
             AbstractMCMC.LogDensityModel(f),
             sampler,
             initial_state;
-            initial_params,
+            initial_params=initial_params_vector,
             kwargs...,
         )
     end
diff --git a/src/mcmc/hmc.jl b/src/mcmc/hmc.jl
@@ -146,7 +146,8 @@ function find_initial_params(
     rng::Random.AbstractRNG,
     model::DynamicPPL.Model,
     varinfo::DynamicPPL.AbstractVarInfo,
-    hamiltonian::AHMC.Hamiltonian;
+    hamiltonian::AHMC.Hamiltonian,
+    init_strategy::DynamicPPL.AbstractInitStrategy;
     max_attempts::Int=1000,
 )
     varinfo = deepcopy(varinfo)  # Don't mutate
@@ -157,10 +158,10 @@ function find_initial_params(
         isfinite(z) && return varinfo, z
 
         attempts == 10 &&
-            @warn "failed to find valid initial parameters in $(attempts) tries; consider providing explicit initial parameters using the `initial_params` keyword"
+            @warn "failed to find valid initial parameters in $(attempts) tries; consider providing a different initialisation strategy with the `initial_params` keyword"
 
         # Resample and try again.
-        varinfo = DynamicPPL.init!!(rng, model, varinfo, DynamicPPL.InitFromUniform())
+        varinfo = DynamicPPL.init!!(rng, model, varinfo, init_strategy)
     end
 
     # if we failed to find valid initial parameters, error
@@ -174,7 +175,9 @@ function DynamicPPL.initialstep(
     model::AbstractModel,
     spl::Sampler{<:Hamiltonian},
     vi_original::AbstractVarInfo;
-    initial_params=nothing,
+    # the initial_params kwarg is always passed on from sample(), cf. DynamicPPL
+    # src/sampler.jl, so we don't need to provide a default value here
+    initial_params::DynamicPPL.AbstractInitStrategy,
     nadapts=0,
     verbose::Bool=true,
     kwargs...,
@@ -195,13 +198,15 @@ function DynamicPPL.initialstep(
     lp_grad_func = Base.Fix1(LogDensityProblems.logdensity_and_gradient, ldf)
     hamiltonian = AHMC.Hamiltonian(metric, lp_func, lp_grad_func)
 
-    # If no initial parameters are provided, resample until the log probability
-    # and its gradient are finite. Otherwise, just use the existing parameters.
-    vi, z = if initial_params === nothing
-        find_initial_params(rng, model, vi, hamiltonian)
-    else
-        vi, AHMC.phasepoint(rng, theta, hamiltonian)
-    end
+    # Note that there is already one round of 'initialisation' before we reach this step,
+    # inside DynamicPPL's `AbstractMCMC.step` implementation. That leads to a possible issue
+    # that this `find_initial_params` function might override the parameters set by the
+    # user.
+    # Luckily for us, `find_initial_params` always checks if the logp and its gradient are
+    # finite. If it is already finite with the params inside the current `vi`, it doesn't
+    # attempt to find new ones. This means that the parameters passed to `sample()` will be
+    # respected instead of being overridden here.
+    vi, z = find_initial_params(rng, model, vi, hamiltonian, initial_params)
     theta = vi[:]
 
     # Find good eps if not provided one
diff --git a/test/mcmc/emcee.jl b/test/mcmc/emcee.jl
@@ -34,18 +34,21 @@ using Turing
         nwalkers = 250
         spl = Emcee(nwalkers, 2.0)
 
-        # No initial parameters, with im- and explicit `initial_params=nothing`
         Random.seed!(1234)
         chain1 = sample(gdemo_default, spl, 1)
         Random.seed!(1234)
-        chain2 = sample(gdemo_default, spl, 1; initial_params=nothing)
+        chain2 = sample(gdemo_default, spl, 1)
         @test Array(chain1) == Array(chain2)
 
+        initial_nt = DynamicPPL.InitFromParams((s=2.0, m=1.0))
         # Initial parameters have to be specified for every walker
-        @test_throws ArgumentError sample(gdemo_default, spl, 1; initial_params=[2.0, 1.0])
+        @test_throws ArgumentError sample(gdemo_default, spl, 1; initial_params=initial_nt)
+        @test_throws r"must be a vector of" sample(
+            gdemo_default, spl, 1; initial_params=initial_nt
+        )
 
         # Initial parameters
-        chain = sample(gdemo_default, spl, 1; initial_params=fill([2.0, 1.0], nwalkers))
+        chain = sample(gdemo_default, spl, 1; initial_params=fill(initial_nt, nwalkers))
         @test chain[:s] == fill(2.0, 1, nwalkers)
         @test chain[:m] == fill(1.0, 1, nwalkers)
     end