TuringLang · torfjelde · Dec 11, 2024 · Dec 4, 2024 · Dec 4, 2024 · Dec 4, 2024
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -8,3 +8,8 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 
 [compat]
 Documenter = "1"
+Distributions = "0.25"
+LinearAlgebra = "1.6"
+LogDensityProblems = "2"
+MCMCChains = "6.0.4"
+Random = "1.6"
diff --git a/docs/src/api.md b/docs/src/api.md
@@ -16,5 +16,5 @@ DensityModel
 ## Samplers
 
 ```@docs
-RAM
+RobustAdaptiveMetropolis
 ```
diff --git a/src/AdvancedMH.jl b/src/AdvancedMH.jl
@@ -3,8 +3,9 @@ module AdvancedMH
 # Import the relevant libraries.
 using AbstractMCMC
 using Distributions
-using LinearAlgebra: I
+using LinearAlgebra: LinearAlgebra, I
 using FillArrays: Zeros
+using DocStringExtensions: FIELDS
 
 using LogDensityProblems: LogDensityProblems
 
@@ -22,7 +23,8 @@ export
     SymmetricRandomWalkProposal,
     Ensemble,
     StretchProposal,
-    MALA
+    MALA,
+    RobustAdaptiveMetropolis
 
 # Reexports
 export sample, MCMCThreads, MCMCDistributed, MCMCSerial
@@ -159,9 +161,6 @@ include("proposal.jl")
 include("mh-core.jl")
 include("emcee.jl")
 include("MALA.jl")
-
 include("RobustAdaptiveMetropolis.jl")
-using .RobustAdaptiveMetropolis
-export RAM
 
 end # module AdvancedMH
diff --git a/src/RobustAdaptiveMetropolis.jl b/src/RobustAdaptiveMetropolis.jl
@@ -1,16 +1,6 @@
-module RobustAdaptiveMetropolis
-
-using Random, LogDensityProblems, LinearAlgebra, AbstractMCMC
-using DocStringExtensions: FIELDS
-
-using AdvancedMH: AdvancedMH
-
-export RAM
-
 # TODO: Should we generalise this arbitrary symmetric proposals?
-# TODO: Implement checking of eigenvalues to avoid degenerate covariance estimates, as in the paper.
 """
-    RAM
+    RobustAdaptiveMetropolis
 
 Robust Adaptive Metropolis-Hastings (RAM).
 
@@ -24,8 +14,8 @@ $(FIELDS)
 
 The following demonstrates how to implement a simple Gaussian model and sample from it using the RAM algorithm.
 
-```jldoctest
-julia> using AdvancedMH, Random, Distributions, MCMCChains, LogDensityProblems, LinearAlgebra
+```jldoctest ram-gaussian; setup=:(using Random; Random.seed!(1234);)
+julia> using AdvancedMH, Distributions, MCMCChains, LogDensityProblems, LinearAlgebra
 
 julia> # Define a Gaussian with zero mean and some covariance.
        struct Gaussian{A}
@@ -53,154 +43,243 @@ julia> # Number of warmup steps, i.e. the number of steps to adapt the covarianc
        # by default in the `sample` call. To include them, pass `discard_initial=0` to `sample`.
        num_warmup = 10_000;
 
-julia> # Set the seed so get some consistency.
-       Random.seed!(1234);
-
 julia> # Sample!
-       chain = sample(model, RAM(), 10_000; chain_type=Chains, num_warmup=10_000, progress=false, initial_params=zeros(2));
+       chain = sample(
+            model,
+            RobustAdaptiveMetropolis(),
+            num_samples;
+            chain_type=Chains, num_warmup, progress=false, initial_params=zeros(2)
+        );
+
+julia> isapprox(cov(Array(chain)), model.Σ; rtol = 0.2)
+true
+```
+
+It's also possible to restrict the eigenvalues to avoid either too small or too large values. See p. 13 in [^VIH12].
+
+```jldoctest ram-gaussian
+julia> chain = sample(
+           model,
+           RobustAdaptiveMetropolis(eigenvalue_lower_bound=0.1, eigenvalue_upper_bound=2.0),
+           num_samples;
+           chain_type=Chains, num_warmup, progress=false, initial_params=zeros(2)
+       );
 
 julia> norm(cov(Array(chain)) - [1.0 0.5; 0.5 1.0]) < 0.2
 true
-```
+````
 
 # References
 [^VIH12]: Vihola (2012) Robust adaptive Metropolis algorithm with coerced acceptance rate, Statistics and computing.
 """
-Base.@kwdef struct RAM{T,A<:Union{Nothing,AbstractMatrix{T}}} <: AdvancedMH.MHSampler
-    "target acceptance rate"
-    α::T=0.234
-    "negative exponent of the adaptation decay rate"
-    γ::T=0.6
-    "initial covariance matrix"
-    S::A=nothing
-    "lower bound on eigenvalues of the adapted covariance matrix"
-    eigenvalue_lower_bound::T=0.0
-    "upper bound on eigenvalues of the adapted covariance matrix"
-    eigenvalue_upper_bound::T=Inf
+Base.@kwdef struct RobustAdaptiveMetropolis{T,A<:Union{Nothing,AbstractMatrix{T}}} <:
+                   AdvancedMH.MHSampler
+    "target acceptance rate. Default: 0.234."
+    α::T = 0.234
+    "negative exponent of the adaptation decay rate. Default: `0.6`."
+    γ::T = 0.6
+    "initial lower-triangular Cholesky factor. If specified, should be convertible into a `LowerTriangular`. Default: `nothing`."
+    S::A = nothing
+    "lower bound on eigenvalues of the adapted Cholesky factor. Default: `0.0`."
+    eigenvalue_lower_bound::T = 0.0
+    "upper bound on eigenvalues of the adapted Cholesky factor. Default: `Inf`."
+    eigenvalue_upper_bound::T = Inf
 end
 
-# TODO: Should we record anything like the acceptance rates?
-struct RAMState{T1,L,A,T2,T3}
+"""
+    RobustAdaptiveMetropolisState
+
+State of the Robust Adaptive Metropolis-Hastings (RAM) algorithm.
+
+See also: [`RobustAdaptiveMetropolis`](@ref).
+
+# Fields
+$(FIELDS)
+"""
+struct RobustAdaptiveMetropolisState{T1,L,A,T2,T3}
+    "current realization of the chain."
     x::T1
+    "log density of `x` under the target model."
     logprob::L
+    "current lower-triangular Cholesky factor."
     S::A
+    "log acceptance ratio of the previous iteration (not necessarily of `x`)."
     logα::T2
+    "current step size for adaptation of `S`."
     η::T3
+    "current iteration."
     iteration::Int
+    "whether the previous iteration was accepted."
     isaccept::Bool
 end
 
-AbstractMCMC.getparams(state::RAMState) = state.x
-AbstractMCMC.setparams!!(state::RAMState, x) = RAMState(x, state.logprob, state.S, state.logα, state.η, state.iteration, state.isaccept)
-
-function step_inner(
+AbstractMCMC.getparams(state::RobustAdaptiveMetropolisState) = state.x
+AbstractMCMC.setparams!!(state::RobustAdaptiveMetropolisState, x) =
+    RobustAdaptiveMetropolisState(
+        x,
+        state.logprob,
+        state.S,
+        state.logα,
+        state.η,
+        state.iteration,
+        state.isaccept,
+    )
+
+function ram_step_inner(
     rng::Random.AbstractRNG,
     model::AbstractMCMC.LogDensityModel,
-    sampler::RAM,
-    state::RAMState
+    sampler::RobustAdaptiveMetropolis,
+    state::RobustAdaptiveMetropolisState,
 )
     # This is the initial state.
     f = model.logdensity
     d = LogDensityProblems.dimension(f)
 
     # Sample the proposal.
     x = state.x
-    U = randn(rng, d)
-    x_new = x + state.S * U
+    U = randn(rng, eltype(x), d)
+    x_new = muladd(state.S, U, x)
 
     # Compute the acceptance probability.
     lp = state.logprob
     lp_new = LogDensityProblems.logdensity(f, x_new)
-    logα = min(lp_new - lp, zero(lp))  # `min` because we'll use this for updating
-    # TODO: use `randexp` instead.
-    isaccept = log(rand(rng)) < logα
+    # Technically, the `min` here is unnecessary for sampling according to `min(..., 1)`.
+    # However, `ram_adapt` assumes that `logα` actually represents the log acceptance probability
+    # and is thus bounded at 0. Moreover, users might be interested in inspecting the average
+    # acceptance rate to check that the algorithm achieves something similar to the target rate.
+    # Hence, it's a bit more convenient for the user if we just perform the `min` here
+    # so they can just take an average of (`exp` of) the `logα` values.
+    logα = min(lp_new - lp, zero(lp))
+    isaccept = Random.randexp(rng) > -logα
 
     return x_new, lp_new, U, logα, isaccept
 end
 
-function adapt(sampler::RAM, state::RAMState, logα::Real, U::AbstractVector)
-    # Update `
+function ram_adapt(
+    sampler::RobustAdaptiveMetropolis,
+    state::RobustAdaptiveMetropolisState,
+    logα::Real,
+    U::AbstractVector,
+)
     Δα = exp(logα) - sampler.α
     S = state.S
     # TODO: Make this configurable by defining a more general path.
     η = state.iteration^(-sampler.γ)
-    ΔS = η * abs(Δα) * S * U / norm(U)
+    ΔS = (η * abs(Δα)) * S * U / LinearAlgebra.norm(U)
     # TODO: Maybe do in-place and then have the user extract it with a callback if they really want it.
     S_new = if sign(Δα) == 1
         # One rank update.
-        LinearAlgebra.lowrankupdate(Cholesky(S), ΔS).L
+        LinearAlgebra.lowrankupdate(LinearAlgebra.Cholesky(S.data, :L, 0), ΔS).L
     else
         # One rank downdate.
-        LinearAlgebra.lowrankdowndate(Cholesky(S), ΔS).L
+        LinearAlgebra.lowrankdowndate(LinearAlgebra.Cholesky(S.data, :L, 0), ΔS).L
     end
     return S_new, η
 end
 
 function AbstractMCMC.step(
     rng::Random.AbstractRNG,
     model::AbstractMCMC.LogDensityModel,
-    sampler::RAM;
-    initial_params=nothing,
-    kwargs...
+    sampler::RobustAdaptiveMetropolis;
+    initial_params = nothing,
+    kwargs...,
 )
     # This is the initial state.
     f = model.logdensity
     d = LogDensityProblems.dimension(f)
 
     # Initial parameter state.
-    x = initial_params === nothing ? rand(rng, d) : initial_params
+    T = if initial_params === nothing
+        eltype(sampler.γ)
+    else
+        Base.promote_type(eltype(sampler.γ), eltype(initial_params))
+    end
+    x = if initial_params === nothing
+        randn(rng, T, d)
+    else
+        convert(AbstractVector{T}, initial_params)
+    end
     # Initialize the Cholesky factor of the covariance matrix.
-    S = LowerTriangular(sampler.S === nothing ? diagm(0 => ones(eltype(sampler.γ), d)) : sampler.S)
+    S_data = if sampler.S === nothing
+        LinearAlgebra.diagm(0 => ones(T, d))
+    else
+        # Check the dimensionality of the provided `S`.
+        if size(sampler.S) != (d, d)
+            throw(ArgumentError("The provided `S` has the wrong dimensionality."))
+        end
+        convert(AbstractMatrix{T}, sampler.S)
+    end
+    S = LinearAlgebra.LowerTriangular(S_data)
 
-    # Constuct the initial state.
+    # Construct the initial state.
     lp = LogDensityProblems.logdensity(f, x)
-    state = RAMState(x, lp, S, 0.0, 0, 1, true)
+    state = RobustAdaptiveMetropolisState(x, lp, S, zero(T), 0, 1, true)
 
     return AdvancedMH.Transition(x, lp, true), state
 end
 
 function AbstractMCMC.step(
     rng::Random.AbstractRNG,
     model::AbstractMCMC.LogDensityModel,
-    sampler::RAM,
-    state::RAMState;
-    kwargs...
+    sampler::RobustAdaptiveMetropolis,
+    state::RobustAdaptiveMetropolisState;
+    kwargs...,
 )
     # Take the inner step.
-    x_new, lp_new, U, logα, isaccept = step_inner(rng, model, sampler, state)
+    x_new, lp_new, U, logα, isaccept = ram_step_inner(rng, model, sampler, state)
     # Accept / reject the proposal.
-    state_new = RAMState(isaccept ? x_new : state.x, isaccept ? lp_new : state.logprob, state.S, logα, state.η, state.iteration + 1, isaccept)
-    return AdvancedMH.Transition(state_new.x, state_new.logprob, state_new.isaccept), state_new
+    state_new = RobustAdaptiveMetropolisState(
+        isaccept ? x_new : state.x,
+        isaccept ? lp_new : state.logprob,
+        state.S,
+        logα,
+        state.η,
+        state.iteration + 1,
+        isaccept,
+    )
+    return AdvancedMH.Transition(state_new.x, state_new.logprob, state_new.isaccept),
+    state_new
 end
 
 function valid_eigenvalues(S, lower_bound, upper_bound)
     # Short-circuit if the bounds are the default.
     (lower_bound == 0 && upper_bound == Inf) && return true
     # Note that this is just the diagonal when `S` is triangular.
-    eigenvals = LinearAlgebra.eigenvals(S)
-    return all(lower_bound .<= eigenvals .<= upper_bound)
+    eigenvals = LinearAlgebra.eigvals(S)
+    return all(x -> lower_bound <= x <= upper_bound, eigenvals)
 end
 
 function AbstractMCMC.step_warmup(
     rng::Random.AbstractRNG,
     model::AbstractMCMC.LogDensityModel,
-    sampler::RAM,
-    state::RAMState;
-    kwargs...
+    sampler::RobustAdaptiveMetropolis,
+    state::RobustAdaptiveMetropolisState;
+    kwargs...,
 )
     # Take the inner step.
-    x_new, lp_new, U, logα, isaccept = step_inner(rng, model, sampler, state)
+    x_new, lp_new, U, logα, isaccept = ram_step_inner(rng, model, sampler, state)
     # Adapt the proposal.
-    S_new, η = adapt(sampler, state, logα, U)
+    S_new, η = ram_adapt(sampler, state, logα, U)
     # Check that `S_new` has eigenvalues in the desired range.
-    if !valid_eigenvalues(S_new, sampler.eigenvalue_lower_bound, sampler.eigenvalue_upper_bound)
+    if !valid_eigenvalues(
+        S_new,
+        sampler.eigenvalue_lower_bound,
+        sampler.eigenvalue_upper_bound,
+    )
         # In this case, we just keep the old `S` (p. 13 in Vihola, 2012).
         S_new = state.S
     end
 
     # Update state.
-    state_new = RAMState(isaccept ? x_new : state.x, isaccept ? lp_new : state.logprob, S_new, logα, η, state.iteration + 1, isaccept)
-    return AdvancedMH.Transition(state_new.x, state_new.logprob, state_new.isaccept), state_new
-end
-
+    state_new = RobustAdaptiveMetropolisState(
+        isaccept ? x_new : state.x,
+        isaccept ? lp_new : state.logprob,
+        S_new,
+        logα,
+        η,
+        state.iteration + 1,
+        isaccept,
+    )
+    return AdvancedMH.Transition(state_new.x, state_new.logprob, state_new.isaccept),
+    state_new
 end