TuringLang · ErikQQY · May 22, 2025 · May 23, 2025 · May 23, 2025 · May 23, 2025
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -29,7 +29,7 @@ jobs:
         os:
           - ubuntu-latest
           - macOS-latest
-          - windows-latest
+          #- windows-latest
         exclude:
           - version: 'min'
             os: macOS-latest # Apple Silicon

diff --git a/src/AdvancedHMC.jl b/src/AdvancedHMC.jl
@@ -24,7 +24,7 @@ import StatsBase: sample
 const DEFAULT_FLOAT_TYPE = typeof(float(0))
 
 include("utilities.jl")
-
+export PolynomialStepsize
 # Notations
 # ℓπ: log density of the target distribution
 # θ: position variables / model parameters
@@ -125,7 +125,7 @@ include("sampler.jl")
 export sample
 
 include("constructors.jl")
-export HMCSampler, HMC, NUTS, HMCDA
+export HMCSampler, HMC, NUTS, HMCDA, SGLD
 
 include("abstractmcmc.jl")
 

diff --git a/src/abstractmcmc.jl b/src/abstractmcmc.jl
@@ -205,6 +205,112 @@ function AbstractMCMC.step(
     return Transition(t.z, tstat), newstate
 end
 
+struct SGLDState{
+    TTrans<:Transition,
+    TMetric<:AbstractMetric,
+    TKernel<:AbstractMCMCKernel,
+    TAdapt<:Adaptation.AbstractAdaptor,
+}
+    "Index of current iteration."
+    i::Int
+    "Current [`Transition`](@ref)."
+    transition::TTrans
+    "Current [`AbstractMetric`](@ref), possibly adapted."
+    metric::TMetric
+    "Current [`AbstractMCMCKernel`](@ref)."
+    κ::TKernel
+    "Current [`AbstractAdaptor`](@ref)."
+    adaptor::TAdapt
+end
+getadaptor(state::SGLDState) = state.adaptor
+getmetric(state::SGLDState) = state.metric
+getintegrator(state::SGLDState) = state.κ.τ.integrator
+
+function AbstractMCMC.step(
+    rng::Random.AbstractRNG,
+    model::AbstractMCMC.LogDensityModel,
+    spl::SGLD;
+    initial_params=nothing,
+    kwargs...,
+)
+    # Unpack model
+    logdensity = model.logdensity
+
+    # Define metric
+    metric = make_metric(spl, logdensity)
+
+    # Construct the hamiltonian using the initial metric
+    hamiltonian = Hamiltonian(metric, model)
+
+    # Compute initial sample and state.
+    initial_params = make_initial_params(rng, spl, logdensity, initial_params)
+    ϵ = make_step_size(rng, spl, hamiltonian, initial_params)
+    integrator = make_integrator(spl, ϵ)
+
+    # Make kernel
+    κ = make_kernel(spl, integrator)
+
+    # Make adaptor
+    adaptor = make_adaptor(spl, metric, integrator)
+
+    # Get an initial sample.
+    h, t = AdvancedHMC.sample_init(rng, hamiltonian, initial_params)
+
+    state = SGLDState(0, t, metric, κ, adaptor)
+
+    return AbstractMCMC.step(rng, model, spl, state; kwargs...)
+end
+
+function AbstractMCMC.step(
+    rng::AbstractRNG,
+    model::AbstractMCMC.LogDensityModel,
+    spl::SGLD,
+    state::SGLDState;
+    n_adapts::Int=0,
+    kwargs...,
+)
+    if haskey(kwargs, :nadapts)
+        throw(
+            ArgumentError(
+                "keyword argument `nadapts` is unsupported. Please use `n_adapts` to specify the number of adaptation steps.",
+            ),
+        )
+    end
+
+    i = state.i + 1
+    t_old = state.transition
+    adaptor = state.adaptor
+    κ = state.κ
+    metric = state.metric
+
+    # Reconstruct hamiltonian.
+    h = Hamiltonian(metric, model)
+
+    # Compute gradient of log density.
+    logdensity_and_gradient = Base.Fix1(
+        LogDensityProblems.logdensity_and_gradient, model.logdensity
+    )
+    θ = t_old.z.θ
+    grad = last(logdensity_and_gradient(θ))
+
+    stepsize = spl.stepsize(i)
+    θ .+= (stepsize / 2) .* grad .+ sqrt(stepsize) .* randn(rng, eltype(θ), length(θ))
+
+    # Make new transition.
+    t = transition(rng, h, κ, t_old.z)
+
+    # Adapt h and spl.
+    tstat = stat(t)
+    h, κ, isadapted = adapt!(h, κ, adaptor, i, n_adapts, θ, tstat.acceptance_rate)
+    tstat = merge(tstat, (is_adapt=isadapted,))
+
+    # Compute next sample and state.
+    sample = Transition(t.z, tstat)
+    newstate = SGLDState(i, t, h.metric, κ, adaptor)
+
+    return sample, newstate
+end
+
 ################
 ### Callback ###
 ################
@@ -398,6 +504,10 @@ function make_adaptor(
     return spl.adaptor
 end
 
+function make_adaptor(spl::SGLD, metric::AbstractMetric, integrator::AbstractIntegrator)
+    return NoAdaptation()
+end
+
 #########
 
 function make_kernel(spl::NUTS, integrator::AbstractIntegrator)
@@ -417,3 +527,7 @@ end
 function make_kernel(spl::HMCSampler, integrator::AbstractIntegrator)
     return spl.κ
 end
+
+function make_kernel(spl::SGLD, integrator::AbstractIntegrator)
+    return HMCKernel(Trajectory{EndPointTS}(integrator, FixedNSteps(spl.n_leapfrog)))
+end
diff --git a/src/constructors.jl b/src/constructors.jl
@@ -163,3 +163,38 @@ function HMCDA(δ, λ; integrator=:leapfrog, metric=:diagonal)
 end
 
 sampler_eltype(::HMCDA{T}) where {T} = T
+
+########### Static Hamiltonian Monte Carlo ###########
+
+#############
+### SGLD ###
+#############
+"""
+    SGLD(step_size::S, n_leapfrog::Int, integrator = :leapfrog, metric = :diagonal)
+
+Stochastic gradient Langevin dynamics (SGLD) sampler.
+
+# Fields
+
+$(FIELDS)
+
+# Notes
+
+For more information, please view the following paper:
+ - Max Welling & Yee Whye Teh (2011). Bayesian Learning via Stochastic Gradient Langevin Dynamics. In: Proceedings of the 28th International Conference on Machine Learning (pp. 681–688).
+"""
+struct SGLD{S,I<:Union{Symbol,AbstractIntegrator},M<:Union{Symbol,AbstractMetric}} <:
+       AbstractHMCSampler
+    "Polynomial step size function."
+    stepsize::S
+    "Number of leapfrog steps."
+    n_leapfrog::Int
+    "Choice of integrator, specified either using a `Symbol` or [`AbstractIntegrator`](@ref)"
+    integrator::I
+    "Choice of initial metric;  `Symbol` means it is automatically initialised. The metric type will be preserved during automatic initialisation and adaption."
+    metric::M
+end
+
+function SGLD(stepsize, n_leapfrog; integrator=:leapfrog, metric=:diagonal)
+    return SGLD(stepsize, n_leapfrog, integrator, metric)
+end
diff --git a/src/utilities.jl b/src/utilities.jl
@@ -101,3 +101,36 @@ function randcat(
     C = cumsum(P; dims=1)
     return max.(vec(count(C .< u'; dims=1)) .+ 1, 1)  # prevent numerical issue for Float32
 end
+
+struct PolynomialStepsize{T<:Real}
+    "Constant scale factor of the step size."
+    a::T
+    "Constant offset of the step size."
+    b::T
+    "Decay rate of step size in (0.5, 1]."
+    γ::T
+
+    function PolynomialStepsize{T}(a::T, b::T, γ::T) where {T}
+        0.5 < γ ≤ 1 || error("the decay rate `γ` has to be in (0.5, 1]")
+        return new{T}(a, b, γ)
+    end
+end
+
+"""
+    PolynomialStepsize(a[, b=0, γ=0.55])
+
+Create a polynomially decaying stepsize function.
+
+At iteration `t`, the step size is
+```math
+a / (b + t)^{-γ}.
+```
+"""
+function PolynomialStepsize(a::T, b::T, γ::T) where {T<:Real}
+    return PolynomialStepsize{T}(a, b, γ)
+end
+function PolynomialStepsize(a::Real, b::Real=0, γ::Real=0.55)
+    return PolynomialStepsize(promote(a, b, γ)...)
+end
+
+(f::PolynomialStepsize)(t::Int) = f.a / (t + f.b)^f.γ
diff --git a/test/abstractmcmc.jl b/test/abstractmcmc.jl
@@ -10,6 +10,7 @@ using Statistics: mean
     nuts = NUTS(0.8)
     hmc = HMC(100; integrator=Leapfrog(0.05))
     hmcda = HMCDA(0.8, 0.1)
+    sgld = SGLD(PolynomialStepsize(0.25), 100)
 
     integrator = Leapfrog(1e-3)
     κ = AdvancedHMC.make_kernel(nuts, integrator)
@@ -111,6 +112,29 @@ using Statistics: mean
 
     @test m_est_hmc ≈ [49 / 24, 7 / 6] atol = RNDATOL
 
+    samples_sgld = AbstractMCMC.sample(
+        rng,
+        model,
+        hmc,
+        n_adapts + n_samples;
+        n_adapts=n_adapts,
+        initial_params=θ_init,
+        progress=false,
+        verbose=false,
+    )
+
+    # Transform back to original space.
+    # NOTE: We're not correcting for the `logabsdetjac` here since, but
+    # we're only interested in the mean it doesn't matter.
+    for t in samples_sgld
+        t.z.θ .= invlink_gdemo(t.z.θ)
+    end
+    m_est_hmc = mean(samples_sgld) do t
+        t.z.θ
+    end
+
+    @test m_est_hmc ≈ [49 / 24, 7 / 6] atol = RNDATOL
+
     samples_custom = AbstractMCMC.sample(
         rng,
         model,