diff --git a/.gitignore b/.gitignore index c5ff299b..482e8044 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ *.jl.cov *.jl.*.cov *.jl.mem -/Manifest.toml +Manifest.toml /test/Manifest.toml diff --git a/Project.toml b/Project.toml index 7641cce5..ab782442 100644 --- a/Project.toml +++ b/Project.toml @@ -26,7 +26,7 @@ Random = "<0.0.1, 1" Random123 = "1.3" Requires = "1.0" StatsFuns = "0.9, 1" -SSMProblems = "0.1" +SSMProblems = "0.5" julia = "1.7" [extras] diff --git a/examples/gaussian-process/script.jl b/examples/gaussian-process/script.jl index 1a58bde3..40957858 100644 --- a/examples/gaussian-process/script.jl +++ b/examples/gaussian-process/script.jl @@ -8,74 +8,76 @@ using Distributions using Libtask using SSMProblems -Parameters = @NamedTuple begin - a::Float64 - q::Float64 - kernel +struct GaussianProcessDynamics{T<:Real,KT<:Kernel} <: LatentDynamics{T,T} + proc::GP{ZeroMean{T},KT} + function GaussianProcessDynamics(::Type{T}, kernel::KT) where {T<:Real,KT<:Kernel} + return new{T,KT}(GP(ZeroMean{T}(), kernel)) + end end -mutable struct GPSSM <: SSMProblems.AbstractStateSpaceModel - X::Vector{Float64} - observations::Vector{Float64} - θ::Parameters - - GPSSM(params::Parameters) = new(Vector{Float64}(), params) - GPSSM(y::Vector{Float64}, params::Parameters) = new(Vector{Float64}(), y, params) +struct LinearGaussianDynamics{T<:Real} <: LatentDynamics{T,T} + a::T + b::T + q::T end -seed = 1 -T = 100 -Nₚ = 20 -Nₛ = 250 -a = 0.9 -q = 0.5 - -params = Parameters((a, q, SqExponentialKernel())) +function SSMProblems.distribution(proc::LinearGaussianDynamics{T}) where {T<:Real} + return Normal(zero(T), proc.q) +end -f(θ::Parameters, x, t) = Normal(θ.a * x, θ.q) -h(θ::Parameters) = Normal(0, θ.q) -g(θ::Parameters, x, t) = Normal(0, exp(0.5 * x)^2) +function SSMProblems.distribution(proc::LinearGaussianDynamics, ::Int, state) + return Normal(proc.a * state + proc.b, proc.q) +end -rng = Random.MersenneTwister(seed) +struct StochasticVolatility{T<:Real} <: ObservationProcess{T,T} end -x = zeros(T) -y = similar(x) -x[1] = rand(rng, h(params)) -for t in 1:T - if t < T - x[t + 1] = rand(rng, f(params, x[t], t)) - end - y[t] = rand(rng, g(params, x[t], t)) +function SSMProblems.distribution(::StochasticVolatility{T}, ::Int, state) where {T<:Real} + return Normal(zero(T), exp((1 / 2) * state)) end -function gp_update(model::GPSSM, state, step) - gp = GP(model.θ.kernel) - prior = gp(1:(step - 1)) - post = posterior(prior, model.X[1:(step - 1)]) - μ, σ = mean_and_cov(post, [step]) - return Normal(μ[1], σ[1]) +function LinearGaussianStochasticVolatilityModel(a::T, q::T) where {T<:Real} + dyn = LinearGaussianDynamics(a, zero(T), q) + obs = StochasticVolatility{T}() + return SSMProblems.StateSpaceModel(dyn, obs) end -SSMProblems.transition!!(rng::AbstractRNG, model::GPSSM) = rand(rng, h(model.θ)) -function SSMProblems.transition!!(rng::AbstractRNG, model::GPSSM, state, step) - return rand(rng, gp_update(model, state, step)) +function GaussianProcessStateSpaceModel(::Type{T}, kernel::KT) where {T<:Real,KT<:Kernel} + dyn = GaussianProcessDynamics(T, kernel) + obs = StochasticVolatility{T}() + return SSMProblems.StateSpaceModel(dyn, obs) end -function SSMProblems.emission_logdensity(model::GPSSM, state, step) - return logpdf(g(model.θ, state, step), model.observations[step]) -end -function SSMProblems.transition_logdensity(model::GPSSM, prev_state, current_state, step) - return logpdf(gp_update(model, prev_state, step), current_state) +const GPSSM{T,KT<:Kernel} = SSMProblems.StateSpaceModel{ + T, + GaussianProcessDynamics{T,KT}, + StochasticVolatility{T} +}; + +# for non-markovian models, we can redefine dynamics to reference the trajectory +function AdvancedPS.dynamics( + ssm::AdvancedPS.TracedSSM{<:GPSSM{T},T,T}, step::Int +) where {T<:Real} + prior = ssm.model.dyn.proc(1:(step - 1)) + post = posterior(prior, ssm.X[1:(step - 1)]) + μ, σ = mean_and_cov(post, [step]) + return LinearGaussianDynamics(zero(T), μ[1], sqrt(σ[1])) end -AdvancedPS.isdone(::GPSSM, step) = step > T +# Everything is now ready to simulate some data. +rng = MersenneTwister(1234); +true_model = LinearGaussianStochasticVolatilityModel(0.9, 0.5); +_, x, y = sample(rng, true_model, 100); -model = GPSSM(y, params) -pg = AdvancedPS.PGAS(Nₚ) -chains = sample(rng, model, pg, Nₛ) +# Create the model and run the sampler +gpssm = GaussianProcessStateSpaceModel(Float64, SqExponentialKernel()); +model = gpssm(y); +pg = AdvancedPS.PGAS(20); +chains = sample(rng, model, pg, 250; progress=false); +#md nothing #hide -particles = hcat([chain.trajectory.model.X for chain in chains]...) +particles = hcat([chain.trajectory.model.X for chain in chains]...); mean_trajectory = mean(particles; dims=2); +#md nothing #hide scatter(particles; label=false, opacity=0.01, color=:black, xlabel="t", ylabel="state") plot!(x; color=:darkorange, label="Original Trajectory") diff --git a/examples/gaussian-ssm/script.jl b/examples/gaussian-ssm/script.jl index d7a5889c..e8fd9ad3 100644 --- a/examples/gaussian-ssm/script.jl +++ b/examples/gaussian-ssm/script.jl @@ -28,71 +28,46 @@ using SSMProblems # as well as the initial distribution $f_0(x) = \mathcal{N}(0, q^2/(1-a^2))$. # To use `AdvancedPS` we first need to define a model type that subtypes `AdvancedPS.AbstractStateSpaceModel`. -Parameters = @NamedTuple begin - a::Float64 - q::Float64 - r::Float64 +mutable struct Parameters{T<:Real} + a::T + q::T + r::T end -mutable struct LinearSSM <: SSMProblems.AbstractStateSpaceModel - X::Vector{Float64} - observations::Vector{Float64} - θ::Parameters - LinearSSM(θ::Parameters) = new(Vector{Float64}(), θ) - LinearSSM(y::Vector, θ::Parameters) = new(Vector{Float64}(), y, θ) +struct LinearGaussianDynamics{T<:Real} <: SSMProblems.LatentDynamics{T,T} + a::T + q::T end -# and the densities defined above. -f(θ::Parameters, state, t) = Normal(θ.a * state, θ.q) # Transition density -g(θ::Parameters, state, t) = Normal(state, θ.r) # Observation density -f₀(θ::Parameters) = Normal(0, θ.q^2 / (1 - θ.a^2)) # Initial state density -#md nothing #hide +function SSMProblems.distribution(dyn::LinearGaussianDynamics{T}; kwargs...) where {T<:Real} + return Normal(zero(T), sqrt(dyn.q^2 / (1 - dyn.a^2))) +end -# We also need to specify the dynamics of the system through the transition equations: -# - `AdvancedPS.initialization`: the initial state density -# - `AdvancedPS.transition`: the state transition density -# - `AdvancedPS.observation`: the observation score given the observed data -# - `AdvancedPS.isdone`: signals the end of the execution for the model -SSMProblems.transition!!(rng::AbstractRNG, model::LinearSSM) = rand(rng, f₀(model.θ)) -function SSMProblems.transition!!( - rng::AbstractRNG, model::LinearSSM, state::Float64, step::Int -) - return rand(rng, f(model.θ, state, step)) +function SSMProblems.distribution(dyn::LinearGaussianDynamics, step::Int, state; kwargs...) + return Normal(dyn.a * state, dyn.q) end -function SSMProblems.emission_logdensity(modeL::LinearSSM, state::Float64, step::Int) - return logpdf(g(model.θ, state, step), model.observations[step]) +struct LinearGaussianObservation{T<:Real} <: SSMProblems.ObservationProcess{T,T} + r::T end -function SSMProblems.transition_logdensity( - model::LinearSSM, prev_state, current_state, step + +function SSMProblems.distribution( + obs::LinearGaussianObservation, step::Int, state; kwargs... ) - return logpdf(f(model.θ, prev_state, step), current_state) + return Normal(state, obs.r) end -# We need to think seriously about how the data is handled -AdvancedPS.isdone(::LinearSSM, step) = step > Tₘ +function LinearGaussianStateSpaceModel(θ::Parameters) + dyn = LinearGaussianDynamics(θ.a, θ.q) + obs = LinearGaussianObservation(θ.r) + return SSMProblems.StateSpaceModel(dyn, obs) +end # Everything is now ready to simulate some data. -a = 0.9 # Scale -q = 0.32 # State variance -r = 1 # Observation variance -Tₘ = 200 # Number of observation -Nₚ = 20 # Number of particles -Nₛ = 500 # Number of samples -seed = 1 # Reproduce everything - -θ₀ = Parameters((a, q, r)) -rng = Random.MersenneTwister(seed) - -x = zeros(Tₘ) -y = zeros(Tₘ) -x[1] = rand(rng, f₀(θ₀)) -for t in 1:Tₘ - if t < Tₘ - x[t + 1] = rand(rng, f(θ₀, x[t], t)) - end - y[t] = rand(rng, g(θ₀, x[t], t)) -end +rng = Random.MersenneTwister(1234) +θ = Parameters(0.9, 0.32, 1.0) +true_model = LinearGaussianStateSpaceModel(θ) +_, x, y = sample(rng, true_model, 200); # Here are the latent and obseravation timeseries plot(x; label="x", xlabel="t") @@ -100,9 +75,8 @@ plot!(y; seriestype=:scatter, label="y", xlabel="t", mc=:red, ms=2, ma=0.5) # `AdvancedPS` subscribes to the `AbstractMCMC` API. To sample we just need to define a Particle Gibbs kernel # and a model interface. -model = LinearSSM(y, θ₀) -pgas = AdvancedPS.PGAS(Nₚ) -chains = sample(rng, model, pgas, Nₛ; progress=false); +pgas = AdvancedPS.PGAS(20) +chains = sample(rng, true_model(y), pgas, 500; progress=false); #md nothing #hide # @@ -118,7 +92,7 @@ plot!(mean_trajectory; color=:dodgerblue, label="Mean trajectory", opacity=0.9) # We used a particle gibbs kernel with the ancestor updating step which should help with the particle # degeneracy problem and improve the mixing. # We can compute the update rate of $x_t$ vs $t$ defined as the proportion of times $t$ where $x_t$ gets updated: -update_rate = sum(abs.(diff(particles; dims=2)) .> 0; dims=2) / Nₛ +update_rate = sum(abs.(diff(particles; dims=2)) .> 0; dims=2) / length(chains) #md nothing #hide # and compare it to the theoretical value of $1 - 1/Nₚ$. @@ -130,4 +104,4 @@ plot( xlabel="Iteration", ylabel="Update rate", ) -hline!([1 - 1 / Nₚ]; label="N: $(Nₚ)") +hline!([1 - 1 / length(chains)]; label="N: $(length(chains))") diff --git a/examples/levy-ssm/script.jl b/examples/levy-ssm/script.jl index 2b35858f..e42a2eec 100644 --- a/examples/levy-ssm/script.jl +++ b/examples/levy-ssm/script.jl @@ -1,6 +1,4 @@ # # Levy-SSM latent state inference -using AdvancedPS: SSMProblems -using AdvancedPS using Random using Plots using Distributions @@ -8,10 +6,11 @@ using AdvancedPS using LinearAlgebra using SSMProblems -struct GammaProcess - C::Float64 - β::Float64 - tol::Float64 +struct GammaProcess{T} + C::T + β::T + tol::T + GammaProcess(C::T, β::T; ϵ::T=1e-10) where {T<:Real} = new{T}(C, β, ϵ) end struct GammaPath{T} @@ -19,36 +18,18 @@ struct GammaPath{T} times::Vector{T} end -struct LangevinDynamics{T} - A::Matrix{T} - L::Vector{T} - θ::T - H::Vector{T} - σe::T -end - -struct NormalMeanVariance{T} - μ::T - σ::T -end - function simulate( - rng::AbstractRNG, - process::GammaProcess, - rate::Float64, - start::Float64, - finish::Float64, - t0::Float64=0.0, -) + rng::AbstractRNG, process::GammaProcess{T}, rate::T, start::T, finish::T, t0::T=zero(T) +) where {T<:Real} let β = process.β, C = process.C, tolerance = process.tol - jumps = Float64[] + jumps = T[] last_jump = Inf t = t0 truncated = last_jump < tolerance while !truncated - t += rand(rng, Exponential(1.0 / rate)) - xi = 1.0 / (β * (exp(t / C) - 1)) - prob = (1.0 + β * xi) * exp(-β * xi) + t += rand(rng, Exponential(one(T) / rate)) + xi = one(T) / (β * (exp(t / C) - one(T))) + prob = (one(T) + β * xi) * exp(-β * xi) if rand(rng) < prob push!(jumps, xi) last_jump = xi @@ -60,50 +41,30 @@ function simulate( end end -function integral(times::Array{Float64}, path::GammaPath) +function integral(times::Array{<:Real}, path::GammaPath) let jumps = path.jumps, jump_times = path.times return [sum(jumps[jump_times .<= t]) for t in times] end end -# Gamma Process -C = 1.0 -β = 1.0 -ϵ = 1e-10 -process = GammaProcess(C, β, ϵ) - -# Normal Mean-Variance representation -μw = 0.0 -σw = 1.0 -nvm = NormalMeanVariance(μw, σw) - -# Levy SSM with Langevin dynamics -# dx(t) = A x(t) dt + L dW(t) -# y(t) = H x(t) + ϵ(t) -θ = -0.5 -A = [ - 0.0 1.0 - 0.0 θ -] -L = [0.0; 1.0] -σe = 1.0 -H = [1.0, 0] -dyn = LangevinDynamics(A, L, θ, H, σe) +struct LangevinDynamics{T} + A::Matrix{T} + L::Vector{T} + θ::T + H::Vector{T} + σe::T +end -# Simulation parameters -start, finish = 0, 100 -N = 200 -ts = range(start, finish; length=N) -seed = 4 -rng = Random.MersenneTwister(seed) -Np = 50 -Ns = 100 +struct NormalMeanVariance{T} + μ::T + σ::T +end f(dt, θ) = exp(θ * dt) -function Base.exp(dyn::LangevinDynamics, dt::Real) +function Base.exp(dyn::LangevinDynamics{T}, dt::T) where {T<:Real} let θ = dyn.θ f_val = f(dt, θ) - return [1.0 (f_val - 1)/θ; 0 f_val] + return [one(T) (f_val - 1)/θ; zero(T) f_val] end end @@ -118,97 +79,78 @@ function meancov( μ += ft .* μw .* z Σ += ft * transpose(ft) .* σw^2 .* z end - return μ, Σ + + # Guarantees positive semi-definiteness + return μ, Σ + T(1e-6) * I end end -X = zeros(Float64, (N, 2)) -Y = zeros(Float64, N) -for (i, t) in enumerate(ts) - if i > 1 - s = ts[i - 1] - dt = t - s - path = simulate(rng, process, dt, s, t, ϵ) - μ, Σ = meancov(t, dyn, path, nvm) - X[i, :] .= rand(rng, MultivariateNormal(exp(dyn, dt) * X[i - 1, :] + μ, Σ)) - end +struct LevyLangevin{T} <: LatentDynamics{T,Vector{T}} + dt::T + dyn::LangevinDynamics{T} + process::GammaProcess{T} + nvm::NormalMeanVariance{T} +end - let H = dyn.H, σe = dyn.σe - Y[i] = transpose(H) * X[i, :] + rand(rng, Normal(0, σe)) - end +function SSMProblems.distribution(proc::LevyLangevin{T}) where {T<:Real} + return MultivariateNormal(zeros(T, 2), I) end -# AdvancedPS -Parameters = @NamedTuple begin - dyn::LangevinDynamics - process::GammaProcess - nvm::NormalMeanVariance - times::Vector{Float64} +function SSMProblems.distribution(proc::LevyLangevin{T}, step::Int, state) where {T<:Real} + dt = proc.dt + path = simulate(rng, proc.process, dt, (step - 1) * dt, step * dt) + μ, Σ = meancov(step * dt, proc.dyn, path, proc.nvm) + return MultivariateNormal(exp(proc.dyn, dt) * state + μ, Σ) end -struct MixedState{T} - x::Vector{T} - path::GammaPath{T} +struct LinearGaussianObservation{T<:Real} <: ObservationProcess{T,T} + H::Vector{T} + R::T end -mutable struct LevyLangevin <: SSMProblems.AbstractStateSpaceModel - X::Vector{MixedState{Float64}} - observations::Vector{Float64} - θ::Parameters - LevyLangevin(θ::Parameters) = new(Vector{MixedState{Float64}}(), θ) - function LevyLangevin(y::Vector{Float64}, θ::Parameters) - return new(Vector{MixedState{Float64}}(), y, θ) - end +function SSMProblems.distribution(proc::LinearGaussianObservation, step::Int, state) + return Normal(transpose(proc.H) * state, proc.R) end -function SSMProblems.transition!!(rng::AbstractRNG, model::LevyLangevin) - return MixedState( - rand(rng, MultivariateNormal([0, 0], I)), GammaPath(Float64[], Float64[]) +function LevyModel(dt, θ, σe, C, β, μw, σw; ϵ=1e-10) + A = [0.0 1.0; 0.0 θ] + L = [0.0; 1.0] + H = [1.0, 0] + + dyn = LevyLangevin( + dt, + LangevinDynamics(A, L, θ, H, σe), + GammaProcess(C, β; ϵ), + NormalMeanVariance(μw, σw), ) -end -function SSMProblems.transition!!( - rng::AbstractRNG, model::LevyLangevin, state::MixedState, step -) - times = model.θ.times - s = times[step - 1] - t = times[step] - dt = t - s - path = simulate(rng, model.θ.process, dt, s, t) - μ, Σ = meancov(t, model.θ.dyn, path, model.θ.nvm) - Σ += 1e-6 * I - return MixedState(rand(rng, MultivariateNormal(exp(dyn, dt) * state.x + μ, Σ)), path) + obs = LinearGaussianObservation(H, σe) + return StateSpaceModel(dyn, obs) end -function SSMProblems.transition_logdensity( - model::LevyLangevin, prev_state::MixedState, current_state::MixedState, step -) - times = model.θ.times - s = times[step - 1] - t = times[step] - dt = t - s - path = simulate(rng, model.θ.process, dt, s, t) - μ, Σ = meancov(t, model.θ.dyn, path, model.θ.nvm) - Σ += 1e-6 * I - return logpdf(MultivariateNormal(exp(dyn, dt) * prev_state.x + μ, Σ), current_state.x) -end +# Levy SSM with Langevin dynamics +# ```math +# dx_{t} = A x_{t} dt + L dW_{t} +# ``` +# ```math +# y_{t} = H x_{t} + ϵ{t} +# ``` -function SSMProblems.emission_logdensity(model::LevyLangevin, state::MixedState, step) - return logpdf(Normal(transpose(H) * state.x, σe), model.observations[step]) -end +# Simulation parameters +N = 200 +ts = range(0, 100; length=N) +levyssm = LevyModel(step(ts), θ, 1.0, 1.0, 1.0, 0.0, 1.0); -AdvancedPS.isdone(model::LevyLangevin, step) = step > length(model.θ.times) +# Simulate data +rng = Random.MersenneTwister(1234); +_, X, Y = sample(rng, levyssm, N); -θ₀ = Parameters((dyn, process, nvm, ts)) -model = LevyLangevin(Y, θ₀) -pg = AdvancedPS.PGAS(Np) -chains = sample(rng, model, pg, Ns; progress=false); +# Run sampler +pg = AdvancedPS.PGAS(50); +chains = sample(rng, levyssm(Y), pg, 100); # Concat all sampled states -particles = hcat([chain.trajectory.model.X for chain in chains]...) -marginal_states = map(s -> s.x, particles); -jump_times = map(s -> s.path.times, particles); -jump_intensities = map(s -> s.path.jumps, particles); +marginal_states = hcat([chain.trajectory.model.X for chain in chains]...) # Plot marginal state and jump intensities for one trajectory p1 = plot( @@ -224,12 +166,8 @@ plot!( label="Marginal State (x2)", ) -p2 = scatter( - vcat([t for t in jump_times[:, end]]...), - vcat([j for j in jump_intensities[:, end]]...); - color=:darkorange, - label="Jumps", -) +# TODO: collect jumps from the model +p2 = scatter([], []; color=:darkorange, label="Jumps") plot( p1, p2; plot_title="Marginal State and Jump Intensities", layout=(2, 1), size=(600, 600) @@ -250,7 +188,7 @@ for d in 1:2 fillalpha=0.2, title="Marginal State Trajectories (X$d)", ) - plot!(p, ts, X[:, d]; color=:dodgerblue, label="True Trajectory") + plot!(p, ts, getindex.(X, d); color=:dodgerblue, label="True Trajectory") push!(ps, p) end plot(ps...; layout=(2, 1), size=(600, 600)) diff --git a/examples/particle-gibbs/script.jl b/examples/particle-gibbs/script.jl index f7a9d2aa..1eb782dc 100644 --- a/examples/particle-gibbs/script.jl +++ b/examples/particle-gibbs/script.jl @@ -37,7 +37,7 @@ end # We consider the following stochastic volatility model: # # ```math -# x_{t+1} = a x_t + v_t \quad v_{t} \sim \mathcal{N}(0, r^2) +# x_{t+1} = a x_t + v_t \quad v_{t} \sim \mathcal{N}(0, q^2) # ``` # ```math # y_{t} = e_t \exp(\frac{1}{2}x_t) \quad e_t \sim \mathcal{N}(0, 1) @@ -45,89 +45,54 @@ end # # We can reformulate the above in terms of transition and observation densities: # ```math -# x_{t+1} \sim f_{\theta}(x_{t+1}|x_t) = \mathcal{N}(a x_t, r^2) +# x_{t+1} \sim f_{\theta}(x_{t+1}|x_t) = \mathcal{N}(a x_t, q^2) # ``` # ```math # y_t \sim g_{\theta}(y_t|x_t) = \mathcal{N}(0, \exp(\frac{1}{2}x_t)^2) # ``` # with the initial distribution $f_0(x) = \mathcal{N}(0, q^2)$. -# Here we assume the static parameters $\theta = (q^2, r^2)$ are known and we are only interested in sampling from the latent state $x_t$. -Parameters = @NamedTuple begin - a::Float64 - q::Float64 - T::Int +# Here we assume the static parameters $\theta = (a^2, q^2)$ are known and we are only interested in sampling from the latent state $x_t$. +struct LinearGaussianDynamics{T<:Real} <: SSMProblems.LatentDynamics{T,T} + a::T + q::T end -f(θ::Parameters, state, t) = Normal(θ.a * state, θ.q) -g(θ::Parameters, state, t) = Normal(0, exp(0.5 * state)) -f₀(θ::Parameters) = Normal(0, θ.q) -#md nothing #hide +function SSMProblems.distribution(dyn::LinearGaussianDynamics{T}) where {T<:Real} + return Normal(zero(T), dyn.q) +end -# Let's simulate some data -a = 0.9 # State Variance -q = 0.5 # Observation variance -Tₘ = 200 # Number of observation -Nₚ = 20 # Number of particles -Nₛ = 200 # Number of samples -seed = 1 # Reproduce everything - -θ₀ = Parameters((a, q, Tₘ)) -rng = Random.MersenneTwister(seed) - -x = zeros(Tₘ) -y = zeros(Tₘ) -x[1] = 0 -for t in 1:Tₘ - if t < Tₘ - x[t + 1] = rand(rng, f(θ₀, x[t], t)) - end - y[t] = rand(rng, g(θ₀, x[t], t)) +function SSMProblems.distribution(dyn::LinearGaussianDynamics, ::Int, state) + return Normal(dyn.a * state, dyn.q) end -# Here are the latent and observation series: -plot(x; label="x", xlabel="t") +struct StochasticVolatility{T<:Real} <: SSMProblems.ObservationProcess{T,T} end -# -plot(y; label="y", xlabel="t") - -# Each model takes an `AbstractRNG` as input and generates the logpdf of the current transition: -mutable struct NonLinearTimeSeries <: SSMProblems.AbstractStateSpaceModel - X::Vector{Float64} - observations::Vector{Float64} - θ::Parameters - NonLinearTimeSeries(θ::Parameters) = new(Float64[], θ) - NonLinearTimeSeries(y::Vector{Float64}, θ::Parameters) = new(Float64[], y, θ) +function SSMProblems.distribution(::StochasticVolatility{T}, ::Int, state) where {T<:Real} + return Normal(zero(T), exp((1 / 2) * state)) end -# The dynamics of the model is defined through the `AbstractStateSpaceModel` interface: -function SSMProblems.transition!!(rng::AbstractRNG, model::NonLinearTimeSeries) - return rand(rng, f₀(model.θ)) -end -function SSMProblems.transition!!( - rng::AbstractRNG, model::NonLinearTimeSeries, state::Float64, step::Int -) - return rand(rng, f(model.θ, state, step)) +function LinearGaussianStochasticVolatilityModel(a::T, q::T) where {T<:Real} + dyn = LinearGaussianDynamics(a, q) + obs = StochasticVolatility{T}() + return SSMProblems.StateSpaceModel(dyn, obs) end +#md nothing #hide -function SSMProblems.emission_logdensity( - modeL::NonLinearTimeSeries, state::Float64, step::Int -) - return logpdf(g(model.θ, state, step), model.observations[step]) -end -function SSMProblems.transition_logdensity( - model::NonLinearTimeSeries, prev_state, current_state, step -) - return logpdf(f(model.θ, prev_state, step), current_state) -end +# Let's simulate some data +rng = Random.MersenneTwister(1234) +true_model = LinearGaussianStochasticVolatilityModel(0.9, 0.5) +_, x, y = sample(rng, true_model, 200); -# We need to tell AdvancedPS when to stop the execution of the model -# TODO -AdvancedPS.isdone(::NonLinearTimeSeries, step) = step > Tₘ +# Here are the latent and observation series: +plot(x; label="x", xlabel="t") + +# +plot(y; label="y", xlabel="t") # Here we use the particle gibbs kernel without adaptive resampling. -model = NonLinearTimeSeries(y, θ₀) -pg = AdvancedPS.PG(Nₚ, 1.0) -chains = sample(rng, model, pg, Nₛ; progress=false); +model = true_model(y) +pg = AdvancedPS.PG(20, 1.0) +chains = sample(rng, model, pg, 200; progress=false); #md nothing #hide particles = hcat([chain.trajectory.model.X for chain in chains]...) # Concat all sampled states @@ -145,11 +110,11 @@ plot!(mean_trajectory; color=:dodgerblue, label="Mean trajectory", opacity=0.9) # We can also check the mixing as defined in the Gaussian State Space model example. As seen on the # scatter plot above, we are mostly left with a single trajectory before timestep 150. The orange # bar is the optimal mixing rate for the number of particles we use. -plot_update_rate(update_rate(particles, Nₛ)[:, 1], Nₚ) +plot_update_rate(update_rate(particles, 200)[:, 1], 20) # Let's see if ancestor sampling can help with the degeneracy problem. We use the same number of particles, but replace the sampler with PGAS. -pgas = AdvancedPS.PGAS(Nₚ) -chains = sample(rng, model, pgas, Nₛ; progress=false); +pgas = AdvancedPS.PGAS(20) +chains = sample(rng, model, pgas, 200; progress=false); particles = hcat([chain.trajectory.model.X for chain in chains]...); mean_trajectory = mean(particles; dims=2); @@ -161,4 +126,4 @@ plot!(x; color=:darkorange, label="Original Trajectory") plot!(mean_trajectory; color=:dodgerblue, label="Mean trajectory", opacity=0.9) # The update rate is now much higher throughout time. -plot_update_rate(update_rate(particles, Nₛ)[:, 1], Nₚ) +plot_update_rate(update_rate(particles, 200)[:, 1], 20) diff --git a/src/model.jl b/src/model.jl index b7b72db3..c836c74f 100644 --- a/src/model.jl +++ b/src/model.jl @@ -10,6 +10,26 @@ const Particle = Trace const SSMTrace{R} = Trace{<:SSMProblems.AbstractStateSpaceModel,R} const GenericTrace{R} = Trace{<:AbstractGenericModel,R} +mutable struct TracedSSM{SSM,XT,YT} <: SSMProblems.AbstractStateSpaceModel + model::SSM + X::Vector{XT} + Y::Vector{YT} + + function TracedSSM( + model::SSMProblems.StateSpaceModel{T,LD,OP}, Y::Vector{YT} + ) where {T,LD,OP,YT} + XT = eltype(LD) + @assert eltype(OP) == YT + return new{SSMProblems.StateSpaceModel{T,LD,OP},XT,YT}(model, Vector{XT}(), Y) + end +end + +(model::SSMProblems.StateSpaceModel)(Y::AbstractVector) = TracedSSM(model, Y) +dynamics(ssm::TracedSSM, step::Int) = ssm.model.dyn +observation(ssm::TracedSSM, step::Int) = ssm.model.obs + +isdone(ssm::TracedSSM, step::Int) = step > length(ssm.Y) + reset_logprob!(::AdvancedPS.Particle) = nothing reset_model(f) = deepcopy(f) delete_retained!(f) = nothing diff --git a/src/pgas.jl b/src/pgas.jl index 41bcfea7..c14bc736 100644 --- a/src/pgas.jl +++ b/src/pgas.jl @@ -23,12 +23,10 @@ current_step(trace::SSMTrace) = trace.rng.count Get the log weight of the transition from previous state of `model` to `x` """ -function transition_logweight(particle::SSMTrace, x) - score = SSMProblems.transition_logdensity( - particle.model, - particle.model.X[current_step(particle) - 2], - x, - current_step(particle) - 1, +function transition_logweight(particle::SSMTrace, x; kwargs...) + iter = current_step(particle) - 1 + score = SSMProblems.logdensity( + dynamics(particle.model, iter), iter, particle.model.X[iter - 1], x, kwargs... ) return score end @@ -61,22 +59,25 @@ function advance!(particle::SSMTrace, isref::Bool=false) if !isref if running_step == 1 - new_state = SSMProblems.transition!!(particle.rng, model) + new_state = SSMProblems.simulate(particle.rng, dynamics(model, running_step)) else current_state = model.X[running_step - 1] - new_state = SSMProblems.transition!!( - particle.rng, model, current_state, running_step + new_state = SSMProblems.simulate( + particle.rng, dynamics(model, running_step), running_step, current_state ) end else - new_state = model.X[running_step] # We need the current state from the reference particle + # We need the current state from the reference particle + new_state = model.X[running_step] end - score = SSMProblems.emission_logdensity(model, new_state, running_step) + score = SSMProblems.logdensity( + observation(model, running_step), running_step, new_state, model.Y[running_step] + ) - # accept transition + # Accept transition and move the time index/rng counter !isref && push!(model.X, new_state) - inc_counter!(particle.rng) # Increase rng counter, we use it as the model `time` index instead of handling two distinct counters + inc_counter!(particle.rng) return score end diff --git a/test/Project.toml b/test/Project.toml index c5edae50..cac881c2 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -21,6 +21,5 @@ HypothesisTests = "0.11" DynamicIterators = "0.4" Libtask = "0.8" Random123 = "1.3" -SSMProblems = "0.1" StableRNGs = "1" julia = "1.3" diff --git a/test/container.jl b/test/container.jl index ba266499..f5434d61 100644 --- a/test/container.jl +++ b/test/container.jl @@ -1,18 +1,20 @@ @testset "container.jl" begin # Since the extension would hide the low level function call API - mutable struct LogPModel{T} <: SSMProblems.AbstractStateSpaceModel + struct LogPDynamics{T} <: LatentDynamics{T,T} end + struct LogPObservation{T} <: ObservationProcess{T,T} logp::T - X::Array{T} end - SSMProblems.transition!!(rng::AbstractRNG, model::LogPModel) = rand(rng, Uniform()) - function SSMProblems.transition!!(rng::AbstractRNG, model::LogPModel, state, step) - return rand(rng, Uniform()) - end - SSMProblems.emission_logdensity(model::LogPModel, state, step) = model.logp + SSMProblems.logdensity(proc::LogPObservation, ::Int, state, observation) = proc.logp + SSMProblems.distribution(proc::LogPDynamics, ::Int, state) = Uniform() + SSMProblems.distribution(::LogPDynamics) = Uniform() - AdvancedPS.isdone(model::LogPModel, step) = false + function LogPModel(logp::T) where {T<:Real} + ssm = StateSpaceModel(LogPDynamics{T}(), LogPObservation(logp)) + # pick some arbitrarily large observables + return ssm(ones(T, 10)) + end @testset "copy particle container" begin pc = AdvancedPS.ParticleContainer(AdvancedPS.Trace[]) @@ -27,13 +29,13 @@ logps = [0.0, -1.0, -2.0] particles = map(logps) do logp trng = AdvancedPS.TracedRNG() - AdvancedPS.Trace(LogPModel(logp, []), trng) + AdvancedPS.Trace(LogPModel(logp), trng) end pc = AdvancedPS.ParticleContainer(particles) # Push! pc2 = deepcopy(pc) - particle = AdvancedPS.Trace(LogPModel(0, []), AdvancedPS.TracedRNG()) + particle = AdvancedPS.Trace(LogPModel(0.0), AdvancedPS.TracedRNG()) push!(pc2, particle) @test length(pc2.vals) == 4 @test pc2.logWs ≈ zeros(4) @@ -67,7 +69,7 @@ # Resample and propagate particles with reference particle particles_ref = map(logps) do logp trng = AdvancedPS.TracedRNG() - AdvancedPS.Trace(LogPModel(logp, []), trng) + AdvancedPS.Trace(LogPModel(logp), trng) end pc_ref = AdvancedPS.ParticleContainer(particles_ref) @@ -168,7 +170,7 @@ particles = map(1:n) do _ trng = AdvancedPS.TracedRNG() - AdvancedPS.Trace(LogPModel(1.0, []), trng) + AdvancedPS.Trace(LogPModel(1.0), trng) end pc = AdvancedPS.ParticleContainer(particles, AdvancedPS.TracedRNG()) @@ -193,7 +195,7 @@ @testset "seed history" begin # Test task copy version of trace trng = AdvancedPS.TracedRNG() - tr = AdvancedPS.Trace(LogPModel(1.0, []), trng) + tr = AdvancedPS.Trace(LogPModel(1.0), trng) AdvancedPS.advance!(tr) AdvancedPS.advance!(tr) diff --git a/test/linear-gaussian.jl b/test/linear-gaussian.jl index 23b0a802..6409ab63 100644 --- a/test/linear-gaussian.jl +++ b/test/linear-gaussian.jl @@ -29,18 +29,18 @@ end N_SAMPLES = 200 # Model dynamics - a = 0.5 - b = 0.2 - q = 0.1 - E = LinearEvolution(a, Gaussian(b, q)) + A = 0.5 + B = 0.2 + Q = 0.1 + E = LinearEvolution(A, Gaussian(B, Q)) H = 1.0 R = 0.1 Obs = LinearObservationModel(H, R) - x0 = 0.0 + X0 = 0.0 P0 = 1.0 - G0 = Gaussian(x0, P0) + G0 = Gaussian(X0, P0) M = LinearStateSpaceModel(E, Obs) O = LinearObservation(E, H, R) @@ -56,46 +56,41 @@ end Xf, ll = kalmanfilter(M, 1 => G0, y_pairs) # Define AdvancedPS model - mutable struct LinearGaussianParams - a::Float64 - b::Float64 - q::Float64 - h::Float64 - r::Float64 - x0::Float64 - p0::Float64 + struct LinearGaussianDynamics{T<:Real} <: LatentDynamics{T,T} + a::T + b::T + q::T end - mutable struct LinearGaussianModel <: SSMProblems.AbstractStateSpaceModel - X::Vector{Float64} - observations::Vector{Float64} - θ::LinearGaussianParams - function LinearGaussianModel(y::Vector{Float64}, θ::LinearGaussianParams) - return new(Vector{Float64}(), y, θ) - end + function SSMProblems.distribution(proc::LinearGaussianDynamics{T}; kwargs...) where {T} + return Normal(convert(T, X0), convert(T, P0)) end - function SSMProblems.transition!!(rng::AbstractRNG, model::LinearGaussianModel) - return rand(rng, Normal(model.θ.x0, model.θ.p0)) - end - function SSMProblems.transition!!( - rng::AbstractRNG, model::LinearGaussianModel, state, step + function SSMProblems.distribution( + proc::LinearGaussianDynamics, step::Int, state; kwargs... ) - return rand(rng, Normal(model.θ.a * state + model.θ.b, model.θ.q)) + return Normal(proc.a * state + proc.b, proc.q) end - function SSMProblems.transition_logdensity( - model::LinearGaussianModel, prev_state, current_state, step - ) - return logpdf(Normal(model.θ.a * prev_state + model.θ.b, model.θ.q), current_state) + + struct LinearGaussianObservation{T<:Real} <: ObservationProcess{T,T} + h::T + r::T end - function SSMProblems.emission_logdensity(model::LinearGaussianModel, state, step) - return logpdf(Normal(model.θ.h * state, model.θ.r), model.observations[step]) + + function SSMProblems.distribution( + proc::LinearGaussianObservation, step::Int, state; kwargs... + ) + return Normal(proc.h * state, proc.r) end - AdvancedPS.isdone(::LinearGaussianModel, step) = step > T + function LinearGaussianStateSpaceModel(a, b, q, h, r) + dyn = LinearGaussianDynamics(a, b, q) + obs = LinearGaussianObservation(h, r) + return StateSpaceModel(dyn, obs) + end - params = LinearGaussianParams(a, b, q, H, R, x0, P0) - model = LinearGaussianModel(ys, params) + lgssm = LinearGaussianStateSpaceModel(A, B, Q, H, R) + model = lgssm(ys) @testset "PGAS" begin pgas = AdvancedPS.PGAS(N_PARTICLES) diff --git a/test/pgas.jl b/test/pgas.jl index 3a701db7..6a909ac1 100644 --- a/test/pgas.jl +++ b/test/pgas.jl @@ -1,35 +1,40 @@ @testset "pgas.jl" begin - mutable struct Params - a::Float64 - q::Float64 - r::Float64 + mutable struct Params{T<:Real} + a::T + q::T + r::T end - mutable struct BaseModel <: SSMProblems.AbstractStateSpaceModel - X::Vector{Float64} - θ::Params - BaseModel(params::Params) = new(Vector{Float64}(), params) + struct BaseModelDynamics{T<:Real} <: LatentDynamics{T,T} + a::T + q::T end - function SSMProblems.transition!!(rng::AbstractRNG, model::BaseModel) - return rand(rng, Normal(0, model.θ.q)) + function SSMProblems.distribution(dyn::BaseModelDynamics{T}) where {T<:Real} + return Normal(zero(T), dyn.q) end - function SSMProblems.transition!!(rng::AbstractRNG, model::BaseModel, state, step) - return rand(rng, Normal(model.θ.a * state, model.θ.q)) + + function SSMProblems.distribution(dyn::BaseModelDynamics, step::Int, state) + return Normal(dyn.a * state, dyn.q) end - function SSMProblems.emission_logdensity(model::BaseModel, state, step) - return logpdf(Distributions.Normal(state, model.θ.r), 0) + + struct BaseModelObservation{T<:Real} <: ObservationProcess{T,T} + r::T end - function SSMProblems.transition_logdensity( - model::BaseModel, prev_state, current_state, step - ) - return logpdf(Normal(model.θ.a * prev_state, model.θ.q), current_state) + + function SSMProblems.distribution(obs::BaseModelObservation, step::Int, state) + return Normal(state, obs.r) end - AdvancedPS.isdone(::BaseModel, step) = step > 3 + function BaseModel(θ::Params{T}) where {T<:Real} + dyn = BaseModelDynamics(θ.a, θ.q) + obs = BaseModelObservation(θ.r) + ssm = StateSpaceModel(dyn, obs) + return ssm(zeros(T, 3)) + end @testset "fork reference" begin - model = BaseModel(Params(0.9, 0.32, 1)) + model = BaseModel(Params(0.9, 0.32, 1.0)) part = AdvancedPS.Trace(model, AdvancedPS.TracedRNG()) AdvancedPS.advance!(part) @@ -50,7 +55,7 @@ @testset "update reference" begin base_rng = Random.MersenneTwister(31) particles = [ - AdvancedPS.Trace(BaseModel(Params(0.9, 0.31, 1)), AdvancedPS.TracedRNG()) for + AdvancedPS.Trace(BaseModel(Params(0.9, 0.31, 1.0)), AdvancedPS.TracedRNG()) for _ in 1:3 ] sampler = AdvancedPS.PGAS(3) @@ -86,7 +91,7 @@ end @testset "rng stability" begin - model = BaseModel(Params(0.9, 0.32, 1)) + model = BaseModel(Params(0.9, 0.32, 1.0)) seed = 10 rng = Random.MersenneTwister(seed) @@ -117,7 +122,7 @@ # Smoke test mostly @testset "smc sampler" begin - model = BaseModel(Params(0.9, 0.32, 1)) + model = BaseModel(Params(0.9, 0.32, 1.0)) npart = 10 sampler = AdvancedPS.SMC(npart)