|
| 1 | +#! Eq (14) of Girolami & Calderhead (2011) |
| 2 | +function ∂H∂r( |
| 3 | + h::Hamiltonian{<:DenseRiemannianMetric,<:GaussianKinetic}, θ::AbstractVecOrMat, r::AbstractVecOrMat |
| 4 | +) |
| 5 | + H = h.metric.G(θ) |
| 6 | + G = h.metric.map(H) |
| 7 | + return G \ r # NOTE it's actually pretty weird that ∂H∂θ returns DualValue but ∂H∂r doesn't |
| 8 | +end |
| 9 | + |
| 10 | +function ∂H∂θ( |
| 11 | + h::Hamiltonian{<:DenseRiemannianMetric{T,<:IdentityMap},<:GaussianKinetic}, |
| 12 | + θ::AbstractVecOrMat{T}, |
| 13 | + r::AbstractVecOrMat{T}, |
| 14 | +) where {T} |
| 15 | + ℓπ, ∂ℓπ∂θ = h.∂ℓπ∂θ(θ) |
| 16 | + G = h.metric.map(h.metric.G(θ)) |
| 17 | + invG = inv(G) |
| 18 | + ∂G∂θ = h.metric.∂G∂θ(θ) |
| 19 | + d = length(∂ℓπ∂θ) |
| 20 | + return DualValue( |
| 21 | + ℓπ, |
| 22 | + #! Eq (15) of Girolami & Calderhead (2011) |
| 23 | + -mapreduce(vcat, 1:d) do i |
| 24 | + ∂G∂θᵢ = ∂G∂θ[:, :, i] |
| 25 | + ∂ℓπ∂θ[i] - 1 / 2 * tr(invG * ∂G∂θᵢ) + 1 / 2 * r' * invG * ∂G∂θᵢ * invG * r |
| 26 | + # Gr = G \ r |
| 27 | + # ∂ℓπ∂θ[i] - 1 / 2 * tr(G \ ∂G∂θᵢ) + 1 / 2 * Gr' * ∂G∂θᵢ * Gr |
| 28 | + # 1 / 2 * tr(invG * ∂G∂θᵢ) |
| 29 | + # 1 / 2 * r' * invG * ∂G∂θᵢ * invG * r |
| 30 | + end, |
| 31 | + ) |
| 32 | +end |
| 33 | + |
| 34 | +# Ref: https://www.wolframalpha.com/input?i=derivative+of+x+*+coth%28a+*+x%29 |
| 35 | +#! Based on middle of the right column of Page 3 of Betancourt (2012) "Note that whenλi=λj, such as for the diagonal elementsor degenerate eigenvalues, this becomes the derivative" |
| 36 | +dsoftabsdλ(α, λ) = coth(α * λ) + λ * α * -csch(λ * α)^2 |
| 37 | + |
| 38 | +#! J as defined in middle of the right column of Page 3 of Betancourt (2012) |
| 39 | +function make_J(λ::AbstractVector{T}, α::T) where {T<:AbstractFloat} |
| 40 | + d = length(λ) |
| 41 | + J = Matrix{T}(undef, d, d) |
| 42 | + for i in 1:d, j in 1:d |
| 43 | + J[i, j] = if (λ[i] == λ[j]) |
| 44 | + dsoftabsdλ(α, λ[i]) |
| 45 | + else |
| 46 | + ((λ[i] * coth(α * λ[i]) - λ[j] * coth(α * λ[j])) / (λ[i] - λ[j])) |
| 47 | + end |
| 48 | + end |
| 49 | + return J |
| 50 | +end |
| 51 | + |
| 52 | +function ∂H∂θ( |
| 53 | + h::Hamiltonian{<:DenseRiemannianMetric{T,<:SoftAbsMap},<:GaussianKinetic}, |
| 54 | + θ::AbstractVecOrMat{T}, |
| 55 | + r::AbstractVecOrMat{T}, |
| 56 | +) where {T} |
| 57 | + return ∂H∂θ_cache(h, θ, r) |
| 58 | +end |
| 59 | +function ∂H∂θ_cache( |
| 60 | + h::Hamiltonian{<:DenseRiemannianMetric{T,<:SoftAbsMap},<:GaussianKinetic}, |
| 61 | + θ::AbstractVecOrMat{T}, |
| 62 | + r::AbstractVecOrMat{T}; |
| 63 | + return_cache=false, |
| 64 | + cache=nothing, |
| 65 | +) where {T} |
| 66 | + # Terms that only dependent on θ can be cached in θ-unchanged loops |
| 67 | + if isnothing(cache) |
| 68 | + ℓπ, ∂ℓπ∂θ = h.∂ℓπ∂θ(θ) |
| 69 | + H = h.metric.G(θ) |
| 70 | + ∂H∂θ = h.metric.∂G∂θ(θ) |
| 71 | + |
| 72 | + G, Q, λ, softabsλ = softabs(H, h.metric.map.α) |
| 73 | + |
| 74 | + R = diagm(1 ./ softabsλ) |
| 75 | + |
| 76 | + # softabsΛ = diagm(softabsλ) |
| 77 | + # M = inv(softabsΛ) * Q' * r |
| 78 | + # M = R * Q' * r # equiv to above but avoid inv |
| 79 | + |
| 80 | + J = make_J(λ, h.metric.map.α) |
| 81 | + |
| 82 | + #! Based on the two equations from the right column of Page 3 of Betancourt (2012) |
| 83 | + term_1_cached = Q * (R .* J) * Q' |
| 84 | + else |
| 85 | + ℓπ, ∂ℓπ∂θ, ∂H∂θ, Q, softabsλ, J, term_1_cached = cache |
| 86 | + end |
| 87 | + d = length(∂ℓπ∂θ) |
| 88 | + D = diagm((Q' * r) ./ softabsλ) |
| 89 | + term_2_cached = Q * D * J * D * Q' |
| 90 | + g = |
| 91 | + isdiag ? |
| 92 | + -(∂ℓπ∂θ - 1 / 2 * diag(term_1_cached * ∂H∂θ) + 1 / 2 * diag(term_2 * ∂H∂θ)) : |
| 93 | + -mapreduce(vcat, 1:d) do i |
| 94 | + ∂H∂θᵢ = ∂H∂θ[:, :, i] |
| 95 | + # ∂ℓπ∂θ[i] - 1 / 2 * tr(term_1_cached * ∂H∂θᵢ) + 1 / 2 * M' * (J .* (Q' * ∂H∂θᵢ * Q)) * M # (v1) |
| 96 | + # NOTE Some further optimization can be done here: cache the 1st product all together |
| 97 | + ∂ℓπ∂θ[i] - 1 / 2 * tr(term_1_cached * ∂H∂θᵢ) + 1 / 2 * tr(term_2_cached * ∂H∂θᵢ) # (v2) cache friendly |
| 98 | + end |
| 99 | + |
| 100 | + dv = DualValue(ℓπ, g) |
| 101 | + return return_cache ? (dv, (; ℓπ, ∂ℓπ∂θ, ∂H∂θ, Q, softabsλ, J, term_1_cached)) : dv |
| 102 | +end |
| 103 | + |
| 104 | +# QUES Do we want to change everything to position dependent by default? |
| 105 | +# Add θ to ∂H∂r for DenseRiemannianMetric |
| 106 | +function phasepoint( |
| 107 | + h::Hamiltonian{<:DenseRiemannianMetric}, |
| 108 | + θ::T, |
| 109 | + r::T; |
| 110 | + ℓπ=∂H∂θ(h, θ), |
| 111 | + ℓκ=DualValue(neg_energy(h, r, θ), ∂H∂r(h, θ, r)), |
| 112 | +) where {T<:AbstractVecOrMat} |
| 113 | + return PhasePoint(θ, r, ℓπ, ℓκ) |
| 114 | +end |
| 115 | + |
| 116 | +#! Eq (13) of Girolami & Calderhead (2011) |
| 117 | +function neg_energy( |
| 118 | + h::Hamiltonian{<:DenseRiemannianMetric,<:GaussianKinetic}, r::T, θ::T |
| 119 | +) where {T<:AbstractVecOrMat} |
| 120 | + G = h.metric.map(h.metric.G(θ)) |
| 121 | + D = size(G, 1) |
| 122 | + # Need to consider the normalizing term as it is no longer same for different θs |
| 123 | + logZ = 1 / 2 * (D * log(2π) + logdet(G)) # it will be user's responsibility to make sure G is SPD and logdet(G) is defined |
| 124 | + mul!(h.metric._temp, inv(G), r) |
| 125 | + return -logZ - dot(r, h.metric._temp) / 2 |
| 126 | +end |
0 commit comments