Merge branch 'main' into qqy/sghmc

ErikQQY · web-flow · commit 914f1a0c1085 · 2025-05-18T17:34:00.000+08:00
diff --git a/.github/workflows/Docs.yml b/.github/workflows/Docs.yml
@@ -15,7 +15,7 @@ concurrency:
 
 permissions:
   contents: write
-  pull-requests: read
+  pull-requests: write
 
 jobs:
   docs:
diff --git a/HISTORY.md b/HISTORY.md
@@ -1,5 +1,9 @@
 # AdvancedHMC Changelog
 
+## 0.8.0
+
+  - To make an MCMC transtion from phasepoint `z` using trajectory `τ`(or HMCKernel `κ`) under Hamiltonian `h`, use `transition(h, τ, z)` or `transition(rng, h, τ, z)`(if using HMCKernel, use `transition(h, κ, z)` or `transition(rng, h, κ, z)`).
+
 ## v0.7.1
 
   - README has been simplified, many docs transfered to docs: https://turinglang.org/AdvancedHMC.jl/dev/.
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "AdvancedHMC"
 uuid = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d"
-version = "0.7.1"
+version = "0.8.0"
 
 [deps]
 AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -4,6 +4,6 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244"
 
 [compat]
-AdvancedHMC = "0.7"
+AdvancedHMC = "0.8"
 Documenter = "1"
 DocumenterCitations = "1"
diff --git a/research/src/riemannian_hmc_utility.jl b/research/src/riemannian_hmc_utility.jl
@@ -1,8 +1,8 @@
-using Random, LinearAlgebra, ReverseDiff, ForwardDiff, VecTargets
+using Random, LinearAlgebra, ReverseDiff, ForwardDiff, MCMCLogDensityProblems
 
 # Fisher information metric
 function gen_∂G∂θ_rev(Vfunc, x; f=identity)
-    _Hfunc = VecTargets.gen_hess(Vfunc, ReverseDiff.track.(x))
+    _Hfunc = MCMCLogDensityProblems.gen_hess(Vfunc, ReverseDiff.track.(x))
     Hfunc = x -> _Hfunc(x)[3]
     # QUES What's the best output format of this function?
     return x -> ReverseDiff.jacobian(x -> f(Hfunc(x)), x) # default output shape [∂H∂x₁; ∂H∂x₂; ...]
@@ -37,7 +37,7 @@ end
 
 function prepare_sample_target(hps, θ₀, ℓπ)
     Vfunc = x -> -ℓπ(x) # potential energy is the negative log-probability
-    _Hfunc = VecTargets.gen_hess(Vfunc, θ₀) # x -> (value, gradient, hessian)
+    _Hfunc = MCMCLogDensityProblems.gen_hess(Vfunc, θ₀) # x -> (value, gradient, hessian)
     Hfunc = x -> copy.(_Hfunc(x)) # _Hfunc do in-place computation, copy to avoid bug
 
     fstabilize = H -> H + hps.λ * I
@@ -70,8 +70,8 @@ function prepare_sample(hps; rng=MersenneTwister(1110))
 
     θ₀ = rand(rng, dim(target))
 
-    ℓπ = VecTargets.gen_logpdf(target)
-    ∂ℓπ∂θ = VecTargets.gen_logpdf_grad(target, θ₀)
+    ℓπ = MCMCLogDensityProblems.gen_logpdf(target)
+    ∂ℓπ∂θ = MCMCLogDensityProblems.gen_logpdf_grad(target, θ₀)
 
     _, _, Gfunc, ∂G∂θfunc = prepare_sample_target(hps, θ₀, ℓπ)
 
diff --git a/research/tests/runtests.jl b/research/tests/runtests.jl
@@ -1,15 +1,10 @@
 using Comonicon, ReTest
 
-using Pkg;
-Pkg.add(; url="https://github.com/xukai92/VecTargets.jl.git");
-
 # include the source code for experimental HMC
 include("../src/relativistic_hmc.jl")
-include("../src/riemannian_hmc.jl")
 
 # include the tests for experimental HMC
 include("relativistic_hmc.jl")
-include("riemannian_hmc.jl")
 
 Comonicon.@main function runtests(patterns...; dry::Bool=false)
     return retest(patterns...; dry=dry, verbose=Inf)
diff --git a/src/riemannian/hamiltonian.jl b/src/riemannian/hamiltonian.jl
diff --git a/src/sampler.jl b/src/sampler.jl
@@ -54,7 +54,7 @@ function transition(
     (; refreshment, τ) = κ
     @set! τ.integrator = jitter(rng, τ.integrator)
     z = refresh(rng, refreshment, h, z)
-    return transition(rng, τ, h, z)
+    return transition(rng, h, τ, z)
 end
 
 function Adaptation.adapt!(
diff --git a/src/trajectory.jl b/src/trajectory.jl
@@ -244,10 +244,10 @@ $(SIGNATURES)
 
 Make a MCMC transition from phase point `z` using the trajectory `τ` under Hamiltonian `h`.
 
-NOTE: This is a RNG-implicit fallback function for `transition(Random.default_rng(), τ, h, z)`
+NOTE: This is a RNG-implicit fallback function for `transition(Random.default_rng(), h, τ, z)`
 """
-function transition(τ::Trajectory, h::Hamiltonian, z::PhasePoint)
-    return transition(Random.default_rng(), τ, h, z)
+function transition(h::Hamiltonian, τ::Trajectory, z::PhasePoint)
+    return transition(Random.default_rng(), h, τ, z)
 end
 
 ###
@@ -256,8 +256,8 @@ end
 
 function transition(
     rng::Union{AbstractRNG,AbstractVector{<:AbstractRNG}},
-    τ::Trajectory{TS,I,TC},
     h::Hamiltonian,
+    τ::Trajectory{TS,I,TC},
     z::PhasePoint,
 ) where {TS<:AbstractTrajectorySampler,I,TC<:StaticTerminationCriterion}
     H0 = energy(z)
@@ -665,7 +665,7 @@ function build_tree(
 end
 
 function transition(
-    rng::AbstractRNG, τ::Trajectory{TS,I,TC}, h::Hamiltonian, z0::PhasePoint
+    rng::AbstractRNG, h::Hamiltonian, τ::Trajectory{TS,I,TC}, z0::PhasePoint
 ) where {
     TS<:AbstractTrajectorySampler,I<:AbstractIntegrator,TC<:DynamicTerminationCriterion
 }
@@ -746,12 +746,24 @@ function A(h, z, ϵ)
     return z′, H′
 end
 
-"Find a good initial leap-frog step-size via heuristic search."
+"""
+    find_good_stepsize(h::Hamiltonian, θ::AbstractVector; initial_step_size = 1//10, max_n_iters::Int=100)
+    find_good_stepsize(rng::AbstractRNG, h::Hamiltonian, θ::AbstractVector; initial_step_size = 1//10, max_n_iters::Int=100)
+
+Find a good initial leap-frog step-size via heuristic search.
+
+ - `initial_step_size`: Custom initial step size, default as 1//10
+ - `max_n_iters`: Maximum number of iteration for searching a good step-size, default as 100
+"""
 function find_good_stepsize(
-    rng::AbstractRNG, h::Hamiltonian, θ::AbstractVector{T}; max_n_iters::Int=100
+    rng::AbstractRNG,
+    h::Hamiltonian,
+    θ::AbstractVector{T};
+    initial_step_size=1//10,
+    max_n_iters::Int=100,
 ) where {T<:Real}
     # Initialize searching parameters
-    ϵ′ = ϵ = T(1//10)
+    ϵ′ = ϵ = T(initial_step_size)
     # minimal, crossing, maximal log accept ratio
     log_a_min = 2 * T(loghalf)
     log_a_cross = T(loghalf)
@@ -815,9 +827,18 @@ function find_good_stepsize(
 end
 
 function find_good_stepsize(
-    h::Hamiltonian, θ::AbstractVector{<:AbstractFloat}; max_n_iters::Int=100
+    h::Hamiltonian,
+    θ::AbstractVector{<:AbstractFloat};
+    initial_step_size=1//10,
+    max_n_iters::Int=100,
 )
-    return find_good_stepsize(Random.default_rng(), h, θ; max_n_iters=max_n_iters)
+    return find_good_stepsize(
+        Random.default_rng(),
+        h,
+        θ;
+        initial_step_size=initial_step_size,
+        max_n_iters=max_n_iters,
+    )
 end
 
 "Perform MH acceptance based on energy, i.e. negative log probability."
diff --git a/test/CUDA/cuda.jl b/test/CUDA/cuda.jl
@@ -2,55 +2,59 @@ using Pkg
 Pkg.activate(@__DIR__)
 Pkg.develop(; path=joinpath(@__DIR__, "..", ".."))
 
-include(joinpath(@__DIR__, "..", "common.jl"))
-
 using Test
 using AdvancedHMC
 using AdvancedHMC: DualValue, PhasePoint
 using CUDA
+using LogDensityProblems
 
-@testset "AdvancedHMC GPU" begin
-    n_chains = 1000
-    n_samples = 1000
-    dim = 5
-
-    T = Float32
-    m, s, θ₀ = zeros(T, dim), ones(T, dim), rand(T, dim, n_chains)
-    m, s, θ₀ = CuArray(m), CuArray(s), CuArray(θ₀)
-
-    target = Gaussian(m, s)
-    metric = UnitEuclideanMetric(T, size(θ₀))
-    ℓπ, ∇ℓπ = get_ℓπ(target), get_∇ℓπ(target)
-    hamiltonian = Hamiltonian(metric, ℓπ, ∇ℓπ)
-    integrator = Leapfrog(one(T) / 5)
-    proposal = HMCKernel(Trajectory{EndPointTS}(integrator, FixedNSteps(5)))
+include(joinpath(@__DIR__, "..", "common.jl"))
 
-    samples, stats = sample(hamiltonian, proposal, θ₀, n_samples)
+@testset "AdvancedHMC GPU" begin
+    if CUDA.functional()
+        n_chains = 1000
+        n_samples = 1000
+        dim = 5
+        T = Float32
+        m, s, θ₀ = zeros(T, dim), ones(T, dim), rand(T, dim, n_chains)
+        m, s, θ₀ = CuArray(m), CuArray(s), CuArray(θ₀)
+        target = Gaussian(m, s)
+        metric = UnitEuclideanMetric(T, size(θ₀))
+        ℓπ, ∇ℓπ = get_ℓπ(target), get_∇ℓπ(target)
+        hamiltonian = Hamiltonian(metric, ℓπ, ∇ℓπ)
+        integrator = Leapfrog(one(T) / 5)
+        proposal = HMCKernel(Trajectory{EndPointTS}(integrator, FixedNSteps(5)))
+        samples, stats = sample(hamiltonian, proposal, θ₀, n_samples)
+    else
+        println("GPU tests are skipped because no CUDA devices are found.")
+    end
 end
 
 @testset "PhasePoint GPU" begin
-    for T in [Float32, Float64]
-        function init_z1()
-            return PhasePoint(
-                CuArray([T(NaN) T(NaN)]),
-                CuArray([T(NaN) T(NaN)]),
-                DualValue(CuArray(zeros(T, 2)), CuArray(zeros(T, 1, 2))),
-                DualValue(CuArray(zeros(T, 2)), CuArray(zeros(T, 1, 2))),
-            )
-        end
-        function init_z2()
-            return PhasePoint(
-                CuArray([T(Inf) T(Inf)]),
-                CuArray([T(Inf) T(Inf)]),
-                DualValue(CuArray(zeros(T, 2)), CuArray(zeros(T, 1, 2))),
-                DualValue(CuArray(zeros(T, 2)), CuArray(zeros(T, 1, 2))),
-            )
+    if CUDA.functional()
+        for T in [Float32, Float64]
+            function init_z1()
+                return PhasePoint(
+                    CuArray([T(NaN) T(NaN)]),
+                    CuArray([T(NaN) T(NaN)]),
+                    DualValue(CuArray(zeros(T, 2)), CuArray(zeros(T, 1, 2))),
+                    DualValue(CuArray(zeros(T, 2)), CuArray(zeros(T, 1, 2))),
+                )
+            end
+            function init_z2()
+                return PhasePoint(
+                    CuArray([T(Inf) T(Inf)]),
+                    CuArray([T(Inf) T(Inf)]),
+                    DualValue(CuArray(zeros(T, 2)), CuArray(zeros(T, 1, 2))),
+                    DualValue(CuArray(zeros(T, 2)), CuArray(zeros(T, 1, 2))),
+                )
+            end
+            z1 = init_z1()
+            z2 = init_z2()
+            @test z1.ℓπ.value == z2.ℓπ.value
+            @test z1.ℓκ.value == z2.ℓκ.value
         end
-
-        z1 = init_z1()
-        z2 = init_z2()
-
-        @test z1.ℓπ.value == z2.ℓπ.value
-        @test z1.ℓκ.value == z2.ℓκ.value
+    else
+        println("GPU tests are skipped because no CUDA devices are found.")
     end
 end
diff --git a/test/Project.toml b/test/Project.toml
@@ -20,6 +20,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 ReTest = "e0db7c4e-2690-44b9-bad6-7687da720f89"
 Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
diff --git a/test/quality.jl b/test/quality.jl
@@ -1,13 +1,13 @@
 using AdvancedHMC
-using ReTest
+using Test: Test
 using Aqua: Aqua
 using JET
 using ForwardDiff
 
-@testset "Aqua" begin
+Test.@testset "Aqua" begin
     Aqua.test_all(AdvancedHMC)
 end
 
-@testset "JET" begin
+Test.@testset "JET" begin
     JET.test_package(AdvancedHMC; target_defined_modules=true)
 end
diff --git a/test/riemannian.jl b/test/riemannian.jl
@@ -19,8 +19,8 @@ using AdvancedHMC: neg_energy, energy
 
         θ₀ = rand(rng, dim(target))
 
-        ℓπ = VecTargets.gen_logpdf(target)
-        ∂ℓπ∂θ = VecTargets.gen_logpdf_grad(target, θ₀)
+        ℓπ = MCMCLogDensityProblems.gen_logpdf(target)
+        ∂ℓπ∂θ = MCMCLogDensityProblems.gen_logpdf_grad(target, θ₀)
 
         Vfunc, Hfunc, Gfunc, ∂G∂θfunc = prepare_sample_target(hps, θ₀, ℓπ)
 
diff --git a/test/trajectory.jl b/test/trajectory.jl
@@ -129,11 +129,11 @@ end
         for τ_test in [τ, τ_with_jittered_lf], seed in [1234, 5678, 90]
             rng = MersenneTwister(seed)
             z = AdvancedHMC.phasepoint(h, θ_init, r_init)
-            z1′ = AdvancedHMC.transition(rng, τ_test, h, z).z
+            z1′ = AdvancedHMC.transition(rng, h, τ_test, z).z
 
             rng = MersenneTwister(seed)
             z = AdvancedHMC.phasepoint(h, θ_init, r_init)
-            z2′ = AdvancedHMC.transition(rng, τ_test, h, z).z
+            z2′ = AdvancedHMC.transition(rng, h, τ_test, z).z
 
             @test z1′.θ == z2′.θ
             @test z1′.r == z2′.r