TuringLang · yebai · Mar 5, 2025 · Feb 16, 2025 · Feb 16, 2025 · Feb 16, 2025
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -19,7 +19,7 @@ jobs:
       matrix:
         version:
           - '1'
-          - '1.6'
+          - '1.10'
         os:
           - ubuntu-latest
         arch:

diff --git a/Project.toml b/Project.toml
@@ -1,11 +1,11 @@
 name = "NormalizingFlows"
 uuid = "50e4474d-9f12-44b7-af7a-91ab30ff6256"
-version = "0.1.1"
+version = "0.1.2"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
-DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
+DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -16,35 +16,32 @@ Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [weakdeps]
-Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
+Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [extensions]
-NormalizingFlowsEnzymeExt = "Enzyme"
-NormalizingFlowsForwardDiffExt = "ForwardDiff"
-NormalizingFlowsReverseDiffExt = "ReverseDiff"
-NormalizingFlowsZygoteExt = "Zygote"
 
 [compat]
 ADTypes = "0.1, 0.2, 1"
-Bijectors = "0.12.6, 0.13, 0.14"
-DiffResults = "1"
+Bijectors = "0.12.6, 0.13, 0.14, 0.15"
+DifferentiationInterface = "0.6"
 Distributions = "0.25"
 DocStringExtensions = "0.9"
+Mooncake = "0.4.95"
 Enzyme = "0.11, 0.12, 0.13"
 ForwardDiff = "0.10.25"
-Optimisers = "0.2.16, 0.3"
+Optimisers = "0.2.16, 0.3, 0.4"
 ProgressMeter = "1.0.0"
 Requires = "1"
 ReverseDiff = "1.14"
 StatsBase = "0.33, 0.34"
-Zygote = "0.6"
-julia = "1.6"
+Zygote = "0.6, 0.7"
+julia = "1.10"
 
 [extras]
-Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
+Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
diff --git a/docs/src/api.md b/docs/src/api.md
@@ -15,6 +15,7 @@ For example of Gaussian VI, we can construct the flow as follows:
 ```@julia
 using Distributions, Bijectors
 T= Float32
+@leaf MvNormal # to prevent params in q₀ from being optimized
 q₀ = MvNormal(zeros(T, 2), ones(T, 2))
 flow = Bijectors.transformed(q₀, Bijectors.Shift(zeros(T,2)) ∘ Bijectors.Scale(ones(T, 2)))
 ```
@@ -23,7 +24,7 @@ To train the Gaussian VI targeting at distirbution $p$ via ELBO maiximization, w
 using NormalizingFlows
 
 sample_per_iter = 10
-flow_trained, stats, _ = train_flow(
+flow_trained, stats, _ , _ = train_flow(
     elbo,
     flow,
     logp,
@@ -83,11 +84,3 @@ NormalizingFlows.loglikelihood
 ```@docs
 NormalizingFlows.optimize
 ```
-
-
-## Utility Functions for Taking Gradient
-```@docs
-NormalizingFlows.grad!
-NormalizingFlows.value_and_gradient!
-```
-
diff --git a/docs/src/example.md b/docs/src/example.md
@@ -36,6 +36,7 @@ Here we used the `PlanarLayer()` from `Bijectors.jl` to construct a
 
 ```julia
 using Bijectors, FunctionChains
+using Functors
 
 function create_planar_flow(n_layers::Int, q₀)
     d = length(q₀)
@@ -45,7 +46,9 @@ function create_planar_flow(n_layers::Int, q₀)
 end
 
 # create a 20-layer planar flow
-flow = create_planar_flow(20, MvNormal(zeros(Float32, 2), I))
+@leaf MvNormal # to prevent params in q₀ from being optimized
+q₀ = MvNormal(zeros(Float32, 2), I)
+flow = create_planar_flow(20, q₀)
 flow_untrained = deepcopy(flow) # keep a copy of the untrained flow for comparison
 ```
 *Notice that here the flow layers are chained together using `fchain` function from [`FunctionChains.jl`](https://github.com/oschulz/FunctionChains.jl). 
@@ -116,4 +119,4 @@ plot!(title = "Comparison of Trained and Untrained Flow", xlabel = "X", ylabel=
 
 ## Reference 
 
-- Rezende, D. and Mohamed, S., 2015. *Variational inference with normalizing flows*. International Conference on Machine Learning  
+- Rezende, D. and Mohamed, S., 2015. *Variational inference with normalizing flows*. International Conference on Machine Learning  
diff --git a/ext/NormalizingFlowsEnzymeExt.jl b/ext/NormalizingFlowsEnzymeExt.jl
diff --git a/ext/NormalizingFlowsForwardDiffExt.jl b/ext/NormalizingFlowsForwardDiffExt.jl
diff --git a/ext/NormalizingFlowsReverseDiffExt.jl b/ext/NormalizingFlowsReverseDiffExt.jl
diff --git a/ext/NormalizingFlowsZygoteExt.jl b/ext/NormalizingFlowsZygoteExt.jl
diff --git a/src/NormalizingFlows.jl b/src/NormalizingFlows.jl
@@ -4,17 +4,14 @@ using Bijectors
 using Optimisers
 using LinearAlgebra, Random, Distributions, StatsBase
 using ProgressMeter
-using ADTypes, DiffResults
+using ADTypes
+using DifferentiationInterface
 
 using DocStringExtensions
 
-export train_flow, elbo, loglikelihood, value_and_gradient!
-
-using ADTypes
-using DiffResults
+export train_flow, elbo, loglikelihood
 
-"""
-    train_flow([rng::AbstractRNG, ]vo, flow, args...; kwargs...)
+""" train_flow([rng::AbstractRNG, ]vo, flow, args...; kwargs...)
 
 Train the given normalizing flow `flow` by calling `optimize`.
 
@@ -56,47 +53,29 @@ function train_flow(
     # use FunctionChains instead of simple compositions to construct the flow when many flow layers are involved
     # otherwise the compilation time for destructure will be too long
     θ_flat, re = Optimisers.destructure(flow)
+
+    loss(θ, rng, args...) = -vo(rng, re(θ), args...)
 
     # Normalizing flow training loop 
-    θ_flat_trained, opt_stats, st = optimize(
-        rng,
+    θ_flat_trained, opt_stats, st, time_elapsed = optimize(
         ADbackend,
-        vo,
+        loss,
         θ_flat,
-        re,
-        args...;
+        re, 
+        (rng, args...)...;
         max_iters=max_iters,
         optimiser=optimiser,
         kwargs...,
     )
 
     flow_trained = re(θ_flat_trained)
-    return flow_trained, opt_stats, st
+    return flow_trained, opt_stats, st, time_elapsed
 end
 
-include("train.jl")
+
+
+include("optimize.jl")
 include("objectives.jl")
 
-# optional dependencies 
-if !isdefined(Base, :get_extension) # check whether :get_extension is defined in Base
-    using Requires
-end
 
-# Question: should Exts be loaded here or in train.jl? 
-function __init__()
-    @static if !isdefined(Base, :get_extension)
-        @require ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" include(
-            "../ext/NormalizingFlowsForwardDiffExt.jl"
-        )
-        @require ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" include(
-            "../ext/NormalizingFlowsReverseDiffExt.jl"
-        )
-        @require Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" include(
-            "../ext/NormalizingFlowsEnzymeExt.jl"
-        )
-        @require Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" include(
-            "../ext/NormalizingFlowsZygoteExt.jl"
-        )
-    end
-end
 end
diff --git a/src/objectives.jl b/src/objectives.jl
@@ -1,2 +1,2 @@
 include("objectives/elbo.jl")
-include("objectives/loglikelihood.jl")
+include("objectives/loglikelihood.jl") # not tested
diff --git a/src/objectives/elbo.jl b/src/objectives/elbo.jl
@@ -42,4 +42,4 @@ end
 
 function elbo(flow::Bijectors.TransformedDistribution, logp, n_samples)
     return elbo(Random.default_rng(), flow, logp, n_samples)
-end
+end
diff --git a/src/objectives/loglikelihood.jl b/src/objectives/loglikelihood.jl
@@ -2,29 +2,32 @@
 # training by minimizing forward KL (MLE)
 ####################################    
 """
-    loglikelihood(flow::Bijectors.TransformedDistribution, xs::AbstractVecOrMat)
+    loglikelihood(rng, flow::Bijectors.TransformedDistribution, xs::AbstractVecOrMat)
 
 Compute the log-likelihood for variational distribution flow at a batch of samples xs from 
 the target distribution p. 
 
 # Arguments
+- `rng`: random number generator (empty argument, only needed to ensure the same signature as other variational objectives)
 - `flow`: variational distribution to be trained. In particular 
   "flow = transformed(q₀, T::Bijectors.Bijector)", 
   q₀ is a reference distribution that one can easily sample and compute logpdf
 - `xs`: samples from the target distribution p.
 
 """
 function loglikelihood(
+    rng::AbstractRNG,                         # empty argument
     flow::Bijectors.UnivariateTransformed,    # variational distribution to be trained
     xs::AbstractVector,                       # sample batch from target dist p
 )
     return mean(Base.Fix1(logpdf, flow), xs)
 end
 
 function loglikelihood(
+    rng::AbstractRNG,                           # empty argument
     flow::Bijectors.MultivariateTransformed,    # variational distribution to be trained
     xs::AbstractMatrix,                         # sample batch from target dist p
 )
     llhs = map(x -> logpdf(flow, x), eachcol(xs))
     return mean(llhs)
-end
+end
-Original file line number
+Diff line change
@@ Expand Up / @@ -19,7 +19,7 @@ jobs: @@
           matrix:
             version:
               - '1'
-              - '1.6'
+              - '1.10'
             os:
               - ubuntu-latest
             arch:
@@ Expand Down @@