TuringLang
diff --git a/‎.github/workflows/Examples.yml‎
Lines changed: 42 additions & 0 deletions b/‎.github/workflows/Examples.yml‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Project.toml‎
Lines changed: 2 additions & 2 deletions b/‎Project.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/src/api.md‎
Lines changed: 5 additions & 0 deletions b/‎docs/src/api.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎docs/src/index.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/src/index.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎example/Project.toml‎
Lines changed: 9 additions & 4 deletions b/‎example/Project.toml‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎example/README.md‎
Lines changed: 3 additions & 3 deletions b/‎example/README.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎example/SyntheticTargets.jl‎
Lines changed: 19 additions & 0 deletions b/‎example/SyntheticTargets.jl‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎example/demo_RealNVP.jl‎
Lines changed: 180 additions & 0 deletions b/‎example/demo_RealNVP.jl‎
Lines changed: 180 additions & 0 deletions
@@ -0,0 +1,42 @@
+name: NF Examples
+
+on:
+  push:
+    branches:
+      - main
+    tags: ['*']
+  pull_request:
+
+concurrency:
+  # Skip intermediate builds: always.
+  # Cancel intermediate builds: only if it is a pull request build.
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+
+jobs:
+  run-examples:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v2
+        with:
+          version: '1'
+          arch: x64
+      - uses: julia-actions/cache@v2
+      - name: Run NF examples
+        run: |
+          cd example
+          julia --project=. --color=yes -e '
+          using Pkg;
+          Pkg.develop(PackageSpec(path=joinpath(pwd(), "..")));
+          Pkg.instantiate();
+          @info "Running planar flow demo";
+          include("demo_planar_flow.jl");
+          @info "Running radial flow demo";
+          include("demo_radial_flow.jl");
+          @info "Running Real NVP demo";
+          include("demo_RealNVP.jl");
+          @info "Running neural spline flow demo";
+          include("demo_neural_spline_flow.jl");
+          @info "Running Hamiltonian flow demo";
+          include("demo_hamiltonian_flow.jl");'
@@ -3,6 +3,7 @@
 /docs/build/
 test/Manifest.toml
 example/Manifest.toml
+example/LocalPreferences.toml
 
 # Files generated by invoking Julia with --code-coverage
 *.jl.cov
 
@@ -1,6 +1,6 @@
 name = "NormalizingFlows"
 uuid = "50e4474d-9f12-44b7-af7a-91ab30ff6256"
-version = "0.2.0"
+version = "0.2.1"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
@@ -24,7 +24,7 @@ NormalizingFlowsCUDAExt = "CUDA"
 ADTypes = "1"
 Bijectors = "0.12.6, 0.13, 0.14, 0.15"
 CUDA = "5"
-DifferentiationInterface = "0.6.42"
+DifferentiationInterface = "0.6, 0.7"
 Distributions = "0.25"
 DocStringExtensions = "0.9"
 Optimisers = "0.2.16, 0.3, 0.4"
 
@@ -61,6 +61,11 @@ and hope to generate approximate samples from it.
 ```@docs
 NormalizingFlows.elbo
 ```
+
+```@docs
+NormalizingFlows.elbo_batch
+```
+
 #### Log-likelihood
 
 By maximizing the log-likelihood, it is equivalent to minimizing the forward KL divergence between $q_\theta$ and $p$, i.e., 
 
@@ -21,7 +21,7 @@ See the [documentation](https://turinglang.org/NormalizingFlows.jl/dev/) for mor
 To install the package, run the following command in the Julia REPL:
 ```
 ]  # enter Pkg mode
-(@v1.9) pkg> add [email protected]:TuringLang/NormalizingFlows.jl.git
+(@v1.11) pkg> add [email protected]:TuringLang/NormalizingFlows.jl.git
 ```
 Then simply run the following command to use the package:
 ```julia
 
@@ -2,16 +2,21 @@
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
-CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
-FunctionChains = "8e6b2b91-af83-483e-ba35-d00930e4cf9b"
 Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
 IrrationalConstants = "92d709cd-6900-40b7-9082-c6be49f344b6"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
+Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
 NormalizingFlows = "50e4474d-9f12-44b7-af7a-91ab30ff6256"
 Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
-Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+
+[extras]
+CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
@@ -12,7 +12,7 @@ normalizing flow to approximate the target distribution using `NormalizingFlows.
 Currently, all examples share the same [Julia project](https://pkgdocs.julialang.org/v1/environments/#Using-someone-else's-project). To run the examples, first activate the project environment:
 
 ```julia
-# pwd() = "NormalizingFlows.jl/"
-using Pkg; Pkg.activate("example"); Pkg.instantiate()
+# pwd() = "NormalizingFlows.jl/example"
+using Pkg; Pkg.activate("."); Pkg.instantiate()
 ```
-This will install all needed packages, at the exact versions when the model was last updated. Then you can run the model code with include("<example-to-run>.jl"), or by running the example script line-by-line.
+This will install all needed packages, at the exact versions when the model was last updated. Then you can run the model code with `include("<example-to-run>.jl")`, or by running the example script line-by-line.
@@ -0,0 +1,19 @@
+using DocStringExtensions
+using Distributions, Random, LinearAlgebra
+using IrrationalConstants
+using Plots
+
+
+include("targets/banana.jl")
+include("targets/cross.jl")
+include("targets/neal_funnel.jl")
+include("targets/warped_gaussian.jl")
+
+function visualize(p::ContinuousMultivariateDistribution, samples=rand(p, 1000))
+    xrange = range(minimum(samples[1, :]) - 1, maximum(samples[1, :]) + 1; length=100)
+    yrange = range(minimum(samples[2, :]) - 1, maximum(samples[2, :]) + 1; length=100)
+    z = [exp(Distributions.logpdf(p, [x, y])) for x in xrange, y in yrange]
+    fig = contour(xrange, yrange, z'; levels=15, color=:viridis, label="PDF", linewidth=2)
+    scatter!(samples[1, :], samples[2, :]; label="Samples", alpha=0.3, legend=:bottomright)
+    return fig
+end
@@ -0,0 +1,180 @@
+using Flux
+using Bijectors
+using Bijectors: partition, combine, PartitionMask
+
+using Random, Distributions, LinearAlgebra
+using Functors
+using Optimisers, ADTypes
+using Mooncake
+using NormalizingFlows
+
+include("SyntheticTargets.jl")
+include("utils.jl")
+
+##################################
+# define affine coupling layer using Bijectors.jl interface
+#################################
+struct AffineCoupling <: Bijectors.Bijector
+    dim::Int
+    mask::Bijectors.PartitionMask
+    s::Flux.Chain
+    t::Flux.Chain
+end
+
+# let params track field s and t
+@functor AffineCoupling (s, t)
+
+function AffineCoupling(
+    dim::Int,  # dimension of input
+    hdims::Int, # dimension of hidden units for s and t
+    mask_idx::AbstractVector, # index of dimensione that one wants to apply transformations on
+)
+    cdims = length(mask_idx) # dimension of parts used to construct coupling law
+    s = mlp3(cdims, hdims, cdims)
+    t = mlp3(cdims, hdims, cdims)
+    mask = PartitionMask(dim, mask_idx)
+    return AffineCoupling(dim, mask, s, t)
+end
+
+function Bijectors.transform(af::AffineCoupling, x::AbstractVecOrMat)
+    # partition vector using 'af.mask::PartitionMask`
+    x₁, x₂, x₃ = partition(af.mask, x)
+    y₁ = x₁ .* af.s(x₂) .+ af.t(x₂)
+    return combine(af.mask, y₁, x₂, x₃)
+end
+
+function (af::AffineCoupling)(x::AbstractArray)
+    return transform(af, x)
+end
+
+function Bijectors.with_logabsdet_jacobian(af::AffineCoupling, x::AbstractVector)
+    x_1, x_2, x_3 = Bijectors.partition(af.mask, x)
+    y_1 = af.s(x_2) .* x_1 .+ af.t(x_2)
+    logjac = sum(log ∘ abs, af.s(x_2)) # this is a scalar
+    return combine(af.mask, y_1, x_2, x_3), logjac
+end
+
+function Bijectors.with_logabsdet_jacobian(af::AffineCoupling, x::AbstractMatrix)
+    x_1, x_2, x_3 = Bijectors.partition(af.mask, x)
+    y_1 = af.s(x_2) .* x_1 .+ af.t(x_2)
+    logjac = sum(log ∘ abs, af.s(x_2); dims = 1) # 1 × size(x, 2)
+    return combine(af.mask, y_1, x_2, x_3), vec(logjac)
+end
+
+
+function Bijectors.with_logabsdet_jacobian(
+    iaf::Inverse{<:AffineCoupling}, y::AbstractVector
+)
+    af = iaf.orig
+    # partition vector using `af.mask::PartitionMask`
+    y_1, y_2, y_3 = partition(af.mask, y)
+    # inverse transformation
+    x_1 = (y_1 .- af.t(y_2)) ./ af.s(y_2)
+    logjac = -sum(log ∘ abs, af.s(y_2))
+    return combine(af.mask, x_1, y_2, y_3), logjac
+end
+
+function Bijectors.with_logabsdet_jacobian(
+    iaf::Inverse{<:AffineCoupling}, y::AbstractMatrix
+)
+    af = iaf.orig
+    # partition vector using `af.mask::PartitionMask`
+    y_1, y_2, y_3 = partition(af.mask, y)
+    # inverse transformation
+    x_1 = (y_1 .- af.t(y_2)) ./ af.s(y_2)
+    logjac = -sum(log ∘ abs, af.s(y_2); dims = 1)
+    return combine(af.mask, x_1, y_2, y_3), vec(logjac)
+end
+
+################### 
+# an equivalent definition of AffineCoupling using Bijectors.Coupling 
+# (see https://github.com/TuringLang/Bijectors.jl/blob/74d52d4eda72a6149b1a89b72524545525419b3f/src/bijectors/coupling.jl#L188C1-L188C1)
+###################
+
+# struct AffineCoupling <: Bijectors.Bijector
+#     dim::Int
+#     mask::Bijectors.PartitionMask
+#     s::Flux.Chain
+#     t::Flux.Chain
+# end
+
+# # let params track field s and t
+# @functor AffineCoupling (s, t)
+
+# function AffineCoupling(dim, mask, s, t)
+#     return Bijectors.Coupling(θ -> Bijectors.Shift(t(θ)) ∘ Bijectors.Scale(s(θ)), mask)
+# end
+
+# function AffineCoupling(
+#     dim::Int,  # dimension of input
+#     hdims::Int, # dimension of hidden units for s and t
+#     mask_idx::AbstractVector, # index of dimensione that one wants to apply transformations on
+# )
+#     cdims = length(mask_idx) # dimension of parts used to construct coupling law
+#     s = mlp3(cdims, hdims, cdims)
+#     t = mlp3(cdims, hdims, cdims)
+#     mask = PartitionMask(dim, mask_idx)
+#     return AffineCoupling(dim, mask, s, t)
+# end
+
+
+
+##################################
+# start demo
+#################################
+Random.seed!(123)
+rng = Random.default_rng()
+T = Float32
+
+######################################
+# a difficult banana target
+######################################
+target = Banana(2, 1.0f0, 100.0f0)
+logp = Base.Fix1(logpdf, target)
+
+######################################
+# learn the target using Affine coupling flow
+######################################
+@leaf MvNormal
+q0 = MvNormal(zeros(T, 2), ones(T, 2))
+
+d = 2
+hdims = 32
+
+# alternating the coupling layers
+Ls = [AffineCoupling(d, hdims, [1]) ∘ AffineCoupling(d, hdims, [2]) for i in 1:3]
+
+flow = create_flow(Ls, q0)
+flow_untrained = deepcopy(flow)
+
+
+######################################
+# start training
+######################################
+sample_per_iter = 64
+
+# callback function to log training progress
+cb(iter, opt_stats, re, θ) = (sample_per_iter=sample_per_iter,ad=adtype)
+adtype = ADTypes.AutoMooncake(; config = Mooncake.Config())
+checkconv(iter, stat, re, θ, st) = stat.gradient_norm < one(T)/1000
+flow_trained, stats, _ = train_flow(
+    rng, 
+    elbo_batch,        # using elbo_batch instead of elbo achieves 4-5 times speedup 
+    flow,
+    logp,
+    sample_per_iter;
+    max_iters=100,   # change to larger number of iterations (e.g., 50_000) for better results
+    optimiser=Optimisers.Adam(5e-4),
+    ADbackend=adtype,
+    show_progress=true,
+    callback=cb,
+    hasconverged=checkconv,
+)
+θ, re = Optimisers.destructure(flow_trained)
+losses = map(x -> x.loss, stats)
+
+######################################
+# evaluate trained flow
+######################################
+plot(losses; label="Loss", linewidth=2) # plot the loss
+compare_trained_and_untrained_flow(flow_trained, flow_untrained, target, 1000)