add nsf interface

zuhengxu · zuhengxu · commit 4caae4981748 · 2025-08-02T23:56:25.000-07:00
diff --git a/example/Project.toml b/example/Project.toml
@@ -2,16 +2,19 @@
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
+ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0"
 DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
 DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
+InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112"
 IrrationalConstants = "92d709cd-6900-40b7-9082-c6be49f344b6"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
+MonotonicSplines = "568f7cb4-8305-41bc-b90d-d32b39cc99d1"
 Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
 NormalizingFlows = "50e4474d-9f12-44b7-af7a-91ab30ff6256"
 Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
diff --git a/example/demo_RealNVP.jl b/example/demo_RealNVP.jl
@@ -5,7 +5,7 @@ using Bijectors: partition, combine, PartitionMask
 using Random, Distributions, LinearAlgebra
 using Functors
 using Optimisers, ADTypes
-using Mooncake
+using Mooncake, Zygote
 using NormalizingFlows
 
 include("SyntheticTargets.jl")
@@ -47,18 +47,16 @@ sample_per_iter = 16
 
 # callback function to log training progress
 cb(iter, opt_stats, re, θ) = (sample_per_iter=sample_per_iter,ad=adtype)
-# TODO: now using AutoMooncake the example broke, but AutoZygote works, need to debug
-adtype = ADTypes.AutoMooncake(; config = nothing)
-# adtype = ADTypes.AutoZygote()
+adtype = ADTypes.AutoMooncake(; config = Mooncake.Config())
 
 checkconv(iter, stat, re, θ, st) = stat.gradient_norm < one(T)/1000
 flow_trained, stats, _ = train_flow(
     rng, 
-    elbo_batch,        # using elbo_batch instead of elbo achieves 4-5 times speedup 
+    elbo,        # using elbo_batch instead of elbo achieves 4-5 times speedup 
     flow,
     logp,
     sample_per_iter;
-    max_iters=100,   # change to larger number of iterations (e.g., 50_000) for better results
+    max_iters=10,   # change to larger number of iterations (e.g., 50_000) for better results
     optimiser=Optimisers.Adam(5e-4),
     ADbackend=adtype,
     show_progress=true,
diff --git a/example/demo_neural_spline_flow.jl b/example/demo_neural_spline_flow.jl
@@ -28,18 +28,9 @@ logp = Base.Fix1(logpdf, target)
 # learn the target using Affine coupling flow
 ######################################
 @leaf MvNormal
-q0 = MvNormal(zeros(T, 2), ones(T, 2))
-
-d = 2
-hdims = 64
-K = 10
-B = 30
-Ls = [
-    NeuralSplineLayer(d, hdims, K, B, [1]) ∘ NeuralSplineLayer(d, hdims, K, B, [2]) for
-    i in 1:3
-]
-
-flow = create_flow(Ls, q0)
+q0 = MvNormal(zeros(T, 2), I)
+
+flow = nsf(q0; paramtype=Float32)
 flow_untrained = deepcopy(flow)
 
 
@@ -50,7 +41,6 @@ sample_per_iter = 64
 
 # callback function to log training progress
 cb(iter, opt_stats, re, θ) = (sample_per_iter=sample_per_iter,ad=adtype)
-# TODO: now using AutoMooncake the example broke, but AutoZygote works, need to debug
 adtype = ADTypes.AutoMooncake(; config = Mooncake.Config())
 checkconv(iter, stat, re, θ, st) = stat.gradient_norm < one(T)/1000
 flow_trained, stats, _ = train_flow(
@@ -73,3 +63,35 @@ losses = map(x -> x.loss, stats)
 ######################################
 plot(losses; label="Loss", linewidth=2) # plot the loss
 compare_trained_and_untrained_flow(flow_trained, flow_untrained, target, 1000)
+
+
+
+
+
+
+
+
+
+# using MonotonicSplines, Plots, InverseFunctions, ChangesOfVariables
+
+# f = rand(RQSpline)
+# f.pX, f.pY, f.dYdX
+
+# plot(f, xlims = (-6, 6)); plot!(inverse(f), xlims = (-6, 6))
+
+# x = 1.2
+# y = f(x)
+# with_logabsdet_jacobian(f, x)
+# inverse(f)(y)
+# with_logabsdet_jacobian(inverse(f), y)
+
+
+
+# # test auto grad
+# function loss(x)
+#     y, laj = MonotonicSplines.rqs_forward(x, f.pX, f.pY, f.dYdX)
+#     return laj + 0.5 * sum((y .- 1).^2)
+# end
+
+# xx = rand()
+# val, g = DifferentiationInterface.value_and_gradient(loss, adtype, xx)
diff --git a/src/NormalizingFlows.jl b/src/NormalizingFlows.jl
@@ -132,8 +132,8 @@ include("flows/realnvp.jl")
 include("flows/neuralspline.jl")
 
 export create_flow
-export RealNVP_layer, realnvp, AffineCoupling
-export NeuralSplineLayer
+export AffineCoupling, RealNVP_layer, realnvp
+export NeuralSplineCoupling, NSF_layer, nsf
 
 
 end
diff --git a/src/flows/neuralspline.jl b/src/flows/neuralspline.jl
@@ -1,49 +1,48 @@
-##################################
-# define neural spline layer using Bijectors.jl interface
-#################################
 """
 Neural Rational quadratic Spline layer 
 
 # References
 [1] Durkan, C., Bekasov, A., Murray, I., & Papamakarios, G., Neural Spline Flows, CoRR, arXiv:1906.04032 [stat.ML],  (2019). 
 """
-struct NeuralSplineLayer{T,A<:Flux.Chain} <: Bijectors.Bijector
+struct NeuralSplineCoupling{T,A<:Flux.Chain} <: Bijectors.Bijector
     dim::Int                        # dimension of input
     K::Int                          # number of knots
     n_dims_transferred::Int         # number of dimensions that are transformed
-    nn::A                           # networks that parmaterize the knots and derivatives
     B::T                            # bound of the knots
+    nn::A                           # networks that parmaterize the knots and derivatives
     mask::Bijectors.PartitionMask
 end
 
-function NeuralSplineLayer(
+function NeuralSplineCoupling(
     dim::T1,                         # dimension of input
-    hdims::T1,                       # dimension of hidden units for s and t
+    hdims::AbstractVector{T1},       # dimension of hidden units for s and t
     K::T1,                           # number of knots
     B::T2,                           # bound of the knots
     mask_idx::AbstractVector{<:Int}, # index of dimensione that one wants to apply transformations on
-) where {T1<:Int,T2<:Real}
+    paramtype::Type{T3},             # type of the parameters, e.g., Float64 or Float32
+) where {T1<:Int,T2<:Real,T3<:AbstractFloat}
     num_of_transformed_dims = length(mask_idx)
     input_dims = dim - num_of_transformed_dims
     
     # output dim of the NN
     output_dims = (3K - 1)*num_of_transformed_dims
     # one big mlp that outputs all the knots and derivatives for all the transformed dimensions
-    nn = mlp3(input_dims, hdims, output_dims)
+    # todo: ensure type stability
+    nn = fnn(input_dims, hdims, output_dims; output_activation=nothing, paramtype=paramtype)
 
     mask = Bijectors.PartitionMask(dim, mask_idx)
-    return NeuralSplineLayer(dim, K, num_of_transformed_dims, nn, B, mask)
+    return NeuralSplineCoupling(dim, K, num_of_transformed_dims, B, nn, mask)
 end
 
-@functor NeuralSplineLayer (nn,)
+@functor NeuralSplineCoupling (nn,)
 
 """
 Build a rational quadratic spline (RQS) from the nn output
 Bijectors.jl has implemented the inverse and logabsdetjac for rational quadratic spline
 
 we just need to map the nn output to the knots and derivatives of the RQS
 """
-function instantiate_rqs(nsl::NeuralSplineLayer, x::AbstractVector)
+function instantiate_rqs(nsl::NeuralSplineCoupling, x::AbstractVector)
     K, B = nsl.K, nsl.B
     nnoutput = reshape(nsl.nn(x), nsl.n_dims_transferred, :)
     ws = @view nnoutput[:, 1:K]
@@ -52,46 +51,100 @@ function instantiate_rqs(nsl::NeuralSplineLayer, x::AbstractVector)
     return Bijectors.RationalQuadraticSpline(ws, hs, ds, B)
 end
 
-function Bijectors.transform(nsl::NeuralSplineLayer, x::AbstractVector)
+function Bijectors.transform(nsl::NeuralSplineCoupling, x::AbstractVector)
     x_1, x_2, x_3 = Bijectors.partition(nsl.mask, x)
     # instantiate rqs knots and derivatives
     rqs = instantiate_rqs(nsl, x_2)
     y_1 = Bijectors.transform(rqs, x_1)
     return Bijectors.combine(nsl.mask, y_1, x_2, x_3)
 end
 
-function Bijectors.transform(insl::Inverse{<:NeuralSplineLayer}, y::AbstractVector)
+function Bijectors.transform(insl::Inverse{<:NeuralSplineCoupling}, y::AbstractVector)
     nsl = insl.orig
     y1, y2, y3 = partition(nsl.mask, y)
     rqs = instantiate_rqs(nsl, y2)
     x1 = Bijectors.transform(Inverse(rqs), y1)
     return Bijectors.combine(nsl.mask, x1, y2, y3)
 end
 
-function (nsl::NeuralSplineLayer)(x::AbstractVector)
+function (nsl::NeuralSplineCoupling)(x::AbstractVector)
     return Bijectors.transform(nsl, x)
 end
 
 # define logabsdetjac
-function Bijectors.logabsdetjac(nsl::NeuralSplineLayer, x::AbstractVector)
+function Bijectors.logabsdetjac(nsl::NeuralSplineCoupling, x::AbstractVector)
     x_1, x_2, _ = Bijectors.partition(nsl.mask, x)
     rqs = instantiate_rqs(nsl, x_2)
     logjac = logabsdetjac(rqs, x_1)
     return logjac
 end
 
-function Bijectors.logabsdetjac(insl::Inverse{<:NeuralSplineLayer}, y::AbstractVector)
+function Bijectors.logabsdetjac(insl::Inverse{<:NeuralSplineCoupling}, y::AbstractVector)
     nsl = insl.orig
     y1, y2, _ = partition(nsl.mask, y)
     rqs = instantiate_rqs(nsl, y2)
     logjac = logabsdetjac(Inverse(rqs), y1)
     return logjac
 end
 
-function Bijectors.with_logabsdet_jacobian(nsl::NeuralSplineLayer, x::AbstractVector)
+function Bijectors.with_logabsdet_jacobian(nsl::NeuralSplineCoupling, x::AbstractVector)
     x_1, x_2, x_3 = Bijectors.partition(nsl.mask, x)
     rqs = instantiate_rqs(nsl, x_2)
     y_1, logjac = with_logabsdet_jacobian(rqs, x_1)
     return Bijectors.combine(nsl.mask, y_1, x_2, x_3), logjac
 end
 
+
+"""
+    NSF_layer(dims, hdims; paramtype = Float64)
+
+Default constructor of single layer of Neural Spline Flow (NSF) 
+which is a composition of 2 neural spline coupling transformations with complementary masks.
+The masking strategy is odd-even masking.
+
+# Arguments
+- `dims::Int`: dimension of the problem
+- `hdims::AbstractVector{Int}`: dimension of hidden units for s and t
+- `K::Int`: number of knots
+- `B::AbstractFloat`: bound of the knots
+
+# Keyword Arguments
+- `paramtype::Type{T} = Float64`: type of the parameters, defaults to `Float64`
+
+# Returns
+- A `Bijectors.Bijector` representing the NSF layer.
+"""
+function NSF_layer(
+    dims::T1,                      # dimension of problem
+    hdims::AbstractVector{T1},     # dimension of hidden units for nn 
+    K::T1,                           # number of knots
+    B::T2;                           # bound of the knots
+    paramtype::Type{T2} = Float64,   # type of the parameters
+) where {T1<:Int,T2<:AbstractFloat}
+
+    mask_idx1 = 1:2:dims
+    mask_idx2 = 2:2:dims
+
+    # by default use the odd-even masking strategy
+    nsf1 = NeuralSplineCoupling(dims, hdims, K, B, mask_idx1, paramtype)
+    nsf2 = NeuralSplineCoupling(dims, hdims, K, B, mask_idx2, paramtype)
+    return reduce(∘, (nsf1, nsf2))
+end
+
+function nsf(
+    q0::Distribution{Multivariate,Continuous},  
+    hdims::AbstractVector{Int},     # dimension of hidden units for s and t
+    K::Int,
+    B::T,
+    nlayers::Int;                   # number of RealNVP_layer 
+    paramtype::Type{T} = Float64,   # type of the parameters
+) where {T<:AbstractFloat}
+
+    dims = length(q0)  # dimension of the reference distribution == dim of the problem
+    Ls = [NSF_layer(dims, hdims, K, B; paramtype=paramtype) for _ in 1:nlayers] 
+    create_flow(Ls, q0)         
+end
+
+nsf(q0; paramtype::Type{T} = Float64) where {T<:AbstractFloat} = nsf(
+    q0, [32, 32], 10, 30*one(T), 10; paramtype=paramtype
+)