incorpoerate comments from red-portal and sunxd3

zuhengxu · zuhengxu · commit 2431e57ebd1a · 2025-08-19T22:47:27.000-07:00
diff --git a/docs/src/example.md b/docs/src/example.md
diff --git a/src/flows/neuralspline.jl b/src/flows/neuralspline.jl
@@ -1,6 +1,6 @@
 """
     NeuralSplineCoupling(dim, hdims, K, B, mask_idx, paramtype)
-    NeuralSplineCoupling(dim, K, n_dims_transferred, B, nn, mask)
+    NeuralSplineCoupling(dim, K, n_dims_transformed, B, nn, mask)
 
 Neural Rational Quadratic Spline (RQS) coupling bijector [^DBMP2019].
 
@@ -19,7 +19,7 @@ Keyword Arguments
 - `paramtype::Type{<:AbstractFloat}`: parameter element type.
 
 Fields
-- `nn::Flux.Chain`: conditioner that outputs all spline params for all transformed dims.
+- `nn::Flux.Chain`: conditioner that outputs all spline params for all transformed dim.
 - `mask::Bijectors.PartitionMask`: partition specification.
 
 Notes
@@ -35,9 +35,9 @@ and log-determinant computations.
 struct NeuralSplineCoupling{T,A<:Flux.Chain} <: Bijectors.Bijector
     dim::Int                        # dimension of input
     K::Int                          # number of knots
-    n_dims_transferred::Int         # number of dimensions that are transformed
+    n_dims_transformed::Int         # number of dimensions that are transformed
     B::T                            # bound of the knots
-    nn::A                           # networks that parmaterize the knots and derivatives
+    nn::A                           # networks that parameterize the knots and derivatives
     mask::Bijectors.PartitionMask
 end
 
@@ -46,13 +46,12 @@ function NeuralSplineCoupling(
     hdims::AbstractVector{T1},       # dimension of hidden units for s and t
     K::T1,                           # number of knots
     B::T2,                           # bound of the knots
-    mask_idx::AbstractVector{T1}, # index of dimensione that one wants to apply transformations on
+    mask_idx::AbstractVector{T1},    # indices of the transformed dimensions
     paramtype::Type{T2},             # type of the parameters, e.g., Float64 or Float32
 ) where {T1<:Int,T2<:AbstractFloat}
     num_of_transformed_dims = length(mask_idx)
     input_dims = dim - num_of_transformed_dims
     
-    # output dim of the NN
     output_dims = (3K - 1)*num_of_transformed_dims
     # one big mlp that outputs all the knots and derivatives for all the transformed dimensions
     nn = fnn(input_dims, hdims, output_dims; output_activation=nothing, paramtype=paramtype)
@@ -66,7 +65,7 @@ end
 function get_nsc_params(nsc::NeuralSplineCoupling, x::AbstractVecOrMat)
     nnoutput = nsc.nn(x)
     px, py, dydx = MonotonicSplines.rqs_params_from_nn(
-        nnoutput, nsc.n_dims_transferred, nsc.B
+        nnoutput, nsc.n_dims_transformed, nsc.B
     )
     return px, py, dydx
 end
@@ -146,13 +145,13 @@ end
 
 
 """
-    NSF_layer(dims, hdims, K, B; paramtype = Float64)
+    NSF_layer(dim, hdims, K, B; paramtype = Float64)
 
 Build a single Neural Spline Flow (NSF) layer by composing two
 `NeuralSplineCoupling` bijectors with complementary odd–even masks.
 
 Arguments
-- `dims::Int`: dimensionality of the problem.
+- `dim::Int`: dimensionality of the problem.
 - `hdims::AbstractVector{Int}`: hidden sizes of the conditioner network.
 - `K::Int`: number of spline knots.
 - `B::AbstractFloat`: spline boundary.
@@ -168,19 +167,19 @@ Example
 - `y = layer(randn(4, 32))`
 """
 function NSF_layer(
-    dims::T1,                      # dimension of problem
+    dim::T1,                      # dimension of problem
     hdims::AbstractVector{T1},     # dimension of hidden units for nn 
     K::T1,                           # number of knots
     B::T2;                           # bound of the knots
     paramtype::Type{T2} = Float64,   # type of the parameters
 ) where {T1<:Int,T2<:AbstractFloat}
 
-    mask_idx1 = 1:2:dims
-    mask_idx2 = 2:2:dims
+    mask_idx1 = 1:2:dim
+    mask_idx2 = 2:2:dim
 
     # by default use the odd-even masking strategy
-    nsf1 = NeuralSplineCoupling(dims, hdims, K, B, mask_idx1, paramtype)
-    nsf2 = NeuralSplineCoupling(dims, hdims, K, B, mask_idx2, paramtype)
+    nsf1 = NeuralSplineCoupling(dim, hdims, K, B, mask_idx1, paramtype)
+    nsf2 = NeuralSplineCoupling(dim, hdims, K, B, mask_idx2, paramtype)
     return reduce(∘, (nsf1, nsf2))
 end
 
@@ -205,11 +204,11 @@ Keyword Arguments
 Returns
 - `Bijectors.TransformedDistribution` representing the NSF flow.
 
-Notes:
-- Under the hood, `nsf` relies on the rational quadratic spline function implememented in 
-`MonotonicSplines.jl` for performance reasons.  `MonotonicSplines.jl` uses 
-`KernelAbstractions.jl` to support batched operations. 
-Because of this, so far `nsf` only supports `Zygote` as the AD type.
+!!! note 
+    Under the hood, `nsf` relies on the rational quadratic spline function implememented in 
+    `MonotonicSplines.jl` for performance reasons.  `MonotonicSplines.jl` uses 
+    `KernelAbstractions.jl` to support batched operations. 
+    Because of this, so far `nsf` only supports `Zygote` as the AD type.
   
 
 Example
@@ -225,8 +224,8 @@ function nsf(
     paramtype::Type{T} = Float64,   # type of the parameters
 ) where {T<:AbstractFloat}
 
-    dims = length(q0)  # dimension of the reference distribution == dim of the problem
-    Ls = [NSF_layer(dims, hdims, K, B; paramtype=paramtype) for _ in 1:nlayers] 
+    dim = length(q0)  # dimension of the reference distribution == dim of the problem
+    Ls = [NSF_layer(dim, hdims, K, B; paramtype=paramtype) for _ in 1:nlayers] 
     create_flow(Ls, q0)         
 end
 
diff --git a/src/flows/realnvp.jl b/src/flows/realnvp.jl
@@ -37,13 +37,12 @@ struct AffineCoupling <: Bijectors.Bijector
     t::Flux.Chain
 end
 
-# let params track field s and t
 @functor AffineCoupling (s, t)
 
 function AffineCoupling(
     dim::Int,                       # dimension of the problem
     hdims::AbstractVector{Int},     # dimension of hidden units for s and t
-    mask_idx::AbstractVector{Int},       # index of dimensione that one wants to apply transformations on
+    mask_idx::AbstractVector{Int},       # indices of the transformed dimensions
     paramtype::Type{T}
 ) where {T<:AbstractFloat}
     cdims = length(mask_idx)  # dimension of parts used to construct coupling law
@@ -110,37 +109,6 @@ function Bijectors.with_logabsdet_jacobian(
     return combine(af.mask, x_1, y_2, y_3), vec(logjac)
 end
 
-################### 
-# an equivalent definition of AffineCoupling using Bijectors.Coupling 
-# (see https://github.com/TuringLang/Bijectors.jl/blob/74d52d4eda72a6149b1a89b72524545525419b3f/src/bijectors/coupling.jl#L188C1-L188C1)
-###################
-
-# struct AffineCoupling <: Bijectors.Bijector
-#     dim::Int
-#     mask::Bijectors.PartitionMask
-#     s::Flux.Chain
-#     t::Flux.Chain
-# end
-
-# # let params track field s and t
-# @functor AffineCoupling (s, t)
-
-# function AffineCoupling(dim, mask, s, t)
-#     return Bijectors.Coupling(θ -> Bijectors.Shift(t(θ)) ∘ Bijectors.Scale(s(θ)), mask)
-# end
-
-# function AffineCoupling(
-#     dim::Int,  # dimension of input
-#     hdims::Int, # dimension of hidden units for s and t
-#     mask_idx::AbstractVector, # index of dimensione that one wants to apply transformations on
-# )
-#     cdims = length(mask_idx) # dimension of parts used to construct coupling law
-#     s = mlp3(cdims, hdims, cdims)
-#     t = mlp3(cdims, hdims, cdims)
-#     mask = PartitionMask(dim, mask_idx)
-#     return AffineCoupling(dim, mask, s, t)
-# end
-
 """
     RealNVP_layer(dims, hdims; paramtype = Float64)
 
diff --git a/src/flows/utils.jl b/src/flows/utils.jl
@@ -8,14 +8,15 @@ Construct a normalizing flow by composing the provided bijector layers and
 attaching them to the base distribution `q0`.
 
 - `layers`: an iterable of `Bijectors.Bijector` objects that are composed in order
-  (left-to-right) via function composition.
+  (left-to-right) via function composition 
+(for instance, if `layers = [l1, l2, l3]`, the flow will be `l3∘l2∘l1(q0)`).
 - `q0`: the base distribution (e.g., `MvNormal(zeros(d), I)`).
 
 Returns a `Bijectors.TransformedDistribution` representing the resulting flow.
 
 Example
 
-    using Distributions
+    using Distributions, Bijectors, LinearAlgebra
     q0 = MvNormal(zeros(2), I)
     flow = create_flow((Bijectors.Scale([1.0, 2.0]), Bijectors.Shift([0.0, 1.0])), q0)
 """
@@ -77,7 +78,7 @@ function fnn(
 ) where {T<:AbstractFloat}
     # Create a chain of dense layers
     # First layer
-    layers = Any[Flux.Dense(input_dim, hidden_dims[1], inlayer_activation)]
+    layers = [Flux.Dense(input_dim, hidden_dims[1], inlayer_activation)]
 
     # Hidden layers
     for i in 1:(length(hidden_dims) - 1)
@@ -96,4 +97,4 @@ function fnn(
 
     m = Chain(layers...)
     return Flux._paramtype(paramtype, m)
-end
+end
diff --git a/test/flow.jl b/test/flow.jl
@@ -45,19 +45,19 @@
             target = MvNormal(μ, Σ)
             logp(z) = logpdf(target, z)
 
-            # Define a simple log-likelihood function
-            logp(z) = logpdf(q₀, z)
-
             # Compute ELBO
             batchsize = 64
             elbo_value = elbo(Random.default_rng(), flow, logp, batchsize)
             elbo_batch_value = elbo_batch(Random.default_rng(), flow, logp, batchsize)
 
+            # test when batchsize == 1
+            batchsize_single = 1
+            elbo_value_single = elbo(Random.default_rng(), flow, logp, batchsize_single)
+
             # test elbo_value is not NaN and not Inf
-            @test !isnan(elbo_value)
-            @test !isinf(elbo_value)
-            @test !isnan(elbo_batch_value)
-            @test !isinf(elbo_batch_value)
+            @test isfinite(elbo_value)
+            @test isfinite(elbo_batch_value)
+            @test isfinite(elbo_value_single)
         end
     end
 end
@@ -112,19 +112,19 @@ end
             target = MvNormal(μ, Σ)
             logp(z) = logpdf(target, z)
 
-            # Define a simple log-likelihood function
-            logp(z) = logpdf(q₀, z)
-
             # Compute ELBO
             batchsize = 64
             elbo_value = elbo(Random.default_rng(), flow, logp, batchsize)
             elbo_batch_value = elbo_batch(Random.default_rng(), flow, logp, batchsize)
 
+            # test when batchsize == 1
+            batchsize_single = 1
+            elbo_value_single = elbo(Random.default_rng(), flow, logp, batchsize_single)
+
             # test elbo_value is not NaN and not Inf
-            @test !isnan(elbo_value)
-            @test !isinf(elbo_value)
-            @test !isnan(elbo_batch_value)
-            @test !isinf(elbo_batch_value)
+            @test isfinite(elbo_value)
+            @test isfinite(elbo_batch_value)
+            @test isfinite(elbo_value_single)
         end
     end
 end