update realnvp default constructor

zuhengxu · zuhengxu · commit e03213ca7f84 · 2025-07-13T16:58:51.000-07:00
diff --git a/src/NormalizingFlows.jl b/src/NormalizingFlows.jl
@@ -1,13 +1,14 @@
 module NormalizingFlows
 
 using ADTypes
-using Bijectors
 using Distributions
 using LinearAlgebra
 using Optimisers
 using ProgressMeter
 using Random
 using StatsBase
+using Bijectors
+using Bijectors: PartitionMask, Inverse, combine, partition
 using Functors
 import DifferentiationInterface as DI
 
diff --git a/src/flows/neuralspline.jl b/src/flows/neuralspline.jl
@@ -39,7 +39,7 @@ end
 
 # define forward and inverse transformation
 """
-Build a rational quadratic spline from the nn output
+Build a rational quadratic spline (RQS) from the nn output
 Bijectors.jl has implemented the inverse and logabsdetjac for rational quadratic spline
 
 we just need to map the nn output to the knots and derivatives of the RQS
diff --git a/src/flows/realnvp.jl b/src/flows/realnvp.jl
@@ -1,6 +1,10 @@
-##################################
-# define affine coupling layer using Bijectors.jl interface
-#################################
+"""
+Default constructor of Affine Coupling flow layer
+
+following the general architecture as Eq(3) in [^AD2025]
+
+[^AD2024]: Agrawal, J., & Domke, J. (2025). Disentangling impact of capacity, objective, batchsize, estimators, and step-size on flow VI. In *AISTATS*
+"""
 struct AffineCoupling <: Bijectors.Bijector
     dim::Int
     mask::Bijectors.PartitionMask
@@ -12,21 +16,25 @@ end
 @functor AffineCoupling (s, t)
 
 function AffineCoupling(
-    dim::Int,  # dimension of input
-    hdims::Int, # dimension of hidden units for s and t
-    mask_idx::AbstractVector, # index of dimensione that one wants to apply transformations on
-)
-    cdims = length(mask_idx) # dimension of parts used to construct coupling law
-    s = mlp3(cdims, hdims, cdims)
-    t = mlp3(cdims, hdims, cdims)
+    dim::Int,                       # dimension of the problem
+    hdims::AbstractVector{Int},     # dimension of hidden units for s and t
+    mask_idx::AbstractVector{Int},       # index of dimensione that one wants to apply transformations on
+    paramtype::Type{T}
+) where {T<:AbstractFloat}
+    cdims = length(mask_idx)  # dimension of parts used to construct coupling law
+    # for the scaling network s, add tanh to the output to ensure stability during training
+    s = fnn(dim-cdims, hdims, cdims; output_activation=Flux.tanh, paramtype=paramtype)  
+    # no transfomration for the output of the translation network t
+    t = fnn(dim-cdims, hdims, cdims; output_activation=nothing, paramtype=paramtype)
     mask = PartitionMask(dim, mask_idx)
     return AffineCoupling(dim, mask, s, t)
 end
 
 function Bijectors.transform(af::AffineCoupling, x::AbstractVecOrMat)
     # partition vector using 'af.mask::PartitionMask`
     x₁, x₂, x₃ = partition(af.mask, x)
-    y₁ = x₁ .* af.s(x₂) .+ af.t(x₂)
+    s_x₂ = af.s(x₂)
+    y₁ = x₁ .* exp.(s_x₂) .+ af.t(x₂)
     return combine(af.mask, y₁, x₂, x₃)
 end
 
@@ -36,15 +44,17 @@ end
 
 function Bijectors.with_logabsdet_jacobian(af::AffineCoupling, x::AbstractVector)
     x_1, x_2, x_3 = Bijectors.partition(af.mask, x)
-    y_1 = af.s(x_2) .* x_1 .+ af.t(x_2)
-    logjac = sum(log ∘ abs, af.s(x_2)) # this is a scalar
+    s_x2 = af.s(x_2)
+    y_1 = exp.(s_x2) .* x_1 .+ af.t(x_2)
+    logjac = sum(s_x2) # this is a scalar
     return combine(af.mask, y_1, x_2, x_3), logjac
 end
 
 function Bijectors.with_logabsdet_jacobian(af::AffineCoupling, x::AbstractMatrix)
     x_1, x_2, x_3 = Bijectors.partition(af.mask, x)
-    y_1 = af.s(x_2) .* x_1 .+ af.t(x_2)
-    logjac = sum(log ∘ abs, af.s(x_2); dims = 1) # 1 × size(x, 2)
+    s_x2 = af.s(x_2)
+    y_1 = exp.(s_x2) .* x_1 .+ af.t(x_2)
+    logjac = sum(s_x2; dims=1) # 1 × size(x, 2)
     return combine(af.mask, y_1, x_2, x_3), vec(logjac)
 end
 
@@ -56,8 +66,9 @@ function Bijectors.with_logabsdet_jacobian(
     # partition vector using `af.mask::PartitionMask`
     y_1, y_2, y_3 = partition(af.mask, y)
     # inverse transformation
-    x_1 = (y_1 .- af.t(y_2)) ./ af.s(y_2)
-    logjac = -sum(log ∘ abs, af.s(y_2))
+    s_y2 = af.s(y_2)
+    x_1 = (y_1 .- af.t(y_2)) .* exp.(-s_y2)
+    logjac = -sum(s_y2)
     return combine(af.mask, x_1, y_2, y_3), logjac
 end
 
@@ -68,8 +79,9 @@ function Bijectors.with_logabsdet_jacobian(
     # partition vector using `af.mask::PartitionMask`
     y_1, y_2, y_3 = partition(af.mask, y)
     # inverse transformation
-    x_1 = (y_1 .- af.t(y_2)) ./ af.s(y_2)
-    logjac = -sum(log ∘ abs, af.s(y_2); dims = 1)
+    s_y2 = af.s(y_2)
+    x_1 = (y_1 .- af.t(y_2)) .* exp.(-s_y2)
+    logjac = -sum(s_y2; dims=1)
     return combine(af.mask, x_1, y_2, y_3), vec(logjac)
 end
 
@@ -104,3 +116,43 @@ end
 #     return AffineCoupling(dim, mask, s, t)
 # end
 
+"""
+Default constructor of RealNVP flow layer
+
+single layer of realnvp flow, which is a composition of 2 affine coupling transformations
+with complementary masks
+"""
+function RealNVP_layer(
+    dims::Int,                      # dimension of problem
+    hdims::AbstractVector{Int};     # dimension of hidden units for s and t
+    paramtype::Type{T} = Float64,   # type of the parameters
+) where {T<:AbstractFloat}
+
+    mask_idx1 = 1:2:dims
+    mask_idx2 = 2:2:dims
+
+    # by default use the odd-even masking strategy
+    af1 = AffineCoupling(dims, hdims, mask_idx1, paramtype)
+    af2 = AffineCoupling(dims, hdims, mask_idx2, paramtype)
+
+    return reduce(∘, (af1, af2))
+end
+
+
+function RealNVP(
+    dims::Int,                      # dimension of problem
+    hdims::AbstractVector{Int},     # dimension of hidden units for s and t
+    nlayers::Int;                   # number of RealNVP_layer 
+    paramtype::Type{T} = Float64,   # type of the parameters
+) where {T<:AbstractFloat}
+
+    q0 = MvNormal(zeros(dims), I) # std Gaussian as the reference distribution
+    Ls = [RealNVP_layer(dims, hdims; paramtype=paramtype) for _ in 1:nlayers]
+    
+    create_flow(Ls, q0)         
+end
+
+function RealNVP(dims:Int; paramtype::Type{T} = Float64) where {T<:AbstractFloat}
+    # default RealNVP with 10 layers, each couplling function has 2 hidden layers with 32 units
+    return RealNVP(dims, [32, 32], 10; paramtype=paramtype)
+end
diff --git a/src/flows/utils.jl b/src/flows/utils.jl
@@ -6,21 +6,29 @@ using Flux
 
 A simple wrapper for a 3 layer dense MLP
 """
-function mlp3(input_dim::Int, hidden_dims::Int, output_dim::Int; activation=Flux.leakyrelu)
-    return Chain(
+function mlp3(
+    input_dim::Int, 
+    hidden_dims::Int, 
+    output_dim::Int; 
+    activation=Flux.leakyrelu,
+    paramtype::Type{T} = Float64
+) where {T<:AbstractFloat}
+    m = Chain(
         Flux.Dense(input_dim, hidden_dims, activation),
         Flux.Dense(hidden_dims, hidden_dims, activation),
         Flux.Dense(hidden_dims, output_dim),
     )
+    return Flux._paramtype(paramtype, m)
 end
 
 """
     fnn(
         input_dim::Int,
-        hidden_dims::AbstractVector{<:Int},
+        hidden_dims::AbstractVector{Int},
         output_dim::Int;
         inlayer_activation=Flux.leakyrelu,
-        output_activation=Flux.tanh,
+        output_activation=nothing,
+        paramtype::Type{T} = Float64,
     )
 
 Create a fully connected neural network (FNN).
@@ -31,17 +39,19 @@ Create a fully connected neural network (FNN).
 - `output_dim::Int`: The dimension of the output layer.
 - `inlayer_activation`: The activation function for the hidden layers. Defaults to `Flux.leakyrelu`.
 - `output_activation`: The activation function for the output layer. Defaults to `Flux.tanh`.
+- `paramtype::Type{T} = Float64`: The type of the parameters in the network, defaults to `Float64`.
 
 # Returns
 - A `Flux.Chain` representing the FNN.
 """
 function fnn(
     input_dim::Int,
-    hidden_dims::AbstractVector{<:Int},
+    hidden_dims::AbstractVector{Int},
     output_dim::Int;
     inlayer_activation=Flux.leakyrelu,
-    output_activation=Flux.tanh,
-)
+    output_activation=nothing,
+    paramtype::Type{T} = Float64,
+) where {T<:AbstractFloat}
     # Create a chain of dense layers
     # First layer
     layers = Any[Flux.Dense(input_dim, hidden_dims[1], inlayer_activation)]
@@ -55,8 +65,14 @@ function fnn(
     end
 
     # Output layer
-    push!(layers, Flux.Dense(hidden_dims[end], output_dim, output_activation))
-    return Chain(layers...)
+    if output_activation === nothing
+        push!(layers, Flux.Dense(hidden_dims[end], output_dim))
+    else
+        push!(layers, Flux.Dense(hidden_dims[end], output_dim, output_activation))
+    end
+
+    m = Chain(layers...)
+    return Flux._paramtype(paramtype, m)
 end
 
 function create_flow(Ls, q₀)