test: cleanup integration testing

avik-pal · avik-pal · commit 13f740b832ce · 2024-09-27T15:19:56.000-04:00
diff --git a/test/Project.toml b/test/Project.toml
@@ -2,6 +2,7 @@
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
 LuxLib = "82251201-b29d-42c6-8e01-566dec8acb11"
diff --git a/test/nn/flux.jl b/test/nn/flux.jl
@@ -1,69 +1,25 @@
-# This will prompt if neccessary to install everything, including CUDA:
+using Reactant, Flux
 
-using Reactant
-using Flux
-using Test
-# Generate some data for the XOR problem: vectors of length 2, as columns of a matrix:
-noisy = rand(Float32, 2, 1000)                                    # 2×1000 Matrix{Float32}
-truth = [xor(col[1] > 0.5, col[2] > 0.5) for col in eachcol(noisy)]   # 1000-element Vector{Bool}
+@testset "Flux.jl Integration" begin
+    # Generate some data for the XOR problem: vectors of length 2, as columns of a matrix:
+    noisy = rand(Float32, 2, 1000)                                        # 2×1000 Matrix{Float32}
+    truth = [xor(col[1] > 0.5, col[2] > 0.5) for col in eachcol(noisy)]   # 1000-element Vector{Bool}
 
-# Define our model, a multi-layer perceptron with one hidden layer of size 3:
-model = Chain(
-    Dense(2 => 3, tanh),   # activation function inside layer
-    BatchNorm(3),
-    Dense(3 => 2),
-    softmax,
-)
+    # Define our model, a multi-layer perceptron with one hidden layer of size 3:
+    model = Chain(
+        Dense(2 => 3, tanh),   # activation function inside layer
+        BatchNorm(3),
+        Dense(3 => 2),
+        softmax,
+    )
 
-using BenchmarkTools
+    origout = model(noisy)
 
-origout = model(noisy)
+    cmodel = Reactant.to_rarray(model)
+    cnoisy = Reactant.ConcreteRArray(noisy)
 
-cmodel = Reactant.to_rarray(model)
-cnoisy = Reactant.ConcreteRArray(noisy)
+    f = Reactant.compile((a, b) -> a(b), (cmodel, cnoisy))
 
-# c_o = cmodel(noisy)
-# @show c_o[3]
-# @btime cmodel(noisy)
-# 
-# o_c = model(cnoisy)
-# @show o_c[3]
-# @btime model(cnoisy)
-# 
-# c_c = cmodel(cnoisy)
-# @show c_c[3]
-# @btime cmodel(cnoisy)
-f = @compile cmodel(cnoisy)
-
-# using InteractiveUtils
-# @show @code_typed f(cmodel,cnoisy)
-# @show @code_llvm f(cmodel,cnoisy)
-comp = f(cnoisy)
-# @btime f(cmodel, cnoisy)
-@test origout ≈ comp
-
-# To train the model, we use batches of 64 samples, and one-hot encoding:
-target = Flux.onehotbatch(truth, [true, false])                   # 2×1000 OneHotMatrix
-loader = Flux.DataLoader((noisy, target); batchsize=64, shuffle=true);
-# 16-element DataLoader with first element: (2×64 Matrix{Float32}, 2×64 OneHotMatrix)
-
-optim = Flux.setup(Flux.Adam(0.01), model)  # will store optimiser momentum, etc.
-
-# Training loop, using the whole data set 1000 times:
-losses = []
-for epoch in 1:1_000
-    for (x, y) in loader
-        loss, grads = Flux.withgradient(model) do m
-            # Evaluate model and loss inside gradient context:
-            y_hat = m(x)
-            return Flux.crossentropy(y_hat, y)
-        end
-        Flux.update!(optim, model, grads[1])
-        push!(losses, loss)  # logging, outside gradient context
-    end
+    comp = f(cmodel, cnoisy)
+    @test origout ≈ comp
 end
-
-optim # parameters, momenta and output have all changed
-out2 = model(noisy)  # first row is prob. of true, second row p(false)
-
-mean((out2[1, :] .> 0.5) .== truth)  # accuracy 94% so far!
diff --git a/test/nn/lux.jl b/test/nn/lux.jl
@@ -1,102 +1,78 @@
-using Reactant, Lux, Random, Statistics
-using Enzyme
-using Test
-
-# Generate some data for the XOR problem: vectors of length 2, as columns of a matrix:
-noisy = rand(Float32, 2, 1000)                                        # 2×1000 Matrix{Float32}
-truth = [xor(col[1] > 0.5, col[2] > 0.5) for col in eachcol(noisy)]   # 1000-element Vector{Bool}
-
-# Define our model, a multi-layer perceptron with one hidden layer of size 3:
-model = Lux.Chain(
-    Lux.Dense(2 => 3, tanh),   # activation function inside layer
-    Lux.BatchNorm(3, gelu),
-    Lux.Dense(3 => 2),
-    softmax,
-)
-ps, st = Lux.setup(Xoshiro(123), model)
-
-using BenchmarkTools
-
-origout, _ = model(noisy, ps, st)
-@btime model($noisy, $ps, $st)  # 68.444 μs (46 allocations: 45.88 KiB)
-
-cmodel = Reactant.to_rarray(model)
-cps = Reactant.to_rarray(ps)
-cst = Reactant.to_rarray(st)
-cnoisy = Reactant.ConcreteRArray(noisy)
-
-f = Reactant.compile((a, b, c, d) -> first(a(b, c, d)), (cmodel, cnoisy, cps, cst))
-
-# # using InteractiveUtils
-# # @show @code_typed f(cmodel,cnoisy)
-# # @show @code_llvm f(cmodel,cnoisy)
-comp = f(cmodel, cnoisy, cps, cst)
-# @btime f($cmodel, $cnoisy, $cps, $cst) # 21.790 μs (6 allocations: 224 bytes)
-
-@test comp ≈ origout atol = 1e-5 rtol = 1e-2
-
-# To train the model, we use batches of 64 samples, and one-hot encoding:
-
-using MLUtils, OneHotArrays, Optimisers
-
-target = onehotbatch(truth, [true, false])                   # 2×1000 OneHotMatrix
-ctarget = Reactant.ConcreteRArray(Array{Float32}(target))
-loader = DataLoader((noisy, target); batchsize=64, shuffle=true);
-# # 16-element DataLoader with first element: (2×64 Matrix{Float32}, 2×64 OneHotMatrix)
-
-opt = Optimisers.Adam(0.01f0)
-losses = []
-
-# Lux.Exprimental.TrainState is very specialized for Lux models, so we write out the
-# training loop manually:
-function crossentropy(ŷ, y)
-    logŷ = log.(ŷ)
-    result = y .* logŷ
-    # result = ifelse.(y .== 0.0f0, zero.(result), result)
-    return -sum(result)
-end
-
-function loss_function(model, x, y, ps, st)
-    y_hat, _ = model(x, ps, st)
-    return crossentropy(y_hat, y)
-end
-
-function gradient_loss_function(model, x, y, ps, st)
-    dps = Enzyme.make_zero(ps)
-    _, res = Enzyme.autodiff(
-        ReverseWithPrimal,
-        loss_function,
-        Active,
-        Const(model),
-        Const(x),
-        Const(y),
-        Duplicated(ps, dps),
-        Const(st),
+using Reactant, Lux, Random, Statistics, Enzyme, Functors, OneHotArrays
+
+@testset "Lux.jl Integration" begin
+    # Generate some data for the XOR problem: vectors of length 2, as columns of a matrix:
+    noisy = rand(Float32, 2, 1000)                                        # 2×1000 Matrix{Float32}
+    truth = [xor(col[1] > 0.5, col[2] > 0.5) for col in eachcol(noisy)]   # 1000-element Vector{Bool}
+
+    # Define our model, a multi-layer perceptron with one hidden layer of size 3:
+    model = Lux.Chain(
+        Lux.Dense(2 => 3, tanh),   # activation function inside layer
+        Lux.BatchNorm(3, sigmoid),
+        Lux.Dense(3 => 2),
+        softmax,
+    )
+    ps, st = Lux.setup(Xoshiro(123), model)
+
+    origout, _ = model(noisy, ps, Lux.testmode(st))
+
+    cmodel = Reactant.to_rarray(model)
+    cps = Reactant.to_rarray(ps)
+    cst = Reactant.to_rarray(Lux.testmode(st))
+    cst2 = Reactant.to_rarray(st)
+    cnoisy = Reactant.ConcreteRArray(noisy)
+
+    f = Reactant.compile((a, b, c, d) -> first(a(b, c, d)), (cmodel, cnoisy, cps, cst))
+
+    comp = f(cmodel, cnoisy, cps, cst)
+
+    @test comp ≈ origout atol = 1e-5 rtol = 1e-2
+
+    target = onehotbatch(truth, [true, false])                   # 2×1000 OneHotMatrix
+
+    ctarget = Reactant.ConcreteRArray(Array{Float32}(target))
+    # ctarget = Reactant.to_rarray(target)
+
+    # Lux.Exprimental.TrainState is very specialized for Lux models, so we write out the
+    # training loop manually:
+    function crossentropy(ŷ, y)
+        logŷ = log.(ŷ)
+        result = y .* logŷ
+        # result = ifelse.(y .== 0.0f0, zero.(result), result)
+        return -sum(result)
+    end
+
+    function loss_function(model, x, y, ps, st)
+        y_hat, _ = model(x, ps, st)
+        # return CrossEntropyLoss()(y_hat, y)
+        return crossentropy(y_hat, y)
+    end
+
+    function gradient_loss_function(model, x, y, ps, st)
+        dps = Enzyme.make_zero(ps)
+        _, res = Enzyme.autodiff(
+            ReverseWithPrimal,
+            loss_function,
+            Active,
+            Const(model),
+            Const(x),
+            Const(y),
+            Duplicated(ps, dps),
+            Const(st),
+        )
+        return res, dps
+    end
+
+    res, dps = gradient_loss_function(model, noisy, target, ps, st)
+
+    compiled_gradient = Reactant.compile(
+        gradient_loss_function, (cmodel, cnoisy, ctarget, cps, cst2)
     )
-    return res, dps
-end
-
-gradient_loss_function(model, noisy, target, ps, st)
-
-compiled_gradient = @compile gradient_loss_function(cmodel, cnoisy, ctarget, cps, cst)
-
-@test length(compiled_gradient(cmodel, cnoisy, ctarget, cps, cst)) == 2
-
-# # Training loop, using the whole data set 1000 times:
-# losses = []
-# for epoch in 1:1_000
-#     for (x, y) in loader
-#         loss, grads = Flux.withgradient(model) do m
-#             # Evaluate model and loss inside gradient context:
-#             y_hat = m(x)
-#             return Flux.crossentropy(y_hat, y)
-#         end
-#         Flux.update!(optim, model, grads[1])
-#         push!(losses, loss)  # logging, outside gradient context
-#     end
-# end
 
-# optim # parameters, momenta and output have all changed
-# out2 = model(noisy)  # first row is prob. of true, second row p(false)
+    res_reactant, dps_reactant = compiled_gradient(cmodel, cnoisy, ctarget, cps, cst2)
 
-# mean((out2[1, :] .> 0.5) .== truth)  # accuracy 94% so far!
+    @test res ≈ res_reactant
+    for (dps1, dps2) in zip(fleaves(dps), fleaves(dps_reactant))
+        @test dps1 ≈ dps2
+    end
+end