Skip to content

Commit 13f740b

Browse files
committed
test: cleanup integration testing
1 parent 28849c5 commit 13f740b

File tree

3 files changed

+94
-161
lines changed

3 files changed

+94
-161
lines changed

test/Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
33
Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
44
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
5+
Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
56
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
67
Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
78
LuxLib = "82251201-b29d-42c6-8e01-566dec8acb11"

test/nn/flux.jl

Lines changed: 18 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,69 +1,25 @@
1-
# This will prompt if neccessary to install everything, including CUDA:
1+
using Reactant, Flux
22

3-
using Reactant
4-
using Flux
5-
using Test
6-
# Generate some data for the XOR problem: vectors of length 2, as columns of a matrix:
7-
noisy = rand(Float32, 2, 1000) # 2×1000 Matrix{Float32}
8-
truth = [xor(col[1] > 0.5, col[2] > 0.5) for col in eachcol(noisy)] # 1000-element Vector{Bool}
3+
@testset "Flux.jl Integration" begin
4+
# Generate some data for the XOR problem: vectors of length 2, as columns of a matrix:
5+
noisy = rand(Float32, 2, 1000) # 2×1000 Matrix{Float32}
6+
truth = [xor(col[1] > 0.5, col[2] > 0.5) for col in eachcol(noisy)] # 1000-element Vector{Bool}
97

10-
# Define our model, a multi-layer perceptron with one hidden layer of size 3:
11-
model = Chain(
12-
Dense(2 => 3, tanh), # activation function inside layer
13-
BatchNorm(3),
14-
Dense(3 => 2),
15-
softmax,
16-
)
8+
# Define our model, a multi-layer perceptron with one hidden layer of size 3:
9+
model = Chain(
10+
Dense(2 => 3, tanh), # activation function inside layer
11+
BatchNorm(3),
12+
Dense(3 => 2),
13+
softmax,
14+
)
1715

18-
using BenchmarkTools
16+
origout = model(noisy)
1917

20-
origout = model(noisy)
18+
cmodel = Reactant.to_rarray(model)
19+
cnoisy = Reactant.ConcreteRArray(noisy)
2120

22-
cmodel = Reactant.to_rarray(model)
23-
cnoisy = Reactant.ConcreteRArray(noisy)
21+
f = Reactant.compile((a, b) -> a(b), (cmodel, cnoisy))
2422

25-
# c_o = cmodel(noisy)
26-
# @show c_o[3]
27-
# @btime cmodel(noisy)
28-
#
29-
# o_c = model(cnoisy)
30-
# @show o_c[3]
31-
# @btime model(cnoisy)
32-
#
33-
# c_c = cmodel(cnoisy)
34-
# @show c_c[3]
35-
# @btime cmodel(cnoisy)
36-
f = @compile cmodel(cnoisy)
37-
38-
# using InteractiveUtils
39-
# @show @code_typed f(cmodel,cnoisy)
40-
# @show @code_llvm f(cmodel,cnoisy)
41-
comp = f(cnoisy)
42-
# @btime f(cmodel, cnoisy)
43-
@test origout comp
44-
45-
# To train the model, we use batches of 64 samples, and one-hot encoding:
46-
target = Flux.onehotbatch(truth, [true, false]) # 2×1000 OneHotMatrix
47-
loader = Flux.DataLoader((noisy, target); batchsize=64, shuffle=true);
48-
# 16-element DataLoader with first element: (2×64 Matrix{Float32}, 2×64 OneHotMatrix)
49-
50-
optim = Flux.setup(Flux.Adam(0.01), model) # will store optimiser momentum, etc.
51-
52-
# Training loop, using the whole data set 1000 times:
53-
losses = []
54-
for epoch in 1:1_000
55-
for (x, y) in loader
56-
loss, grads = Flux.withgradient(model) do m
57-
# Evaluate model and loss inside gradient context:
58-
y_hat = m(x)
59-
return Flux.crossentropy(y_hat, y)
60-
end
61-
Flux.update!(optim, model, grads[1])
62-
push!(losses, loss) # logging, outside gradient context
63-
end
23+
comp = f(cmodel, cnoisy)
24+
@test origout comp
6425
end
65-
66-
optim # parameters, momenta and output have all changed
67-
out2 = model(noisy) # first row is prob. of true, second row p(false)
68-
69-
mean((out2[1, :] .> 0.5) .== truth) # accuracy 94% so far!

test/nn/lux.jl

Lines changed: 75 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,102 +1,78 @@
1-
using Reactant, Lux, Random, Statistics
2-
using Enzyme
3-
using Test
4-
5-
# Generate some data for the XOR problem: vectors of length 2, as columns of a matrix:
6-
noisy = rand(Float32, 2, 1000) # 2×1000 Matrix{Float32}
7-
truth = [xor(col[1] > 0.5, col[2] > 0.5) for col in eachcol(noisy)] # 1000-element Vector{Bool}
8-
9-
# Define our model, a multi-layer perceptron with one hidden layer of size 3:
10-
model = Lux.Chain(
11-
Lux.Dense(2 => 3, tanh), # activation function inside layer
12-
Lux.BatchNorm(3, gelu),
13-
Lux.Dense(3 => 2),
14-
softmax,
15-
)
16-
ps, st = Lux.setup(Xoshiro(123), model)
17-
18-
using BenchmarkTools
19-
20-
origout, _ = model(noisy, ps, st)
21-
@btime model($noisy, $ps, $st) # 68.444 μs (46 allocations: 45.88 KiB)
22-
23-
cmodel = Reactant.to_rarray(model)
24-
cps = Reactant.to_rarray(ps)
25-
cst = Reactant.to_rarray(st)
26-
cnoisy = Reactant.ConcreteRArray(noisy)
27-
28-
f = Reactant.compile((a, b, c, d) -> first(a(b, c, d)), (cmodel, cnoisy, cps, cst))
29-
30-
# # using InteractiveUtils
31-
# # @show @code_typed f(cmodel,cnoisy)
32-
# # @show @code_llvm f(cmodel,cnoisy)
33-
comp = f(cmodel, cnoisy, cps, cst)
34-
# @btime f($cmodel, $cnoisy, $cps, $cst) # 21.790 μs (6 allocations: 224 bytes)
35-
36-
@test comp origout atol = 1e-5 rtol = 1e-2
37-
38-
# To train the model, we use batches of 64 samples, and one-hot encoding:
39-
40-
using MLUtils, OneHotArrays, Optimisers
41-
42-
target = onehotbatch(truth, [true, false]) # 2×1000 OneHotMatrix
43-
ctarget = Reactant.ConcreteRArray(Array{Float32}(target))
44-
loader = DataLoader((noisy, target); batchsize=64, shuffle=true);
45-
# # 16-element DataLoader with first element: (2×64 Matrix{Float32}, 2×64 OneHotMatrix)
46-
47-
opt = Optimisers.Adam(0.01f0)
48-
losses = []
49-
50-
# Lux.Exprimental.TrainState is very specialized for Lux models, so we write out the
51-
# training loop manually:
52-
function crossentropy(ŷ, y)
53-
logŷ = log.(ŷ)
54-
result = y .* logŷ
55-
# result = ifelse.(y .== 0.0f0, zero.(result), result)
56-
return -sum(result)
57-
end
58-
59-
function loss_function(model, x, y, ps, st)
60-
y_hat, _ = model(x, ps, st)
61-
return crossentropy(y_hat, y)
62-
end
63-
64-
function gradient_loss_function(model, x, y, ps, st)
65-
dps = Enzyme.make_zero(ps)
66-
_, res = Enzyme.autodiff(
67-
ReverseWithPrimal,
68-
loss_function,
69-
Active,
70-
Const(model),
71-
Const(x),
72-
Const(y),
73-
Duplicated(ps, dps),
74-
Const(st),
1+
using Reactant, Lux, Random, Statistics, Enzyme, Functors, OneHotArrays
2+
3+
@testset "Lux.jl Integration" begin
4+
# Generate some data for the XOR problem: vectors of length 2, as columns of a matrix:
5+
noisy = rand(Float32, 2, 1000) # 2×1000 Matrix{Float32}
6+
truth = [xor(col[1] > 0.5, col[2] > 0.5) for col in eachcol(noisy)] # 1000-element Vector{Bool}
7+
8+
# Define our model, a multi-layer perceptron with one hidden layer of size 3:
9+
model = Lux.Chain(
10+
Lux.Dense(2 => 3, tanh), # activation function inside layer
11+
Lux.BatchNorm(3, sigmoid),
12+
Lux.Dense(3 => 2),
13+
softmax,
14+
)
15+
ps, st = Lux.setup(Xoshiro(123), model)
16+
17+
origout, _ = model(noisy, ps, Lux.testmode(st))
18+
19+
cmodel = Reactant.to_rarray(model)
20+
cps = Reactant.to_rarray(ps)
21+
cst = Reactant.to_rarray(Lux.testmode(st))
22+
cst2 = Reactant.to_rarray(st)
23+
cnoisy = Reactant.ConcreteRArray(noisy)
24+
25+
f = Reactant.compile((a, b, c, d) -> first(a(b, c, d)), (cmodel, cnoisy, cps, cst))
26+
27+
comp = f(cmodel, cnoisy, cps, cst)
28+
29+
@test comp origout atol = 1e-5 rtol = 1e-2
30+
31+
target = onehotbatch(truth, [true, false]) # 2×1000 OneHotMatrix
32+
33+
ctarget = Reactant.ConcreteRArray(Array{Float32}(target))
34+
# ctarget = Reactant.to_rarray(target)
35+
36+
# Lux.Exprimental.TrainState is very specialized for Lux models, so we write out the
37+
# training loop manually:
38+
function crossentropy(ŷ, y)
39+
logŷ = log.(ŷ)
40+
result = y .* logŷ
41+
# result = ifelse.(y .== 0.0f0, zero.(result), result)
42+
return -sum(result)
43+
end
44+
45+
function loss_function(model, x, y, ps, st)
46+
y_hat, _ = model(x, ps, st)
47+
# return CrossEntropyLoss()(y_hat, y)
48+
return crossentropy(y_hat, y)
49+
end
50+
51+
function gradient_loss_function(model, x, y, ps, st)
52+
dps = Enzyme.make_zero(ps)
53+
_, res = Enzyme.autodiff(
54+
ReverseWithPrimal,
55+
loss_function,
56+
Active,
57+
Const(model),
58+
Const(x),
59+
Const(y),
60+
Duplicated(ps, dps),
61+
Const(st),
62+
)
63+
return res, dps
64+
end
65+
66+
res, dps = gradient_loss_function(model, noisy, target, ps, st)
67+
68+
compiled_gradient = Reactant.compile(
69+
gradient_loss_function, (cmodel, cnoisy, ctarget, cps, cst2)
7570
)
76-
return res, dps
77-
end
78-
79-
gradient_loss_function(model, noisy, target, ps, st)
80-
81-
compiled_gradient = @compile gradient_loss_function(cmodel, cnoisy, ctarget, cps, cst)
82-
83-
@test length(compiled_gradient(cmodel, cnoisy, ctarget, cps, cst)) == 2
84-
85-
# # Training loop, using the whole data set 1000 times:
86-
# losses = []
87-
# for epoch in 1:1_000
88-
# for (x, y) in loader
89-
# loss, grads = Flux.withgradient(model) do m
90-
# # Evaluate model and loss inside gradient context:
91-
# y_hat = m(x)
92-
# return Flux.crossentropy(y_hat, y)
93-
# end
94-
# Flux.update!(optim, model, grads[1])
95-
# push!(losses, loss) # logging, outside gradient context
96-
# end
97-
# end
9871

99-
# optim # parameters, momenta and output have all changed
100-
# out2 = model(noisy) # first row is prob. of true, second row p(false)
72+
res_reactant, dps_reactant = compiled_gradient(cmodel, cnoisy, ctarget, cps, cst2)
10173

102-
# mean((out2[1, :] .> 0.5) .== truth) # accuracy 94% so far!
74+
@test res res_reactant
75+
for (dps1, dps2) in zip(fleaves(dps), fleaves(dps_reactant))
76+
@test dps1 dps2
77+
end
78+
end

0 commit comments

Comments
 (0)