diff --git a/.github/workflows/test_GraphNeuralNetworks.yml b/.github/workflows/test_GraphNeuralNetworks.yml index c92899661..065866e33 100644 --- a/.github/workflows/test_GraphNeuralNetworks.yml +++ b/.github/workflows/test_GraphNeuralNetworks.yml @@ -15,7 +15,7 @@ jobs: matrix: version: - '1.10' # Replace this with the minimum Julia version that your package supports. - # - '1' # '1' will automatically expand to the latest stable 1.x release of Julia. + - '1' # '1' will automatically expand to the latest stable 1.x release of Julia. # - 'pre' os: - ubuntu-latest diff --git a/GNNGraphs/test/test_utils.jl b/GNNGraphs/test/test_utils.jl index fe8f9a997..56e298311 100644 --- a/GNNGraphs/test/test_utils.jl +++ b/GNNGraphs/test/test_utils.jl @@ -5,224 +5,3 @@ function ngradient(f, x...) fdm = central_fdm(5, 1) return FiniteDifferences.grad(fdm, f, x...) end - -const rule_config = Zygote.ZygoteRuleConfig() - -# Using this until https://github.com/JuliaDiff/FiniteDifferences.jl/issues/188 is fixed -function FiniteDifferences.to_vec(x::Integer) - Integer_from_vec(v) = x - return Int[x], Integer_from_vec -end - -# Test that forward pass on cpu and gpu are the same. -# Tests also gradient on cpu and gpu comparing with -# finite difference methods. -# Test gradients with respects to layer weights and to input. -# If `g` has edge features, it is assumed that the layer can -# use them in the forward pass as `l(g, x, e)`. -# Test also gradient with respect to `e`. -function test_layer(l, g::GNNGraph; atol = 1e-5, rtol = 1e-5, - exclude_grad_fields = [], - verbose = false, - test_gpu = TEST_GPU, - outsize = nothing, - outtype = :node) - - # TODO these give errors, probably some bugs in ChainRulesTestUtils - # test_rrule(rule_config, x -> l(g, x), x; rrule_f=rrule_via_ad, check_inferred=false) - # test_rrule(rule_config, l -> l(g, x), l; rrule_f=rrule_via_ad, check_inferred=false) - - isnothing(node_features(g)) && error("Plese add node data to the input graph") - fdm = central_fdm(5, 1) - - x = node_features(g) - e = edge_features(g) - use_edge_feat = !isnothing(e) - - x64, e64, l64, g64 = to64.([x, e, l, g]) # needed for accurate FiniteDifferences' grad - xgpu, egpu, lgpu, ggpu = gpu.([x, e, l, g]) - - f(l, g::GNNGraph) = l(g) - f(l, g::GNNGraph, x, e) = use_edge_feat ? l(g, x, e) : l(g, x) - - loss(l, g::GNNGraph) = - if outtype == :node - sum(node_features(f(l, g))) - elseif outtype == :edge - sum(edge_features(f(l, g))) - elseif outtype == :graph - sum(graph_features(f(l, g))) - elseif outtype == :node_edge - gnew = f(l, g) - sum(node_features(gnew)) + sum(edge_features(gnew)) - end - - function loss(l, g::GNNGraph, x, e) - y = f(l, g, x, e) - if outtype == :node_edge - return sum(y[1]) + sum(y[2]) - else - return sum(y) - end - end - - # TEST OUTPUT - y = f(l, g, x, e) - if outtype == :node_edge - @assert y isa Tuple - @test eltype(y[1]) == eltype(x) - @test eltype(y[2]) == eltype(e) - @test all(isfinite, y[1]) - @test all(isfinite, y[2]) - if !isnothing(outsize) - @test size(y[1]) == outsize[1] - @test size(y[2]) == outsize[2] - end - else - @test eltype(y) == eltype(x) - @test all(isfinite, y) - if !isnothing(outsize) - @test size(y) == outsize - end - end - - # test same output on different graph formats - gcoo = GNNGraph(g, graph_type = :coo) - ycoo = f(l, gcoo, x, e) - if outtype == :node_edge - @test ycoo[1] ≈ y[1] - @test ycoo[2] ≈ y[2] - else - @test ycoo ≈ y - end - - g′ = f(l, g) - if outtype == :node - @test g′.ndata.x ≈ y - elseif outtype == :edge - @test g′.edata.e ≈ y - elseif outtype == :graph - @test g′.gdata.u ≈ y - elseif outtype == :node_edge - @test g′.ndata.x ≈ y[1] - @test g′.edata.e ≈ y[2] - else - @error "wrong outtype $outtype" - end - if test_gpu - ygpu = f(lgpu, ggpu, xgpu, egpu) - if outtype == :node_edge - @test ygpu[1] isa CuArray - @test eltype(ygpu[1]) == eltype(xgpu) - @test Array(ygpu[1]) ≈ y[1] - @test ygpu[2] isa CuArray - @test eltype(ygpu[2]) == eltype(xgpu) - @test Array(ygpu[2]) ≈ y[2] - else - @test ygpu isa CuArray - @test eltype(ygpu) == eltype(xgpu) - @test Array(ygpu) ≈ y - end - end - - # TEST x INPUT GRADIENT - x̄ = gradient(x -> loss(l, g, x, e), x)[1] - x̄_fd = FiniteDifferences.grad(fdm, x64 -> loss(l64, g64, x64, e64), x64)[1] - @test eltype(x̄) == eltype(x) - @test x̄≈x̄_fd atol=atol rtol=rtol - - if test_gpu - x̄gpu = gradient(xgpu -> loss(lgpu, ggpu, xgpu, egpu), xgpu)[1] - @test x̄gpu isa CuArray - @test eltype(x̄gpu) == eltype(x) - @test Array(x̄gpu)≈x̄ atol=atol rtol=rtol - end - - # TEST e INPUT GRADIENT - if e !== nothing - verbose && println("Test e gradient cpu") - ē = gradient(e -> loss(l, g, x, e), e)[1] - ē_fd = FiniteDifferences.grad(fdm, e64 -> loss(l64, g64, x64, e64), e64)[1] - @test eltype(ē) == eltype(e) - @test ē≈ē_fd atol=atol rtol=rtol - - if test_gpu - verbose && println("Test e gradient gpu") - ēgpu = gradient(egpu -> loss(lgpu, ggpu, xgpu, egpu), egpu)[1] - @test ēgpu isa CuArray - @test eltype(ēgpu) == eltype(ē) - @test Array(ēgpu)≈ē atol=atol rtol=rtol - end - end - - # TEST LAYER GRADIENT - l(g, x, e) - l̄ = gradient(l -> loss(l, g, x, e), l)[1] - l̄_fd = FiniteDifferences.grad(fdm, l64 -> loss(l64, g64, x64, e64), l64)[1] - test_approx_structs(l, l̄, l̄_fd; atol, rtol, exclude_grad_fields, verbose) - - if test_gpu - l̄gpu = gradient(lgpu -> loss(lgpu, ggpu, xgpu, egpu), lgpu)[1] - test_approx_structs(lgpu, l̄gpu, l̄; atol, rtol, exclude_grad_fields, verbose) - end - - # TEST LAYER GRADIENT - l(g) - l̄ = gradient(l -> loss(l, g), l)[1] - test_approx_structs(l, l̄, l̄_fd; atol, rtol, exclude_grad_fields, verbose) - - return true -end - -function test_approx_structs(l, l̄, l̄fd; atol = 1e-5, rtol = 1e-5, - exclude_grad_fields = [], - verbose = false) - l̄ = l̄ isa Base.RefValue ? l̄[] : l̄ # Zygote wraps gradient of mutables in RefValue - l̄fd = l̄fd isa Base.RefValue ? l̄fd[] : l̄fd # Zygote wraps gradient of mutables in RefValue - - for f in fieldnames(typeof(l)) - f ∈ exclude_grad_fields && continue - verbose && println("Test gradient of field $f...") - x, g, gfd = getfield(l, f), getfield(l̄, f), getfield(l̄fd, f) - test_approx_structs(x, g, gfd; atol, rtol, exclude_grad_fields, verbose) - verbose && println("... field $f done!") - end - return true -end - -function test_approx_structs(x, g::Nothing, gfd; atol, rtol, kws...) - # finite diff gradients has to be zero if present - @test !(gfd isa AbstractArray) || isapprox(gfd, fill!(similar(gfd), 0); atol, rtol) -end - -function test_approx_structs(x::Union{AbstractArray, Number}, - g::Union{AbstractArray, Number}, gfd; atol, rtol, kws...) - @test eltype(g) == eltype(x) - if x isa CuArray - @test g isa CuArray - g = Array(g) - end - @test g≈gfd atol=atol rtol=rtol -end - -""" - to32(m) - -Convert the `eltype` of model's float parameters to `Float32`. -Preserves integer arrays. -""" -to32(m) = _paramtype(Float32, m) - -""" - to64(m) - -Convert the `eltype` of model's float parameters to `Float64`. -Preserves integer arrays. -""" -to64(m) = _paramtype(Float64, m) - -struct GNNEltypeAdaptor{T} end - -Adapt.adapt_storage(::GNNEltypeAdaptor{T}, x::AbstractArray{<:AbstractFloat}) where T = convert(AbstractArray{T}, x) -Adapt.adapt_storage(::GNNEltypeAdaptor{T}, x::AbstractArray{<:Integer}) where T = x -Adapt.adapt_storage(::GNNEltypeAdaptor{T}, x::AbstractArray{<:Number}) where T = convert(AbstractArray{T}, x) - -_paramtype(::Type{T}, m) where T = fmap(adapt(GNNEltypeAdaptor{T}()), m) diff --git a/GNNlib/src/layers/conv.jl b/GNNlib/src/layers/conv.jl index e310fa81c..8b378cbdc 100644 --- a/GNNlib/src/layers/conv.jl +++ b/GNNlib/src/layers/conv.jl @@ -253,7 +253,6 @@ function gin_conv(l, g::AbstractGNNGraph, x) xj, xi = expand_srcdst(g, x) m = propagate(copy_xj, g, l.aggr, xj = xj) - return l.nn((1 .+ ofeltype(xi, l.ϵ)) .* xi .+ m) end diff --git a/GraphNeuralNetworks/Project.toml b/GraphNeuralNetworks/Project.toml index 29aaf3acd..dabc74ac5 100644 --- a/GraphNeuralNetworks/Project.toml +++ b/GraphNeuralNetworks/Project.toml @@ -33,28 +33,26 @@ LinearAlgebra = "1" MLUtils = "0.4" MacroTools = "0.5" NNlib = "0.9" +Pkg = "1" Random = "1" Reexport = "1" Statistics = "1" TestItemRunner = "1.0.5" -cuDNN = "1" julia = "1.10" [extras] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" InlineStrings = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48" MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" +Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" -cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" [targets] -test = ["Test", "TestItemRunner", "MLDatasets", "Adapt", "DataFrames", "InlineStrings", - "SparseArrays", "Graphs", "Zygote", "FiniteDifferences", "ChainRulesTestUtils", "CUDA", "cuDNN"] +test = ["Test", "TestItemRunner", "Pkg", "MLDatasets", "Adapt", "DataFrames", "InlineStrings", "SparseArrays", "Graphs", "Zygote", "FiniteDifferences", "ChainRulesTestUtils"] diff --git a/GraphNeuralNetworks/test/examples/node_classification_cora.jl b/GraphNeuralNetworks/test/examples/node_classification_cora.jl index cfcdc2d1c..9a424a26d 100644 --- a/GraphNeuralNetworks/test/examples/node_classification_cora.jl +++ b/GraphNeuralNetworks/test/examples/node_classification_cora.jl @@ -1,14 +1,13 @@ -@testitem "Training Example" setup=[TestModule] begin - using .TestModule +@testmodule TrainingExampleModule begin using Flux using Flux: onecold, onehotbatch using Flux.Losses: logitcrossentropy using GraphNeuralNetworks using MLDatasets: Cora using Statistics, Random - using CUDA - CUDA.allowscalar(false) - + using Test + ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" + function eval_loss_accuracy(X, y, ids, model, g) ŷ = model(g, X) l = logitcrossentropy(ŷ[:, ids], y[:, ids]) @@ -21,7 +20,7 @@ η = 5.0f-3 # learning rate epochs = 10 # number of epochs seed = 17 # set seed > 0 for reproducibility - usecuda = false # if true use cuda (if available) + use_gpu = false # if true use gpu (if available) nhidden = 64 # dimension of hidden features end @@ -29,11 +28,11 @@ args = Args(; kws...) args.seed > 0 && Random.seed!(args.seed) - if args.usecuda && CUDA.functional() - device = Flux.gpu - args.seed > 0 && CUDA.seed!(args.seed) + if args.use_gpu + device = gpu_device(force=true) + Random.seed!(default_device_rng(device)) else - device = Flux.cpu + device = cpu_device() end # LOAD DATA @@ -41,7 +40,7 @@ classes = dataset.metadata["classes"] g = mldataset2gnngraph(dataset) |> device X = g.ndata.features - y = onehotbatch(g.ndata.targets |> cpu, classes) |> device # remove when https://github.com/FluxML/Flux.jl/pull/1959 tagged + y = onehotbatch(g.ndata.targets, classes) train_mask = g.ndata.train_mask test_mask = g.ndata.test_mask ytrain = y[:, train_mask] @@ -78,7 +77,7 @@ return train_res, test_res end - function train_many(; usecuda = false) + function train_many(; use_gpu = false) for (layer, Layer) in [ ("GCNConv", (nin, nout) -> GCNConv(nin => nout, relu)), ("ResGatedGraphConv", (nin, nout) -> ResGatedGraphConv(nin => nout, relu)), @@ -96,16 +95,21 @@ ## ("EdgeConv",(nin, nout) -> EdgeConv(Dense(2nin, nout, relu))), # Fits the training set but does not generalize well ] @show layer - @time train_res, test_res = train(Layer; usecuda, verbose = false) + @time train_res, test_res = train(Layer; use_gpu, verbose = false) # @show train_res, test_res @test train_res.acc > 94 @test test_res.acc > 69 end end +end # module + +@testitem "training example" setup=[TrainingExampleModule] begin + using .TrainingExampleModule + TrainingExampleModule.train_many() +end - train_many(usecuda = false) - # #TODO - # if TEST_GPU - # train_many(usecuda = true) - # end +@testitem "training example GPU" setup=[TrainingExampleModule] tags=[:gpu] begin + using .TrainingExampleModule + TrainingExampleModule.train_many(use_gpu = true) end + diff --git a/GraphNeuralNetworks/test/layers/basic.jl b/GraphNeuralNetworks/test/layers/basic.jl index caad9458a..bce86b75d 100644 --- a/GraphNeuralNetworks/test/layers/basic.jl +++ b/GraphNeuralNetworks/test/layers/basic.jl @@ -18,7 +18,7 @@ Flux.testmode!(gnn) - test_layer(gnn, g, rtol = 1e-5, exclude_grad_fields = [:μ, :σ²]) + test_gradients(gnn, g, x, rtol = 1e-5) @testset "constructor with names" begin m = GNNChain(GCNConv(din => d), @@ -53,7 +53,7 @@ Flux.trainmode!(gnn) - test_layer(gnn, g, rtol = 1e-4, atol=1e-4, exclude_grad_fields = [:μ, :σ²]) + test_gradients(gnn, g, x, rtol = 1e-4, atol=1e-4) end end diff --git a/GraphNeuralNetworks/test/layers/conv.jl b/GraphNeuralNetworks/test/layers/conv.jl index 238315a4f..88c6282fb 100644 --- a/GraphNeuralNetworks/test/layers/conv.jl +++ b/GraphNeuralNetworks/test/layers/conv.jl @@ -6,18 +6,26 @@ end @testitem "GCNConv" setup=[TolSnippet, TestModule] begin using .TestModule - l = GCNConv(D_IN => D_OUT) - for g in TEST_GRAPHS - test_layer(l, g, rtol = RTOL_HIGH, outsize = (D_OUT, g.num_nodes)) - end + @testset "basic" begin + l = GCNConv(D_IN => D_OUT) + for g in TEST_GRAPHS + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH) + end - l = GCNConv(D_IN => D_OUT, tanh, bias = false) - for g in TEST_GRAPHS - test_layer(l, g, rtol = RTOL_HIGH, outsize = (D_OUT, g.num_nodes)) - end + l = GCNConv(D_IN => D_OUT, tanh, bias = false) + for g in TEST_GRAPHS + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH) + end - l = GCNConv(D_IN => D_OUT, add_self_loops = false) - test_layer(l, TEST_GRAPHS[1], rtol = RTOL_HIGH, outsize = (D_OUT, TEST_GRAPHS[1].num_nodes)) + l = GCNConv(D_IN => D_OUT, add_self_loops = false) + for g in TEST_GRAPHS + has_isolated_nodes(g) && continue + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH) + end + end @testset "edge weights & custom normalization $GRAPH_T" for GRAPH_T in GRAPH_TYPES s = [2, 3, 1, 3, 1, 2] @@ -40,20 +48,34 @@ end g = GNNGraph((s, t, w), ndata = x, graph_type = GRAPH_T, edata = w) l = GCNConv(1 => 1, add_self_loops = false, use_edge_weight = true) @test gradient(w -> sum(l(g, x, w)), w)[1] isa AbstractVector{Float32} # redundant test but more explicit - test_layer(l, g, rtol = RTOL_HIGH, outsize = (1, g.num_nodes)) + @test size(l(g, x, w)) == (1, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH) end @testset "conv_weight" begin l = GraphNeuralNetworks.GCNConv(D_IN => D_OUT) w = zeros(Float32, D_OUT, D_IN) - g1 = GNNGraph(TEST_GRAPHS[1], ndata = ones(Float32, D_IN, 4)) - @test l(g1, g1.ndata.x, conv_weight = w) == zeros(Float32, D_OUT, 4) - a = rand(Float32, D_IN, 4) - g2 = GNNGraph(TEST_GRAPHS[1], ndata = a) - @test l(g2, g2.ndata.x, conv_weight = w) == w * a + + for g in TEST_GRAPHS + x = ones(Float32, D_IN, g.num_nodes) + @test l(g, x, conv_weight = w) == zeros(Float32, D_OUT, g.num_nodes) + x = rand(Float32, D_IN, g.num_nodes) + @test l(g, x, conv_weight = w) == w * x + end end end + +@testitem "GCNConv GPU" setup=[TolSnippet, TestModule] tags=[:gpu] begin + using .TestModule + l = GCNConv(D_IN => D_OUT) + for g in TEST_GRAPHS + g.graph isa AbstractSparseMatrix && continue + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH, test_gpu = true, compare_finite_diff = false) + end +end + @testitem "ChebConv" setup=[TolSnippet, TestModule] begin using .TestModule k = 2 @@ -63,7 +85,8 @@ end @test l.k == k for g in TEST_GRAPHS g = add_self_loops(g) - test_layer(l, g, rtol = RTOL_HIGH, outsize = (D_OUT, g.num_nodes)) + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_LOW) end @testset "bias=false" begin @@ -72,16 +95,30 @@ end end end + +@testitem "ChebConv GPU" setup=[TolSnippet, TestModule] tags=[:gpu] begin + using .TestModule + k = 2 + l = ChebConv(D_IN => D_OUT, k) + for g in TEST_GRAPHS + g.graph isa AbstractSparseMatrix && continue + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_LOW, test_gpu = true, compare_finite_diff = false) + end +end + @testitem "GraphConv" setup=[TolSnippet, TestModule] begin using .TestModule l = GraphConv(D_IN => D_OUT) for g in TEST_GRAPHS - test_layer(l, g, rtol = RTOL_HIGH, outsize = (D_OUT, g.num_nodes)) + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH) end l = GraphConv(D_IN => D_OUT, tanh, bias = false, aggr = mean) for g in TEST_GRAPHS - test_layer(l, g, rtol = RTOL_HIGH, outsize = (D_OUT, g.num_nodes)) + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH) end @testset "bias=false" begin @@ -90,15 +127,25 @@ end end end + +@testitem "GraphConv GPU" setup=[TolSnippet, TestModule] tags=[:gpu] begin + using .TestModule + l = GraphConv(D_IN => D_OUT) + for g in TEST_GRAPHS + g.graph isa AbstractSparseMatrix && continue + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH, test_gpu = true, compare_finite_diff = false) + end +end + + @testitem "GATConv" setup=[TolSnippet, TestModule] begin using .TestModule for heads in (1, 2), concat in (true, false) l = GATConv(D_IN => D_OUT; heads, concat, dropout=0) for g in TEST_GRAPHS - test_layer(l, g, rtol = RTOL_LOW, - exclude_grad_fields = [:negative_slope, :dropout], - outsize = (concat ? heads * D_OUT : D_OUT, - g.num_nodes)) + @test size(l(g, g.x)) == (concat ? heads * D_OUT : D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_LOW) end end @@ -106,9 +153,8 @@ end ein = 3 l = GATConv((D_IN, ein) => D_OUT, add_self_loops = false, dropout=0) g = GNNGraph(TEST_GRAPHS[1], edata = rand(Float32, ein, TEST_GRAPHS[1].num_edges)) - test_layer(l, g, rtol = RTOL_LOW, - exclude_grad_fields = [:negative_slope, :dropout], - outsize = (D_OUT, g.num_nodes)) + @test size(l(g, g.x, g.e)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, g.e, rtol = RTOL_LOW) end @testset "num params" begin @@ -121,15 +167,25 @@ end end end +@testitem "GATConv GPU" setup=[TolSnippet, TestModule] tags=[:gpu] begin + using .TestModule + for heads in (1, 2), concat in (true, false) + l = GATConv(D_IN => D_OUT; heads, concat, dropout=0) + for g in TEST_GRAPHS + g.graph isa AbstractSparseMatrix && continue + @test size(l(g, g.x)) == (concat ? heads * D_OUT : D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_LOW, test_gpu = true, compare_finite_diff = false) + end + end +end + @testitem "GATv2Conv" setup=[TolSnippet, TestModule] begin using .TestModule for heads in (1, 2), concat in (true, false) l = GATv2Conv(D_IN => D_OUT, tanh; heads, concat, dropout=0) for g in TEST_GRAPHS - test_layer(l, g, rtol = RTOL_LOW, atol=ATOL_LOW, - exclude_grad_fields = [:negative_slope, :dropout], - outsize = (concat ? heads * D_OUT : D_OUT, - g.num_nodes)) + @test size(l(g, g.x)) == (concat ? heads * D_OUT : D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_LOW, atol=ATOL_LOW) end end @@ -137,9 +193,8 @@ end ein = 3 l = GATv2Conv((D_IN, ein) => D_OUT, add_self_loops = false, dropout=0) g = GNNGraph(TEST_GRAPHS[1], edata = rand(Float32, ein, TEST_GRAPHS[1].num_edges)) - test_layer(l, g, rtol = RTOL_LOW, atol=ATOL_LOW, - exclude_grad_fields = [:negative_slope, :dropout], - outsize = (D_OUT, g.num_nodes)) + @test size(l(g, g.x, g.e)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, g.e, rtol = RTOL_LOW, atol=ATOL_LOW) end @testset "num params" begin @@ -152,6 +207,18 @@ end end end +@testitem "GATv2Conv GPU" setup=[TolSnippet, TestModule] tags=[:gpu] begin + using .TestModule + for heads in (1, 2), concat in (true, false) + l = GATv2Conv(D_IN => D_OUT, tanh; heads, concat, dropout=0) + for g in TEST_GRAPHS + g.graph isa AbstractSparseMatrix && continue + @test size(l(g, g.x)) == (concat ? heads * D_OUT : D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_LOW, atol=ATOL_LOW, test_gpu = true, compare_finite_diff = false) + end + end +end + @testitem "GatedGraphConv" setup=[TolSnippet, TestModule] begin using .TestModule num_layers = 3 @@ -159,30 +226,66 @@ end @test size(l.weight) == (D_OUT, D_OUT, num_layers) for g in TEST_GRAPHS - test_layer(l, g, rtol = RTOL_HIGH, outsize = (D_OUT, g.num_nodes)) + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH) end end + +@testitem "GatedGraphConv GPU" setup=[TolSnippet, TestModule] tags=[:gpu] begin + using .TestModule + num_layers = 3 + l = GatedGraphConv(D_OUT, num_layers) + for g in TEST_GRAPHS + g.graph isa AbstractSparseMatrix && continue + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH, test_gpu = true, compare_finite_diff = false) + end +end + @testitem "EdgeConv" setup=[TolSnippet, TestModule] begin using .TestModule l = EdgeConv(Dense(2 * D_IN, D_OUT), aggr = +) for g in TEST_GRAPHS - test_layer(l, g, rtol = RTOL_HIGH, outsize = (D_OUT, g.num_nodes)) + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH) end end +@testitem "EdgeConv GPU" setup=[TolSnippet, TestModule] tags=[:gpu] begin + using .TestModule + l = EdgeConv(Dense(2 * D_IN, D_OUT), aggr = +) + for g in TEST_GRAPHS + g.graph isa AbstractSparseMatrix && continue + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH, test_gpu = true, compare_finite_diff = false) + end +end + @testitem "GINConv" setup=[TolSnippet, TestModule] begin using .TestModule nn = Dense(D_IN, D_OUT) - l = GINConv(nn, 0.01f0, aggr = mean) + l = GINConv(nn, 0.01, aggr = mean) for g in TEST_GRAPHS - test_layer(l, g, rtol = RTOL_HIGH, outsize = (D_OUT, g.num_nodes)) + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH) end @test !in(:eps, Flux.trainable(l)) end +@testitem "GINConv GPU" setup=[TolSnippet, TestModule] tags=[:gpu] begin + using .TestModule + nn = Dense(D_IN, D_OUT) + l = GINConv(nn, 0.01, aggr = mean) + for g in TEST_GRAPHS + g.graph isa AbstractSparseMatrix && continue + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH, test_gpu = true, compare_finite_diff = false) + end +end + @testitem "NNConv" setup=[TolSnippet, TestModule] begin using .TestModule edim = 10 @@ -191,10 +294,24 @@ end l = NNConv(D_IN => D_OUT, nn, tanh, bias = true, aggr = +) for g in TEST_GRAPHS g = GNNGraph(g, edata = rand(Float32, edim, g.num_edges)) - test_layer(l, g, rtol = RTOL_HIGH, outsize = (D_OUT, g.num_nodes)) + @test size(l(g, g.x, g.e)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, g.e, rtol = RTOL_HIGH) end end +@testitem "NNConv GPU" setup=[TolSnippet, TestModule] tags=[:gpu] begin + using .TestModule + edim = 10 + nn = Dense(edim, D_OUT * D_IN) + l = NNConv(D_IN => D_OUT, nn, tanh, bias = true, aggr = +) + for g in TEST_GRAPHS + g.graph isa AbstractSparseMatrix && continue + g = GNNGraph(g, edata = rand(Float32, edim, g.num_edges)) + @test size(l(g, g.x, g.e)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, g.e, rtol = RTOL_HIGH, test_gpu = true, compare_finite_diff = false) + end +end + @testitem "SAGEConv" setup=[TolSnippet, TestModule] begin using .TestModule l = SAGEConv(D_IN => D_OUT) @@ -202,18 +319,40 @@ end l = SAGEConv(D_IN => D_OUT, tanh, bias = false, aggr = +) for g in TEST_GRAPHS - test_layer(l, g, rtol = RTOL_HIGH, outsize = (D_OUT, g.num_nodes)) + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH) end end +@testitem "SAGEConv GPU" setup=[TolSnippet, TestModule] tags=[:gpu] begin + using .TestModule + l = SAGEConv(D_IN => D_OUT) + for g in TEST_GRAPHS + g.graph isa AbstractSparseMatrix && continue + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH, test_gpu = true, compare_finite_diff = false) + end +end + @testitem "ResGatedGraphConv" setup=[TolSnippet, TestModule] begin using .TestModule l = ResGatedGraphConv(D_IN => D_OUT, tanh, bias = true) for g in TEST_GRAPHS - test_layer(l, g, rtol = RTOL_HIGH, outsize = (D_OUT, g.num_nodes)) + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH) end end +@testitem "ResGatedGraphConv GPU" setup=[TolSnippet, TestModule] tags=[:gpu] begin + using .TestModule + l = ResGatedGraphConv(D_IN => D_OUT, tanh, bias = true) + for g in TEST_GRAPHS + g.graph isa AbstractSparseMatrix && continue + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH, test_gpu = true, compare_finite_diff = false) + end +end + @testitem "CGConv" setup=[TolSnippet, TestModule] begin using .TestModule @@ -221,7 +360,8 @@ end l = CGConv((D_IN, edim) => D_OUT, tanh, residual = false, bias = true) for g in TEST_GRAPHS g = GNNGraph(g, edata = rand(Float32, edim, g.num_edges)) - test_layer(l, g, rtol = RTOL_HIGH, outsize = (D_OUT, g.num_nodes)) + @test size(l(g, g.x, g.e)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, g.e, rtol = RTOL_HIGH) end # no edge features @@ -231,6 +371,17 @@ end @test l1(g1, g1.ndata.x, nothing) == l1(g1).ndata.x end +@testitem "CGConv GPU" setup=[TolSnippet, TestModule] tags=[:gpu] begin + using .TestModule + edim = 10 + l = CGConv((D_IN, edim) => D_OUT, tanh, residual = false, bias = true) + for g in TEST_GRAPHS + g.graph isa AbstractSparseMatrix && continue + @test size(l(g, g.x, g.e)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, g.e, rtol = RTOL_HIGH, test_gpu = true, compare_finite_diff = false) + end +end + @testitem "AGNNConv" setup=[TolSnippet, TestModule] begin using .TestModule l = AGNNConv(trainable=false, add_self_loops=false) @@ -245,21 +396,53 @@ end @test l.trainable == true Flux.trainable(l) == (; β = [1f0]) for g in TEST_GRAPHS - test_layer(l, g, rtol = RTOL_HIGH, outsize = (D_IN, g.num_nodes)) + @test size(l(g, g.x)) == (D_IN, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH) end end +@testitem "AGNNConv GPU" setup=[TolSnippet, TestModule] tags=[:gpu] begin + using .TestModule + l = AGNNConv(trainable=false, add_self_loops=false) + for g in TEST_GRAPHS + g.graph isa AbstractSparseMatrix && continue + @test size(l(g, g.x)) == (D_IN, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH, test_gpu = true, compare_finite_diff = false) + end +end + @testitem "MEGNetConv" setup=[TolSnippet, TestModule] begin using .TestModule l = MEGNetConv(D_IN => D_OUT, aggr = +) for g in TEST_GRAPHS g = GNNGraph(g, edata = rand(Float32, D_IN, g.num_edges)) - test_layer(l, g, rtol = RTOL_LOW, - outtype = :node_edge, - outsize = ((D_OUT, g.num_nodes), (D_OUT, g.num_edges))) + y = l(g, g.x, g.e) + @test size(y[1]) == (D_OUT, g.num_nodes) + @test size(y[2]) == (D_OUT, g.num_edges) + function loss(l, g, x, e) + y = l(g, x, e) + return mean(y[1]) + sum(y[2]) + end + test_gradients(l, g, g.x, g.e, rtol = RTOL_LOW; loss) end end +@testitem "MEGNetConv GPU" setup=[TolSnippet, TestModule] tags=[:gpu] begin + using .TestModule + l = MEGNetConv(D_IN => D_OUT, aggr = +) + for g in TEST_GRAPHS + g.graph isa AbstractSparseMatrix && continue + y = l(g, g.x, g.e) + @test size(y[1]) == (D_OUT, g.num_nodes) + @test size(y[2]) == (D_OUT, g.num_edges) + function loss(l, g, x, e) + y = l(g, x, e) + return mean(y[1]) + sum(y[2]) + end + test_gradients(l, g, g.x, g.e, rtol = RTOL_LOW; loss, test_gpu = true, compare_finite_diff = false) + end +end + @testitem "GMMConv" setup=[TolSnippet, TestModule] begin using .TestModule ein_channel = 10 @@ -267,45 +450,85 @@ end l = GMMConv((D_IN, ein_channel) => D_OUT, K = K) for g in TEST_GRAPHS g = GNNGraph(g, edata = rand(Float32, ein_channel, g.num_edges)) - test_layer(l, g, rtol = RTOL_HIGH, outsize = (D_OUT, g.num_nodes)) + y = l(g, g.x, g.e) + test_gradients(l, g, g.x, g.e, rtol = RTOL_HIGH) end end +@testitem "GMMConv GPU" setup=[TolSnippet, TestModule] tags=[:gpu] begin + using .TestModule + ein_channel = 10 + K = 5 + l = GMMConv((D_IN, ein_channel) => D_OUT, K = K) + for g in TEST_GRAPHS + g.graph isa AbstractSparseMatrix && continue + y = l(g, g.x, g.e) + test_gradients(l, g, g.x, g.e, rtol = RTOL_HIGH, test_gpu = true, compare_finite_diff = false) + end +end + @testitem "SGConv" setup=[TolSnippet, TestModule] begin using .TestModule K = [1, 2, 3] # for different number of hops for k in K l = SGConv(D_IN => D_OUT, k, add_self_loops = true) for g in TEST_GRAPHS - test_layer(l, g, rtol = RTOL_HIGH, outsize = (D_OUT, g.num_nodes)) + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH) end l = SGConv(D_IN => D_OUT, k, add_self_loops = true) for g in TEST_GRAPHS - test_layer(l, g, rtol = RTOL_HIGH, outsize = (D_OUT, g.num_nodes)) + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH) end end end +@testitem "SGConv GPU" setup=[TolSnippet, TestModule] tags=[:gpu] begin + using .TestModule + k = 2 + l = SGConv(D_IN => D_OUT, k, add_self_loops = true) + for g in TEST_GRAPHS + g.graph isa AbstractSparseMatrix && continue + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH, test_gpu = true, compare_finite_diff = false) + end +end + @testitem "TAGConv" setup=[TolSnippet, TestModule] begin using .TestModule K = [1, 2, 3] for k in K l = TAGConv(D_IN => D_OUT, k, add_self_loops = true) for g in TEST_GRAPHS - test_layer(l, g, rtol = RTOL_HIGH, outsize = (D_OUT, g.num_nodes)) + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH) end l = TAGConv(D_IN => D_OUT, k, add_self_loops = true) for g in TEST_GRAPHS - test_layer(l, g, rtol = RTOL_HIGH, outsize = (D_OUT, g.num_nodes)) + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH) end end end +@testitem "TAGConv GPU" setup=[TolSnippet, TestModule] tags=[:gpu] begin + using .TestModule + k = 2 + l = TAGConv(D_IN => D_OUT, k, add_self_loops = true) + for g in TEST_GRAPHS + g.graph isa AbstractSparseMatrix && continue + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH, test_gpu = true, compare_finite_diff = false) + end +end + @testitem "EGNNConv" setup=[TolSnippet, TestModule] begin using .TestModule #TODO test gradient + #TODO test gpu @testset "EGNNConv $GRAPH_T" for GRAPH_T in GRAPH_TYPES hin = 5 hout = 5 @@ -331,37 +554,61 @@ end # batch_norm=false here for tests to pass; true in paper for g in TEST_GRAPHS g = GNNGraph(g, ndata = rand(Float32, D_IN * heads, g.num_nodes)) - test_layer(l, g, rtol = RTOL_LOW, - exclude_grad_fields = [:negative_slope], - outsize = (D_IN * heads, g.num_nodes)) + @test size(l(g, g.x)) == (D_IN * heads, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_LOW) end # used like in Shi et al., 2021 l = TransformerConv((D_IN, ein) => D_IN; heads, gating = true, bias_qkv = true) for g in TEST_GRAPHS g = GNNGraph(g, edata = rand(Float32, ein, g.num_edges)) - test_layer(l, g, rtol = RTOL_LOW, - exclude_grad_fields = [:negative_slope], - outsize = (D_IN * heads, g.num_nodes)) + @test size(l(g, g.x, g.e)) == (D_IN * heads, g.num_nodes) + test_gradients(l, g, g.x, g.e, rtol = RTOL_LOW) end # test averaging heads l = TransformerConv(D_IN => D_IN; heads, concat = false, bias_root = false, root_weight = false) for g in TEST_GRAPHS - test_layer(l, g, rtol = RTOL_LOW, - exclude_grad_fields = [:negative_slope], - outsize = (D_IN, g.num_nodes)) + @test size(l(g, g.x)) == (D_IN, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_LOW) end end +@testitem "TransformerConv GPU" setup=[TolSnippet, TestModule] tags=[:gpu] begin + using .TestModule + ein = 2 + heads = 3 + + # used like in Shi et al., 2021 + l = TransformerConv((D_IN, ein) => D_IN; heads, gating = true, + bias_qkv = true) + for g in TEST_GRAPHS + g.graph isa AbstractSparseMatrix && continue + @test size(l(g, g.x, g.e)) == (D_IN * heads, g.num_nodes) + test_gradients(l, g, g.x, g.e, rtol = RTOL_LOW, test_gpu = true, compare_finite_diff = false) + end +end + + @testitem "DConv" setup=[TolSnippet, TestModule] begin using .TestModule K = [1, 2, 3] # for different number of hops for k in K l = DConv(D_IN => D_OUT, k) for g in TEST_GRAPHS - test_layer(l, g, rtol = RTOL_HIGH, outsize = (D_OUT, g.num_nodes)) + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH) end end -end \ No newline at end of file +end + +@testitem "DConv GPU" setup=[TolSnippet, TestModule] tags=[:gpu] begin + using .TestModule + l = DConv(D_IN => D_OUT, 2) + for g in TEST_GRAPHS + g.graph isa AbstractSparseMatrix && continue + @test size(l(g, g.x)) == (D_OUT, g.num_nodes) + test_gradients(l, g, g.x, rtol = RTOL_HIGH, test_gpu = true, compare_finite_diff = false) + end +end diff --git a/GraphNeuralNetworks/test/layers/heteroconv.jl b/GraphNeuralNetworks/test/layers/heteroconv.jl index 7584378cf..4a6d40ca3 100644 --- a/GraphNeuralNetworks/test/layers/heteroconv.jl +++ b/GraphNeuralNetworks/test/layers/heteroconv.jl @@ -17,17 +17,17 @@ y = model(g, x) grad, dx = gradient((model, x) -> sum(model(g, x)[1]) + sum(model(g, x)[2].^2), model, x) - ngrad, ndx = ngradient((model, x) -> sum(model(g, x)[1]) + sum(model(g, x)[2].^2), model, x) + yhat, (ngrad, ndx) = finitediff_withgradient((model, x) -> sum(model(g, x)[1]) + sum(model(g, x)[2].^2), model, x) - @test grad.layers[1].weight1 ≈ ngrad.layers[1].weight1 rtol=1e-4 - @test grad.layers[1].weight2 ≈ ngrad.layers[1].weight2 rtol=1e-4 - @test grad.layers[1].bias ≈ ngrad.layers[1].bias rtol=1e-4 - @test grad.layers[2].weight1 ≈ ngrad.layers[2].weight1 rtol=1e-4 - @test grad.layers[2].weight2 ≈ ngrad.layers[2].weight2 rtol=1e-4 - @test grad.layers[2].bias ≈ ngrad.layers[2].bias rtol=1e-4 + @test grad.layers[1].weight1 ≈ ngrad.layers[1].weight1 rtol=1e-3 + @test grad.layers[1].weight2 ≈ ngrad.layers[1].weight2 rtol=1e-3 + @test grad.layers[1].bias ≈ ngrad.layers[1].bias rtol=1e-3 + @test grad.layers[2].weight1 ≈ ngrad.layers[2].weight1 rtol=1e-3 + @test grad.layers[2].weight2 ≈ ngrad.layers[2].weight2 rtol=1e-3 + @test grad.layers[2].bias ≈ ngrad.layers[2].bias rtol=1e-3 - @test dx[:A] ≈ ndx[:A] rtol=1e-4 - @test dx[:B] ≈ ndx[:B] rtol=1e-4 + @test dx[:A] ≈ ndx[:A] rtol=1e-3 + @test dx[:B] ≈ ndx[:B] rtol=1e-3 end @testset "Constructor from pairs" begin diff --git a/GraphNeuralNetworks/test/layers/pool.jl b/GraphNeuralNetworks/test/layers/pool.jl index 60789f2e5..382a728ea 100644 --- a/GraphNeuralNetworks/test/layers/pool.jl +++ b/GraphNeuralNetworks/test/layers/pool.jl @@ -19,7 +19,7 @@ @test u[:, [1]] ≈ sum(g.ndata.x[:, 1:n], dims = 2) @test p(g).gdata.u == u - test_layer(p, g, rtol = 1e-5, exclude_grad_fields = [:aggr], outtype = :graph) + test_gradients(p, g, g.x, rtol = 1e-5) end end @@ -41,7 +41,8 @@ end graph_type = GRAPH_T) for i in 1:ng]) - test_layer(p, g, rtol = 1e-5, outtype = :graph, outsize = (chout, ng)) + @test size(p(g, g.x)) == (chout, ng) + test_gradients(p, g, g.x, rtol = 1e-5) end end @@ -83,7 +84,6 @@ end @test size(y) == (2 * n_in, g.num_graphs) ## TODO the numerical gradient seems to be 3 times smaller than zygote one - # test_layer(l, g, rtol = 1e-4, atol=1e-4, outtype = :graph, outsize = (2 * n_in, g.num_graphs), - # verbose=true, exclude_grad_fields = [:state0, :state]) + # test_gradients(l, g, g.x, rtol = 1e-4, atol=1e-4) end end diff --git a/GraphNeuralNetworks/test/runtests.jl b/GraphNeuralNetworks/test/runtests.jl index b9e874db1..e3ca04f88 100644 --- a/GraphNeuralNetworks/test/runtests.jl +++ b/GraphNeuralNetworks/test/runtests.jl @@ -1,3 +1,25 @@ using TestItemRunner -@run_package_tests +## See https://www.julia-vscode.org/docs/stable/userguide/testitems/ +## for how to run the tests within VS Code. +## See test_module.jl for the test infrastructure. + +## Uncomment below to change the default test settings +# ENV["GNN_TEST_CPU"] = "false" +# ENV["GNN_TEST_CUDA"] = "true" +# ENV["GNN_TEST_AMDGPU"] = "true" +# ENV["GNN_TEST_Metal"] = "true" + +if get(ENV, "GNN_TEST_CPU", "true") == "true" + @run_package_tests filter = ti -> :gpu ∉ ti.tags +end +if get(ENV, "GNN_TEST_CUDA", "false") == "true" + @run_package_tests filter = ti -> :gpu ∈ ti.tags +end +if get(ENV, "GNN_TEST_AMDGPU", "false") == "true" + @run_package_tests filter = ti -> :gpu ∈ ti.tags +end +if get(ENV, "GNN_TEST_Metal", "false") == "true" + @run_package_tests filter = ti -> :gpu ∈ ti.tags +end + diff --git a/GraphNeuralNetworks/test/test_module.jl b/GraphNeuralNetworks/test/test_module.jl index 7c5b9a77a..abd54b84c 100644 --- a/GraphNeuralNetworks/test/test_module.jl +++ b/GraphNeuralNetworks/test/test_module.jl @@ -5,244 +5,144 @@ using Test using Statistics, Random using Flux, Functors using Graphs -using ChainRulesTestUtils, FiniteDifferences, Zygote, Adapt, CUDA -CUDA.allowscalar(false) +using ChainRulesTestUtils, FiniteDifferences, Zygote, Adapt +using SparseArrays +using Pkg + +## Uncomment below to change the default test settings +# ENV["GNN_TEST_CPU"] = "false" +# ENV["GNN_TEST_CUDA"] = "true" +# ENV["GNN_TEST_AMDGPU"] = "true" +# ENV["GNN_TEST_Metal"] = "true" + +if get(ENV, "GNN_TEST_CUDA", "false") == "true" + Pkg.add(["CUDA", "cuDNN"]) + using CUDA + CUDA.allowscalar(false) +end +if get(ENV, "GNN_TEST_AMDGPU", "false") == "true" + Pkg.add("AMDGPU") + using AMDGPU + AMDGPU.allowscalar(false) +end +if get(ENV, "GNN_TEST_Metal", "false") == "true" + Pkg.add("Metal") + using Metal + Metal.allowscalar(false) +end + +# from Bse +export mean, randn, SparseArrays, AbstractSparseMatrix # from other packages export Flux, gradient, Dense, Chain, relu, random_regular_graph, erdos_renyi, BatchNorm, LayerNorm, Dropout, Parallel -export mean, randn + # from this module -export D_IN, D_OUT, test_layer, ngradient, GRAPH_TYPES, TEST_GRAPHS +export D_IN, D_OUT, GRAPH_TYPES, TEST_GRAPHS, + test_gradients, finitediff_withgradient, + check_equal_leaves + const D_IN = 3 const D_OUT = 5 -function ngradient(f, x...) - fdm = central_fdm(5, 1) - return FiniteDifferences.grad(fdm, f, x...) +function finitediff_withgradient(f, x...) + y = f(x...) + # We set a range to avoid domain errors + fdm = FiniteDifferences.central_fdm(5, 1, max_range=1e-2) + return y, FiniteDifferences.grad(fdm, f, x...) end -const rule_config = Zygote.ZygoteRuleConfig() - -# Using this until https://github.com/JuliaDiff/FiniteDifferences.jl/issues/188 is fixed -function FiniteDifferences.to_vec(x::Integer) - Integer_from_vec(v) = x - return Int[x], Integer_from_vec -end - -# Test that forward pass on cpu and gpu are the same. -# Tests also gradient on cpu and gpu comparing with -# finite difference methods. -# Test gradients with respects to layer weights and to input. -# If `g` has edge features, it is assumed that the layer can -# use them in the forward pass as `l(g, x, e)`. -# Test also gradient with respect to `e`. -function test_layer(l, g::GNNGraph; atol = 1e-5, rtol = 1e-5, - exclude_grad_fields = [], - verbose = false, - test_gpu = false, - outsize = nothing, - outtype = :node) - - # TODO these give errors, probably some bugs in ChainRulesTestUtils - # test_rrule(rule_config, x -> l(g, x), x; rrule_f=rrule_via_ad, check_inferred=false) - # test_rrule(rule_config, l -> l(g, x), l; rrule_f=rrule_via_ad, check_inferred=false) - - isnothing(node_features(g)) && error("Plese add node data to the input graph") - fdm = central_fdm(5, 1) - - x = node_features(g) - e = edge_features(g) - use_edge_feat = !isnothing(e) - - x64, e64, l64, g64 = to64.([x, e, l, g]) # needed for accurate FiniteDifferences' grad - xgpu, egpu, lgpu, ggpu = gpu.([x, e, l, g]) - - f(l, g::GNNGraph) = l(g) - f(l, g::GNNGraph, x, e) = use_edge_feat ? l(g, x, e) : l(g, x) - - loss(l, g::GNNGraph) = - if outtype == :node - sum(node_features(f(l, g))) - elseif outtype == :edge - sum(edge_features(f(l, g))) - elseif outtype == :graph - sum(graph_features(f(l, g))) - elseif outtype == :node_edge - gnew = f(l, g) - sum(node_features(gnew)) + sum(edge_features(gnew)) - end - - function loss(l, g::GNNGraph, x, e) - y = f(l, g, x, e) - if outtype == :node_edge - return sum(y[1]) + sum(y[2]) - else - return sum(y) - end - end - - # TEST OUTPUT - y = f(l, g, x, e) - if outtype == :node_edge - @assert y isa Tuple - @test eltype(y[1]) == eltype(x) - @test eltype(y[2]) == eltype(e) - @test all(isfinite, y[1]) - @test all(isfinite, y[2]) - if !isnothing(outsize) - @test size(y[1]) == outsize[1] - @test size(y[2]) == outsize[2] - end - else - @test eltype(y) == eltype(x) - @test all(isfinite, y) - if !isnothing(outsize) - @test size(y) == outsize +function check_equal_leaves(a, b; rtol=1e-4, atol=1e-4) + fmapstructure_with_path(a, b) do kp, x, y + if x isa AbstractArray + # @show kp + @test x ≈ y rtol=rtol atol=atol + # elseif x isa Number + # @show kp + # @test x ≈ y rtol=rtol atol=atol end end +end - # test same output on different graph formats - gcoo = GNNGraph(g, graph_type = :coo) - ycoo = f(l, gcoo, x, e) - if outtype == :node_edge - @test ycoo[1] ≈ y[1] - @test ycoo[2] ≈ y[2] - else - @test ycoo ≈ y +function test_gradients( + f, + graph::GNNGraph, + xs...; + rtol=1e-5, atol=1e-5, + test_gpu = false, + test_grad_f = true, + test_grad_x = true, + compare_finite_diff = true, + loss = (f, g, xs...) -> mean(f(g, xs...)), + ) + + if !test_gpu && !compare_finite_diff + error("You should either compare finite diff vs CPU AD \ + or CPU AD vs GPU AD.") end - g′ = f(l, g) - if outtype == :node - @test g′.ndata.x ≈ y - elseif outtype == :edge - @test g′.edata.e ≈ y - elseif outtype == :graph - @test g′.gdata.u ≈ y - elseif outtype == :node_edge - @test g′.ndata.x ≈ y[1] - @test g′.edata.e ≈ y[2] - else - @error "wrong outtype $outtype" - end + ## Let's make sure first that the forward pass works. + l = loss(f, graph, xs...) + @test l isa Number if test_gpu - ygpu = f(lgpu, ggpu, xgpu, egpu) - if outtype == :node_edge - @test ygpu[1] isa CuArray - @test eltype(ygpu[1]) == eltype(xgpu) - @test Array(ygpu[1]) ≈ y[1] - @test ygpu[2] isa CuArray - @test eltype(ygpu[2]) == eltype(xgpu) - @test Array(ygpu[2]) ≈ y[2] - else - @test ygpu isa CuArray - @test eltype(ygpu) == eltype(xgpu) - @test Array(ygpu) ≈ y - end + gpu_dev = gpu_device(force=true) + cpu_dev = cpu_device() + graph_gpu = graph |> gpu_dev + xs_gpu = xs |> gpu_dev + f_gpu = f |> gpu_dev + l_gpu = loss(f_gpu, graph_gpu, xs_gpu...) + @test l_gpu isa Number end - # TEST x INPUT GRADIENT - x̄ = gradient(x -> loss(l, g, x, e), x)[1] - x̄_fd = FiniteDifferences.grad(fdm, x64 -> loss(l64, g64, x64, e64), x64)[1] - @test eltype(x̄) == eltype(x) - @test x̄≈x̄_fd atol=atol rtol=rtol - - if test_gpu - x̄gpu = gradient(xgpu -> loss(lgpu, ggpu, xgpu, egpu), xgpu)[1] - @test x̄gpu isa CuArray - @test eltype(x̄gpu) == eltype(x) - @test Array(x̄gpu)≈x̄ atol=atol rtol=rtol - end - - # TEST e INPUT GRADIENT - if e !== nothing - verbose && println("Test e gradient cpu") - ē = gradient(e -> loss(l, g, x, e), e)[1] - ē_fd = FiniteDifferences.grad(fdm, e64 -> loss(l64, g64, x64, e64), e64)[1] - @test eltype(ē) == eltype(e) - @test ē≈ē_fd atol=atol rtol=rtol + if test_grad_x + # Zygote gradient with respect to input. + y, g = Zygote.withgradient((xs...) -> loss(f, graph, xs...), xs...) + + if compare_finite_diff + # Cast to Float64 to avoid precision issues. + f64 = f |> Flux.f64 + xs64 = xs .|> Flux.f64 + y_fd, g_fd = finitediff_withgradient((xs...) -> loss(f64, graph, xs...), xs64...) + @test y ≈ y_fd rtol=rtol atol=atol + check_equal_leaves(g, g_fd; rtol, atol) + end if test_gpu - verbose && println("Test e gradient gpu") - ēgpu = gradient(egpu -> loss(lgpu, ggpu, xgpu, egpu), egpu)[1] - @test ēgpu isa CuArray - @test eltype(ēgpu) == eltype(ē) - @test Array(ēgpu)≈ē atol=atol rtol=rtol + # Zygote gradient with respect to input on GPU. + y_gpu, g_gpu = Zygote.withgradient((xs...) -> loss(f_gpu, graph_gpu, xs...), xs_gpu...) + @test get_device(g_gpu) == get_device(xs_gpu) + @test y_gpu ≈ y rtol=rtol atol=atol + check_equal_leaves(g_gpu |> cpu_dev, g; rtol, atol) end end - # TEST LAYER GRADIENT - l(g, x, e) - l̄ = gradient(l -> loss(l, g, x, e), l)[1] - l̄_fd = FiniteDifferences.grad(fdm, l64 -> loss(l64, g64, x64, e64), l64)[1] - test_approx_structs(l, l̄, l̄_fd; atol, rtol, exclude_grad_fields, verbose) - - if test_gpu - l̄gpu = gradient(lgpu -> loss(lgpu, ggpu, xgpu, egpu), lgpu)[1] - test_approx_structs(lgpu, l̄gpu, l̄; atol, rtol, exclude_grad_fields, verbose) - end - - # TEST LAYER GRADIENT - l(g) - l̄ = gradient(l -> loss(l, g), l)[1] - test_approx_structs(l, l̄, l̄_fd; atol, rtol, exclude_grad_fields, verbose) - - return true -end - -function test_approx_structs(l, l̄, l̄fd; atol = 1e-5, rtol = 1e-5, - exclude_grad_fields = [], - verbose = false) - l̄ = l̄ isa Base.RefValue ? l̄[] : l̄ # Zygote wraps gradient of mutables in RefValue - l̄fd = l̄fd isa Base.RefValue ? l̄fd[] : l̄fd # Zygote wraps gradient of mutables in RefValue + if test_grad_f + # Zygote gradient with respect to f. + y, g = Zygote.withgradient(f -> loss(f, graph, xs...), f) + + if compare_finite_diff + # Cast to Float64 to avoid precision issues. + f64 = f |> Flux.f64 + ps, re = Flux.destructure(f64) + y_fd, g_fd = finitediff_withgradient(ps -> loss(re(ps),graph, xs...), ps) + g_fd = (re(g_fd[1]),) + @test y ≈ y_fd rtol=rtol atol=atol + check_equal_leaves(g, g_fd; rtol, atol) + end - for f in fieldnames(typeof(l)) - f ∈ exclude_grad_fields && continue - verbose && println("Test gradient of field $f...") - x, g, gfd = getfield(l, f), getfield(l̄, f), getfield(l̄fd, f) - test_approx_structs(x, g, gfd; atol, rtol, exclude_grad_fields, verbose) - verbose && println("... field $f done!") + if test_gpu + # Zygote gradient with respect to f on GPU. + y_gpu, g_gpu = Zygote.withgradient(f -> loss(f,graph_gpu, xs_gpu...), f_gpu) + # @test get_device(g_gpu) == get_device(xs_gpu) + @test y_gpu ≈ y rtol=rtol atol=atol + check_equal_leaves(g_gpu |> cpu_dev, g; rtol, atol) + end end return true end -function test_approx_structs(x, g::Nothing, gfd; atol, rtol, kws...) - # finite diff gradients has to be zero if present - @test !(gfd isa AbstractArray) || isapprox(gfd, fill!(similar(gfd), 0); atol, rtol) -end - -function test_approx_structs(x::Union{AbstractArray, Number}, - g::Union{AbstractArray, Number}, gfd; atol, rtol, kws...) - @test eltype(g) == eltype(x) - if x isa CuArray - @test g isa CuArray - g = Array(g) - end - @test g≈gfd atol=atol rtol=rtol -end - -""" - to32(m) - -Convert the `eltype` of model's float parameters to `Float32`. -Preserves integer arrays. -""" -to32(m) = _paramtype(Float32, m) - -""" - to64(m) - -Convert the `eltype` of model's float parameters to `Float64`. -Preserves integer arrays. -""" -to64(m) = _paramtype(Float64, m) - -struct GNNEltypeAdaptor{T} end - -Adapt.adapt_storage(::GNNEltypeAdaptor{T}, x::AbstractArray{<:AbstractFloat}) where T = convert(AbstractArray{T}, x) -Adapt.adapt_storage(::GNNEltypeAdaptor{T}, x::AbstractArray{<:Integer}) where T = x -Adapt.adapt_storage(::GNNEltypeAdaptor{T}, x::AbstractArray{<:Number}) where T = convert(AbstractArray{T}, x) - -_paramtype(::Type{T}, m) where T = fmap(adapt(GNNEltypeAdaptor{T}()), m) function generate_test_graphs(graph_type) adj1 = [0 1 0 1