improve cuda tests

CarloLucibello · CarloLucibello · commit 99b25b08ad82 · 2021-09-18T19:15:13.000+02:00
diff --git a/Project.toml b/Project.toml
@@ -4,6 +4,7 @@ authors = ["Carlo Lucibello and contributors"]
 version = "0.1.0"
 
 [deps]
+Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
@@ -25,7 +26,7 @@ ChainRulesCore = "1"
 DataStructures = "0.18"
 Flux = "0.12"
 KrylovKit = "0.5"
-LearnBase = "0.5"
+LearnBase = "0.4, 0.5"
 LightGraphs = "1.3"
 MacroTools = "0.5"
 NNlib = "0.7"
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -32,7 +32,7 @@ function GCNConv(ch::Pair{Int,Int}, σ=identity;
                  init=glorot_uniform, bias::Bool=true)
     in, out = ch
     W = init(out, in)
-    b = Flux.create_bias(W, bias, out)
+    b = bias ? Flux.create_bias(W, true, out) : false
     GCNConv(W, b, σ)
 end
 
@@ -105,7 +105,7 @@ function ChebConv(ch::Pair{Int,Int}, k::Int;
                   init=glorot_uniform, bias::Bool=true)
     in, out = ch
     W = init(out, in, k)
-    b = Flux.create_bias(W, bias, out)
+    b = bias ? Flux.create_bias(W, true, out) : false
     ChebConv(W, b, k)
 end
 
@@ -172,7 +172,7 @@ function GraphConv(ch::Pair{Int,Int}, σ=identity, aggr=+;
     in, out = ch
     W1 = init(out, in)
     W2 = init(out, in)
-    b = Flux.create_bias(W1, bias, out)
+    b = bias ? Flux.create_bias(W1, true, out) : false
     GraphConv(W1, W2, b, σ, aggr)
 end
 
@@ -243,7 +243,7 @@ function GATConv(ch::Pair{Int,Int}, σ=identity;
                  init=glorot_uniform, bias::Bool=true)
     in, out = ch             
     W = init(out*heads, in)
-    b = Flux.create_bias(W, bias, out*heads)
+    b = bias ? Flux.create_bias(W, true, out*heads) : false
     a = init(2*out, heads)
     negative_slope = convert(eltype(W), negative_slope)
     GATConv(W, b, a, σ, negative_slope, ch, heads, concat)
@@ -479,7 +479,7 @@ end
 function NNConv(ch::Pair{Int,Int}, nn, σ=identity; aggr=+, bias=true, init=glorot_uniform)
     in, out = ch
     W = init(out, in)
-    b = Flux.create_bias(W, bias, out)
+    b = bias ? Flux.create_bias(W, true, out) : false
     return NNConv(W, b, nn, σ, aggr)
 end
 
diff --git a/test/cuda/layers/conv.jl b/test/cuda/layers/conv.jl
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
@@ -30,10 +30,10 @@
             gradtest(l, g, rtol=1e-5)
         end
 
-        # l = GCNConv(in_channel => out_channel, relu, bias=false)
-        # for g in test_graphs
-        #     gradtest(l, g)
-        # end
+        l = GCNConv(in_channel => out_channel, tanh, bias=false)
+        for g in test_graphs
+            gradtest(l, g)
+        end
     end
 
 
@@ -44,7 +44,8 @@
         @test size(l.bias) == (out_channel,)
         @test l.k == k
         for g in test_graphs
-            gradtest(l, g, rtol=1e-5, broken_grad_fields=[:weight])
+            gradtest(l, g, rtol=1e-5, broken_grad_fields=[:weight], test_gpu=false)
+            @test_broken gradtest(l, g, rtol=1e-5, broken_grad_fields=[:weight], test_gpu=true)
         end
         
         @testset "bias=false" begin
@@ -116,10 +117,17 @@
     @testset "NNConv" begin
         edim = 10
         nn = Dense(edim, out_channel * in_channel)
+        
         l = NNConv(in_channel => out_channel, nn)
         for g in test_graphs
             g = GNNGraph(g, edata=rand(T, edim, g.num_edges))
             gradtest(l, g, rtol=1e-5) 
         end
+        
+        l = NNConv(in_channel => out_channel, nn, tanh, bias=false, aggr=mean)
+        for g in test_graphs
+            g = GNNGraph(g, edata=rand(T, edim, g.num_edges))
+            gradtest(l, g, rtol=1e-5) 
+        end
     end
 end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -11,7 +11,7 @@ using Test
 CUDA.allowscalar(false)
 
 include("test_utils.jl")
-include("cuda/test_utils.jl")
+# include("cuda/test_utils.jl")
 
 tests = [
     "gnngraph",
@@ -24,13 +24,15 @@ tests = [
 !CUDA.functional() && @warn("CUDA unavailable, not testing GPU support")
 
 # Testing all graph types. :sparse is a bit broken at the moment
-@testset "GraphNeuralNetworks: graph format $graph_type" for graph_type in (:coo, :sparse, :dense)
+@testset "GraphNeuralNetworks: graph format $graph_type" for graph_type in (:coo,)
 
     global GRAPH_T = graph_type
+    global TEST_GPU = CUDA.functional() && GRAPH_T != :sparse
+
     for t in tests
         include("$t.jl")
 
-        if CUDA.functional() && GRAPH_T != :sparse && isfile("cuda/$t.jl")
+        if TEST_GPU && isfile("cuda/$t.jl")
             include("cuda/$t.jl")
         end
     end
diff --git a/test/test_utils.jl b/test/test_utils.jl
@@ -1,9 +1,12 @@
-using ChainRulesTestUtils, FiniteDifferences, Zygote, Adapt
+using ChainRulesTestUtils, FiniteDifferences, Zygote, Adapt, CUDA
+CUDA.allowscalar(false)
+
+# global GRAPH_T = :coo
+# global TEST_GPU = true
 
 const rule_config = Zygote.ZygoteRuleConfig()
 
-# Using this until https://github.com/JuliaDiff/FiniteDifferences.jl/issues/188
-# is fixed
+# Using this until https://github.com/JuliaDiff/FiniteDifferences.jl/issues/188 is fixed
 function FiniteDifferences.to_vec(x::Integer)
     Integer_from_vec(v) = x
     return Int[x], Integer_from_vec
@@ -12,8 +15,10 @@ end
 function gradtest(l, g::GNNGraph; atol=1e-7, rtol=1e-5,
                                  exclude_grad_fields=[],
                                  broken_grad_fields=[],
-                                 verbose = false
+                                 verbose = false,
+                                 test_gpu = TEST_GPU,
                                 )
+
     # TODO these give errors, probably some bugs in ChainRulesTestUtils
     # test_rrule(rule_config, x -> l(g, x), x; rrule_f=rrule_via_ad, check_inferred=false)
     # test_rrule(rule_config, l -> l(g, x), l; rrule_f=rrule_via_ad, check_inferred=false)
@@ -24,75 +29,120 @@ function gradtest(l, g::GNNGraph; atol=1e-7, rtol=1e-5,
     x = node_features(g)
     e = edge_features(g)
 
+    x64, e64, l64, g64 = to64.([x, e, l, g]) 
+    xgpu, egpu, lgpu, ggpu = gpu.([x, e, l, g]) 
+
     f(l, g) = l(g)
-    f(l, g, x) = isnothing(e) ? l(g, x) : l(g, x, e)
+    f(l, g, x::AbstractArray{Float32}) = isnothing(e) ? l(g, x) : l(g, x, e)
+    f(l, g, x::AbstractArray{Float64}) = isnothing(e64) ? l(g, x) : l(g, x, e64)
+    f(l, g, x::CuArray) = isnothing(e64) ? l(g, x) : l(g, x, egpu)
     
     loss(l, g) = sum(node_features(f(l, g))) 
     loss(l, g, x) = sum(f(l, g, x)) 
     loss(l, g, x, e) = sum(l(g, x, e)) 
     
-    x64, e64, l64, g64 = to64.([x, e, l, g]) 
+    
     # TEST OUTPUT
     y = f(l, g, x)
     @test eltype(y) == eltype(x)
     
     g′ = f(l, g)
     @test g′.ndata.x ≈ y
     
-    # TEST X INPUT GRADIENT
+    if test_gpu
+        ygpu = f(lgpu, ggpu, xgpu)
+        @test ygpu isa CuArray 
+        @test eltype(ygpu) == eltype(xgpu)
+        @test Array(ygpu) ≈ y
+    end
+
+
+    # TEST x INPUT GRADIENT
     x̄  = gradient(x -> loss(l, g, x), x)[1]
     x̄_fd = FiniteDifferences.grad(fdm, x64 -> loss(l64, g64, x64), x64)[1]
+    @test eltype(x̄) == eltype(x)
     @test x̄ ≈ x̄_fd    atol=atol rtol=rtol
 
+    if test_gpu
+        x̄gpu  = gradient(xgpu -> loss(lgpu, ggpu, xgpu), xgpu)[1]
+        @test x̄gpu isa CuArray 
+        @test eltype(x̄gpu) == eltype(x)
+        @test Array(x̄gpu) ≈ x̄   atol=atol rtol=rtol
+    end
+
+
+    # TEST e INPUT GRADIENT
     if e !== nothing
-        # TEST E INPUT GRADIENT
         ē  = gradient(e -> loss(l, g, x, e), e)[1]
         ē_fd = FiniteDifferences.grad(fdm, e64 -> loss(l64, g64, x64, e64), e64)[1]
+        @test eltype(ē) == eltype(e)
         @test ē ≈ ē_fd    atol=atol rtol=rtol
+
+        if test_gpu
+            ēgpu  = gradient(egpu -> loss(lgpu, ggpu, xgpu, egpu), egpu)[1]
+            @test ēgpu isa CuArray 
+            @test eltype(ēgpu) == eltype(ē)
+            @test Array(ēgpu) ≈ ē   atol=atol rtol=rtol
+        end
     end
 
+
     # TEST LAYER GRADIENT - l(g, x) 
     l̄ = gradient(l -> loss(l, g, x), l)[1]
     l̄_fd = FiniteDifferences.grad(fdm, l64 -> loss(l64, g64, x64), l64)[1]
     test_approx_structs(l, l̄, l̄_fd; atol, rtol, broken_grad_fields, exclude_grad_fields, verbose)
+
+    if test_gpu
+        l̄gpu = gradient(lgpu -> loss(lgpu, ggpu, xgpu), lgpu)[1]
+        test_approx_structs(lgpu, l̄gpu, l̄; atol, rtol, broken_grad_fields, exclude_grad_fields, verbose)
+    end
+
     # TEST LAYER GRADIENT - l(g)
     l̄ = gradient(l -> loss(l, g), l)[1]
     l̄_fd = FiniteDifferences.grad(fdm, l64 -> loss(l64, g64), l64)[1]
     test_approx_structs(l, l̄, l̄_fd; atol, rtol, broken_grad_fields, exclude_grad_fields, verbose)
+
+    return true
 end
 
-function test_approx_structs(l, l̄, l̄_fd; atol=1e-5, rtol=1e-5, 
+function test_approx_structs(l, l̄, l̄2; atol=1e-5, rtol=1e-5, 
             broken_grad_fields=[],
             exclude_grad_fields=[],
             verbose=false)
 
     for f in fieldnames(typeof(l))
         f ∈ exclude_grad_fields && continue
-        f̄, f̄_fd = getfield(l̄, f), getfield(l̄_fd, f)
+        f̄, f̄2 = getfield(l̄, f), getfield(l̄2, f)
+        x = getfield(l, f)
         if verbose
-            println() 
-            @show f getfield(l, f) f̄ f̄_fd
-        end 
+            println()
+            @show f x f̄ f̄2
+        end
         if isnothing(f̄)
             verbose && println("A")
-            @test !(f̄_fd isa AbstractArray) || isapprox(f̄_fd, fill!(similar(f̄_fd), 0); atol=atol, rtol=rtol)
+            @test !(f̄2 isa AbstractArray) || isapprox(f̄2, fill!(similar(f̄2), 0); atol=atol, rtol=rtol)
         elseif f̄ isa Union{AbstractArray, Number}
             verbose && println("B")
-            @test eltype(f̄) == eltype(getfield(l, f))
+            @test eltype(f̄) == eltype(x)
+            if x isa CuArray
+                @test f̄ isa CuArray
+                f̄ = Array(f̄)
+            end
             if f ∈ broken_grad_fields
-                @test_broken f̄ ≈ f̄_fd   atol=atol rtol=rtol
+                @test_broken f̄ ≈ f̄2   atol=atol rtol=rtol
             else
-                @test f̄ ≈ f̄_fd   atol=atol rtol=rtol
+                @test f̄ ≈ f̄2   atol=atol rtol=rtol
             end
         else
             verbose && println("C")
-            test_approx_structs(getfield(l, f), f̄, f̄_fd; broken_grad_fields)
+            test_approx_structs(x, f̄, f̄2; broken_grad_fields)
         end
     end
     return true
 end
 
 
+
 """
     to32(m)