Merge pull request #43 from CarloLucibello/cl/dev

CarloLucibello · web-flow · commit eed2197c3ae8 · 2021-09-19T19:47:56.000+02:00
add SAGEConv + fix bug in GATConv
diff --git a/src/GraphNeuralNetworks.jl b/src/GraphNeuralNetworks.jl
@@ -46,6 +46,7 @@ export
     GINConv,
     GraphConv,
     NNConv,
+    SAGEConv,
 
     # layers/pool
     GlobalPool,
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -142,7 +142,7 @@ end
 
 
 @doc raw"""
-    GraphConv(in => out, σ=identity, aggr=+; bias=true, init=glorot_uniform)
+    GraphConv(in => out, σ=identity; aggr=+, bias=true, init=glorot_uniform)
 
 Graph convolution layer from Reference: [Weisfeiler and Leman Go Neural: Higher-order Graph Neural Networks](https://arxiv.org/abs/1810.02244).
 
@@ -172,7 +172,7 @@ end
 
 @functor GraphConv
 
-function GraphConv(ch::Pair{Int,Int}, σ=identity, aggr=+;
+function GraphConv(ch::Pair{Int,Int}, σ=identity; aggr=+,
                    init=glorot_uniform, bias::Bool=true)
     in, out = ch
     W1 = init(out, in)
@@ -214,9 +214,9 @@ Implements the operation
 ```math
 \mathbf{x}_i' = \sum_{j \in N(i)} \alpha_{ij} W \mathbf{x}_j
 ```
-where the attention coefficient ``\alpha_{ij}`` is given by
+where the attention coefficients ``\alpha_{ij}`` are given by
 ```math
-\alpha_{ij} = \frac{1}{z_i} \exp(LeakyReLU(\mathbf{a}^T [W \mathbf{x}_i || W \mathbf{x}_j]))
+\alpha_{ij} = \frac{1}{z_i} \exp(LeakyReLU(\mathbf{a}^T [W \mathbf{x}_i \,\|\, W \mathbf{x}_j]))
 ```
 with ``z_i`` a normalization factor.
 
@@ -225,9 +225,9 @@ with ``z_i`` a normalization factor.
 - `in`: The dimension of input features.
 - `out`: The dimension of output features.
 - `bias::Bool`: Keyword argument, whether to learn the additive bias.
-- `heads`: Number attention heads 
+- `heads`: Number attention heads.
 - `concat`: Concatenate layer output or not. If not, layer output is averaged over the heads.
-- `negative_slope::Real`: Keyword argument, the parameter of LeakyReLU.
+- `negative_slope`: The parameter of LeakyReLU.
 """
 struct GATConv{T, A<:AbstractMatrix, B} <: GNNLayer
     weight::A
@@ -248,14 +248,18 @@ function GATConv(ch::Pair{Int,Int}, σ=identity;
                  init=glorot_uniform, bias::Bool=true)
     in, out = ch             
     W = init(out*heads, in)
-    b = bias ? Flux.create_bias(W, true, out*heads) : false
+    if concat 
+        b = bias ? Flux.create_bias(W, true, out*heads) : false
+    else
+        b = bias ? Flux.create_bias(W, true, out) : false
+    end
     a = init(2*out, heads)
     negative_slope = convert(eltype(W), negative_slope)
     GATConv(W, b, a, σ, negative_slope, ch, heads, concat)
 end
 
 function compute_message(l::GATConv, Wxi, Wxj)
-    aWW = sum(l.a .* cat(Wxi, Wxj, dims=1), dims=1)   # 1 × nheads × nedges
+    aWW = sum(l.a .* vcat(Wxi, Wxj), dims=1)   # 1 × nheads × nedges
     α = exp.(leakyrelu.(aWW, l.negative_slope))       
     return (α = α, m = α .* Wxj)
 end
@@ -273,14 +277,13 @@ function (l::GATConv)(g::GNNGraph, x::AbstractMatrix)
     
     x, _ = propagate(l, g, +, Wx)                 ## chout × nheads × nnodes
 
-    b = reshape(l.bias, chout, heads)
-    x = l.σ.(x .+ b)                                      
     if !l.concat
-        x = sum(x, dims=2)
+        x = mean(x, dims=2)
     end
+    x = reshape(x, :, size(x, 3))  # return a matrix
+    x = l.σ.(x .+ l.bias)                                      
 
-    # We finally return a matrix
-    return reshape(x, :, size(x, 3)) 
+    return x  
 end
 
 
@@ -514,3 +517,60 @@ function Base.show(io::IO, l::NNConv)
     print(io, ", aggr=", l.aggr)
     print(io, ")")
 end
+
+
+@doc raw"""
+    SAGEConv(in => out, σ=identity; aggr=mean, bias=true, init=glorot_uniform)
+
+GraphSAGE convolution layer from paper [Inductive Representation Learning on Large Graphs](https://arxiv.org/pdf/1706.02216.pdf).
+
+Performs:
+```math
+\mathbf{x}_i' = W [\mathbf{x}_i \,\|\, \square_{j \in \mathcal{N}(i)} \mathbf{x}_j]
+```
+
+where the aggregation type is selected by `aggr`.
+
+# Arguments
+
+- `in`: The dimension of input features.
+- `out`: The dimension of output features.
+- `σ`: Activation function.
+- `aggr`: Aggregation operator for the incoming messages (e.g. `+`, `*`, `max`, `min`, and `mean`).
+- `bias`: Add learnable bias.
+- `init`: Weights' initializer.
+"""
+struct SAGEConv{A<:AbstractMatrix, B} <: GNNLayer
+    weight::A
+    bias::B
+    σ
+    aggr
+end
+
+@functor SAGEConv
+
+function SAGEConv(ch::Pair{Int,Int}, σ=identity; aggr=mean,
+                   init=glorot_uniform, bias::Bool=true)
+    in, out = ch
+    W = init(out, 2*in)
+    b = bias ? Flux.create_bias(W, true, out) : false
+    SAGEConv(W, b, σ, aggr)
+end
+
+compute_message(l::SAGEConv, x_i, x_j, e_ij) =  x_j
+update_node(l::SAGEConv, m, x) = l.σ.(l.weight * vcat(x, m) .+ l.bias)
+
+function (l::SAGEConv)(g::GNNGraph, x::AbstractMatrix)
+    check_num_nodes(g, x)
+    x, _ = propagate(l, g, l.aggr, x)
+    x
+end
+
+function Base.show(io::IO, l::SAGEConv)
+    in_channel = size(l.weight1, ndims(l.weight1))
+    out_channel = size(l.weight1, ndims(l.weight1)-1)
+    print(io, "SAGEConv(", in_channel, " => ", out_channel)
+    l.σ == identity || print(io, ", ", l.σ)
+    print(io, ", aggr=", l.aggr)
+    print(io, ")")
+end
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
@@ -27,16 +27,16 @@
     @testset "GCNConv" begin
         l = GCNConv(in_channel => out_channel)
         for g in test_graphs
-            test_layer(l, g, rtol=1e-5)
+            test_layer(l, g, rtol=1e-5, outsize=(out_channel, g.num_nodes))
         end
 
         l = GCNConv(in_channel => out_channel, tanh, bias=false)
         for g in test_graphs
-            test_layer(l, g, rtol=1e-5)
+            test_layer(l, g, rtol=1e-5, outsize=(out_channel, g.num_nodes))
         end
 
         l = GCNConv(in_channel => out_channel, add_self_loops=false)
-        test_layer(l, g1, rtol=1e-5)
+        test_layer(l, g1, rtol=1e-5, outsize=(out_channel, g1.num_nodes))
     end
 
     @testset "ChebConv" begin
@@ -65,12 +65,12 @@
     @testset "GraphConv" begin
         l = GraphConv(in_channel => out_channel)
         for g in test_graphs
-            test_layer(l, g, rtol=1e-5)
+            test_layer(l, g, rtol=1e-5, outsize=(out_channel, g.num_nodes))
         end
 
-        l = GraphConv(in_channel => out_channel, relu, bias=false)
+        l = GraphConv(in_channel => out_channel, relu, bias=false, aggr=mean)
         for g in test_graphs
-            test_layer(l, g, rtol=1e-5)
+            test_layer(l, g, rtol=1e-5, outsize=(out_channel, g.num_nodes))
         end
         
         @testset "bias=false" begin
@@ -81,10 +81,11 @@
 
     @testset "GATConv" begin
 
-        for heads in (1, 2), concat in (true, false)
+        for heads in (1, 3), concat in (true, false)
             l = GATConv(in_channel => out_channel; heads, concat)
             for g in test_graphs
-                test_layer(l, g, rtol=1e-4)
+                test_layer(l, g, rtol=1e-4, 
+                    outsize=(concat ? heads*out_channel : out_channel, g.num_nodes))
             end
         end
 
@@ -100,14 +101,14 @@
         @test size(l.weight) == (out_channel, out_channel, num_layers)
 
         for g in test_graphs
-            test_layer(l, g, rtol=1e-5) 
+            test_layer(l, g, rtol=1e-5, outsize=(out_channel, g.num_nodes)) 
         end
     end
 
     @testset "EdgeConv" begin
         l = EdgeConv(Dense(2*in_channel, out_channel), aggr=+)
         for g in test_graphs
-            test_layer(l, g, rtol=1e-5)
+            test_layer(l, g, rtol=1e-5, outsize=(out_channel, g.num_nodes))
         end
     end
 
@@ -116,7 +117,7 @@
         eps = 0.001f0
         l = GINConv(nn, eps=eps)
         for g in test_graphs
-            test_layer(l, g, rtol=1e-5, exclude_grad_fields=[:eps]) 
+            test_layer(l, g, rtol=1e-5, outsize=(out_channel, g.num_nodes), exclude_grad_fields=[:eps]) 
         end
     
         @test !in(:eps, Flux.trainable(l))
@@ -129,13 +130,26 @@
         l = NNConv(in_channel => out_channel, nn)
         for g in test_graphs
             g = GNNGraph(g, edata=rand(T, edim, g.num_edges))
-            test_layer(l, g, rtol=1e-5) 
+            test_layer(l, g, rtol=1e-5, outsize=(out_channel, g.num_nodes)) 
         end
         
         l = NNConv(in_channel => out_channel, nn, tanh, bias=false, aggr=mean)
         for g in test_graphs
             g = GNNGraph(g, edata=rand(T, edim, g.num_edges))
-            test_layer(l, g, rtol=1e-5) 
+            test_layer(l, g, rtol=1e-5, outsize=(out_channel, g.num_nodes)) 
+        end
+    end
+
+    @testset "SAGEConv" begin
+        l = SAGEConv(in_channel => out_channel)
+        @test l.aggr == mean
+        for g in test_graphs
+            test_layer(l, g, rtol=1e-5, outsize=(out_channel, g.num_nodes)) 
+        end
+        
+        l = SAGEConv(in_channel => out_channel, tanh, bias=false, aggr=+)
+        for g in test_graphs
+            test_layer(l, g, rtol=1e-5, outsize=(out_channel, g.num_nodes)) 
         end
     end
 end
diff --git a/test/test_utils.jl b/test/test_utils.jl
@@ -12,11 +12,12 @@ function FiniteDifferences.to_vec(x::Integer)
     return Int[x], Integer_from_vec
 end
 
-function test_layer(l, g::GNNGraph; atol=1e-7, rtol=1e-5,
-                                 exclude_grad_fields=[],
-                                 broken_grad_fields=[],
+function test_layer(l, g::GNNGraph; atol = 1e-7, rtol = 1e-5,
+                                 exclude_grad_fields = [],
+                                 broken_grad_fields =[],
                                  verbose = false,
                                  test_gpu = TEST_GPU,
+                                 outsize = nothing,
                                 )
 
     # TODO these give errors, probably some bugs in ChainRulesTestUtils
@@ -29,7 +30,7 @@ function test_layer(l, g::GNNGraph; atol=1e-7, rtol=1e-5,
     x = node_features(g)
     e = edge_features(g)
 
-    x64, e64, l64, g64 = to64.([x, e, l, g]) 
+    x64, e64, l64, g64 = to64.([x, e, l, g]) # needed for accurate FiniteDifferences' grad
     xgpu, egpu, lgpu, ggpu = gpu.([x, e, l, g]) 
 
     f(l, g) = l(g)
@@ -45,7 +46,11 @@ function test_layer(l, g::GNNGraph; atol=1e-7, rtol=1e-5,
     # TEST OUTPUT
     y = f(l, g, x)
     @test eltype(y) == eltype(x)
-    
+    @test all(isfinite, y)
+    if !isnothing(outsize)
+        @test size(y) == outsize
+    end
+ 
     g′ = f(l, g)
     @test g′.ndata.x ≈ y