Merge pull request #26 from CarloLucibello/cl/prop

CarloLucibello · web-flow · commit 05cfb805007d · 2021-09-14T15:13:32.000+02:00
Generalize message passing to handle Tuples and NamedTuples
diff --git a/docs/src/messagepassing.md b/docs/src/messagepassing.md
@@ -1,4 +1,21 @@
 # Message Passing
 
-TODO
+The message passing is initiated by [`propagate`](@ref)
+and can be customized for a specific layer by overloading the methods
+[`compute_message`](@ref), [`update_node`](@ref), and [`update_edge`](@ref).
 
+
+The message passing corresponds to the following operations 
+
+```math
+\begin{aligned}
+\mathbf{m}_{j\to i} &= \phi(\mathbf{x}_i, \mathbf{x}_j, \mathbf{e}_{j\to i}) \\
+\mathbf{x}_{i}' &= \gamma_x(\mathbf{x}_{i}, \square_{j\in N(i)}  \mathbf{m}_{j\to i})\\
+\mathbf{e}_{j\to i}^\prime &=  \gamma_e(\mathbf{e}_{j \to i},\mathbf{m}_{j \to i})
+\end{aligned}
+```
+where ``phi`` is expressed by the [`compute_message`](@ref) function, 
+``\gamma_x`` and ``gamma_v`` by [`update_node`](@ref) and [`update_edge`](@ref)
+respectively.
+
+See [`GraphConv`](ref) and [`GATConv`](ref)'s implementations as usage examples. 
diff --git a/src/gnngraph.jl b/src/gnngraph.jl
@@ -59,7 +59,7 @@ from the LightGraphs' graph library can be used on it.
 
 # Usage. 
 
-```
+```julia
 using Flux, GraphNeuralNetworks
 
 # Construct from adjacency list representation
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -196,7 +196,7 @@ end
 
 
 @doc raw"""
-    GATConv(in => out;
+    GATConv(in => out, , σ=identity;
             heads=1,
             concat=true,
             init=glorot_uniform    
@@ -228,6 +228,7 @@ struct GATConv{T, A<:AbstractMatrix{T}, B} <: GNNLayer
     weight::A
     bias::B
     a::A
+    σ
     negative_slope::T
     channel::Pair{Int, Int}
     heads::Int
@@ -237,44 +238,43 @@ end
 @functor GATConv
 Flux.trainable(l::GATConv) = (l.weight, l.bias, l.a)
 
-function GATConv(ch::Pair{Int,Int};
+function GATConv(ch::Pair{Int,Int}, σ=identity;
                  heads::Int=1, concat::Bool=true, negative_slope=0.2f0,
                  init=glorot_uniform, bias::Bool=true)
     in, out = ch             
     W = init(out*heads, in)
     b = Flux.create_bias(W, bias, out*heads)
     a = init(2*out, heads)
-    GATConv(W, b, a, negative_slope, ch, heads, concat)
+    GATConv(W, b, a, σ, negative_slope, ch, heads, concat)
 end
 
-function (gat::GATConv)(g::GNNGraph, X::AbstractMatrix)
-    check_num_nodes(g, X)
+function compute_message(l::GATConv, Wxi, Wxj)
+    aWW = sum(l.a .* cat(Wxi, Wxj, dims=1), dims=1)   # 1 × nheads × nedges
+    α = exp.(leakyrelu.(aWW, l.negative_slope))       
+    return (α = α, m = α .* Wxj)
+end
+
+update_node(l::GATConv, d̄, x) = d̄.m ./ d̄.α
+
+function (l::GATConv)(g::GNNGraph, x::AbstractMatrix)
+    check_num_nodes(g, x)
     g = add_self_loops(g)
-    chin, chout = gat.channel
-    heads = gat.heads
+    chin, chout = l.channel
+    heads = l.heads
 
-    source, target = edge_index(g)
-    Wx = gat.weight*X
+    Wx = l.weight * x
     Wx = reshape(Wx, chout, heads, :)                   # chout × nheads × nnodes
-    Wxi = NNlib.gather(Wx, target)                      # chout × nheads × nedges
-    Wxj = NNlib.gather(Wx, source)
-
-    # Edge Message
-    # Computing softmax. TODO make it numerically stable
-    aWW = sum(gat.a .* cat(Wxi, Wxj, dims=1), dims=1)   # 1 × nheads × nedges
-    α = exp.(leakyrelu.(aWW, gat.negative_slope))       
-    m̄ =  NNlib.scatter(+, α .* Wxj, target)             # chout × nheads × nnodes 
-    ᾱ = NNlib.scatter(+, α, target)                     # 1 × nheads × nnodes
     
-    # Node update
-    b = reshape(gat.bias, chout, heads)
-    X = m̄ ./ ᾱ .+ b                                     # chout × nheads × nnodes 
-    if !gat.concat
-        X = sum(X, dims=2)
+    x, _ = propagate(l, g, +, Wx)                 ## chout × nheads × nnodes
+
+    b = reshape(l.bias, chout, heads)
+    x = l.σ.(x .+ b)                                      
+    if !l.concat
+        x = sum(x, dims=2)
     end
 
     # We finally return a matrix
-    return reshape(X, :, size(X, 3)) 
+    return reshape(x, :, size(x, 3)) 
 end
 
 
diff --git a/src/msgpass.jl b/src/msgpass.jl
@@ -13,7 +13,7 @@ The computational steps are the following:
 
 ```julia
 m = compute_batch_message(l, g, x, e)  # calls `compute_message`
-m̄ = aggregate_neighbors(l, aggr, g, m)
+m̄ = aggregate_neighbors(g, aggr, m)
 x′ = update_node(l, m̄, x)
 e′ = update_edge(l, m, e)
 ```
@@ -24,7 +24,7 @@ this method in the forward pass:
 
 # Usage example
 
-```
+```julia
 using GraphNeuralNetworks, Flux
 
 struct GNNConv <: GNNLayer
@@ -63,7 +63,7 @@ end
 
 function propagate(l, g::GNNGraph, aggr, x, e=nothing)
     m = compute_batch_message(l, g, x, e) 
-    m̄ = aggregate_neighbors(l, g, aggr, m)
+    m̄ = aggregate_neighbors(g, aggr, m)
     x′ = update_node(l, m̄, x)
     e′ = update_edge(l, m, e)
     return x′, e′
@@ -74,15 +74,17 @@ end
 """
     compute_message(l, x_i, x_j, [e_ij])
 
-Message function for the message-passing scheme,
-returning the message from node `j` to node `i` .
+Message function for the message-passing scheme
+started by [`propagate`](@ref).
+Returns the message from node `j` to node `i` .
 In the message-passing scheme, the incoming messages 
 from the neighborhood of `i` will later be aggregated
 in order to update (see [`update_node`](@ref)) the features of node `i`.
 
 The function operates on batches of edges, therefore
 `x_i`, `x_j`, and `e_ij` are tensors whose last dimension
-is the batch size. 
+is the batch size, or can be tuple/namedtuples of 
+such tensors, according to the input to propagate.
 
 By default, the function returns `x_j`.
 Custom layer should specialize this method with the desired behavior.
@@ -101,10 +103,12 @@ function compute_message end
 @inline compute_message(l, x_i, x_j, e_ij) = compute_message(l, x_i, x_j)
 @inline compute_message(l, x_i, x_j) = x_j
 
-_gather(x, i) = NNlib.gather(x, i)
+_gather(x::NamedTuple, i) = map(x -> _gather(x, i), x)
+_gather(x::Tuple, i) = map(x -> _gather(x, i), x)
+_gather(x::AbstractArray, i) = NNlib.gather(x, i)
 _gather(x::Nothing, i) = nothing
 
-function compute_batch_message(l, g, x, e)
+function compute_batch_message(l, g::GNNGraph, x, e)
     s, t = edge_index(g)
     xi = _gather(x, t)
     xj = _gather(x, s)
@@ -114,12 +118,17 @@ end
 
 ##  Step 2
 
-function aggregate_neighbors(l, g, aggr, e)
+_scatter(aggr, e::NamedTuple, t) = map(e -> _scatter(aggr, e, t), e)
+_scatter(aggr, e::Tuple, t) = map(e -> _scatter(aggr, e, t), e)
+_scatter(aggr, e::AbstractArray, t) = NNlib.scatter(aggr, e, t)
+_scatter(aggr, e::Nothing, t) = nothing
+
+function aggregate_neighbors(g::GNNGraph, aggr, e)
     s, t = edge_index(g)
-    NNlib.scatter(aggr, e, t)
+    _scatter(aggr, e, t)
 end
 
-aggregate_neighbors(l, g, aggr::Nothing, e) = nothing
+aggregate_neighbors(g::GNNGraph, aggr::Nothing, e) = nothing
 
 ## Step 3
 
@@ -130,6 +139,9 @@ Node update function for the GNN layer `l`,
 returning a new set of node features `x′` based on old 
 features `x` and the aggregated message `m̄` from the neighborhood.
 
+The input `m̄` is an array, a tuple or a named tuple, 
+reflecting the output of [`compute_message`](@ref).
+
 By default, the function returns `m̄`.
 Custom layers should  specialize this method with the desired behavior.
 
@@ -148,7 +160,7 @@ function update_node end
 Edge update function for the GNN layer `l`,
 returning a new set of edge features `e′` based on old 
 features `e` and the newly computed messages `m`
-from the [`message`](@ref) function.
+from the [`compute_message`](@ref) function.
 
 By default, the function returns `e`.
 Custom layers should specialize this method with the desired behavior.
diff --git a/test/cuda/msgpass.jl b/test/cuda/msgpass.jl
@@ -10,19 +10,19 @@
         0 1 0 1 0 1
         0 1 1 0 1 0]
 
-    struct NewCudaLayer
+    struct NewCudaLayer{G} <: GNNLayer
         weight
     end
-    NewCudaLayer(m, n) = NewCudaLayer(randn(T, m, n))
-    @functor NewCudaLayer
+    NewCudaLayer{GRAPH_T}(m, n) = NewCudaLayer{GRAPH_T}(randn(T, m, n))
+    Flux.@functor NewCudaLayer{GRAPH_T}
 
-    (l::NewCudaLayer)(g, X) = GraphNeuralNetworks.propagate(l, g, +, X)
-    GraphNeuralNetworks.compute_message(n::NewCudaLayer, x_i, x_j, e_ij) = n.weight * x_j
-    GraphNeuralNetworks.update_node(::NewCudaLayer, m, x) = m
+    (l::NewCudaLayer{GRAPH_T})(g, X) = GraphNeuralNetworks.propagate(l, g, +, X)[1]
+    GraphNeuralNetworks.compute_message(n::NewCudaLayer{GRAPH_T}, x_i, x_j, e_ij) = n.weight * x_j
+    GraphNeuralNetworks.update_node(::NewCudaLayer{GRAPH_T}, m, x) = m
 
     X = rand(T, in_channel, N) |> gpu
     g = GNNGraph(adj, ndata=X, graph_type=GRAPH_T)
-    l = NewCudaLayer(out_channel, in_channel) |> gpu
+    l = NewCudaLayer{GRAPH_T}(out_channel, in_channel) |> gpu
 
     g_ = l(g)
     @test size(node_features(g_)) == (out_channel, N)
diff --git a/test/msgpass.jl b/test/msgpass.jl
@@ -1,3 +1,5 @@
+import GraphNeuralNetworks: compute_message, update_node, update_edge, propagate
+
 @testset "message passing" begin 
     in_channel = 10
     out_channel = 5
@@ -113,9 +115,9 @@
         GraphNeuralNetworks.compute_message(l::NewLayerW{GRAPH_T}, x_i, x_j, e_ij) = l.weight * x_j
         GraphNeuralNetworks.update_node(l::NewLayerW{GRAPH_T}, m, x) = l.weight * x + m
 
-        l = NewLayerW(in_channel, out_channel)
         (l::NewLayerW{GRAPH_T})(g) = GraphNeuralNetworks.propagate(l, g, +)
 
+        l = NewLayerW(in_channel, out_channel)
         g = GNNGraph(adj, ndata=X, edata=E, gdata=U, graph_type=GRAPH_T)
         g_ = l(g)
 
@@ -124,4 +126,36 @@
         @test edge_features(g_) === E
         @test graph_features(g_) === U
     end
+
+    @testset "NamedTuples" begin
+        struct NewLayerNT{G}
+            W
+        end
+        
+        NewLayerNT(in, out) = NewLayerNT{GRAPH_T}(randn(T, out, in))
+        
+        function GraphNeuralNetworks.compute_message(l::NewLayerNT{GRAPH_T}, di, dj, dij)
+            a = l.W * (di.x .+ dj.x .+ dij.e) 
+            b = l.W * di.x
+            return (; a, b)
+        end
+        function GraphNeuralNetworks.update_node(l::NewLayerNT{GRAPH_T}, m, d) 
+            return (α=l.W * d.x + m.a + m.b, β=m)
+        end
+        function GraphNeuralNetworks.update_edge(l::NewLayerNT{GRAPH_T}, m, e) 
+            return m.a
+        end
+
+        function (::NewLayerNT{GRAPH_T})(g, x, e)
+            x, e = propagate(l, g, mean, (; x), (; e))
+            return x.α .+ x.β.a, e
+        end
+
+        l = NewLayerNT(in_channel, out_channel)
+        g = GNNGraph(adj, graph_type=GRAPH_T)
+        X′, E′ = l(g, X, E)
+
+        @test size(X′) == (out_channel, num_V)
+        @test size(E′) == (out_channel, num_E)
+    end
 end