work

CarloLucibello · CarloLucibello · commit 95d0c50a2d9e · 2021-09-12T09:43:28.000+02:00
diff --git a/docs/src/api/messagepassing.md b/docs/src/api/messagepassing.md
@@ -2,6 +2,6 @@
 
 ```@docs
 GraphNeuralNetworks.message
-GraphNeuralNetworks.update
+GraphNeuralNetworks.update_node
 GraphNeuralNetworks.propagate
 ```
diff --git a/src/GraphNeuralNetworks.jl b/src/GraphNeuralNetworks.jl
@@ -31,7 +31,7 @@ export
     sprand, sparse, 
 
     # msgpass
-    # update, update_edge, message, propagate,
+    # update_node, update_edge, message, propagate,
 
     # layers/basic
     GNNLayer,
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -45,8 +45,8 @@ function (l::GCNConv)(g::GNNGraph, x::AbstractMatrix{T}) where T
     l.σ.(l.weight * x * Ã .+ l.bias)
 end
 
-message(l::GCNConv, xi, xj) = xj
-update(l::GCNConv, x, m) = m
+message(l::GCNConv, xi, xj, eij) = xj
+update_node(l::GCNConv, m, x) = m
 
 function (l::GCNConv)(g::GNNGraph, x::CuMatrix{T}) where T
     g = add_self_loops(g)
@@ -177,11 +177,11 @@ function GraphConv(ch::Pair{Int,Int}, σ=identity, aggr=+;
 end
 
 message(l::GraphConv, x_i, x_j, e_ij) =  x_j
-update(l::GraphConv, x, m) = l.σ.(l.weight1 * x .+ l.weight2 * m .+ l.bias)
+update_node(l::GraphConv, m, x) = l.σ.(l.weight1 * x .+ l.weight2 * m .+ l.bias)
 
 function (l::GraphConv)(g::GNNGraph, x::AbstractMatrix)
     check_num_nodes(g, x)
-    x, _ = propagate(l, g, +, x)
+    x, _ = propagate(l, g, l.aggr, x)
     x
 end
 
@@ -235,6 +235,7 @@ struct GATConv{T, A<:AbstractMatrix{T}, B} <: GNNLayer
 end
 
 @functor GATConv
+Flux.trainable(l::GATConv) = (l.weight, l.bias, l.a)
 
 function GATConv(ch::Pair{Int,Int};
                  heads::Int=1, concat::Bool=true, negative_slope=0.2f0,
@@ -316,6 +317,7 @@ end
 
 @functor GatedGraphConv
 
+
 function GatedGraphConv(out_ch::Int, num_layers::Int;
                         aggr=+, init=glorot_uniform)
     w = init(out_ch, out_ch, num_layers)
@@ -325,7 +327,7 @@ end
 
 
 message(l::GatedGraphConv, x_i, x_j, e_ij) = x_j
-update(l::GatedGraphConv, x, m) = m
+update_node(l::GatedGraphConv, m, x) = m
 
 # remove after https://github.com/JuliaDiff/ChainRules.jl/pull/521
 @non_differentiable fill!(x...)
@@ -340,7 +342,7 @@ function (l::GatedGraphConv)(g::GNNGraph, H::AbstractMatrix{S}) where {T<:Abstra
     end
     for i = 1:l.num_layers
         M = view(l.weight, :, :, i) * H
-        M, _ = propagate(l, g, +, M)
+        M, _ = propagate(l, g, l.aggr, M)
         H, _ = l.gru(H, M)
     end
     H
@@ -381,11 +383,11 @@ EdgeConv(nn; aggr=max) = EdgeConv(nn, aggr)
 
 message(l::EdgeConv, x_i, x_j, e_ij) = l.nn(vcat(x_i, x_j .- x_i))
 
-update(l::EdgeConv, x, m) = m
+update_node(l::EdgeConv, m, x) = m
 
 function (l::EdgeConv)(g::GNNGraph, X::AbstractMatrix)
     check_num_nodes(g, X)
-    X, _ = propagate(l, g, +, X)
+    X, _ = propagate(l, g, l.aggr, X)
     X
 end
 
@@ -424,8 +426,8 @@ function GINConv(nn; eps=0f0)
     GINConv(nn, eps)
 end
 
-message(l::GINConv, x_i, x_j) = x_j 
-update(l::GINConv, x, m) = l.nn((1 + l.eps) * x + m)
+message(l::GINConv, x_i, x_j, e_ij) = x_j 
+update_node(l::GINConv, m, x) = l.nn((1 + l.eps) * x + m)
 
 function (l::GINConv)(g::GNNGraph, X::AbstractMatrix)
     check_num_nodes(g, X)
diff --git a/src/msgpass.jl b/src/msgpass.jl
@@ -1,60 +1,88 @@
-# Adapted message passing from paper 
-# "Relational inductive biases, deep learning, and graph networks"
-
 """
-    propagate(l, g, aggr, [X, E]) -> X′, E′
+    propagate(l, g, aggr, [x, e]) -> x′, e′
     propagate(l, g, aggr) -> g′
 
-Perform the sequence of operations implementing the message-passing scheme
-of gnn layer `l` on graph `g` . 
-Updates the node, edge, and global features `X`, `E`, and `U` respectively.
+Performs the message-passing for GNN layer `l` on graph `g` . 
+Returns updated node and edge features `x` and `e`.
+
+In case no input and edge features are given as input, 
+extracts them from `g` and returns the same graph
+with updated feautres.
 
-The computation involved is the following:
+The computational steps are the following:
 
 ```julia
-M = compute_batch_message(l, g, X, E) 
-M̄ = aggregate_neighbors(l, aggr, g, M)
-X′ = update(l, X, M̄)
-E′ = update_edge(l, E, M)
+m = compute_batch_message(l, g, x, e)  # calls `message`
+m̄ = aggregate_neighbors(l, aggr, g, m)
+x′ = update_node(l, m̄, x)
+e′ = update_edge(l, m, e)
 ```
 
-Custom layers typically define their own [`update`](@ref)
+Custom layers typically define their own [`update_node`](@ref)
 and [`message`](@ref) functions, then call
 this method in the forward pass:
 
-```julia
-function (l::MyLayer)(g, X)
-    ... some prepocessing if needed ...
-    propagate(l, g, +, X, E)
+# Usage example
+
+```
+using GraphNeuralNetworks, Flux
+
+struct GNNConv <: GNNLayer
+    W
+    b
+    σ
+end
+
+Flux.@functor GNNConv
+
+function GNNConv(ch::Pair{Int,Int}, σ=identity;
+                 init=glorot_uniform, bias::Bool=true)
+    in, out = ch
+    W = init(out, in)
+    b = Flux.create_bias(W, bias, out)
+    GNNConv(W, b, σ, aggr)
+end
+
+message(l::GNNConv, x_i, x_j, e_ij) = l.W * x_j
+update_node(l::GNNConv, m̄, x) = l.σ.(m̄ .+ l.bias)
+
+function (l::GNNConv)(g::GNNGraph, x::AbstractMatrix)
+    x, _ = propagate(l, g, +, x)
+    return x
 end
 ```
 
-See also [`message`](@ref) and [`update`](@ref).
+See also [`message`](@ref) and [`update_node`](@ref).
 """
 function propagate end 
 
 function propagate(l, g::GNNGraph, aggr)
-    X, E = propagate(l, g, aggr, node_features(g), edge_features(g))
-    
-    return GNNGraph(g, ndata=X, edata=E)
+    x, e = propagate(l, g, aggr, node_features(g), edge_features(g))
+    return GNNGraph(g, ndata=x, edata=e)
 end
 
-function propagate(l, g::GNNGraph, aggr, X, E=nothing)
-    M = compute_batch_message(l, g, X, E) 
-    M̄ = aggregate_neighbors(l, g, aggr, M)
-    X′ = update(l, X, M̄)
-    E′ = update_edge(l, E, M)
-    return X′, E′, U′
+function propagate(l, g::GNNGraph, aggr, x, e=nothing)
+    m = compute_batch_message(l, g, x, e) 
+    m̄ = aggregate_neighbors(l, g, aggr, m)
+    x′ = update_node(l, m̄, x)
+    e′ = update_edge(l, m, e)
+    return x′, e′
 end
 
+## Step 1.
+
 """
     message(l, x_i, x_j, [e_ij])
 
 Message function for the message-passing scheme,
 returning the message from node `j` to node `i` .
 In the message-passing scheme, the incoming messages 
 from the neighborhood of `i` will later be aggregated
-in order to [`update`](@ref) the features of node `i`.
+in order to update (see [`update_node`](@ref)) the features of node `i`.
+
+The function operates on batches of edges, therefore
+`x_i`, `x_j`, and `e_ij` are tensors whose last dimention
+is the batch size. 
 
 By default, the function returns `x_j`.
 Custom layer should specialize this method with the desired behavior.
@@ -66,63 +94,69 @@ Custom layer should specialize this method with the desired behavior.
 - `x_j`: Features of the neighbor `j` of node `i`.
 - `e_ij`: Features of edge `(i,j)`.
 
-See also [`update`](@ref) and [`propagate`](@ref).
+See also [`update_node`](@ref) and [`propagate`](@ref).
 """
 function message end 
 
-"""
-    update(l, x, m̄)
-
-Update function for the message-passing scheme,
-returning a new set of node features `x′` based on old 
-features `x` and the incoming message from the neighborhood
-aggregation `m̄`.
-
-By default, the function returns `m̄`.
-Custom layers should  specialize this method with the desired behavior.
-
-# Arguments
-
-- `l`: A gnn layer.
-- `m̄`: Aggregated edge messages from the [`message`](@ref) function.
-- `x`: Node features to be updated.
-- `u`: Global features.
-
-See also [`message`](@ref) and [`propagate`](@ref).
-"""
-function update end
+@inline message(l, x_i, x_j, e_ij) = message(l, x_i, x_j)
+@inline message(l, x_i, x_j) = x_j
 
 _gather(x, i) = NNlib.gather(x, i)
 _gather(x::Nothing, i) = nothing
 
-## Step 1.
-
-function compute_batch_message(l, g, X, E)
+function compute_batch_message(l, g, x, e)
     s, t = edge_index(g)
-    Xi = _gather(X, t)
-    Xj = _gather(X, s)
-    M = message(l, Xi, Xj, E)
-    return M
+    xi = _gather(x, t)
+    xj = _gather(x, s)
+    m = message(l, xi, xj, e)
+    return m
 end
 
-@inline message(l, x_i, x_j, e_ij) = message(l, x_i, x_j)
-@inline message(l, x_i, x_j) = x_j
-
 ##  Step 2
 
-function aggregate_neighbors(l, g, aggr, E)
+function aggregate_neighbors(l, g, aggr, e)
     s, t = edge_index(g)
-    NNlib.scatter(aggr, E, t)
+    NNlib.scatter(aggr, e, t)
 end
 
-aggregate_neighbors(l, g, aggr::Nothing, E) = nothing
+aggregate_neighbors(l, g, aggr::Nothing, e) = nothing
 
 ## Step 3
 
-@inline update(l, x, m̄) = m̄
+"""
+    update_node(l, m̄, x)
+
+Node update function for the GNN layer `l`,
+returning a new set of node features `x′` based on old 
+features `x` and the aggregated message `m̄` from the neighborhood.
+
+By default, the function returns `m̄`.
+Custom layers should  specialize this method with the desired behavior.
+
+See also [`message`](@ref), [`update_edge`](@ref), and [`propagate`](@ref).
+"""
+function update_node end
+
+@inline update_node(l, m̄, x) = m̄
 
 ## Step 4
 
-@inline update_edge(l, E, M) = E
+
+"""
+    update_edge(l, m, e)
+
+Edge update function for the GNN layer `l`,
+returning a new set of edge features `e′` based on old 
+features `e` and the newly computed messages `m`
+from the [`message`](@ref) function.
+
+By default, the function returns `e`.
+Custom layers should specialize this method with the desired behavior.
+
+See also [`message`](@ref), [`update_node`](@ref), and [`propagate`](@ref).
+"""
+function update_edge end
+
+@inline update_edge(l, m, e) = e
 
 ### end steps ###
diff --git a/test/cuda/msgpass.jl b/test/cuda/msgpass.jl
@@ -18,7 +18,7 @@
 
     (l::NewCudaLayer)(g, X) = GraphNeuralNetworks.propagate(l, g, +, X)
     GraphNeuralNetworks.message(n::NewCudaLayer, x_i, x_j, e_ij) = n.weight * x_j
-    GraphNeuralNetworks.update(::NewCudaLayer, x, m) = m
+    GraphNeuralNetworks.update_node(::NewCudaLayer, m, x) = m
 
     X = rand(T, in_channel, N) |> gpu
     g = GNNGraph(adj, ndata=X, graph_type=GRAPH_T)
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
@@ -133,6 +133,7 @@
             @test size(gat.weight) == (out_channel * heads, in_channel)
             @test size(gat.bias) == (out_channel * heads,)
             @test size(gat.a) == (2*out_channel, heads)
+            @test length(Flux.trainable(gat)) == 3
 
             g_ = gat(g_gat)
             Y = node_features(g_)
diff --git a/test/msgpass.jl b/test/msgpass.jl