Merge pull request #83 from CarloLucibello/cl/meg

CarloLucibello · web-flow · commit 4636ca38d547 · 2021-12-18T21:53:32.000+01:00
implement MEGNetConv
diff --git a/docs/src/api/messagepassing.md b/docs/src/api/messagepassing.md
@@ -15,6 +15,7 @@ Pages   = ["messagepassing.md"]
 
 ```@docs
 apply_edges
+aggregate_neighbors
 propagate
 ```
 
diff --git a/src/GraphNeuralNetworks.jl b/src/GraphNeuralNetworks.jl
@@ -21,14 +21,21 @@ using .GNNGraphs: COO_T, ADJMAT_T, SPARSE_T,
 
 export
     # utils
-    reduce_nodes, reduce_edges, 
-    softmax_nodes, softmax_edges,
-    broadcast_nodes, broadcast_edges,
+    reduce_nodes, 
+    reduce_edges, 
+    softmax_nodes, 
+    softmax_edges,
+    broadcast_nodes, 
+    broadcast_edges,
     softmax_edge_neighbors,
     
     # msgpass
-    apply_edges, propagate,
-    copy_xj, copy_xi, xi_dot_xj,
+    apply_edges,
+    aggregate_neighbors, 
+    propagate,
+    copy_xj, 
+    copy_xi, 
+    xi_dot_xj,
 
     # layers/basic
     GNNLayer,
@@ -46,9 +53,11 @@ export
     GCNConv,
     GINConv,
     GraphConv,
+    MEGNetConv, 
     NNConv,
     ResGatedGraphConv,
     SAGEConv,
+
     
     # layers/pool
     GlobalPool,
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -823,3 +823,73 @@ function (l::AGNNConv)(g::GNNGraph, x::AbstractMatrix)
     return x  
 end
 
+@doc raw"""
+    MEGNetConv(ϕe, ϕv; aggr=mean)
+    MEGNetConv(in => out; aggr=mean)
+
+Convolution from [Graph Networks as a Universal Machine Learning Framework for Molecules and Crystals](https://arxiv.org/pdf/1812.05055.pdf)
+paper. In the forward pass, takes as inputs node features `x` and edge features `e` and returns
+updated features `x̄, ē` according to 
+
+```math
+ē  = ϕe(vcat(xi, xj, e))
+x̄  = ϕv(vcat(x, \square_{j\in \mathcal{N}(i)} ē_{j\to i}))
+```
+`aggr` defines the aggregation to be performed.
+
+If the neural networks `ϕe` and  `ϕv` are not provided, they will be constructed from
+the `in` and `out` arguments instead as multi-layer perceptron with one hidden layer and `relu` 
+activations.
+````
+
+# Examples
+
+```julia
+g = rand_graph(10, 30)
+x = randn(3, 10)
+e = randn(3, 30)
+m = MEGNetConv(3 => 3)
+x̄, ē = m(g, x, e)
+```
+"""
+struct MEGNetConv <: GNNLayer
+    ϕe
+    ϕv 
+    aggr
+end
+
+@functor MEGNetConv
+
+MEGNetConv(ϕe, ϕv; aggr=mean) = MEGNetConv(ϕe, ϕv, aggr)
+
+function MEGNetConv(ch::Pair{Int,Int}; aggr=mean)
+    nin, nout = ch 
+    ϕe = Chain(Dense(3nin, nout, relu),
+               Dense(nout, nout))
+
+    ϕv = Chain(Dense(nin + nout, nout, relu),
+               Dense(nout, nout))
+
+    MEGNetConv(ϕe, ϕv; aggr)
+end
+
+function (l::MEGNetConv)(g::GNNGraph)
+    x, e = l(g, node_features(g), edge_features(g))
+    g = GNNGraph(g, ndata=x, edata=e)
+end
+
+function (l::MEGNetConv)(g::GNNGraph, x::AbstractMatrix, e::AbstractMatrix)
+    check_num_nodes(g, x)
+
+    ē = apply_edges(g, xi=x, xj=x, e=e) do xi, xj, e
+        l.ϕe(vcat(xi, xj, e))
+    end
+
+    xᵉ = aggregate_neighbors(g, l.aggr, ē)
+
+    x̄ = l.ϕv(vcat(x, xᵉ))
+
+    return x̄, ē
+end
+
+
diff --git a/src/msgpass.jl b/src/msgpass.jl
@@ -61,7 +61,7 @@ l = GNNConv(10 => 20)
 l(g, x)
 ```
 
-See also [`apply_edges`](@ref).
+See also [`apply_edges`](@ref) and [`aggregate_neighbors`](@ref).
 """
 function propagate end 
 
@@ -103,7 +103,7 @@ such tensors.
        a batch of edges. The output of `f` has to be an array (or a named tuple of arrays)
        with the same batch size. 
 
-See also [`propagate`](@ref).
+See also [`propagate`](@ref) and [`aggregate_neighbors`](@ref).
 """
 function apply_edges end 
 
@@ -125,7 +125,19 @@ _gather(x::Nothing, i) = nothing
 
 
 ##  AGGREGATE NEIGHBORS
+@doc raw"""
+    aggregate_neighbors(g::GNNGraph, aggr, m)
+
+Given a graph `g`, edge features `m`, and an aggregation
+operator `aggr` (e.g `+, min, max, mean`), returns the new node
+features 
+```math
+\mathbf{x}_i = \square_{j \in \mathcal{N}(i)} \mathbf{m}_{j\to i}
+```
 
+Neighborhood aggregation is the second step of [`propagate`](@ref), 
+where it comes after [`apply_edges`](@ref).
+"""
 function aggregate_neighbors(g::GNNGraph, aggr, m)
     s, t = edge_index(g)
     return _scatter(aggr, m, t)
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
@@ -184,4 +184,14 @@
             test_layer(l, g, rtol=1e-5, outsize=(in_channel, g.num_nodes)) 
         end
     end
+
+    @testset "MEGNetConv" begin
+        l = MEGNetConv(in_channel => out_channel, aggr=+)
+        for g in test_graphs
+            g = GNNGraph(g, edata=rand(T, in_channel, g.num_edges))
+            test_layer(l, g, rtol=1e-5,
+                outtype=:node_edge, 
+                outsize=((out_channel, g.num_nodes), (out_channel, g.num_edges))) 
+        end
+    end
 end
diff --git a/test/test_utils.jl b/test/test_utils.jl
@@ -30,66 +30,103 @@ function test_layer(l, g::GNNGraph; atol = 1e-6, rtol = 1e-5,
     
     x = node_features(g)
     e = edge_features(g)
+    use_edge_feat = !isnothing(e)
 
     x64, e64, l64, g64 = to64.([x, e, l, g]) # needed for accurate FiniteDifferences' grad
     xgpu, egpu, lgpu, ggpu = gpu.([x, e, l, g]) 
 
     f(l, g::GNNGraph) = l(g)
-    f(l, g::GNNGraph, x::AbstractArray{Float32}) = isnothing(e) ? l(g, x) : l(g, x, e)
-    f(l, g::GNNGraph, x::AbstractArray{Float64}) = isnothing(e64) ? l(g, x) : l(g, x, e64)
-    f(l, g::GNNGraph, x::CuArray) = isnothing(e64) ? l(g, x) : l(g, x, egpu)
+    f(l, g::GNNGraph, x, e) = use_edge_feat ? l(g, x, e) : l(g, x)
     
     loss(l, g::GNNGraph) = if outtype == :node
                                 sum(node_features(f(l, g))) 
                             elseif outtype == :edge
                                 sum(edge_features(f(l, g)))         
                             elseif outtype == :graph
                                 sum(graph_features(f(l, g))) 
+                            elseif outtype == :node_edge
+                                gnew = f(l, g)
+                                sum(node_features(gnew)) + sum(edge_features(gnew))
                             end
 
-    loss(l, g::GNNGraph, x) = sum(f(l, g, x)) 
-    loss(l, g::GNNGraph, x, e) = sum(l(g, x, e)) 
+    function loss(l, g::GNNGraph, x, e) 
+        y = f(l, g, x, e)
+        if outtype == :node_edge
+            return sum(y[1]) + sum(y[2])
+        else 
+            return sum(y)
+        end
+    end
     
     
     # TEST OUTPUT
-    y = f(l, g, x)
-    @test eltype(y) == eltype(x)
-    @test all(isfinite, y)
-    if !isnothing(outsize)
-        @test size(y) == outsize
+    y = f(l, g, x, e)
+    if outtype == :node_edge
+        @assert y isa Tuple
+        @test eltype(y[1]) == eltype(x)
+        @test eltype(y[2]) == eltype(e)
+        @test all(isfinite, y[1])
+        @test all(isfinite, y[2])
+        if !isnothing(outsize)
+            @test size(y[1]) == outsize[1]
+            @test size(y[2]) == outsize[2]
+        end
+    else
+        @test eltype(y) == eltype(x)
+        @test all(isfinite, y)
+        if !isnothing(outsize)
+            @test size(y) == outsize
+        end
     end
 
     # test same output on different graph formats
     gcoo = GNNGraph(g, graph_type=:coo)
-    ycoo = f(l, gcoo, x)
-    @test ycoo ≈ y    
- 
+    ycoo = f(l, gcoo, x, e)
+    if outtype == :node_edge
+        @test ycoo[1] ≈ y[1]
+        @test ycoo[2] ≈ y[2]
+    else
+        @test ycoo ≈ y
+    end
+
     g′ = f(l, g)
     if outtype == :node
         @test g′.ndata.x ≈ y
     elseif outtype == :edge
         @test g′.edata.e ≈ y    
     elseif outtype == :graph
         @test g′.gdata.u ≈ y
+    elseif outtype == :node_edge
+        @test g′.ndata.x ≈ y[1]
+        @test g′.edata.e ≈ y[2]
     else
         @error "wrong outtype $outtype"
     end
     if test_gpu
-        ygpu = f(lgpu, ggpu, xgpu)
-        @test ygpu isa CuArray 
-        @test eltype(ygpu) == eltype(xgpu)
-        @test Array(ygpu) ≈ y
+        ygpu = f(lgpu, ggpu, xgpu, egpu)
+        if outtype == :node_edge
+            @test ygpu[1] isa CuArray 
+            @test eltype(ygpu[1]) == eltype(xgpu)
+            @test Array(ygpu[1]) ≈ y[1]
+            @test ygpu[2] isa CuArray 
+            @test eltype(ygpu[2]) == eltype(xgpu)
+            @test Array(ygpu[2]) ≈ y[2]
+        else
+            @test ygpu isa CuArray 
+            @test eltype(ygpu) == eltype(xgpu)
+            @test Array(ygpu) ≈ y
+        end
     end
 
 
     # TEST x INPUT GRADIENT
-    x̄  = gradient(x -> loss(l, g, x), x)[1]
-    x̄_fd = FiniteDifferences.grad(fdm, x64 -> loss(l64, g64, x64), x64)[1]
+    x̄  = gradient(x -> loss(l, g, x, e), x)[1]
+    x̄_fd = FiniteDifferences.grad(fdm, x64 -> loss(l64, g64, x64, e64), x64)[1]
     @test eltype(x̄) == eltype(x)
     @test x̄ ≈ x̄_fd    atol=atol rtol=rtol
 
     if test_gpu
-        x̄gpu  = gradient(xgpu -> loss(lgpu, ggpu, xgpu), xgpu)[1]
+        x̄gpu  = gradient(xgpu -> loss(lgpu, ggpu, xgpu, egpu), xgpu)[1]
         @test x̄gpu isa CuArray 
         @test eltype(x̄gpu) == eltype(x)
         @test Array(x̄gpu) ≈ x̄   atol=atol rtol=rtol
@@ -112,13 +149,13 @@ function test_layer(l, g::GNNGraph; atol = 1e-6, rtol = 1e-5,
     end
 
 
-    # TEST LAYER GRADIENT - l(g, x) 
-    l̄ = gradient(l -> loss(l, g, x), l)[1]
-    l̄_fd = FiniteDifferences.grad(fdm, l64 -> loss(l64, g64, x64), l64)[1]
+    # TEST LAYER GRADIENT - l(g, x, e) 
+    l̄ = gradient(l -> loss(l, g, x, e), l)[1]
+    l̄_fd = FiniteDifferences.grad(fdm, l64 -> loss(l64, g64, x64, e64), l64)[1]
     test_approx_structs(l, l̄, l̄_fd; atol, rtol, broken_grad_fields, exclude_grad_fields, verbose)
 
     if test_gpu
-        l̄gpu = gradient(lgpu -> loss(lgpu, ggpu, xgpu), lgpu)[1]
+        l̄gpu = gradient(lgpu -> loss(lgpu, ggpu, xgpu, egpu), lgpu)[1]
         test_approx_structs(lgpu, l̄gpu, l̄; atol, rtol, broken_grad_fields, exclude_grad_fields, verbose)
     end