propagate redisign

CarloLucibello · CarloLucibello · commit 4587f7f93e8d · 2021-09-23T16:00:28.000+02:00
diff --git a/Project.toml b/Project.toml
@@ -13,6 +13,7 @@ KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
 LearnBase = "7f8f8fb0-2700-5f03-b4bd-41f8cfc144b6"
 LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
 MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
 NNlibCUDA = "a00861dc-f156-4864-bf3c-e6376f28a68d"
diff --git a/src/deprecations.jl b/src/deprecations.jl
@@ -4,3 +4,7 @@
 
 # TO Deprecate
 # x, _ = propagate(l, g, l.aggr, x, e)
+
+# # TODO deprecate
+# propagate(l, g::GNNGraph, aggr, x, e=nothing) = propagate(l, g, aggr; x, e)
+
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -39,26 +39,22 @@ function GCNConv(ch::Pair{Int,Int}, σ=identity;
     GCNConv(W, b, σ, add_self_loops)
 end
 
-## Matrix operations are more performant, 
-## but cannot compute the normalized adjacency of sparse cuda matrices yet,
-## therefore fallback to message passing framework on gpu for the time being
- 
 function (l::GCNConv)(g::GNNGraph, x::AbstractMatrix{T}) where T
-    Ã = normalized_adjacency(g, T; dir=:out, l.add_self_loops)
-    l.σ.(l.weight * x * Ã .+ l.bias)
-end
-
-compute_message(l::GCNConv, xi, xj, eij) = xj
-
-function (l::GCNConv)(g::GNNGraph, x::CuMatrix{T}) where T
     if l.add_self_loops
         g = add_self_loops(g)
     end
+    Dout, Din = size(l.weight)
+    if Dout < Din
+        x = l.weight * x
+    end
     c = 1 ./ sqrt.(degree(g, T, dir=:in))
     x = x .* c'
-    x = propagate(l, g, +, xj=x)
+    x = propagate(copyxj, g, +, xj=x)
     x = x .* c'
-    return l.σ.(l.weight * x .+ l.bias)
+    if Dout >= Din
+        x = l.weight * x
+    end
+    return l.σ.(x .+ l.bias)
 end
 
 function Base.show(io::IO, l::GCNConv)
@@ -180,11 +176,9 @@ function GraphConv(ch::Pair{Int,Int}, σ=identity; aggr=+,
     GraphConv(W1, W2, b, σ, aggr)
 end
 
-compute_message(l::GraphConv, x_i, x_j, e_ij) =  x_j
-
 function (l::GraphConv)(g::GNNGraph, x::AbstractMatrix)
     check_num_nodes(g, x)
-    m = propagate(l, g, l.aggr, xj=x)
+    m = propagate(copyxj, g, l.aggr, xj=x)
     x = l.σ.(l.weight1 * x .+ l.weight2 * m .+ l.bias)
     return x
 end
@@ -272,7 +266,7 @@ function (l::GATConv)(g::GNNGraph, x::AbstractMatrix)
     Wx = l.weight * x
     Wx = reshape(Wx, chout, heads, :)                   # chout × nheads × nnodes
     
-    d̄ = propagate(l, g, +; x=Wx)                 ## chout × nheads × nnodes
+    d̄ = propagate(l, g, +; xi=Wx, xj=Wx)                 ## chout × nheads × nnodes
     x = d̄.m ./ d̄.α
 
     if !l.concat
@@ -330,8 +324,6 @@ function GatedGraphConv(out_ch::Int, num_layers::Int;
     GatedGraphConv(w, gru, out_ch, num_layers, aggr)
 end
 
-compute_message(l::GatedGraphConv, x_i, x_j, e_ij) = x_j
-
 # remove after https://github.com/JuliaDiff/ChainRules.jl/pull/521
 @non_differentiable fill!(x...)
 
@@ -345,7 +337,7 @@ function (l::GatedGraphConv)(g::GNNGraph, H::AbstractMatrix{S}) where {S<:Real}
     end
     for i = 1:l.num_layers
         M = view(l.weight, :, :, i) * H
-        M = propagate(l, g, l.aggr; xj=M)
+        M = propagate(copyxj, g, l.aggr; xj=M)
         H, _ = l.gru(H, M)
     end
     H
@@ -387,8 +379,8 @@ EdgeConv(nn; aggr=max) = EdgeConv(nn, aggr)
 compute_message(l::EdgeConv, x_i, x_j, e_ij) = l.nn(vcat(x_i, x_j .- x_i))
 
 function (l::EdgeConv)(g::GNNGraph, x::AbstractMatrix)
-    check_num_nodes(g, X)
-    x = propagate(l, g, l.aggr; x)
+    check_num_nodes(g, x)
+    x = propagate(l, g, l.aggr, xi=x, xj=x)
     return x
 end
 
@@ -426,11 +418,9 @@ Flux.trainable(l::GINConv) = (l.nn,)
 
 GINConv(nn, ϵ; aggr=+) = GINConv(nn, ϵ, aggr)
 
-compute_message(l::GINConv, x_i, x_j, e_ij) = x_j 
-
 function (l::GINConv)(g::GNNGraph, x::AbstractMatrix)
     check_num_nodes(g, x)
-    m = propagate(l, g, l.aggr, xj=x)
+    m = propagate(copyxj, g, l.aggr, xj=x)
     l.nn((1 + ofeltype(x, l.ϵ)) * x + m)
 end
 
@@ -549,11 +539,9 @@ function SAGEConv(ch::Pair{Int,Int}, σ=identity; aggr=mean,
     SAGEConv(W, b, σ, aggr)
 end
 
-compute_message(l::SAGEConv, x_i, x_j, e_ij) =  x_j
-
 function (l::SAGEConv)(g::GNNGraph, x::AbstractMatrix)
     check_num_nodes(g, x)
-    m = propagate(l, g, l.aggr, xj=x)
+    m = propagate(copyxj, g, l.aggr, xj=x)
     x = l.σ.(l.weight * vcat(x, m) .+ l.bias)
     return x 
 end
@@ -613,9 +601,9 @@ function ResGatedGraphConv(ch::Pair{Int,Int}, σ=identity;
     return ResGatedGraphConv(A, B, U, V, b, σ)
 end
 
-function compute_message(l::ResGatedGraphConv, di, dj)
-    η = sigmoid.(di.Ax .+ dj.Bx)
-    return η .* dj.Vx
+function compute_message(l::ResGatedGraphConv, xi, xj, e)
+    η = sigmoid.(xi.Ax .+ xj.Bx)
+    η .* xj.Vx
 end
 
 function (l::ResGatedGraphConv)(g::GNNGraph, x::AbstractMatrix)
diff --git a/src/msgpass.jl b/src/msgpass.jl
@@ -53,21 +53,15 @@ See also [`compute_message`](@ref) and [`update_node`](@ref).
 """
 function propagate end 
 
+propagate(l, g::GNNGraph, aggr; xi=nothing, xj=nothing, e=nothing) = 
+    propagate(l, g, aggr, xi, xj, e)
 
-function propagate(l, g::GNNGraph, aggr; x=nothing, xi=nothing, xj=nothing, e=nothing)
-    if !isnothing(x)
-        @assert isnothing(xi)
-        @assert isnothing(xj)
-        xi, xj = x, x
-    end
+function propagate(l, g::GNNGraph, aggr, xi, xj, e)
     m = apply_edges(l, g, xi, xj, e) 
     m̄ = aggregate_neighbors(g, aggr, m)
     return m̄
 end
 
-# TODO deprecate
-propagate(l, g::GNNGraph, aggr, x, e=nothing) = propagate(l, g, aggr; x, e)
-
 ## Step 1.
 
 """
@@ -106,8 +100,11 @@ _gather(x::Tuple, i) = map(x -> _gather(x, i), x)
 _gather(x::AbstractArray, i) = NNlib.gather(x, i)
 _gather(x::Nothing, i) = nothing
 
+apply_edges(l, g::GNNGraph; xi=nothing, xj=nothing, e=nothing) = 
+    apply_edges(l, g, xi, xj, e)
+
 apply_edges(l::GNNLayer, g::GNNGraph, xi, xj, e) = 
-    apply_edges((a...) -> compute_message(l, a...), g::GNNGraph, xi, xj, e)
+    apply_edges((xi,xj,e) -> compute_message(l, xi, xj, e), g, xi, xj, e)
 
 function apply_edges(f, g::GNNGraph, xi, xj, e)
     s, t = edge_index(g)
@@ -120,16 +117,32 @@ end
 
 ##  Step 2
 
-_scatter(aggr, e::NamedTuple, t) = map(e -> _scatter(aggr, e, t), e)
-_scatter(aggr, e::Tuple, t) = map(e -> _scatter(aggr, e, t), e)
-_scatter(aggr, e::AbstractArray, t) = NNlib.scatter(aggr, e, t)
-_scatter(aggr, e::Nothing, t) = nothing
+_scatter(aggr, m::NamedTuple, t) = map(m -> _scatter(aggr, m, t), m)
+_scatter(aggr, m::Tuple, t) = map(m -> _scatter(aggr, m, t), m)
+_scatter(aggr, m::AbstractArray, t) = NNlib.scatter(aggr, m, t)
 
-function aggregate_neighbors(g::GNNGraph, aggr, e)
+function aggregate_neighbors(g::GNNGraph, aggr, m)
     s, t = edge_index(g)
-    return _scatter(aggr, e, t)
+    return _scatter(aggr, m, t)
 end
 
 aggregate_neighbors(g::GNNGraph, aggr::Nothing, e) = nothing
 
 ### end steps ###
+
+
+
+### SPECIALIZATIONS OF PROPAGATE ###
+copyxi(xi, xj, e) = xi
+copyxj(xi, xj, e) = xj
+ximulxj(xi, xj, e) = xi .* xj
+xiaddxj(xi, xj, e) = xi .+ xj
+
+function propagate(::typeof(copyxj), g::GNNGraph, ::typeof(+), xi, xj, e)
+    A = adjacency_matrix(g)
+    return xj * A
+end
+
+# TODO divide  by degre
+# propagate(::typeof(copyxj), g::GNNGraph, ::typeof(mean), xi, xj, e)
+
diff --git a/test/examples/node_classification_cora.jl b/test/examples/node_classification_cora.jl
@@ -93,7 +93,7 @@ for (layer, Layer) in [
               ]
 
     @show layer
-    @time train_res, test_res = train(Layer, verbose=false)
+    @time train_res, test_res = train(Layer, verbose=true)
     @test train_res.acc > 95
     @test test_res.acc > 70
 end

Original file line number	Diff line number	Diff line change
`@@ -93,7 +93,7 @@ for (layer, Layer) in [`
`93`	`93`	`]`
`94`	`94`
`95`	`95`	`@show layer`
`96`		`- @time train_res, test_res = train(Layer, verbose=false)`
	`96`	`+ @time train_res, test_res = train(Layer, verbose=true)`
`97`	`97`	`@test train_res.acc > 95`
`98`	`98`	`@test test_res.acc > 70`
`99`	`99`	`end`