Skip to content

Commit 96df778

Browse files
improve degree and various fixes
1 parent d409d90 commit 96df778

File tree

10 files changed

+157
-78
lines changed

10 files changed

+157
-78
lines changed

Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
1818
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
1919
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
2020
NNlibCUDA = "a00861dc-f156-4864-bf3c-e6376f28a68d"
21+
ProfileView = "c46f51b8-102a-5cf2-8d2c-8597cb0e0da7"
2122
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
2223
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
2324
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

perf/perf.jl

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,27 @@ function run_single_benchmark(N, c, D, CONV; gtype=:lg)
1515
g_gpu = g |> gpu
1616

1717
m = CONV(D => D)
18+
ps = Flux.params(m)
19+
1820
m_gpu = m |> gpu
21+
ps_gpu = Flux.params(m_gpu)
22+
1923

2024
res = Dict()
21-
res["CPU"] = @benchmark $m($g)
25+
26+
res["CPU_FWD"] = @benchmark $m($g)
27+
res["CPU_GRAD"] = @benchmark gradient(() -> sum($m($g).ndata.x), $ps)
2228

23-
try [GCNConv, GraphConv, GATConv]
24-
res["GPU"] = @benchmark CUDA.@sync($m_gpu($g_gpu)) teardown=(GC.gc(); CUDA.reclaim())
29+
try
30+
res["GPU_FWD"] = @benchmark CUDA.@sync($m_gpu($g_gpu)) teardown=(GC.gc(); CUDA.reclaim())
31+
catch
32+
res["GPU_FWD"] = missing
33+
end
34+
35+
try
36+
res["GPU_GRAD"] = @benchmark CUDA.@sync(gradient(() -> sum($m_gpu($g_gpu).ndata.x), $ps_gpu)) teardown=(GC.gc(); CUDA.reclaim())
2537
catch
26-
res["GPU"] = missing
38+
res["GPU_GRAD"] = missing
2739
end
2840

2941
return res
@@ -45,7 +57,7 @@ function run_benchmarks(;
4557
Ns = [10, 100, 1000, 10000],
4658
c = 6,
4759
D = 100,
48-
layers = [GCNConv, GraphConv, GATConv],
60+
layers = [GCNConv, GATConv],
4961
gtypes = [:coo, :sparse, :dense],
5062
)
5163

src/gnngraph.jl

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ struct GNNGraph{T<:Union{COO_T,ADJMAT_T}}
102102
num_nodes::Int
103103
num_edges::Int
104104
num_graphs::Int
105-
graph_indicator
105+
graph_indicator # vector of ints or nothing
106106
ndata::NamedTuple
107107
edata::NamedTuple
108108
gdata::NamedTuple
@@ -216,10 +216,12 @@ s, t = edge_index(g)
216216
"""
217217
edge_index(g::GNNGraph{<:COO_T}) = g.graph[1:2]
218218

219-
edge_index(g::GNNGraph{<:ADJMAT_T}) = to_coo(g.graph)[1][1:2]
219+
edge_index(g::GNNGraph{<:ADJMAT_T}) = to_coo(g.graph, num_nodes=g.num_nodes)[1][1:2]
220220

221221
edge_weight(g::GNNGraph{<:COO_T}) = g.graph[3]
222222

223+
edge_weight(g::GNNGraph{<:ADJMAT_T}) = to_coo(g.graph, num_nodes=g.num_nodes)[1][3]
224+
223225
LightGraphs.edges(g::GNNGraph) = zip(edge_index(g)...)
224226

225227
LightGraphs.edgetype(g::GNNGraph) = Tuple{Int, Int}
@@ -278,6 +280,7 @@ end
278280

279281
function LightGraphs.adjacency_matrix(g::GNNGraph{<:COO_T}, T::DataType=Int; dir=:out)
280282
if g.graph[1] isa CuVector
283+
# TODO revisi after https://github.com/JuliaGPU/CUDA.jl/pull/1152
281284
A, n, m = to_dense(g.graph, T, num_nodes=g.num_nodes)
282285
else
283286
A, n, m = to_sparse(g.graph, T, num_nodes=g.num_nodes)
@@ -293,17 +296,18 @@ function LightGraphs.adjacency_matrix(g::GNNGraph{<:ADJMAT_T}, T::DataType=eltyp
293296
return dir == :out ? A : A'
294297
end
295298

296-
function LightGraphs.degree(g::GNNGraph{<:COO_T}, T=Int; dir=:out)
299+
function LightGraphs.degree(g::GNNGraph{<:COO_T}, T=nothing; dir=:out)
297300
s, t = edge_index(g)
301+
T = isnothing(T) ? eltype(s) : T
298302
degs = fill!(similar(s, T, g.num_nodes), 0)
299-
o = fill!(similar(s, Int, g.num_edges), 1)
303+
src = 1
300304
if dir [:out, :both]
301-
NNlib.scatter!(+, degs, o, s)
305+
NNlib.scatter!(+, degs, src, s)
302306
end
303307
if dir [:in, :both]
304-
NNlib.scatter!(+, degs, o, t)
308+
NNlib.scatter!(+, degs, src, t)
305309
end
306-
return degs
310+
return degs
307311
end
308312

309313
function LightGraphs.degree(g::GNNGraph{<:ADJMAT_T}, T=Int; dir=:out)
@@ -318,6 +322,7 @@ function LightGraphs.laplacian_matrix(g::GNNGraph, T::DataType=Int; dir::Symbol=
318322
return D - A
319323
end
320324

325+
321326
"""
322327
normalized_laplacian(g, T=Float32; add_self_loops=false, dir=:out)
323328
@@ -406,14 +411,15 @@ end
406411
function add_self_loops(g::GNNGraph{<:ADJMAT_T})
407412
A = g.graph
408413
@assert g.edata === (;)
414+
num_edges = g.num_edges + g.num_nodes
409415
A = A + I
410-
num_edges = g.num_edges + g.num_nodes
411416
GNNGraph(A,
412417
g.num_nodes, num_edges, g.num_graphs,
413418
g.graph_indicator,
414419
g.ndata, g.edata, g.gdata)
415420
end
416421

422+
417423
function remove_self_loops(g::GNNGraph{<:COO_T})
418424
s, t = edge_index(g)
419425
# TODO remove these constraints
@@ -572,9 +578,3 @@ end
572578
@non_differentiable degree(x...)
573579
@non_differentiable add_self_loops(x...) # TODO this is wrong, since g carries feature arrays, needs rrule
574580
@non_differentiable remove_self_loops(x...) # TODO this is wrong, since g carries feature arrays, needs rrule
575-
576-
# # delete when https://github.com/JuliaDiff/ChainRules.jl/pull/472 is merged
577-
# function ChainRulesCore.rrule(::typeof(copy), x)
578-
# copy_pullback(ȳ) = (NoTangent(), ȳ)
579-
# return copy(x), copy_pullback
580-
# end

src/graph_conversions.jl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,17 @@ function to_coo(coo::COO_T; dir=:out, num_nodes=nothing)
1212
return coo, num_nodes, num_edges
1313
end
1414

15+
function to_coo(A::SPARSE_T; dir=:out, num_nodes=nothing)
16+
s, t, v = findnz(A)
17+
if dir == :in
18+
s, t = t, s
19+
end
20+
num_nodes = isnothing(num_nodes) ? max(maximum(s), maximum(t)) : num_nodes
21+
num_edges = length(s)
22+
23+
return (s, t, nothing), num_nodes, num_edges
24+
end
25+
1526
function to_coo(A::ADJMAT_T; dir=:out, num_nodes=nothing)
1627
nz = findall(!=(0), A) # vec of cartesian indexes
1728
s, t = ntuple(i -> map(t->t[i], nz), 2)

src/msgpass.jl

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -146,16 +146,25 @@ copyxj(xi, xj, e) = xj
146146
# ximulxj(xi, xj, e) = xi .* xj
147147
# xiaddxj(xi, xj, e) = xi .+ xj
148148

149-
# function propagate(::typeof(copyxj), g::GNNGraph, ::typeof(+), xi, xj::AbstractMatrix, e)
150-
# A = adjacency_matrix(g)
151-
# return xj * A
152-
# end
149+
150+
function propagate(::typeof(copyxj), g::GNNGraph, ::typeof(+), xi, xj::AbstractMatrix, e)
151+
A = adjacency_matrix(g)
152+
return xj * A
153+
end
154+
155+
## avoid the fast path on gpu until we have better cuda support
156+
function propagate(::typeof(copyxj), g::GNNGraph{<:Union{COO_T,SPARSE_T}}, ::typeof(+), xi, xj::AnyCuMatrix, e)
157+
propagate((xi,xj,e)->copyxj(xi,xj,e), g, +, xi, xj, e)
158+
end
153159

154160
# function propagate(::typeof(copyxj), g::GNNGraph, ::typeof(mean), xi, xj::AbstractMatrix, e)
155161
# A = adjacency_matrix(g)
156-
# degs = vec(sum(A; dims=2))
157-
# D = Diagonal(ofeltype(xj, 1) ./ degs)
158-
# # A, D = _aa(g, xj)
162+
# D = compute_degree(A)
159163
# return xj * A * D
160164
# end
161165

166+
# # Zygote bug. Error with sparse matrix without nograd
167+
# compute_degree(A) = Diagonal(1f0 ./ vec(sum(A; dims=2)))
168+
169+
# Flux.Zygote.@nograd compute_degree
170+

src/utils.jl

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,5 +68,69 @@ function normalize_graphdata(data::NamedTuple; default_name, n, duplicate_if_nee
6868
return data
6969
end
7070

71+
ofeltype(x, y) = convert(float(eltype(x)), y)
7172

72-
ofeltype(x, y) = convert(float(eltype(x)), y)
73+
# TODO move to flux. fix for https://github.com/FluxML/Flux.jl/issues/1720
74+
Flux._cpu_array(x::AbstractSparseArray) = Flux.adapt(SparseMatrixCSC, x)
75+
76+
# TODO. FIX THIS HACK. CUDA.jl support to sparse matrices is very bad, convert to dense
77+
# Revisit after https://github.com/JuliaGPU/CUDA.jl/pull/1152
78+
Flux._gpu_array(x::AbstractSparseArray) = CuMatrix(x)
79+
80+
81+
# Considers the src a zero dimensional object.
82+
# Useful for implementing `StatsBase.counts`, `degree`, etc...
83+
# function NNlib.scatter!(op, dst::AbstractArray, src::Number, idx::AbstractArray)
84+
# for k in CartesianIndices(idx)
85+
# # dst_v = NNlib._view(dst, idx[k])
86+
# # dst_v .= (op).(dst_v, src)
87+
# dst[idx[k]] .= (op).(dst[idx[k]], src)
88+
# end
89+
# dst
90+
# end
91+
92+
# 10 time faster than the generic version above.
93+
# All the speedup comes from not broadcasting `op`, i dunno why.
94+
function NNlib.scatter!(op, dst::AbstractVector, src::Number, idx::AbstractVector{<:Integer})
95+
for i in idx
96+
dst[i] = op(dst[i], src)
97+
end
98+
end
99+
100+
# NNlib._view(X, k) = view(X, k...)
101+
# NNlib._view(X, k::Union{Integer, CartesianIndex}) = view(X, k)
102+
103+
# Considers src as a zero dimensional object to be scattered
104+
# function NNlib.scatter(op,
105+
# src::Tsrc,
106+
# idx::AbstractArray{Tidx,Nidx};
107+
# init = nothing, dstsize = nothing) where {Tsrc<:Number,Tidx,Nidx}
108+
109+
# dstsz = isnothing(dstsize) ? maximum_dims(idx) : dstsize
110+
# dst = similar(src, Tsrc, dstsz)
111+
# xinit = isnothing(init) ? scatter_empty(op, Tsrc) : init
112+
# fill!(dst, xinit)
113+
# scatter!(op, dst, src, idx)
114+
# end
115+
116+
117+
function scatter_scalar_kernel!(op, dst, src, idx)
118+
index = threadIdx().x + (blockIdx().x - 1) * blockDim().x
119+
120+
@inbounds if index <= length(idx)
121+
CUDA.@atomic dst[idx[index]...] = op(dst[idx[index]...], src)
122+
end
123+
return nothing
124+
end
125+
126+
function NNlib.scatter!(op, dst::AnyCuArray, src::Number, idx::AnyCuArray)
127+
max_idx = length(idx)
128+
args = op, dst, src, idx
129+
130+
kernel = @cuda launch=false scatter_scalar_kernel!(args...)
131+
config = launch_configuration(kernel.fun; max_threads=256)
132+
threads = min(max_idx, config.threads)
133+
blocks = cld(max_idx, threads)
134+
kernel(args...; threads=threads, blocks=blocks)
135+
return dst
136+
end

test/cuda/gnngraph.jl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,13 @@ const ACUMatrix{T} = Union{CuMatrix{T}, CUDA.CUSPARSE.CuSparseMatrix{T}}
3232
@test Array(mat_gpu) == mat
3333
end
3434

35+
@teset "degree" begin
36+
d = degree(g)
37+
d_gpu = degree(g_gpu)
38+
@test d_gpu isa CuVector
39+
@test Array(d_gpu) == d
40+
end
41+
3542
@testset "scaled_laplacian" begin
3643
@test_broken begin
3744
mat = scaled_laplacian(g)

test/examples/node_classification_cora.jl

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ function train(Layer; verbose=false, kws...)
6565
end
6666

6767
verbose && report(0)
68-
for epoch in 1:args.epochs
68+
@time for epoch in 1:args.epochs
6969
gs = Flux.gradient(ps) do
7070
= model(g, X)
7171
logitcrossentropy(ŷ[:,train_ids], ytrain)
@@ -79,21 +79,29 @@ function train(Layer; verbose=false, kws...)
7979
return train_res, test_res
8080
end
8181

82-
for (layer, Layer) in [
83-
("GCNConv", (nin, nout) -> GCNConv(nin => nout, relu)),
84-
("GraphConv", (nin, nout) -> GraphConv(nin => nout, relu, aggr=mean)),
85-
("SAGEConv", (nin, nout) -> SAGEConv(nin => nout, relu)),
86-
("GATConv", (nin, nout) -> GATConv(nin => nout, relu)),
87-
("GINConv", (nin, nout) -> GINConv(Dense(nin, nout, relu), 0.01, aggr=mean)),
88-
("ChebConv", (nin, nout) -> ChebConv(nin => nout, 2)),
89-
("ResGatedGraphConv", (nin, nout) -> ResGatedGraphConv(nin => nout, relu)),
90-
# (nin, nout) -> NNConv(nin => nout), # needs edge features
91-
# (nin, nout) -> GatedGraphConv(nout, 2), # needs nin = nout
92-
# (nin, nout) -> EdgeConv(Dense(2nin, nout, relu)), # Fits the traning set but does not generalize well
93-
]
82+
function train_many(; usecuda=false)
83+
for (layer, Layer) in [
84+
("GCNConv", (nin, nout) -> GCNConv(nin => nout, relu)),
85+
("ResGatedGraphConv", (nin, nout) -> ResGatedGraphConv(nin => nout, relu)),
86+
("GraphConv", (nin, nout) -> GraphConv(nin => nout, relu, aggr=mean)),
87+
("SAGEConv", (nin, nout) -> SAGEConv(nin => nout, relu)),
88+
("GATConv", (nin, nout) -> GATConv(nin => nout, relu)),
89+
("GINConv", (nin, nout) -> GINConv(Dense(nin, nout, relu), 0.01, aggr=mean)),
90+
## ("ChebConv", (nin, nout) -> ChebConv(nin => nout, 2)), # not working on gpu
91+
## ("NNConv", (nin, nout) -> NNConv(nin => nout)), # needs edge features
92+
## ("GatedGraphConv", (nin, nout) -> GatedGraphConv(nout, 2)), # needs nin = nout
93+
## ("EdgeConv",(nin, nout) -> EdgeConv(Dense(2nin, nout, relu))), # Fits the traning set but does not generalize well
94+
]
9495

95-
@show layer
96-
@time train_res, test_res = train(Layer, verbose=false)
97-
@test train_res.acc > 95
98-
@test test_res.acc > 70
96+
@show layer
97+
@time train_res, test_res = train(Layer; usecuda, verbose=false)
98+
@test train_res.acc > 94
99+
@test test_res.acc > 70
100+
end
101+
end
102+
103+
## if GRAPH_T != :dense # some erratic errors with :dense
104+
train_many(usecuda=false)
105+
if TEST_GPU
106+
train_many(usecuda=true)
99107
end

test/msgpass.jl

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -57,37 +57,4 @@
5757

5858
@test m.a == ones(out_channel, num_E)
5959
end
60-
61-
62-
# @testset "NamedTuples" begin
63-
# struct NewLayerNT{G}
64-
# W
65-
# end
66-
67-
# NewLayerNT(in, out) = NewLayerNT{GRAPH_T}(randn(T, out, in))
68-
69-
# function GraphNeuralNetworks.compute_message(l::NewLayerNT{GRAPH_T}, di, dj, dij)
70-
# a = l.W * (di.x .+ dj.x .+ dij.e)
71-
# b = l.W * di.x
72-
# return (; a, b)
73-
# end
74-
# function GraphNeuralNetworks.update_node(l::NewLayerNT{GRAPH_T}, m, d)
75-
# return (α=l.W * d.x + m.a + m.b, β=m)
76-
# end
77-
# function GraphNeuralNetworks.update_edge(l::NewLayerNT{GRAPH_T}, m, e)
78-
# return m.a
79-
# end
80-
81-
# function (::NewLayerNT{GRAPH_T})(g, x, e)
82-
# x, e = propagate(l, g, mean, (; x), (; e))
83-
# return x.α .+ x.β.a, e
84-
# end
85-
86-
# l = NewLayerNT(in_channel, out_channel)
87-
# g = GNNGraph(adj, graph_type=GRAPH_T)
88-
# X′, E′ = l(g, X, E)
89-
90-
# @test size(X′) == (out_channel, num_V)
91-
# @test size(E′) == (out_channel, num_E)
92-
# end
9360
end

test/runtests.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ tests = [
3030
@testset "GraphNeuralNetworks: graph format $graph_type" for graph_type in (:coo,:sparse,:dense)
3131

3232
global GRAPH_T = graph_type
33-
global TEST_GPU = CUDA.functional() && GRAPH_T != :sparse
33+
global TEST_GPU = CUDA.functional()# && GRAPH_T != :sparse
3434

3535
for t in tests
3636
include("$t.jl")

0 commit comments

Comments
 (0)