Skip to content

Commit 4510d98

Browse files
Merge pull request #8 from CarloLucibello/cl/bench
add benchmarks
2 parents 8022b4d + a7f260c commit 4510d98

File tree

7 files changed

+80
-42
lines changed

7 files changed

+80
-42
lines changed

docs/make.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ makedocs(;
88
sitename = "GraphNeuralNetworks.jl",
99
pages=[
1010
"Home" => "index.md",
11+
"Developer Notes" => "dev.md",
1112
],
1213
)
1314

docs/src/dev.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Developer Notes
2+
3+
## Benchmarking
4+
5+
You can benchmark the effect on performance of your commits using the script `perf/perf.jl`.
6+
7+
First, checkout and benchmark the master branch:
8+
9+
```julia
10+
julia> include("perf.jl")
11+
12+
julia> df = run_benchmarks()
13+
14+
# observe results
15+
julia> for g in groupby(df, :layer); println(g, "\n"); end
16+
17+
julia> @save "perf_master_20210803_mymachine.jld2" dfmaster=df
18+
```
19+
20+
Now checkout your branch and do the same:
21+
22+
```julia
23+
julia> df = run_benchmarks()
24+
25+
julia> @save "perf_pr_20210803_mymachine.jld2" dfpr=df
26+
```
27+
28+
Finally, compare the results:
29+
30+
```julia
31+
julia> @load "perf_master_20210803_mymachine.jld2"
32+
33+
julia> @load "perf_pr_20210803_mymachine.jld2"
34+
35+
julia> compare(dfpr, dfmaster)
36+
```

perf/Project.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[deps]
2+
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
3+
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
4+
GraphNeuralNetworks = "cffab07f-9bc2-4db1-8861-388f63bf7694"
5+
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
6+
LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"

perf/perf.jl

Lines changed: 28 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,81 +1,74 @@
11
using Flux, GraphNeuralNetworks, LightGraphs, BenchmarkTools, CUDA
22
using DataFrames, Statistics, JLD2, SparseArrays
3+
CUDA.device!(2)
4+
CUDA.allowscalar(false)
35

46
BenchmarkTools.ratio(::Missing, x) = Inf
57
BenchmarkTools.ratio(x, ::Missing) = 0.0
68
BenchmarkTools.ratio(::Missing, ::Missing) = missing
79

8-
adjlist(g) = [neighbors(g, i) for i in 1:nv(g)]
9-
1010
function run_single_benchmark(N, c, D, CONV; gtype=:lg)
11-
@assert gtype [:lightgraph, :adjlist, :dense, :sparse]
1211
g = erdos_renyi(N, c / (N-1), seed=17)
13-
if gtype == :adjlist
14-
g = adjlist(g)
15-
elseif gtype == :dense
16-
g = Array(adjacency_matrix(g))
17-
elseif gtype == :sparse
18-
g = adjacency_matrix(g) # lightgraphs returns sparse adj mats
19-
end
20-
21-
res = Dict()
2212
X = randn(Float32, D, N)
23-
res["FG"] = @benchmark FeaturedGraph($g, nf=$X)
2413

25-
fg = FeaturedGraph(g, nf=X)
14+
fg = FeaturedGraph(g; nf=X, graph_type=gtype)
2615
fg_gpu = fg |> gpu
2716

2817
m = CONV(D => D)
2918
m_gpu = m |> gpu
30-
try
31-
res["CPU"] = @benchmark $m($fg)
32-
catch
33-
res["CPU"] = missing
34-
end
35-
36-
try
19+
20+
res = Dict()
21+
res["CPU"] = @benchmark $m($fg)
22+
23+
try [GCNConv, GraphConv, GATConv]
3724
res["GPU"] = @benchmark CUDA.@sync($m_gpu($fg_gpu)) teardown=(GC.gc(); CUDA.reclaim())
3825
catch
3926
res["GPU"] = missing
4027
end
4128

4229
return res
4330
end
31+
4432
"""
4533
run_benchmarks(;
4634
Ns = [10, 100, 1000, 10000],
4735
c = 6,
48-
D = 100)
36+
D = 100,
37+
layers = [GCNConv, GraphConv, GATConv]
38+
)
4939
50-
Benchmark GNN layers on random regular graphs
51-
of mean connectivity `c` and number of nodes in the list `Ns`.
40+
Benchmark GNN layers on Erdos-Renyi ranomd graphs
41+
with average degree `c`. Benchmarks are perfomed for each graph size in the list `Ns`.
5242
`D` is the number of node features.
5343
"""
5444
function run_benchmarks(;
5545
Ns = [10, 100, 1000, 10000],
56-
c = 6.0,
57-
D = 100)
46+
c = 6,
47+
D = 100,
48+
layers = [GCNConv, GraphConv, GATConv],
49+
gtypes = [:coo, :sparse, :dense],
50+
)
5851

5952
df = DataFrame(N=Int[], c=Float64[], layer=String[], gtype=Symbol[],
60-
time_fg=Any[], time_cpu=Any[], time_gpu=Any[]) |> allowmissing
53+
time_cpu=Any[], time_gpu=Any[]) |> allowmissing
6154

62-
for gtype in [:lightgraph, :adjlist, :dense, :sparse]
55+
for gtype in gtypes
6356
for N in Ns
6457
println("## GRAPH_TYPE = $gtype N = $N")
65-
for CONV in [GCNConv, GraphConv, GATConv]
58+
for CONV in layers
6659
res = run_single_benchmark(N, c, D, CONV; gtype)
67-
row = (; N = N,
60+
row = (;layer = "$CONV",
61+
N = N,
6862
c = c,
69-
layer = "$CONV",
7063
gtype = gtype,
71-
time_fg = median(res["FG"]),
7264
time_cpu = ismissing(res["CPU"]) ? missing : median(res["CPU"]),
7365
time_gpu = ismissing(res["GPU"]) ? missing : median(res["GPU"]),
7466
)
7567
push!(df, row)
7668
end
7769
end
7870
end
71+
7972
df.gpu_to_cpu = ratio.(df.time_gpu, df.time_cpu)
8073
sort!(df, [:layer, :N, :c, :gtype])
8174
return df
@@ -89,11 +82,11 @@ end
8982
# @save "perf/perf_pr.jld2" dfpr=df
9083

9184

92-
function compare(dfpr, dfmaster; on=[:N, :c, :layer])
85+
function compare(dfpr, dfmaster; on=[:N, :c, :gtype, :layer])
9386
df = outerjoin(dfpr, dfmaster; on=on, makeunique=true, renamecols = :_pr => :_master)
9487
df.pr_to_master_cpu = ratio.(df.time_cpu_pr, df.time_cpu_master)
95-
df.pr_to_master_gpu = ratio.(df.time_cpu_pr, df.time_gpu_master)
96-
return df[:,[:N, :c, :gtype_pr, :gtype_master, :layer, :pr_to_master_cpu, :pr_to_master_gpu]]
88+
df.pr_to_master_gpu = ratio.(df.time_gpu_pr, df.time_gpu_master)
89+
return df[:,[:N, :c, :gtype, :layer, :pr_to_master_cpu, :pr_to_master_gpu]]
9790
end
9891

9992
# @load "perf/perf_pr.jld2" dfpr
29.3 KB
Binary file not shown.

src/graph_conversions.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,6 @@ function to_sparse(A::ADJMAT_T, T::DataType=eltype(A); dir=:out, num_nodes=nothi
114114
return sparse(A), num_nodes, num_edges
115115
end
116116

117-
118117
function to_sparse(adj_list::ADJLIST_T, T::DataType=Int; dir=:out, num_nodes=nothing)
119118
coo, num_nodes, num_edges = to_coo(adj_list; dir, num_nodes)
120119
to_sparse(coo; dir, num_nodes)

src/layers/conv.jl

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,19 @@ function GCNConv(ch::Pair{Int,Int}, σ=identity;
3030
GCNConv(W, b, σ)
3131
end
3232

33-
34-
# function (l::GCNConv)(fg::FeaturedGraph, x::AbstractMatrix)
35-
# L̃ = normalized_laplacian(fg, eltype(x); selfloop=true)
36-
# l.σ.(l.weight * x * L̃ .+ l.bias)
37-
# end
33+
## Matrix operations are more performant,
34+
## but cannot compute the normalized laplacian of sparse cuda matrices yet,
35+
## therefore fallback to message passing framework on gpu for the time being
36+
37+
function (l::GCNConv)(fg::FeaturedGraph, x::AbstractMatrix)
38+
= normalized_laplacian(fg, eltype(x); selfloop=true)
39+
l.σ.(l.weight * x *.+ l.bias)
40+
end
3841

3942
message(l::GCNConv, xi, xj) = xj
4043
update(l::GCNConv, m, x) = m
4144

42-
function (l::GCNConv)(fg::FeaturedGraph, x::AbstractMatrix)
45+
function (l::GCNConv)(fg::FeaturedGraph, x::CuMatrix)
4346
fg = add_self_loops(fg)
4447
T = eltype(l.weight)
4548
# cout = sqrt.(degree(fg, dir=:out))

0 commit comments

Comments
 (0)