Merge pull request #8 from CarloLucibello/cl/bench

CarloLucibello · web-flow · commit 4510d98a42f1 · 2021-08-30T19:32:24.000+02:00
add benchmarks
diff --git a/docs/make.jl b/docs/make.jl
@@ -8,6 +8,7 @@ makedocs(;
     sitename = "GraphNeuralNetworks.jl",
     pages=[
         "Home" => "index.md",
+        "Developer Notes" => "dev.md",
     ],
 )
 
diff --git a/docs/src/dev.md b/docs/src/dev.md
@@ -0,0 +1,36 @@
+# Developer Notes
+
+## Benchmarking
+
+You can benchmark the effect on performance of your commits using the script `perf/perf.jl`.
+
+First, checkout and benchmark the master branch:
+
+```julia
+julia> include("perf.jl")
+
+julia> df = run_benchmarks()
+
+# observe results
+julia> for g in groupby(df, :layer); println(g, "\n"); end
+
+julia> @save "perf_master_20210803_mymachine.jld2" dfmaster=df
+```
+
+Now checkout your branch and do the same:
+
+```julia
+julia> df = run_benchmarks()
+
+julia> @save "perf_pr_20210803_mymachine.jld2" dfpr=df
+```
+
+Finally, compare the results:
+
+```julia
+julia> @load "perf_master_20210803_mymachine.jld2"
+
+julia> @load "perf_pr_20210803_mymachine.jld2"
+
+julia> compare(dfpr, dfmaster)
+```
diff --git a/perf/Project.toml b/perf/Project.toml
@@ -0,0 +1,6 @@
+[deps]
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+GraphNeuralNetworks = "cffab07f-9bc2-4db1-8861-388f63bf7694"
+JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
+LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"
diff --git a/perf/perf.jl b/perf/perf.jl
@@ -1,81 +1,74 @@
 using Flux, GraphNeuralNetworks, LightGraphs, BenchmarkTools, CUDA
 using DataFrames, Statistics, JLD2, SparseArrays
+CUDA.device!(2)
+CUDA.allowscalar(false)
 
 BenchmarkTools.ratio(::Missing, x) = Inf
 BenchmarkTools.ratio(x, ::Missing) = 0.0
 BenchmarkTools.ratio(::Missing, ::Missing) = missing
 
-adjlist(g) = [neighbors(g, i) for i in 1:nv(g)]
-
 function run_single_benchmark(N, c, D, CONV; gtype=:lg)
-    @assert gtype ∈ [:lightgraph, :adjlist, :dense, :sparse]
     g = erdos_renyi(N, c / (N-1), seed=17)
-    if gtype == :adjlist
-        g = adjlist(g)
-    elseif gtype == :dense
-        g = Array(adjacency_matrix(g))
-    elseif gtype == :sparse 
-        g = adjacency_matrix(g) # lightgraphs returns sparse adj mats
-    end
-
-    res = Dict() 
     X = randn(Float32, D, N)
-    res["FG"] = @benchmark FeaturedGraph($g, nf=$X) 
     
-    fg = FeaturedGraph(g, nf=X)
+    fg = FeaturedGraph(g; nf=X, graph_type=gtype)
     fg_gpu = fg |> gpu    
     
     m = CONV(D => D)
     m_gpu = m |> gpu
-    try 
-        res["CPU"] = @benchmark $m($fg)
-    catch
-        res["CPU"] = missing
-    end
-
-    try 
+    
+    res = Dict()
+    res["CPU"] = @benchmark $m($fg)
+    
+    try [GCNConv, GraphConv, GATConv]
         res["GPU"] = @benchmark CUDA.@sync($m_gpu($fg_gpu)) teardown=(GC.gc(); CUDA.reclaim())
     catch
         res["GPU"] = missing
     end
 
     return res
 end
+
 """
     run_benchmarks(;
         Ns = [10, 100, 1000, 10000],
         c = 6,
-        D = 100)
+        D = 100,
+        layers = [GCNConv, GraphConv, GATConv]
+        )
 
-Benchmark GNN layers on random regular graphs 
-of mean connectivity `c` and number of nodes in the list `Ns`.
+Benchmark GNN layers on Erdos-Renyi ranomd graphs 
+with average degree `c`. Benchmarks are perfomed for each graph size in the list `Ns`.
 `D` is the number of node features.
 """
 function run_benchmarks(; 
         Ns = [10, 100, 1000, 10000],
-        c = 6.0,
-        D = 100)
+        c = 6,
+        D = 100,
+        layers = [GCNConv, GraphConv, GATConv],
+        gtypes = [:coo, :sparse, :dense],
+        )
 
     df = DataFrame(N=Int[], c=Float64[], layer=String[], gtype=Symbol[], 
-                   time_fg=Any[], time_cpu=Any[], time_gpu=Any[]) |> allowmissing
+                   time_cpu=Any[], time_gpu=Any[]) |> allowmissing
     
-    for gtype in [:lightgraph, :adjlist, :dense, :sparse]
+    for gtype in gtypes
         for N in Ns
             println("## GRAPH_TYPE = $gtype  N = $N")           
-            for CONV in [GCNConv, GraphConv, GATConv]
+            for CONV in layers
                 res = run_single_benchmark(N, c, D, CONV; gtype)
-                row = (; N = N,
+                row = (;layer = "$CONV", 
+                        N = N,
                         c = c,
-                        layer = "$CONV", 
                         gtype = gtype, 
-                        time_fg = median(res["FG"]),
                         time_cpu = ismissing(res["CPU"]) ? missing : median(res["CPU"]),
                         time_gpu = ismissing(res["GPU"]) ? missing : median(res["GPU"]),
                     )
                 push!(df, row)
             end
         end
     end
+
     df.gpu_to_cpu = ratio.(df.time_gpu, df.time_cpu)
     sort!(df, [:layer, :N, :c, :gtype])
     return df
@@ -89,11 +82,11 @@ end
 # @save "perf/perf_pr.jld2" dfpr=df
 
 
-function compare(dfpr, dfmaster; on=[:N, :c, :layer])
+function compare(dfpr, dfmaster; on=[:N, :c, :gtype, :layer])
     df = outerjoin(dfpr, dfmaster; on=on, makeunique=true, renamecols = :_pr => :_master)
     df.pr_to_master_cpu = ratio.(df.time_cpu_pr, df.time_cpu_master)
-    df.pr_to_master_gpu = ratio.(df.time_cpu_pr, df.time_gpu_master) 
-    return df[:,[:N, :c, :gtype_pr, :gtype_master, :layer, :pr_to_master_cpu, :pr_to_master_gpu]]
+    df.pr_to_master_gpu = ratio.(df.time_gpu_pr, df.time_gpu_master) 
+    return df[:,[:N, :c, :gtype, :layer, :pr_to_master_cpu, :pr_to_master_gpu]]
 end
 
 # @load "perf/perf_pr.jld2" dfpr
diff --git a/perf/perf_master_20210830_carlodsba.jld2 b/perf/perf_master_20210830_carlodsba.jld2
diff --git a/src/graph_conversions.jl b/src/graph_conversions.jl
@@ -114,7 +114,6 @@ function to_sparse(A::ADJMAT_T, T::DataType=eltype(A); dir=:out, num_nodes=nothi
     return sparse(A), num_nodes, num_edges
 end
 
-
 function to_sparse(adj_list::ADJLIST_T, T::DataType=Int; dir=:out, num_nodes=nothing)
     coo, num_nodes, num_edges = to_coo(adj_list; dir, num_nodes)
     to_sparse(coo; dir, num_nodes)
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -30,16 +30,19 @@ function GCNConv(ch::Pair{Int,Int}, σ=identity;
     GCNConv(W, b, σ)
 end
 
-
-# function (l::GCNConv)(fg::FeaturedGraph, x::AbstractMatrix)
-#     L̃ = normalized_laplacian(fg, eltype(x); selfloop=true)
-#     l.σ.(l.weight * x * L̃ .+ l.bias)
-# end
+## Matrix operations are more performant, 
+## but cannot compute the normalized laplacian of sparse cuda matrices yet,
+## therefore fallback to message passing framework on gpu for the time being
+ 
+function (l::GCNConv)(fg::FeaturedGraph, x::AbstractMatrix)
+    L̃ = normalized_laplacian(fg, eltype(x); selfloop=true)
+    l.σ.(l.weight * x * L̃ .+ l.bias)
+end
 
 message(l::GCNConv, xi, xj) = xj
 update(l::GCNConv, m, x) = m
 
-function (l::GCNConv)(fg::FeaturedGraph, x::AbstractMatrix)
+function (l::GCNConv)(fg::FeaturedGraph, x::CuMatrix)
     fg = add_self_loops(fg)
     T = eltype(l.weight)
     # cout = sqrt.(degree(fg, dir=:out))

Original file line number	Diff line number	Diff line change
`@@ -8,6 +8,7 @@ makedocs(;`
`8`	`8`	`sitename = "GraphNeuralNetworks.jl",`
`9`	`9`	`pages=[`
`10`	`10`	`"Home" => "index.md",`
	`11`	`+ "Developer Notes" => "dev.md",`
`11`	`12`	`],`
`12`	`13`	`)`
`13`	`14`