diff --git a/GNNlib/ext/GNNlibCUDAExt.jl b/GNNlib/ext/GNNlibCUDAExt.jl index 7cf5adceb..afe22c3f0 100644 --- a/GNNlib/ext/GNNlibCUDAExt.jl +++ b/GNNlib/ext/GNNlibCUDAExt.jl @@ -10,7 +10,7 @@ using GNNGraphs: GNNGraph, COO_T, SPARSE_T ## COPY_XJ ## avoid the fast path on gpu until we have better cuda support -function GNNlib.propagate(::typeof(copy_xj), g::GNNGraph{<:Union{COO_T, SPARSE_T}}, ::typeof(+), +function GNNlib.propagate(::typeof(copy_xj), g::GNNGraph{COO_T}, ::typeof(+), xi, xj::AnyCuMatrix, e) propagate((xi, xj, e) -> copy_xj(xi, xj, e), g, +, xi, xj, e) end diff --git a/GNNlib/src/msgpass.jl b/GNNlib/src/msgpass.jl index 7bbe2ab58..7b7685e1b 100644 --- a/GNNlib/src/msgpass.jl +++ b/GNNlib/src/msgpass.jl @@ -213,7 +213,7 @@ end ## COPY_XJ function propagate(::typeof(copy_xj), g::GNNGraph, ::typeof(+), xi, xj::AbstractMatrix, e) - A = adjacency_matrix(g, weighted = false) + A = adjacency_matrix(g, eltype(xj); weighted = false) return xj * A end diff --git a/GraphNeuralNetworks/perf/Project.toml b/GraphNeuralNetworks/perf/Project.toml index ddbb1be6e..c09a51049 100644 --- a/GraphNeuralNetworks/perf/Project.toml +++ b/GraphNeuralNetworks/perf/Project.toml @@ -1,6 +1,10 @@ [deps] BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" +GNNGraphs = "aed8fd31-079b-4b5a-b342-a13352159b8c" +GNNlib = "a6a84749-d869-43f8-aacc-be26a1996e48" GraphNeuralNetworks = "cffab07f-9bc2-4db1-8861-388f63bf7694" +Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" Graphs = "093fc24a-ae57-5d10-9952-331d41423f4d" diff --git a/GraphNeuralNetworks/perf/sparse_propagate_cuda.jl b/GraphNeuralNetworks/perf/sparse_propagate_cuda.jl new file mode 100644 index 000000000..fee5372d6 --- /dev/null +++ b/GraphNeuralNetworks/perf/sparse_propagate_cuda.jl @@ -0,0 +1,48 @@ +# # Activate the perf environment +# using Pkg +# Pkg.activate(@__DIR__) +# Pkg.develop(path=joinpath(@__DIR__, "..", "..", "GNNGraphs")) +# Pkg.develop(path=joinpath(@__DIR__, "..", "..", "GNNlib")) +# Pkg.develop(path=joinpath(@__DIR__, "..")) +# Pkg.instantiate() +using SparseArrays +using GraphNeuralNetworks +using BenchmarkTools +import Random: seed! +using LinearAlgebra +using Flux, CUDA + +# ENV["JULIA_DEBUG"] = "GraphNeuralNetworks,GNNlib,GNNlibCUDAExt,GNNGraphs,GNNGraphsCUDAExt,CUDA" # packages with debugging enabled, don't put a whitespace between the package names + +function prop_copy_xj(graph_type, sp_p, n, feat_size) + A = sprand(n, n, sp_p) + b = rand(1, n) + B = rand(feat_size, n) + g = GNNGraph(A, + ndata = (; b = b, B = B), + edata = (; A = reshape(A.nzval, 1, :)), + graph_type = graph_type) |> dev + printstyled("propagate copy_xj for graph type: $graph_type", "\n", color=:yellow) + CUDA.@sync propagate(copy_xj, g, +; xj = g.ndata.B) # run once to compile before benchmarking + # @profview for _ in 1:1000 + # propagate(copy_xj, g, +; xj = g.ndata.B) + # end + @btime CUDA.@sync propagate($copy_xj, $g, +; xj = $g.ndata.B) # using spmm for :sparse + printstyled("gather/scatter propagate copy_xj for graph type: $graph_type", "\n", color=:yellow) + CUDA.@sync propagate((xi, xj, e) -> xj, g, +; xj = g.ndata.B) # run once to compile before benchmarking + @btime CUDA.@sync propagate((xi, xj, e) -> xj, $g, +; xj = $g.ndata.B) # using gather/scatter + return nothing +end + +seed!(0) +dev = gpu_device() +println("Device: ", dev) +feat_size = 128 +# test for :sparse graph_type +for n in (32, 128, 1024) + for sp_p in (0.01, 0.1, 0.9) + printstyled("n = $n, feat_size = $feat_size, sparsity = $sp_p\n", color=:blue) + prop_copy_xj(:sparse, sp_p, n, feat_size) + println() + end +end