diff --git a/GNNGraphs/ext/GNNGraphsCUDAExt.jl b/GNNGraphs/ext/GNNGraphsCUDAExt.jl index af9e9f820..2b38ee739 100644 --- a/GNNGraphs/ext/GNNGraphsCUDAExt.jl +++ b/GNNGraphs/ext/GNNGraphsCUDAExt.jl @@ -5,8 +5,10 @@ using Random, Statistics, LinearAlgebra using GNNGraphs using GNNGraphs: COO_T, ADJMAT_T, SPARSE_T using SparseArrays +using Graphs const CUMAT_T = Union{CUDA.AnyCuMatrix, CUDA.CUSPARSE.CuSparseMatrix} +const CUDA_COO_T = Tuple{T, T, V} where {T <: AnyCuArray{<:Integer}, V <: Union{Nothing, AnyCuArray}} # Query @@ -35,5 +37,31 @@ function sort_edge_index(u::AnyCuArray, v::AnyCuArray) sort_edge_index(u, v) |> dev end +# Convert + +function GNNGraphs.to_sparse(coo::CUDA_COO_T, T = nothing; dir = :out, num_nodes = nothing, + weighted = true, is_coalesced = false) + s, t, eweight = coo + T = T === nothing ? (eweight === nothing ? eltype(s) : eltype(eweight)) : T + + if eweight === nothing || !weighted + eweight = fill!(similar(s, T), 1) + end + + num_nodes::Int = isnothing(num_nodes) ? max(maximum(s), maximum(t)) : num_nodes + + # if coalesced build directly sparse coo matrix + if is_coalesced + A = CUDA.CUSPARSE.CuSparseMatrixCOO{T,eltype(s)}(s, t, eweight, (num_nodes, num_nodes)) + else + A = sparse(s, t, eweight, num_nodes, num_nodes) + end + + num_edges::Int = nnz(A) + if eltype(A) != T + A = T.(A) + end + return A, num_nodes, num_edges +end end #module diff --git a/GNNGraphs/src/gnngraph.jl b/GNNGraphs/src/gnngraph.jl index b90cfc032..e59c8f1e0 100644 --- a/GNNGraphs/src/gnngraph.jl +++ b/GNNGraphs/src/gnngraph.jl @@ -113,7 +113,7 @@ struct GNNGraph{T <: Union{COO_T, ADJMAT_T}} <: AbstractGNNGraph{T} ndata::DataStore edata::DataStore gdata::DataStore - is_coalesced::Bool # only for :coo, true if the graph is coalesced, i.e., indices ordered by row and no multi edges + is_coalesced::Bool # only for :coo, true if the graph is coalesced, i.e., no multi edges and indices ordered by target, then source end # GNNGraph constructor setting the is_coalesced field to false diff --git a/GNNGraphs/src/query.jl b/GNNGraphs/src/query.jl index e2e5f9bdb..482502b4a 100644 --- a/GNNGraphs/src/query.jl +++ b/GNNGraphs/src/query.jl @@ -231,13 +231,7 @@ If `weighted=true`, the `A` will contain the edge weights if any, otherwise the """ function Graphs.adjacency_matrix(g::GNNGraph{<:COO_T}, T::DataType = eltype(g); dir = :out, weighted = true) - if iscuarray(g.graph[1]) - # Revisit after - # https://github.com/JuliaGPU/CUDA.jl/issues/1113 - A, n, m = to_dense(g.graph, T; num_nodes = g.num_nodes, weighted) - else - A, n, m = to_sparse(g.graph, T; num_nodes = g.num_nodes, weighted) - end + A, n, m = to_sparse(g.graph, T; num_nodes = g.num_nodes, weighted) @assert size(A) == (n, n) return dir == :out ? A : A' end diff --git a/GNNGraphs/src/transform.jl b/GNNGraphs/src/transform.jl index ce8d90b6a..1c8cffaa9 100644 --- a/GNNGraphs/src/transform.jl +++ b/GNNGraphs/src/transform.jl @@ -148,7 +148,7 @@ end """ coalesce(g::GNNGraph; aggr=+) -Return a new GNNGraph where all multiple edges between the same pair of nodes are merged (using aggr for edge weights and features), and the edge indices are sorted lexicographically (by source, then target). +Return a new GNNGraph where all multiple edges between the same pair of nodes are merged (using aggr for edge weights and features), and the edge indices are sorted lexicographically (by target, then by source). This method is only applicable to graphs of type `:coo`. `aggr` can take value `+`,`min`, `max` or `mean`. @@ -158,7 +158,8 @@ function Base.coalesce(g::GNNGraph{<:COO_T}; aggr = +) w = get_edge_weight(g) edata = g.edata num_edges = g.num_edges - idxs, idxmax = edge_encoding(s, t, g.num_nodes) + # order by target first and then source as a workaround of CUDA.jl issue: https://github.com/JuliaGPU/CUDA.jl/issues/2820 + idxs, idxmax = edge_encoding(t, s, g.num_nodes) perm = sortperm(idxs) idxs = idxs[perm] diff --git a/GNNGraphs/test/gnngraph.jl b/GNNGraphs/test/gnngraph.jl index 2b18fe7b7..1d6d27b77 100644 --- a/GNNGraphs/test/gnngraph.jl +++ b/GNNGraphs/test/gnngraph.jl @@ -99,13 +99,14 @@ end mat_gpu = adjacency_matrix(g_gpu) @test mat_gpu isa AbstractMatrix{Int} @test get_device(mat_gpu) isa AbstractGPUDevice - @test Array(mat_gpu) == adj_mat + # Convert to float first because poor Int support in CUSPARSE, throws an error + @test Array(Float32.(mat_gpu)) == Float32.(adj_mat) end end @testset "normalized_laplacian" begin mat = normalized_laplacian(g) - if TEST_GPU && !(dev isa MetalDevice) && GRAPH_T != :sparse + if TEST_GPU && !(dev isa MetalDevice) && GRAPH_T != :sparse && GRAPH_T != :coo mat_gpu = normalized_laplacian(g_gpu) @test mat_gpu isa AbstractMatrix{Float32} @test get_device(mat_gpu)isa AbstractGPUDevice @@ -114,7 +115,7 @@ end end @testset "scaled_laplacian" begin - if TEST_GPU && !(dev isa MetalDevice) && GRAPH_T != :sparse + if TEST_GPU && !(dev isa MetalDevice) && GRAPH_T != :sparse && GRAPH_T != :coo mat = scaled_laplacian(g) mat_gpu = scaled_laplacian(g_gpu) @test mat_gpu isa AbstractMatrix{Float32} diff --git a/GNNGraphs/test/transform.jl b/GNNGraphs/test/transform.jl index 256d851bf..fb7e95bf4 100644 --- a/GNNGraphs/test/transform.jl +++ b/GNNGraphs/test/transform.jl @@ -456,8 +456,10 @@ end s2, t2 = edge_index(g2) w2 = get_edge_weight(g2) - @test s2 == [1, 2, 2, 3, 3, 4, 4] - @test t2 == [2, 1, 3, 2, 4, 3, 4] + # @test s2 == [1, 2, 2, 3, 3, 4, 4] + # @test t2 == [2, 1, 3, 2, 4, 3, 4] + @test s2 == [2, 1, 3, 2, 4, 3, 4] + @test t2 == [1, 2, 2, 3, 3, 4, 4] @test w2 == [1, 1, 2, 2, 3.5, 3.5, 5] @test g2.edata.e == [10.0, 10.0, 20.0, 20.0, 35.0, 35.0, 50.0] end diff --git a/GNNlib/ext/GNNlibCUDAExt.jl b/GNNlib/ext/GNNlibCUDAExt.jl index 56a6738e9..f745d51da 100644 --- a/GNNlib/ext/GNNlibCUDAExt.jl +++ b/GNNlib/ext/GNNlibCUDAExt.jl @@ -3,7 +3,10 @@ module GNNlibCUDAExt using CUDA using Random, Statistics, LinearAlgebra using GNNlib: GNNlib, propagate, copy_xj, e_mul_xj, w_mul_xj -using GNNGraphs: GNNGraph, COO_T, SPARSE_T +using GNNGraphs: GNNGraph, COO_T, SPARSE_T, to_dense, to_sparse +using ChainRulesCore: @non_differentiable + +const CUDA_COO_T = Tuple{T, T, V} where {T <: AnyCuArray{<:Integer}, V <: Union{Nothing, AnyCuArray}} ###### PROPAGATE SPECIALIZATIONS #################### @@ -12,7 +15,9 @@ using GNNGraphs: GNNGraph, COO_T, SPARSE_T ## avoid the fast path on gpu until we have better cuda support function GNNlib.propagate(::typeof(copy_xj), g::GNNGraph{<:COO_T}, ::typeof(+), xi, xj::AnyCuMatrix, e) - propagate((xi, xj, e) -> copy_xj(xi, xj, e), g, +, xi, xj, e) + A = _adjacency_matrix(g, eltype(xj); weighted = false) + + return xj * A end ## E_MUL_XJ @@ -42,4 +47,21 @@ end # Flux.Zygote.@nograd compute_degree +## CUSTOM ADJACENCY_MATRIX IMPLEMENTATION FOR CUDA COO GRAPHS, returning dense matrix when not coalesced, more efficient + +function _adjacency_matrix(g::GNNGraph{<:CUDA_COO_T}, T::DataType = eltype(g); dir = :out, + weighted = true) + if !g.is_coalesced + # Revisit after + # https://github.com/JuliaGPU/CUDA.jl/issues/1113 + A, n, m = to_dense(g.graph, T; num_nodes = g.num_nodes, weighted) # if not coalesced, construction of sparse matrix is slow + else + A, n, m = to_sparse(g.graph, T; num_nodes = g.num_nodes, weighted, is_coalesced = true) + end + @assert size(A) == (n, n) + return dir == :out ? A : A' +end + +@non_differentiable _adjacency_matrix(x...) + end #module diff --git a/GNNlib/test/test_module.jl b/GNNlib/test/test_module.jl index b6894cdfa..075881af8 100644 --- a/GNNlib/test/test_module.jl +++ b/GNNlib/test/test_module.jl @@ -150,7 +150,7 @@ function test_gradients( return true end -function generate_test_graphs(graph_type) +function generate_test_graphs(graph_type; do_coalesce=false) adj1 = [0 1 0 1 1 0 1 0 0 1 0 1 @@ -168,12 +168,18 @@ function generate_test_graphs(graph_type) g_single_vertex = GNNGraph(adj_single_vertex, ndata = rand(Float32, D_IN, 4); graph_type) + + if graph_type == :coo && do_coalesce + g1 = coalesce(g1) + g_single_vertex = coalesce(g_single_vertex) + end return (g1, g_single_vertex) end GRAPH_TYPES = [:coo, :dense, :sparse] TEST_GRAPHS = [generate_test_graphs(:coo)..., + generate_test_graphs(:coo, do_coalesce=true)..., generate_test_graphs(:dense)..., generate_test_graphs(:sparse)...] diff --git a/GraphNeuralNetworks/test/layers/conv.jl b/GraphNeuralNetworks/test/layers/conv.jl index 16e9b2fd5..97cc7a355 100644 --- a/GraphNeuralNetworks/test/layers/conv.jl +++ b/GraphNeuralNetworks/test/layers/conv.jl @@ -108,7 +108,7 @@ end if gpu_backend() == "AMDGPU" broken = true - elseif gpu_backend() == "CUDA" && get_graph_type(g) == :sparse + elseif gpu_backend() == "CUDA" && get_graph_type(g) in [:coo, :sparse] broken = true else broken = false diff --git a/GraphNeuralNetworks/test/test_module.jl b/GraphNeuralNetworks/test/test_module.jl index 8f7a0446b..74c25a555 100644 --- a/GraphNeuralNetworks/test/test_module.jl +++ b/GraphNeuralNetworks/test/test_module.jl @@ -157,7 +157,7 @@ function test_gradients( end -function generate_test_graphs(graph_type) +function generate_test_graphs(graph_type; do_coalesce=false) adj1 = [0 1 0 1 1 0 1 0 0 1 0 1 @@ -175,12 +175,18 @@ function generate_test_graphs(graph_type) g_single_vertex = GNNGraph(adj_single_vertex, ndata = rand(Float32, D_IN, 4); graph_type) + + if graph_type == :coo && do_coalesce + g1 = coalesce(g1) + g_single_vertex = coalesce(g_single_vertex) + end return (g1, g_single_vertex) end GRAPH_TYPES = [:coo, :dense, :sparse] TEST_GRAPHS = [generate_test_graphs(:coo)..., + generate_test_graphs(:coo, do_coalesce=true)..., generate_test_graphs(:dense)..., generate_test_graphs(:sparse)...]