From dac292628a5324a43748c18644612036c0036ba0 Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Mon, 23 Jun 2025 06:59:50 +0000 Subject: [PATCH 1/3] Support sparse matrices from rocSPARSE --- Project.toml | 9 +- ext/SparseMatrixColoringsAMDGPUExt.jl | 155 ++++++++++++++++++++++++++ test/rocm.jl | 62 +++++++++++ test/runtests.jl | 10 +- 4 files changed, 232 insertions(+), 4 deletions(-) create mode 100644 ext/SparseMatrixColoringsAMDGPUExt.jl create mode 100644 test/rocm.jl diff --git a/Project.toml b/Project.toml index 6656e125..04d0832e 100644 --- a/Project.toml +++ b/Project.toml @@ -12,23 +12,26 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [weakdeps] +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" CliqueTrees = "60701a23-6482-424a-84db-faee86b9b1f8" Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" [extensions] +SparseMatrixColoringsAMDGPUExt = "AMDGPU" SparseMatrixColoringsCUDAExt = "CUDA" SparseMatrixColoringsCliqueTreesExt = "CliqueTrees" SparseMatrixColoringsColorsExt = "Colors" [compat] ADTypes = "1.2.1" +AMDGPU = "1.3.3" CUDA = "5.8.2" CliqueTrees = "1" Colors = "0.12.11, 0.13" DocStringExtensions = "0.8,0.9" -LinearAlgebra = "<0.0.1, 1" +LinearAlgebra = "1.10" PrecompileTools = "1.2.1" -Random = "<0.0.1, 1" -SparseArrays = "<0.0.1, 1" +Random = "1.10" +SparseArrays = "1.10" julia = "1.10" diff --git a/ext/SparseMatrixColoringsAMDGPUExt.jl b/ext/SparseMatrixColoringsAMDGPUExt.jl new file mode 100644 index 00000000..007a60b4 --- /dev/null +++ b/ext/SparseMatrixColoringsAMDGPUExt.jl @@ -0,0 +1,155 @@ +module SparseMatrixColoringsAMDGPUExt + +import SparseMatrixColorings as SMC +using SparseArrays: SparseMatrixCSC, rowvals, nnz, nzrange +using AMDGPU: ROCVector, ROCMatrix +using AMDGPU.rocSPARSE: AbstractROCSparseMatrix, ROCSparseMatrixCSC, ROCSparseMatrixCSR + +SMC.matrix_versions(A::AbstractROCSparseMatrix) = (A,) + +## Compression (slow, through CPU) + +function SMC.compress( + A::AbstractROCSparseMatrix, result::SMC.AbstractColoringResult{structure,:column} +) where {structure} + return ROCMatrix(SMC.compress(SparseMatrixCSC(A), result)) +end + +function SMC.compress( + A::AbstractROCSparseMatrix, result::SMC.AbstractColoringResult{structure,:row} +) where {structure} + return ROCMatrix(SMC.compress(SparseMatrixCSC(A), result)) +end + +## CSC Result + +function SMC.ColumnColoringResult( + A::ROCSparseMatrixCSC, bg::SMC.BipartiteGraph{T}, color::Vector{<:Integer} +) where {T<:Integer} + group = SMC.group_by_color(T, color) + compressed_indices = SMC.column_csc_indices(bg, color) + additional_info = (; compressed_indices_gpu_csc=ROCVector(compressed_indices)) + return SMC.ColumnColoringResult( + A, bg, color, group, compressed_indices, additional_info + ) +end + +function SMC.RowColoringResult( + A::ROCSparseMatrixCSC, bg::SMC.BipartiteGraph{T}, color::Vector{<:Integer} +) where {T<:Integer} + group = SMC.group_by_color(T, color) + compressed_indices = SMC.row_csc_indices(bg, color) + additional_info = (; compressed_indices_gpu_csc=ROCVector(compressed_indices)) + return SMC.RowColoringResult(A, bg, color, group, compressed_indices, additional_info) +end + +function SMC.StarSetColoringResult( + A::ROCSparseMatrixCSC, + ag::SMC.AdjacencyGraph{T}, + color::Vector{<:Integer}, + star_set::SMC.StarSet{<:Integer}, +) where {T<:Integer} + group = SMC.group_by_color(T, color) + compressed_indices = SMC.star_csc_indices(ag, color, star_set) + additional_info = (; compressed_indices_gpu_csc=ROCVector(compressed_indices)) + return SMC.StarSetColoringResult( + A, ag, color, group, compressed_indices, additional_info + ) +end + +## CSR Result + +function SMC.ColumnColoringResult( + A::ROCSparseMatrixCSR, bg::SMC.BipartiteGraph{T}, color::Vector{<:Integer} +) where {T<:Integer} + group = SMC.group_by_color(T, color) + compressed_indices = SMC.column_csc_indices(bg, color) + compressed_indices_csr = SMC.column_csr_indices(bg, color) + additional_info = (; compressed_indices_gpu_csr=ROCVector(compressed_indices_csr)) + return SMC.ColumnColoringResult( + A, bg, color, group, compressed_indices, additional_info + ) +end + +function SMC.RowColoringResult( + A::ROCSparseMatrixCSR, bg::SMC.BipartiteGraph{T}, color::Vector{<:Integer} +) where {T<:Integer} + group = SMC.group_by_color(T, color) + compressed_indices = SMC.row_csc_indices(bg, color) + compressed_indices_csr = SMC.row_csr_indices(bg, color) + additional_info = (; compressed_indices_gpu_csr=ROCVector(compressed_indices_csr)) + return SMC.RowColoringResult(A, bg, color, group, compressed_indices, additional_info) +end + +function SMC.StarSetColoringResult( + A::ROCSparseMatrixCSR, + ag::SMC.AdjacencyGraph{T}, + color::Vector{<:Integer}, + star_set::SMC.StarSet{<:Integer}, +) where {T<:Integer} + group = SMC.group_by_color(T, color) + compressed_indices = SMC.star_csc_indices(ag, color, star_set) + additional_info = (; compressed_indices_gpu_csr=ROCVector(compressed_indices)) + return SMC.StarSetColoringResult( + A, ag, color, group, compressed_indices, additional_info + ) +end + +## Decompression + +for R in (:ColumnColoringResult, :RowColoringResult) + # loop to avoid method ambiguity + @eval function SMC.decompress!( + A::ROCSparseMatrixCSC, B::ROCMatrix, result::SMC.$R{<:ROCSparseMatrixCSC} + ) + compressed_indices = result.additional_info.compressed_indices_gpu_csc + copyto!(A.nzVal, view(B, compressed_indices)) + return A + end + + @eval function SMC.decompress!( + A::ROCSparseMatrixCSR, B::ROCMatrix, result::SMC.$R{<:ROCSparseMatrixCSR} + ) + compressed_indices = result.additional_info.compressed_indices_gpu_csr + copyto!(A.nzVal, view(B, compressed_indices)) + return A + end +end + +function SMC.decompress!( + A::ROCSparseMatrixCSC, + B::ROCMatrix, + result::SMC.StarSetColoringResult{<:ROCSparseMatrixCSC}, + uplo::Symbol=:F, +) + if uplo != :F + throw( + SMC.UnsupportedDecompressionError( + "Single-triangle decompression is not supported on GPU matrices" + ), + ) + end + compressed_indices = result.additional_info.compressed_indices_gpu_csc + copyto!(A.nzVal, view(B, compressed_indices)) + return A +end + +function SMC.decompress!( + A::ROCSparseMatrixCSR, + B::ROCMatrix, + result::SMC.StarSetColoringResult{<:ROCSparseMatrixCSR}, + uplo::Symbol=:F, +) + if uplo != :F + throw( + SMC.UnsupportedDecompressionError( + "Single-triangle decompression is not supported on GPU matrices" + ), + ) + end + compressed_indices = result.additional_info.compressed_indices_gpu_csr + copyto!(A.nzVal, view(B, compressed_indices)) + return A +end + +end diff --git a/test/rocm.jl b/test/rocm.jl new file mode 100644 index 00000000..e2aa7840 --- /dev/null +++ b/test/rocm.jl @@ -0,0 +1,62 @@ +using AMDGPU.rocSPARSE: ROCSparseMatrixCSC, ROCSparseMatrixCSR +using LinearAlgebra +using SparseArrays +using SparseMatrixColorings +import SparseMatrixColorings as SMC +using StableRNGs +using Test + +include("utils.jl") + +rng = StableRNG(63) + +asymmetric_params = vcat( + [(10, 20, p) for p in (0.0:0.2:0.5)], + [(20, 10, p) for p in (0.0:0.2:0.5)], + [(100, 200, p) for p in (0.01:0.02:0.05)], + [(200, 100, p) for p in (0.01:0.02:0.05)], +) + +symmetric_params = vcat( + [(10, p) for p in (0.0:0.2:0.5)], # + [(100, p) for p in (0.01:0.02:0.05)], +) + +@testset verbose = true "Column coloring & decompression" begin + problem = ColoringProblem(; structure=:nonsymmetric, partition=:column) + algo = GreedyColoringAlgorithm(; decompression=:direct) + @testset for T in (ROCSparseMatrixCSC, ROCSparseMatrixCSR) + @testset "$((; m, n, p))" for (m, n, p) in asymmetric_params + A0 = T(sprand(rng, m, n, p)) + test_coloring_decompression(A0, problem, algo; gpu=true) + end + end +end; + +@testset verbose = true "Row coloring & decompression" begin + problem = ColoringProblem(; structure=:nonsymmetric, partition=:row) + algo = GreedyColoringAlgorithm(; decompression=:direct) + @testset for T in (ROCSparseMatrixCSC, ROCSparseMatrixCSR) + @testset "$((; m, n, p))" for (m, n, p) in asymmetric_params + A0 = T(sprand(rng, m, n, p)) + test_coloring_decompression(A0, problem, algo; gpu=true) + end + end +end; + +@testset verbose = true "Symmetric coloring & direct decompression" begin + problem = ColoringProblem(; structure=:symmetric, partition=:column) + algo = GreedyColoringAlgorithm(; postprocessing=false, decompression=:direct) + @testset for T in (ROCSparseMatrixCSC, ROCSparseMatrixCSR) + @testset "$((; n, p))" for (n, p) in symmetric_params + A0 = T(sparse(Symmetric(sprand(rng, n, n, p)))) + test_coloring_decompression(A0, problem, algo; gpu=true) + end + A0 = T(sparse(Diagonal(ones(10)))) + result = coloring(A0, problem, algo) + B = compress(A0, result) + @test_throws SMC.UnsupportedDecompressionError decompress!( + similar(A0), B, result, :U + ) + end +end; diff --git a/test/runtests.jl b/test/runtests.jl index 81c4ecc9..d2a08601 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -14,7 +14,15 @@ include("utils.jl") if get(ENV, "JULIA_SMC_TEST_GROUP", nothing) == "GPU" @testset "CUDA" begin using CUDA - include("cuda.jl") + if CUDA.functional() + include("cuda.jl") + end + end + @testset "ROCm" begin + using AMDGPU + if AMDGPU.functional() + include("rocm.jl") + end end else @testset verbose = true "Code quality" begin From 56fd6af4bf56e7b00db68a1828836b7b8e9341d5 Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Mon, 23 Jun 2025 07:18:02 +0000 Subject: [PATCH 2/3] Update test/Project.toml --- test/Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/test/Project.toml b/test/Project.toml index ab595aba..a0f4510d 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,5 +1,6 @@ [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" BandedMatrices = "aae01518-5342-5314-be14-df237901396f" From 82fa780cfb786901cfe6a294ede04c0cf376568a Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Mon, 18 Aug 2025 09:21:39 +0000 Subject: [PATCH 3/3] Update tests for AMDGPU.jl --- Project.toml | 2 +- test/utils.jl | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Project.toml b/Project.toml index 04d0832e..70a5e062 100644 --- a/Project.toml +++ b/Project.toml @@ -25,7 +25,7 @@ SparseMatrixColoringsColorsExt = "Colors" [compat] ADTypes = "1.2.1" -AMDGPU = "1.3.3" +AMDGPU = "2.0.1" CUDA = "5.8.2" CliqueTrees = "1" Colors = "0.12.11, 0.13" diff --git a/test/utils.jl b/test/utils.jl index bb80f95f..1dc17e9c 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -62,10 +62,10 @@ function test_coloring_decompression( end @testset "Full decompression" begin - @test decompress(B, result) ≈ A0 - @test decompress(B, result) ≈ A0 # check result wasn't modified - @test decompress!(respectful_similar(A, eltype(B)), B, result) ≈ A0 - @test decompress!(respectful_similar(A, eltype(B)), B, result) ≈ A0 + @test collect(decompress(B, result)) ≈ collect(A0) + @test collect(decompress(B, result)) ≈ collect(A0) # check result wasn't modified + @test collect(decompress!(respectful_similar(A, eltype(B)), B, result)) ≈ collect(A0) + @test collect(decompress!(respectful_similar(A, eltype(B)), B, result)) ≈ collect(A0) end if gpu