@@ -27,3 +27,82 @@ using CUDA.CUSPARSE: CuSparseDeviceVector, CuSparseDeviceMatrixCSC, CuSparseDevi
2727 cuA = CuSparseMatrixBSR (A, 2 )
2828 @test cudaconvert (cuA) isa CuSparseDeviceMatrixBSR{Float64, Cint, AS. Global}
2929end
30+
31+ @testset " device SparseArrays api" begin
32+ @testset " nnz per column" begin
33+ function nnz_per_column (A:: CuSparseMatrixCSC{Tv, Ti} ) where {Tv, Ti}
34+ function nnz_per_column_kernel (out, A)
35+ i = (blockIdx (). x - 1 ) * blockDim (). x + threadIdx (). x
36+ col = @view A[:, i]
37+ out[i] = SparseArrays. nnz (col)
38+ nothing
39+ end
40+
41+ out = CuVector {Ti} (undef, size (A, 2 ))
42+ @cuda threads= size (A, 2 ) nnz_per_column_kernel (out, A)
43+ out
44+ end
45+
46+ nnz_per_column (A:: SparseMatrixCSC ) = map (SparseArrays. nnz, eachcol (A))
47+
48+ A = sprand (10 , 10 , 0.5 )
49+ cuA = CuSparseMatrixCSC (A)
50+
51+ @test nnz_per_column (A) == Vector (nnz_per_column (cuA))
52+ end
53+
54+ @testset " sum per column" begin
55+ function sum_per_column (A:: CuSparseMatrixCSC{Tv, Ti} ) where {Tv, Ti}
56+ function sum_per_column_kernel (out, A)
57+ j = blockIdx (). x
58+ col = @view A[:, j]
59+
60+ v = zero (Tv)
61+ i = threadIdx (). x
62+ while i <= SparseArrays. nnz (col)
63+ v += nonzeros (col)[i]
64+ i += blockDim (). x
65+ end
66+ v = CUDA. reduce_warp (+ , v)
67+
68+ if threadIdx (). x == 1
69+ out[j] = v
70+ end
71+ nothing
72+ end
73+
74+ out = CuVector {Tv} (undef, size (A, 2 ))
75+ @cuda threads= 32 blocks= size (A, 2 ) sum_per_column_kernel (out, A)
76+ out
77+ end
78+
79+ sum_per_column (A:: SparseMatrixCSC ) = vec (sum (A; dims= 1 ))
80+
81+ A = sprand (10 , 10 , 0.5 )
82+ cuA = CuSparseMatrixCSC (A)
83+
84+ @test sum_per_column (A) ≈ Vector (sum_per_column (cuA))
85+ end
86+
87+ @testset " last nonzero per column" begin
88+ function last_nz_per_column (A:: CuSparseMatrixCSC{Tv, Ti} ) where {Tv, Ti}
89+ function last_nz_per_column_kernel (out, A)
90+ i = (blockIdx (). x - 1 ) * blockDim (). x + threadIdx (). x
91+ col = @view A[:, i]
92+ out[i] = last (SparseArrays. rowvals (col))
93+ nothing
94+ end
95+
96+ out = CuVector {Ti} (undef, size (A, 2 ))
97+ @cuda threads= size (A, 2 ) last_nz_per_column_kernel (out, A)
98+ out
99+ end
100+
101+ last_nz_per_column (A:: SparseMatrixCSC ) = map (last ∘ SparseArrays. rowvals, eachcol (A))
102+
103+ A = sprand (10 , 10 , 0.5 )
104+ cuA = CuSparseMatrixCSC (A)
105+
106+ @test last_nz_per_column (A) == Vector (last_nz_per_column (cuA))
107+ end
108+ end
0 commit comments