Skip to content

fix conversion of 0x0 CuSparseMatrixCSC <-> CSR #2806

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 37 additions & 21 deletions lib/cusparse/conversions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -332,9 +332,13 @@ for elty in (:Float32, :Float64, :ComplexF32, :ComplexF64)
@eval begin
function CuSparseMatrixCSC{$elty}(csr::CuSparseMatrixCSR{$elty}; index::SparseChar='O', action::cusparseAction_t=CUSPARSE_ACTION_NUMERIC, algo::cusparseCsr2CscAlg_t=CUSPARSE_CSR2CSC_ALG1)
m,n = size(csr)
colPtr = CUDA.zeros(Cint, n+1)
rowVal = CUDA.zeros(Cint, nnz(csr))
nzVal = CUDA.zeros($elty, nnz(csr))
colPtr = CuArray{Cint}(undef, n+1)
rowVal = CuArray{Cint}(undef, nnz(csr))
nzVal = CuArray{$elty}(undef, nnz(csr))
if version() <= v"12.6-"
# JuliaGPU/CUDA.jl#2806 (NVBUG 5384319)
colPtr .= (index == 'O' ? 1 : 0)
end
function bufferSize()
out = Ref{Csize_t}(1)
cusparseCsr2cscEx2_bufferSize(handle(), m, n, nnz(csr), nonzeros(csr),
Expand All @@ -352,9 +356,13 @@ for elty in (:Float32, :Float64, :ComplexF32, :ComplexF64)

function CuSparseMatrixCSR{$elty}(csc::CuSparseMatrixCSC{$elty}; index::SparseChar='O', action::cusparseAction_t=CUSPARSE_ACTION_NUMERIC, algo::cusparseCsr2CscAlg_t=CUSPARSE_CSR2CSC_ALG1)
m,n = size(csc)
rowPtr = CUDA.zeros(Cint,m+1)
colVal = CUDA.zeros(Cint,nnz(csc))
nzVal = CUDA.zeros($elty,nnz(csc))
rowPtr = CuArray{Cint}(undef, m+1)
colVal = CuArray{Cint}(undef, nnz(csc))
nzVal = CuArray{$elty}(undef, nnz(csc))
if version() <= v"12.6-"
# JuliaGPU/CUDA.jl#2806 (NVBUG 5384319)
rowPtr .= (index == 'O' ? 1 : 0)
end
function bufferSize()
out = Ref{Csize_t}(1)
cusparseCsr2cscEx2_bufferSize(handle(), n, m, nnz(csc), nonzeros(csc),
Expand All @@ -379,9 +387,13 @@ for (elty, welty) in ((:Float16, :Float32),
@eval begin
function CuSparseMatrixCSC{$elty}(csr::CuSparseMatrixCSR{$elty}; index::SparseChar='O', action::cusparseAction_t=CUSPARSE_ACTION_NUMERIC, algo::cusparseCsr2CscAlg_t=CUSPARSE_CSR2CSC_ALG1)
m,n = size(csr)
colPtr = CUDA.zeros(Cint, n+1)
rowVal = CUDA.zeros(Cint, nnz(csr))
nzVal = CUDA.zeros($elty, nnz(csr))
colPtr = CuArray{Cint}(undef, n+1)
rowVal = CuArray{Cint}(undef, nnz(csr))
nzVal = CuArray{$elty}(undef, nnz(csr))
if version() <= v"12.6-"
# JuliaGPU/CUDA.jl#2806 (NVBUG 5384319)
colPtr .= (index == 'O' ? 1 : 0)
end
if $elty == Float16 #broken for ComplexF16?
function bufferSize()
out = Ref{Csize_t}(1)
Expand All @@ -405,9 +417,13 @@ for (elty, welty) in ((:Float16, :Float32),

function CuSparseMatrixCSR{$elty}(csc::CuSparseMatrixCSC{$elty}; index::SparseChar='O', action::cusparseAction_t=CUSPARSE_ACTION_NUMERIC, algo::cusparseCsr2CscAlg_t=CUSPARSE_CSR2CSC_ALG1)
m,n = size(csc)
rowPtr = CUDA.zeros(Cint,m+1)
colVal = CUDA.zeros(Cint,nnz(csc))
nzVal = CUDA.zeros($elty,nnz(csc))
rowPtr = CuArray{Cint}(undef, m+1)
colVal = CuArray{Cint}(undef, nnz(csc))
nzVal = CuArray{$elty}(undef, nnz(csc))
if version() <= v"12.6-"
# JuliaGPU/CUDA.jl#2806 (NVBUG 5384319)
rowPtr .= (index == 'O' ? 1 : 0)
end
if $elty == Float16 #broken for ComplexF16?
function bufferSize()
out = Ref{Csize_t}(1)
Expand Down Expand Up @@ -523,9 +539,9 @@ for (fname,elty) in ((:cusparseSbsr2csr, :Float32),
nb = cld(n, bsr.blockDim)
cudesca = CuMatrixDescriptor('G', 'L', 'N', index)
cudescc = CuMatrixDescriptor('G', 'L', 'N', indc)
csrRowPtr = CUDA.zeros(Cint, m + 1)
csrColInd = CUDA.zeros(Cint, nnz(bsr))
csrNzVal = CUDA.zeros($elty, nnz(bsr))
csrRowPtr = CuArray{Cint}(undef, m + 1)
csrColInd = CuArray{Cint}(undef, nnz(bsr))
csrNzVal = CuArray{$elty}(undef, nnz(bsr))
$fname(handle(), bsr.dir, mb, nb,
cudesca, nonzeros(bsr), bsr.rowPtr, bsr.colVal,
bsr.blockDim, cudescc, csrNzVal, csrRowPtr,
Expand Down Expand Up @@ -586,10 +602,10 @@ end
## CSR to COO and vice-versa

function CuSparseMatrixCSR{Tv}(coo::CuSparseMatrixCOO{Tv}; index::SparseChar='O') where {Tv}
m,n = size(coo)
nnz(coo) == 0 && return CuSparseMatrixCSR{Tv}(CUDA.ones(Cint, m+1), coo.colInd, nonzeros(coo), size(coo))
m, n = size(coo)
csrRowPtr = (index == 'O') ? CUDA.ones(Cint, m + 1) : CUDA.zeros(Cint, m + 1)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just stumbled across these lines. For consistency, the initialization should probably be treated similar as in the CSC/CSR case.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this needed? See my latest comment and commit, this was really a bug, so I don't think we should apply this here.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Otherwise the case nnz(coo) == 0 and index == 'Z' would produce an invalid CSR-Matrix. If a user explicitly asks for zero-based indexing, an empty COO-Matrix should be converted into a CSR-Matrix with

rowPtr = [0, 0, 0, ...] (number of rows + 1)
colVal = []
nzVal = []

Alternatively, the shortcut nnz(coo) == 0 && return ... could be removed, then the cusparseXcoo2csr should correctly initialize the rowPtr

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes. Let's just rely on cusparseXcoo2csr.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Turns out cusparseXcoo2csr has similar issues: If size of the dimension is zero, the method does not change the indices of the row/col-ptr. Here's a MWE:

julia> using CUDA
julia> csrRowPtr = CuVector{Cint}(undef, 1);
julia> csrRowPtr .= -10;
julia> rowIdx = CuVector{Cint}(undef, 0);
julia> n = 0;
julia> CUDA.CUSPARSE.cusparseXcoo2csr(CUDA.CUSPARSE.handle(), rowIdx, 0, n, csrRowPtr, 'O');
julia> csrRowPtr # should be 1
1-element CuArray{Int32, 1, CUDA.DeviceMemory}:
 -10
julia> @assert collect(csrRowPtr)[1] == 1
ERROR: AssertionError: (collect(csrRowPtr))[1] == 1
[...]

julia> csrRowPtr .= -10;
julia> n = 1;
julia> CUDA.CUSPARSE.cusparseXcoo2csr(CUDA.CUSPARSE.handle(), rowIdx, 0, n, csrRowPtr, 'O');
julia> @assert collect(csrRowPtr)[1] == 1 # with n==1 the method works

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not surprising, I guess we'll have to extend the workaround then.

nnz(coo) == 0 && return CuSparseMatrixCSR{Tv}(csrRowPtr, coo.colInd, nonzeros(coo), size(coo))
coo = sort_coo(coo, 'R')
csrRowPtr = CuVector{Cint}(undef, m+1)
cusparseXcoo2csr(handle(), coo.rowInd, nnz(coo), m, csrRowPtr, index)
CuSparseMatrixCSR{Tv}(csrRowPtr, coo.colInd, nonzeros(coo), size(coo))
end
Expand All @@ -605,10 +621,10 @@ end
### CSC to COO and viceversa

function CuSparseMatrixCSC{Tv}(coo::CuSparseMatrixCOO{Tv}; index::SparseChar='O') where {Tv}
m,n = size(coo)
nnz(coo) == 0 && return CuSparseMatrixCSC{Tv}(CUDA.ones(Cint, n+1), coo.rowInd, nonzeros(coo), size(coo))
m, n = size(coo)
cscColPtr = (index == 'O') ? CUDA.ones(Cint, n + 1) : CUDA.zeros(Cint, n + 1)
nnz(coo) == 0 && return CuSparseMatrixCSC{Tv}(cscColPtr, coo.rowInd, nonzeros(coo), size(coo))
coo = sort_coo(coo, 'C')
cscColPtr = CuVector{Cint}(undef, n+1)
cusparseXcoo2csr(handle(), coo.colInd, nnz(coo), n, cscColPtr, index)
CuSparseMatrixCSC{Tv}(cscColPtr, coo.rowInd, nonzeros(coo), size(coo))
end
Expand Down
4 changes: 3 additions & 1 deletion test/libraries/cusparse/conversions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -113,17 +113,19 @@ if !(v"12.0" <= CUSPARSE.version() < v"12.1")
end
end

for (n, bd, p) in [(100, 5, 0.02), (5, 1, 0.8), (4, 2, 0.5)]
for (n, bd, p) in [(100, 5, 0.02), (5, 1, 0.8), (4, 2, 0.5), (0, 1, 0.0)]
v"12.0" <= CUSPARSE.version() < v"12.1" && n == 4 && continue
@testset "conversions between CuSparseMatrices (n, bd, p) = ($n, $bd, $p)" begin
A = sprand(n, n, p)
blockdim = bd
for CuSparseMatrixType1 in (CuSparseMatrixCSC, CuSparseMatrixCSR, CuSparseMatrixCOO, CuSparseMatrixBSR)
if CuSparseMatrixType1 == CuSparseMatrixBSR && n == 0 continue end # TODO: conversion to CuSparseMatrixBSR breaks with (0x0) matrices
dA1 = CuSparseMatrixType1 == CuSparseMatrixBSR ? CuSparseMatrixType1(A, blockdim) : CuSparseMatrixType1(A)
@testset "conversion $CuSparseMatrixType1 --> SparseMatrixCSC" begin
@test SparseMatrixCSC(dA1) ≈ A
end
for CuSparseMatrixType2 in (CuSparseMatrixCSC, CuSparseMatrixCSR, CuSparseMatrixCOO, CuSparseMatrixBSR)
if CuSparseMatrixType2 == CuSparseMatrixBSR && n == 0 continue end # TODO: conversion to CuSparseMatrixBSR breaks with (0x0) matrices
CuSparseMatrixType1 == CuSparseMatrixType2 && continue
dA2 = CuSparseMatrixType2 == CuSparseMatrixBSR ? CuSparseMatrixType2(dA1, blockdim) : CuSparseMatrixType2(dA1)
@testset "conversion $CuSparseMatrixType1 --> $CuSparseMatrixType2" begin
Expand Down