Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 25 additions & 63 deletions lib/cusparse/generic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ export bmm!
mv!(transa::SparseChar, alpha::Number, A::CuSparseMatrix, X::CuVector, beta::Number, Y::CuVector, index::SparseChar)

Performs `Y = alpha * op(A) * X + beta * Y`, where `op` can be nothing (`transa = N`),
tranpose (`transa = T`) or conjugate transpose (`transa = C`).
transpose (`transa = T`) or conjugate transpose (`transa = C`).
`X` and `Y` are dense vectors.
"""
function mv! end
Expand All @@ -18,7 +18,7 @@ function mv! end
mm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::CuMatrix, B::Union{CuSparseMatrixCSC,CuSparseMatrixCSR,CuSparseMatrixCOO}, beta::Number, C::CuMatrix, index::SparseChar)

Performs `C = alpha * op(A) * op(B) + beta * C`, where `op` can be nothing (`transa = N`),
tranpose (`transa = T`) or conjugate transpose (`transa = C`).
transpose (`transa = T`) or conjugate transpose (`transa = C`).
"""
function mm! end

Expand Down Expand Up @@ -273,7 +273,7 @@ function mv!(transa::SparseChar, alpha::Number, A::Union{CuSparseMatrixCSC{TA},C
end

function mm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::CuSparseMatrix{T},
B::DenseCuMatrix{T}, beta::Number, C::DenseCuMatrix{T}, index::SparseChar, algo::cusparseSpMMAlg_t=CUSPARSE_SPMM_ALG_DEFAULT) where {T}
B::Union{DenseCuMatrix{T}, Transpose{T, <:DenseCuMatrix{T}}}, beta::Number, C::Union{DenseCuMatrix{T}, Transpose{T, <:DenseCuMatrix{T}}}, index::SparseChar, algo::cusparseSpMMAlg_t=CUSPARSE_SPMM_ALG_DEFAULT) where {T}

(A isa CuSparseMatrixBSR) && (CUSPARSE.version() < v"12.5.1") && throw(ErrorException("This operation is not supported by the current CUDA version."))

Expand All @@ -298,12 +298,6 @@ function mm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::CuSparseM
descB = CuDenseMatrixDescriptor(B)
descC = CuDenseMatrixDescriptor(C)

# cusparseDnMatSetStridedBatch(descB, size(B,3), size(B,1)*size(B,2))
# cusparseDnMatSetStridedBatch(descB, size(B,3), size(B,1)*size(B,2))
# batchsize = length(nonzeros(A)) ÷ nnz(A)
# if batchsize > 1
# cusparseCsrSetStridedBatch(obj, batchsize, 0, nnz(A))
# end

# Set default buffer for small matrices (1000 chosen arbitrarly)
# Otherwise tries to allocate 120TB of memory (see #2296)
Expand All @@ -327,6 +321,28 @@ function mm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::CuSparseM
return C
end

function mm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::DenseCuMatrix{T}, B::CuSparseMatrixCSC{T}, beta::Number, C::DenseCuMatrix{T}, index::SparseChar, algo::cusparseSpMMAlg_t=CUSPARSE_SPMM_ALG_DEFAULT) where T
_B = CuSparseMatrixCSR{T}(B.colPtr, B.rowVal, B.nzVal, reverse(B.dims))
mm!(transb, transa, alpha, _B, transpose(A), beta, transpose(C), index, algo)
return C
end

function mm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::DenseCuMatrix{T}, B::CuSparseMatrixCSR{T}, beta::Number, C::DenseCuMatrix{T}, index::SparseChar, algo::cusparseSpMMAlg_t=CUSPARSE_SPMM_ALG_DEFAULT) where T
_B = CuSparseMatrixCSC{T}(B.rowPtr, B.colVal, B.nzVal, reverse(B.dims))
mm!(transb, transa, alpha, _B, transpose(A), beta, transpose(C), index, algo)
return C
end

function mm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::DenseCuMatrix{T}, B::CuSparseMatrixCOO{T}, beta::Number, C::DenseCuMatrix{T}, index::SparseChar, algo::cusparseSpMMAlg_t=CUSPARSE_SPMM_ALG_DEFAULT) where T
if T <: Real || transb == 'N' || transb == 'T'
mm!((transb == 'N') ? 'T' : 'N', transa, alpha, B, transpose(A), beta, transpose(C), index, algo)
else # transb == 'C'
_B = CuSparseMatrixCOO{T}(B.rowInd, B.colInd, conj.(B.nzVal), B.dims)
mm!('N', transa, alpha, _B, transpose(A), beta, transpose(C), index, algo)
end
return C
end

function bmm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::CuSparseArrayCSR{T,Ti,N},
B::DenseCuArray{T,N}, beta::Number, C::DenseCuArray{T,N}, index::SparseChar, algo::cusparseSpMMAlg_t=CUSPARSE_SPMM_ALG_DEFAULT) where {T,Ti,N}
Ar = reshape(A, :, :, :)
Expand Down Expand Up @@ -401,60 +417,6 @@ function bmm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::CuSparse
return C
end

function mm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::DenseCuMatrix{T},
B::Union{CuSparseMatrixCSC{T},CuSparseMatrixCSR{T},CuSparseMatrixCOO{T}}, beta::Number,
C::DenseCuMatrix{T}, index::SparseChar, algo::cusparseSpMMAlg_t=CUSPARSE_SPMM_ALG_DEFAULT) where {T}

# Support transa = 'C' and `transb = 'C' for real matrices
transa = T <: Real && transa == 'C' ? 'T' : transa
transb = T <: Real && transb == 'C' ? 'T' : transb

# cusparseSpMM can be also used to perform the multiplication of a dense matrix and a sparse matrix by switching the dense matrices layout:
# Cc = α * Ac * B + β * Cc → α * Bᵀ * Ar + β * Cr
# Cc = α * Ac * Bᵀ + β * Cc → α * B * Ar + β * Cr
# Cc = α * Ac * Bᴴ + β * Cc → α * B̅ * Ar + β * Cr
# where B is a sparse matrix, Ac and Cc indicate column-major layout, while Ar and Cr refer to row-major layout.

m,k = size(A)
n = size(C)[2]

if transa == 'N' && transb == 'N'
chkmmdims(B,C,k,n,m,n)
elseif transa == 'N' && transb != 'N'
chkmmdims(B,C,n,k,m,n)
elseif transa != 'N' && transb == 'N'
chkmmdims(B,C,m,n,k,n)
elseif transa != 'N' && transb != 'N'
chkmmdims(B,C,n,m,k,n)
end

descA = CuDenseMatrixDescriptor(A, transposed=true)
descB = CuSparseMatrixDescriptor(B, index, transposed=true)
descC = CuDenseMatrixDescriptor(C, transposed=true)

# Set default buffer for small matrices (1000 chosen arbitrarly)
# Otherwise tries to allocate 120TB of memory (see #2296)
function bufferSize()
out = Ref{Csize_t}(1000)
cusparseSpMM_bufferSize(
handle(), transb, transa, Ref{T}(alpha), descB, descA, Ref{T}(beta),
descC, T, algo, out)
return out[]
end
with_workspace(bufferSize) do buffer
# We should find a way to reuse the buffer (issue #1362)
if !(B isa CuSparseMatrixCOO)
cusparseSpMM_preprocess(
handle(), transb, transa, Ref{T}(alpha), descB, descA, Ref{T}(beta),
descC, T, algo, buffer)
end
cusparseSpMM(
handle(), transb, transa, Ref{T}(alpha), descB, descA, Ref{T}(beta),
descC, T, algo, buffer)
end
return C
end

# AB and C must have the same sparsity pattern if β ≠ 0.
function gemm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::CuSparseMatrixCSR{T}, B::CuSparseMatrixCSR{T},
beta::Number, C::CuSparseMatrixCSR{T}, index::SparseChar, algo::cusparseSpGEMMAlg_t=CUSPARSE_SPGEMM_DEFAULT) where {T}
Expand Down
2 changes: 2 additions & 0 deletions lib/cusparse/helpers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ mutable struct CuDenseMatrixDescriptor
obj
end

CuDenseMatrixDescriptor(At::Transpose{<:Any, <:DenseCuMatrix}) = CuDenseMatrixDescriptor(parent(At), transposed=true)

function CuDenseMatrixDescriptor(A::DenseCuArray{T, 3}; transposed::Bool=false) where T
desc_ref = Ref{cusparseDnMatDescr_t}()
if transposed
Expand Down
14 changes: 1 addition & 13 deletions lib/cusparse/interfaces.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,6 @@ function mv_wrapper(transa::SparseChar, alpha::Number, A::CuSparseMatrix, X::Den
mv!(transa, alpha, A, X, beta, Y, 'O')
end

function mm_wrapper(transa::SparseChar, transb::SparseChar, alpha::Number,
A::CuSparseMatrix{T}, B::CuMatrix{T}, beta::Number, C::CuMatrix{T}) where {T}
n_A, m_A = (transa != 'N') ? reverse(size(A)) : size(A)
n_B, m_B = (transb != 'N') ? reverse(size(B)) : size(B)
n_C, m_C = size(C)
m_A == n_B || throw(DimensionMismatch())
n_A == n_C || throw(DimensionMismatch())
m_B == m_C || throw(DimensionMismatch())
isempty(B) && return CUDA.zeros(eltype(B), size(A, 1), 0)
mm!(transa, transb, alpha, A, B, beta, C, 'O')
end

LinearAlgebra.dot(x::CuSparseVector{T}, y::DenseCuVector{T}) where {T <: BlasReal} = vv!('N', x, y, 'O')
LinearAlgebra.dot(x::DenseCuVector{T}, y::CuSparseVector{T}) where {T <: BlasReal} = dot(y, x)

Expand Down Expand Up @@ -82,7 +70,7 @@ end
function LinearAlgebra.generic_matmatmul!(C::CuMatrix{T}, tA, tB, A::CuSparseMatrix{T}, B::DenseCuMatrix{T}, alpha::Number, beta::Number) where {T <: Union{Float16, ComplexF16, BlasFloat}}
tA = tA in ('S', 's', 'H', 'h') ? 'N' : tA
tB = tB in ('S', 's', 'H', 'h') ? 'N' : tB
mm_wrapper(tA, tB, alpha, A, B, beta, C)
mm!(tA, tB, alpha, A, B, beta, C, 'O')
end

for (wrapa, transa, unwrapa) in op_wrappers
Expand Down
36 changes: 26 additions & 10 deletions test/libraries/cusparse/generic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -83,20 +83,38 @@ for SparseMatrixType in keys(SPMM_ALGOS)
@testset "mm! $T" for T in [Float32, Float64, ComplexF32, ComplexF64]
@testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)]
@testset "transb = $transb" for (transb, opb) in [('N', identity), ('T', transpose), ('C', adjoint)]
CUSPARSE.version() < v"12.0" && SparseMatrixType == CuSparseMatrixCSC && T <: Complex && transa == 'C' && continue
algo == CUSPARSE.CUSPARSE_SPMM_CSR_ALG3 && (transa != 'N' || transb != 'N') && continue
CUSPARSE.version() < v"12.5.8" && algo == CUSPARSE.CUSPARSE_SPMM_CSR_ALG3 && (transa != 'N' || transb != 'N') && continue
algo == CUSPARSE.CUSPARSE_SPMM_CSR_ALG3 && transa != 'N' && continue # https://docs.nvidia.com/cuda/cusparse/index.html#cusparsespmm: CSR_ALG3 supports only 'NON_TRANSPOSE'
algo == CUSPARSE.CUSPARSE_SPMM_CSR_ALG3 && transb == 'C' && continue # https://docs.nvidia.com/cuda/cusparse/index.html#cusparsespmm: CSR_ALG3 does not support 'CONJUGATE_TRANSPOSE'
(SparseMatrixType == CuSparseMatrixBSR) && (transa != 'N') && continue
A = sprand(T, 10, 10, 0.1)
B = transb == 'N' ? rand(T, 10, 2) : rand(T, 2, 10)
Bt = collect(transpose(B))
C = rand(T, 10, 2)
Ct = collect(transpose(C))
dA = SparseMatrixType == CuSparseMatrixBSR ? SparseMatrixType(A,1) : SparseMatrixType(A)
dB = CuArray(B)
dC = CuArray(C)
dBt = CuArray(Bt)

alpha = rand(T)
beta = rand(T)

dC = CuArray(C)
mm!(transa, transb, alpha, dA, dB, beta, dC, 'O', algo)
@test alpha * opa(A) * opb(B) + beta * C ≈ collect(dC)

dCt = CuArray(Ct)
mm!(transa, transb, alpha, dA, transpose(dBt), beta, transpose(dCt), 'O', algo)
@test alpha * opa(A) * opb(B) + beta * C ≈ transpose(collect(dCt))

CUSPARSE.version() < v"12.5.8" && continue
dC = CuArray(C)
mm!(transa, transb, alpha, dA, transpose(dBt), beta, dC, 'O', algo)
@test alpha * opa(A) * opb(B) + beta * C ≈ collect(dC)

dCt = CuArray(Ct)
mm!(transa, transb, alpha, dA, dB, beta, transpose(dCt), 'O', algo)
@test alpha * opa(A) * opb(B) + beta * C ≈ transpose(collect(dCt))
end
end
end
Expand All @@ -122,16 +140,14 @@ for SparseMatrixType in keys(SPMM_ALGOS)
@testset "$T" for T in [Float32, Float64, ComplexF32, ComplexF64]
@testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)]
@testset "transb = $transb" for (transb, opb) in [('N', identity), ('T', transpose), ('C', adjoint)]
CUSPARSE.version() < v"12.0" && SparseMatrixType == CuSparseMatrixCSR && T <: Complex && transb == 'C' && continue
algo == CUSPARSE.CUSPARSE_SPMM_CSR_ALG3 && (transa != 'N' || transb != 'N') && continue
CUSPARSE.version() < v"12.5.8" && algo == CUSPARSE.CUSPARSE_SPMM_CSR_ALG3 && continue
algo == CUSPARSE.CUSPARSE_SPMM_CSR_ALG3 && transb != 'N' && continue # https://docs.nvidia.com/cuda/cusparse/index.html#cusparsespmm: CSR_ALG3 supports only 'NON_TRANSPOSE'
algo == CUSPARSE.CUSPARSE_SPMM_CSR_ALG3 && transa == 'C' && continue # https://docs.nvidia.com/cuda/cusparse/index.html#cusparsespmm: CSR_ALG3 does not support 'CONJUGATE_TRANSPOSE'
A = rand(T, 10, 10)
B = transb == 'N' ? sprand(T, 10, 5, 0.5) : sprand(T, 5, 10, 0.5)
C = rand(T, 10, 5)
dA = CuArray(A)
dB = SparseMatrixType(B)
if SparseMatrixType == CuSparseMatrixCOO
dB = sort_coo(dB, 'C')
end
dC = CuArray(C)

alpha = rand(T)
Expand All @@ -141,8 +157,8 @@ for SparseMatrixType in keys(SPMM_ALGOS)

# add tests for very small matrices (see #2296)
# skip conjugate transpose - causes errors with 1x1 matrices
# CUSPARSE_SPMM_CSR_ALG3 also fails
(algo == CUSPARSE.CUSPARSE_SPMM_CSR_ALG3 || transa == 'C') && continue
CUSPARSE.version() < v"12.6.3" && algo == CUSPARSE.CUSPARSE_SPMM_CSR_ALG3 && continue
transa == 'C' && continue
A = rand(T, 1, 1)
B = sprand(T, 1, 1, 1.)
C = rand(T, 1, 1)
Expand Down
3 changes: 0 additions & 3 deletions test/libraries/cusparse/interfaces.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,6 @@ nB = 2
B = opb == identity ? sprand(elty, k, n, 0.2) : sprand(elty, n, k, 0.2)
for SparseMatrixType in (CuSparseMatrixCSC, CuSparseMatrixCSR, CuSparseMatrixCOO)
dB = SparseMatrixType(B)
if SparseMatrixType == CuSparseMatrixCOO
dB = sort_coo(dB, 'C')
end
@testset "CuMatrix * $SparseMatrixType" begin
@testset "A * B" begin
C = opa(A) * opb(B)
Expand Down