diff --git a/README.md b/README.md index 94be9c13..8b87ccc6 100644 --- a/README.md +++ b/README.md @@ -302,5 +302,3 @@ The discovered paths will be written to a global file with preferences, typicall `$HOME/.julia/environments/vX.Y/LocalPreferences.toml` (where `vX.Y` refers to the Julia version you are using). You can modify this file, or remove it when you want to revert to default set of binaries. - -# bump buildkite diff --git a/lib/mkl/wrappers_sparse.jl b/lib/mkl/wrappers_sparse.jl index 360c00b7..9a0f5170 100644 --- a/lib/mkl/wrappers_sparse.jl +++ b/lib/mkl/wrappers_sparse.jl @@ -122,6 +122,14 @@ for SparseMatrix in (:oneSparseMatrixCSR, :oneSparseMatrixCOO) end for SparseMatrix in (:oneSparseMatrixCSC,) + # CSC(A) is represented by storing CSR(A^T). Map operations accordingly: + # - trans = 'N': want A*x -> use op(S)='T' with S=A^T. + # - trans = 'T': want A^T*x -> use op(S)='N' with S=A^T. + # - trans = 'C': want A^H*x. + # * For real eltypes, A^H == A^T -> use op(S)='N'. + # * For complex eltypes, we cannot express A^H using a single op(S). + # Use identity: conj(y_new) = conj(alpha) * A * conj(x) + conj(beta) * conj(y) + # and compute with op(S)='T' (since S^T = A), conjugating x and y around the call. for (fname, elty) in ((:onemklSsparse_gemv, :Float32), (:onemklDsparse_gemv, :Float64)) @eval begin @@ -139,8 +147,47 @@ for SparseMatrix in (:oneSparseMatrixCSC,) end end + # Special handling for CSC matrices since they are stored as transposed CSR + for (fname, elty) in ( + (:onemklCsparse_gemv, :ComplexF32), + (:onemklZsparse_gemv, :ComplexF64), + ) + @eval begin + function sparse_gemv!( + trans::Char, + alpha::Number, + A::$SparseMatrix{$elty}, + x::oneStridedVector{$elty}, + beta::Number, + y::oneStridedVector{$elty} + ) + + # Compute A^H*x via identity: + # conj(y_new) = conj(alpha) * (A^T) * conj(x) + conj(beta) * conj(y) + # Since S=A^T and op='N' computes S*x = A^T*x, we can realize this with one call. + + if trans == 'C' + y .= conj.(y) + x .= conj.(x) + alpha = conj(alpha) + beta = conj(beta) + end + + queue = global_queue(context(x), device()) + $fname(sycl_queue(queue), flip_trans(trans), alpha, A.handle, x, beta, y) + + if trans == 'C' + y .= conj.(y) + # Restore x + x .= conj.(x) + end + return y + end + end + end @eval begin function sparse_optimize_gemv!(trans::Char, A::$SparseMatrix) + # complex 'C' case is implemented using op='N' on S=A^T with conjugation trick queue = global_queue(context(A.nzVal), device(A.nzVal)) onemklXsparse_optimize_gemv(sycl_queue(queue), flip_trans(trans), A.handle) return A @@ -149,17 +196,19 @@ for SparseMatrix in (:oneSparseMatrixCSC,) end for (fname, elty) in ((:onemklSsparse_gemm, :Float32), - (:onemklDsparse_gemm, :Float64), - (:onemklCsparse_gemm, :ComplexF32), - (:onemklZsparse_gemm, :ComplexF64)) + (:onemklDsparse_gemm, :Float64), + (:onemklCsparse_gemm, :ComplexF32), + (:onemklZsparse_gemm, :ComplexF64), + ) @eval begin function sparse_gemm!(transa::Char, - transb::Char, - alpha::Number, - A::oneSparseMatrixCSR{$elty}, - B::oneStridedMatrix{$elty}, - beta::Number, - C::oneStridedMatrix{$elty}) + transb::Char, + alpha::Number, + A::oneSparseMatrixCSR{$elty}, + B::oneStridedMatrix{$elty}, + beta::Number, + C::oneStridedMatrix{$elty} + ) mB, nB = size(B) mC, nC = size(C) @@ -212,6 +261,80 @@ for (fname, elty) in ((:onemklSsparse_gemm, :Float32), end end +# Special handling for CSC matrices since they are stored as transposed CSR (S = A^T) +for (fname, elty) in ( + (:onemklCsparse_gemm, :ComplexF32), + (:onemklZsparse_gemm, :ComplexF64), + ) + @eval begin + function sparse_gemm!( + transa::Char, + transb::Char, + alpha::Number, + A::oneSparseMatrixCSC{$elty}, + B::oneStridedMatrix{$elty}, + beta::Number, + C::oneStridedMatrix{$elty} + ) + + # Map op(A) to op(S) where S = A^T stored as CSR in the handle + # transa: 'N' -> op(S)='T'; 'T' -> op(S)='N'; 'C' -> + # real: op(S)='N' (since A^H == A^T) + # complex: use conjugation identity on B and C with op(S)='N' + + mB, nB = size(B) + mC, nC = size(C) + (nB != nC) && (transb == 'N') && throw(ArgumentError("B and C must have the same number of columns.")) + (mB != nC) && (transb != 'N') && throw(ArgumentError("Bᵀ and C must have the same number of columns.")) + nrhs = size(B, 2) + ldb = max(1, stride(B, 2)) + ldc = max(1, stride(C, 2)) + queue = global_queue(context(C), device()) + + # Use identity: conj(C_new) = conj(alpha) * S * conj(opB(B)) + conj(beta) * conj(C) + # Prepare conj(C) in-place and conj(B) into a temporary if needed + + # Determine how to supply opB under conjugation + # - transb == 'N': pass transb='N' and use conj(B) + # - transb == 'T': pass transb='T' and use conj(B) + # - transb == 'C': since conj(B^H) = B^T, pass transb='T' and use B as-is + local transb_eff + local Beff + if transa == 'C' + C .= conj.(C) + alpha = conj(alpha) + beta = conj(beta) + if transb == 'N' + transb_eff = 'N' + # Beff = similar(B) + B .= conj.(B) + elseif transb == 'T' + transb_eff = 'T' + # Beff = similar(B) + B .= conj.(B) + else + # transb == 'C' + transb_eff = 'T' + end + else + transb_eff = transb + end + + $fname(sycl_queue(queue), 'C', flip_trans(transa), transb_eff, alpha, A.handle, B, nrhs, ldb, beta, C, ldc) + + # Undo conjugation to obtain C_new + if transa == 'C' + C .= conj.(C) + if transb == 'N' || transb == 'T' + # Restore B + B .= conj.(B) + end + end + return C + end + end +end + function sparse_optimize_gemm!(trans::Char, A::oneSparseMatrixCSC) queue = global_queue(context(A.nzVal), device(A.nzVal)) onemklXsparse_optimize_gemm(sycl_queue(queue), flip_trans(trans), A.handle) @@ -244,7 +367,10 @@ for (fname, elty) in ((:onemklSsparse_symv, :Float32), end for (fname, elty) in ((:onemklSsparse_symv, :Float32), - (:onemklDsparse_symv, :Float64)) + (:onemklDsparse_symv, :Float64), + (:onemklCsparse_symv, :ComplexF32), + (:onemklZsparse_symv, :ComplexF64), + ) @eval begin function sparse_symv!(uplo::Char, alpha::Number, @@ -287,33 +413,54 @@ function sparse_optimize_trmv!(uplo::Char, trans::Char, diag::Char, A::oneSparse return A end -# Only trans = 'N' is supported with oneSparseMatrixCSR. -# We can't use any trick to support sparse "trmv" for oneSparseMatrixCSC. -# -# for (fname, elty) in ((:onemklSsparse_trmv, :Float32), -# (:onemklDsparse_trmv, :Float64)) -# @eval begin -# function sparse_trmv!(uplo::Char, -# trans::Char, -# diag::Char, -# alpha::Number, -# A::oneSparseMatrixCSC{$elty}, -# x::oneStridedVector{$elty}, -# beta::Number, -# y::oneStridedVector{$elty}) -# -# queue = global_queue(context(y), device()) -# $fname(sycl_queue(queue), uplo, flip_trans(trans), diag, alpha, A.handle, x, beta, y) -# y -# end -# end -# end -# -# function sparse_optimize_trmv!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC) -# queue = global_queue(context(A.nzVal), device(A.nzVal)) -# onemklXsparse_optimize_trmv(sycl_queue(queue), uplo, flip_trans(trans), diag, A.handle) -# return A -# end +# Special handling for CSC matrices since they are stored as transposed CSR +for (fname, elty) in ( + (:onemklSsparse_trmv, :Float32), + (:onemklDsparse_trmv, :Float64), + (:onemklCsparse_trmv, :ComplexF32), + (:onemklZsparse_trmv, :ComplexF64), + ) + @eval begin + function sparse_trmv!( + uplo::Char, + trans::Char, + diag::Char, + alpha::Number, + A::oneSparseMatrixCSC{$elty}, + x::oneStridedVector{$elty}, + beta::Number, + y::oneStridedVector{$elty} + ) + + # Intel oneAPI sparse trmv only supports nontrans operations. + # Since CSC(A) is stored as CSR(A^T), we cannot map CSC operations + # to CSR operations for triangular operations without transpose support. + throw( + ArgumentError( + "sparse_trmv! is not supported for oneSparseMatrixCSC due to Intel oneAPI limitations. " * + "Intel sparse library only supports nontrans operations for triangular matrix operations. " * + "Convert to oneSparseMatrixCSR format instead." + ) + ) + queue = global_queue(context(y), device()) + $fname(sycl_queue(queue), uplo, flip_trans(trans), diag, alpha, A.handle, x, beta, y) + return y + end + end +end + +function sparse_optimize_trmv!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC) + throw( + ArgumentError( + "sparse_optimize_trmv! is not supported for oneSparseMatrixCSC due to Intel oneAPI limitations. " * + "Intel sparse library only supports nontrans operations for triangular matrix operations. " * + "Convert to oneSparseMatrixCSR format instead." + ) + ) + queue = global_queue(context(A.nzVal), device(A.nzVal)) + onemklXsparse_optimize_trmv(sycl_queue(queue), uplo, flip_trans(trans), diag, A.handle) + return A +end for (fname, elty) in ((:onemklSsparse_trsv, :Float32), (:onemklDsparse_trsv, :Float64), @@ -341,32 +488,49 @@ function sparse_optimize_trsv!(uplo::Char, trans::Char, diag::Char, A::oneSparse return A end -# Only trans = 'N' is supported with oneSparseMatrixCSR. -# We can't use any trick to support sparse "trsv" for oneSparseMatrixCSC. -# -# for (fname, elty) in ((:onemklSsparse_trsv, :Float32), -# (:onemklDsparse_trsv, :Float64)) -# @eval begin -# function sparse_trsv!(uplo::Char, -# trans::Char, -# diag::Char, -# alpha::Number, -# A::oneSparseMatrixCSC{$elty}, -# x::oneStridedVector{$elty}, -# y::oneStridedVector{$elty}) -# -# queue = global_queue(context(y), device()) -# $fname(sycl_queue(queue), uplo, flip_trans(trans), diag, alpha, A.handle, x, y) -# y -# end -# end -# end -# -# function sparse_optimize_trsv!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC) -# queue = global_queue(context(A.nzVal), device(A.nzVal)) -# onemklXsparse_optimize_trsv(sycl_queue(queue), uplo, flip_trans(trans), diag, A.handle) -# return A -# end +for (fname, elty) in ( + (:onemklSsparse_trsv, :Float32), + (:onemklDsparse_trsv, :Float64), + (:onemklCsparse_trsv, :ComplexF32), + (:onemklZsparse_trsv, :ComplexF64), + ) + @eval begin + function sparse_trsv!( + uplo::Char, + trans::Char, + diag::Char, + alpha::Number, + A::oneSparseMatrixCSC{$elty}, + x::oneStridedVector{$elty}, + y::oneStridedVector{$elty} + ) + + throw( + ArgumentError( + "sparse_trsv! is not supported for oneSparseMatrixCSC due to Intel oneAPI limitations. " * + "Intel sparse library only supports nontrans operations for triangular matrix operations. " * + "Convert to oneSparseMatrixCSR format instead." + ) + ) + queue = global_queue(context(y), device()) + onemklXsparse_optimize_trsv(sycl_queue(queue), uplo, flip_trans(trans), diag, A.handle) + return A + end + end +end + +function sparse_optimize_trsv!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC) + throw( + ArgumentError( + "sparse_optimize_trsv! is not supported for oneSparseMatrixCSC due to Intel oneAPI limitations. " * + "Intel sparse library only supports nontrans operations for triangular matrix operations. " * + "Convert to oneSparseMatrixCSR format instead." + ) + ) + queue = global_queue(context(A.nzVal), device(A.nzVal)) + onemklXsparse_optimize_trsv(sycl_queue(queue), uplo, flip_trans(trans), diag, A.handle) + return A +end for (fname, elty) in ((:onemklSsparse_trsm, :Float32), (:onemklDsparse_trsm, :Float64), @@ -412,43 +576,73 @@ end # Only transA = 'N' is supported with oneSparseMatrixCSR. # We can't use any trick to support sparse "trsm" for oneSparseMatrixCSC. -# -# for (fname, elty) in ((:onemklSsparse_trsm, :Float32), -# (:onemklDsparse_trsm, :Float64)) -# @eval begin -# function sparse_trsm!(uplo::Char, -# transA::Char, -# transX::Char, -# diag::Char, -# alpha::Number, -# A::oneSparseMatrixCSC{$elty}, -# X::oneStridedMatrix{$elty}, -# Y::oneStridedMatrix{$elty}) -# -# mX, nX = size(X) -# mY, nY = size(Y) -# (mX != mY) && (transX == 'N') && throw(ArgumentError("X and Y must have the same number of rows.")) -# (nX != nY) && (transX == 'N') && throw(ArgumentError("X and Y must have the same number of columns.")) -# (nX != mY) && (transX != 'N') && throw(ArgumentError("Xᵀ and Y must have the same number of rows.")) -# (mX != nY) && (transX != 'N') && throw(ArgumentError("Xᵀ and Y must have the same number of columns.")) -# nrhs = size(X, 2) -# ldx = max(1,stride(X,2)) -# ldy = max(1,stride(Y,2)) -# queue = global_queue(context(Y), device()) -# $fname(sycl_queue(queue), 'C', flip_trans(transA), transX, uplo, diag, alpha, A.handle, X, nrhs, ldx, Y, ldy) -# Y -# end -# end -# end -# -# function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC) -# queue = global_queue(context(A.nzVal), device(A.nzVal)) -# onemklXsparse_optimize_trsm(sycl_queue(queue), uplo, flip_trans(trans), diag, A.handle) -# return A -# end -# -# function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, nrhs::Int, A::oneSparseMatrixCSC) -# queue = global_queue(context(A.nzVal), device(A.nzVal)) -# onemklXsparse_optimize_trsm_advanced(sycl_queue(queue), 'C', uplo, flip_trans(trans), diag, A.handle, nrhs) -# return A -# end +for (fname, elty) in ( + (:onemklSsparse_trsm, :Float32), + (:onemklDsparse_trsm, :Float64), + (:onemklCsparse_trsm, :ComplexF32), + (:onemklZsparse_trsm, :ComplexF64), + ) + @eval begin + function sparse_trsm!( + uplo::Char, + transA::Char, + transX::Char, + diag::Char, + alpha::Number, + A::oneSparseMatrixCSC{$elty}, + X::oneStridedMatrix{$elty}, + Y::oneStridedMatrix{$elty} + ) + + # Intel oneAPI sparse trsm only supports nontrans operations for the matrix A. + # Since CSC(A) is stored as CSR(A^T), we cannot map CSC operations + # to CSR operations for triangular solve operations without transpose support. + throw( + ArgumentError( + "sparse_trsm! is not supported for oneSparseMatrixCSC due to Intel oneAPI limitations. " * + "Intel sparse library only supports nontrans operations for triangular matrix operations. " * + "Convert to oneSparseMatrixCSR format instead." + ) + ) + + mX, nX = size(X) + mY, nY = size(Y) + (mX != mY) && (transX == 'N') && throw(ArgumentError("X and Y must have the same number of rows.")) + (nX != nY) && (transX == 'N') && throw(ArgumentError("X and Y must have the same number of columns.")) + (nX != mY) && (transX != 'N') && throw(ArgumentError("Xᵀ and Y must have the same number of rows.")) + (mX != nY) && (transX != 'N') && throw(ArgumentError("Xᵀ and Y must have the same number of columns.")) + nrhs = size(X, 2) + ldx = max(1, stride(X, 2)) + ldy = max(1, stride(Y, 2)) + queue = global_queue(context(Y), device()) + $fname(sycl_queue(queue), 'C', flip_trans(transA), transX, uplo, diag, alpha, A.handle, X, nrhs, ldx, Y, ldy) + return Y + end + end +end + +function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, A::oneSparseMatrixCSC) + throw( + ArgumentError( + "sparse_optimize_trsm! is not supported for oneSparseMatrixCSC due to Intel oneAPI limitations. " * + "Intel sparse library only supports nontrans operations for triangular matrix operations. " * + "Convert to oneSparseMatrixCSR format instead." + ) + ) + queue = global_queue(context(A.nzVal), device(A.nzVal)) + onemklXsparse_optimize_trsm(sycl_queue(queue), uplo, trans, diag, A.handle) + return A +end + +function sparse_optimize_trsm!(uplo::Char, trans::Char, diag::Char, nrhs::Int, A::oneSparseMatrixCSC) + throw( + ArgumentError( + "sparse_optimize_trsm! is not supported for oneSparseMatrixCSC due to Intel oneAPI limitations. " * + "Intel sparse library only supports nontrans operations for triangular matrix operations. " * + "Convert to oneSparseMatrixCSR format instead." + ) + ) + queue = global_queue(context(A.nzVal), device(A.nzVal)) + onemklXsparse_optimize_trsm_advanced(sycl_queue(queue), 'C', uplo, trans, diag, A.handle, nrhs) + return A +end diff --git a/test/onemkl.jl b/test/onemkl.jl index 8b691e93..8881401c 100644 --- a/test/onemkl.jl +++ b/test/onemkl.jl @@ -3,7 +3,7 @@ if Sys.iswindows() else using oneAPI -using oneAPI.oneMKL: band, bandex, oneSparseMatrixCSR, oneSparseMatrixCOO, oneSparseMatrixCSC + using oneAPI.oneMKL: band, bandex, oneSparseMatrixCSR, oneSparseMatrixCOO, oneSparseMatrixCSC using SparseArrays using LinearAlgebra @@ -1082,9 +1082,7 @@ end end @testset "SPARSE" begin - @testset "$T" for T in intersect(eltypes, [Float32, Float64, ComplexF32, ComplexF64]) - ε = sqrt(eps(real(T))) - + @testset "$T" for T in intersect(eltypes, [Float32, Float64, ComplexF32, ComplexF64]) @testset "oneSparseMatrixCSR" begin for S in (Int32, Int64) A = sprand(T, 20, 10, 0.5) @@ -1095,16 +1093,16 @@ end end end - @testset "oneSparseMatrixCSC" begin - (T isa Complex) && continue - for S in (Int32, Int64) - A = sprand(T, 20, 10, 0.5) - A = SparseMatrixCSC{T, S}(A) - B = oneSparseMatrixCSC(A) - A2 = SparseMatrixCSC(B) - @test A == A2 + @testset "oneSparseMatrixCSC" begin + (T isa Complex) && continue + for S in (Int32, Int64) + A = sprand(T, 20, 10, 0.5) + A = SparseMatrixCSC{T, S}(A) + B = oneSparseMatrixCSC(A) + A2 = SparseMatrixCSC(B) + @test A == A2 + end end - end @testset "oneSparseMatrixCOO" begin for S in (Int32, Int64) @@ -1117,9 +1115,8 @@ end end @testset "sparse gemv" begin - @testset "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCOO, oneSparseMatrixCSR, oneSparseMatrixCSC) - @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)] - (T <: Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue + @testset "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCOO, oneSparseMatrixCSR, oneSparseMatrixCSC) + @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)] A = sprand(T, 20, 10, 0.5) x = transa == 'N' ? rand(T, 10) : rand(T, 20) y = transa == 'N' ? rand(T, 20) : rand(T, 10) @@ -1132,41 +1129,39 @@ end beta = rand(T) oneMKL.sparse_optimize_gemv!(transa, dA) oneMKL.sparse_gemv!(transa, alpha, dA, dx, beta, dy) - @test isapprox(alpha * opa(A) * x + beta * y, collect(dy), atol=ε) + @test alpha * opa(A) * x + beta * y ≈ collect(dy) end end end @testset "sparse gemm" begin - @testset "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC) - @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)] - @testset "transb = $transb" for (transb, opb) in [('N', identity), ('T', transpose), ('C', adjoint)] - (transb == 'N') || continue - (T <: Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue - A = sprand(T, 10, 10, 0.5) - B = transb == 'N' ? rand(T, 10, 2) : rand(T, 2, 10) - C = rand(T, 10, 2) - - dA = SparseMatrix(A) - dB = oneMatrix{T}(B) - dC = oneMatrix{T}(C) + @testset "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC) + @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)] + @testset "transb = $transb" for (transb, opb) in [('N', identity), ('T', transpose), ('C', adjoint)] + (transb == 'N') || continue + A = sprand(T, 10, 10, 0.5) + B = transb == 'N' ? rand(T, 10, 2) : rand(T, 2, 10) + C = rand(T, 10, 2) + + dA = SparseMatrix(A) + dB = oneMatrix{T}(B) + dC = oneMatrix{T}(C) - alpha = rand(T) - beta = rand(T) - oneMKL.sparse_optimize_gemm!(transa, dA) - oneMKL.sparse_gemm!(transa, transb, alpha, dA, dB, beta, dC) - @test isapprox(alpha * opa(A) * opb(B) + beta * C, collect(dC), atol=ε) + alpha = rand(T) + beta = rand(T) + oneMKL.sparse_optimize_gemm!(transa, dA) + oneMKL.sparse_gemm!(transa, transb, alpha, dA, dB, beta, dC) - oneMKL.sparse_optimize_gemm!(transa, transb, 2, dA) - end + @test alpha * opa(A) * opb(B) + beta * C ≈ collect(dC) + oneMKL.sparse_optimize_gemm!(transa, transb, 2, dA) + end end end end @testset "sparse symv" begin - @testset "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC) - @testset "uplo = $uplo" for uplo in ('L', 'U') - (T <: Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue + @testset "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC) + @testset "uplo = $uplo" for uplo in ('L', 'U') A = sprand(T, 10, 10, 0.5) A = A + transpose(A) x = rand(T, 10) @@ -1179,95 +1174,116 @@ end alpha = rand(T) beta = rand(T) oneMKL.sparse_symv!(uplo, alpha, dA, dx, beta, dy) - @test isapprox(alpha * A * x + beta * y, collect(dy), atol=ε) + @test alpha * A * x + beta * y ≈ collect(dy) end end end - @testset "sparse trmv" begin - @testset "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR,) - @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)] - for (uplo, diag, wrapper) in [('L', 'N', LowerTriangular), ('L', 'U', UnitLowerTriangular), - ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular)] - (transa == 'N') || continue - (T <: Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue - A = sprand(T, 10, 10, 0.5) - x = rand(T, 10) - y = rand(T, 10) - - B = uplo == 'L' ? tril(A) : triu(A) - B = diag == 'U' ? B - Diagonal(B) + I : B - dA = SparseMatrix(B) - dx = oneVector{T}(x) - dy = oneVector{T}(y) + @testset "sparse trmv" begin + @testset "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC) + @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)] + for (uplo, diag, wrapper) in [ + ('L', 'N', LowerTriangular), ('L', 'U', UnitLowerTriangular), + ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular), + ] + (transa == 'N') || continue + A = sprand(T, 10, 10, 0.5) + x = rand(T, 10) + y = rand(T, 10) - alpha = rand(T) - beta = rand(T) + B = uplo == 'L' ? tril(A) : triu(A) + B = diag == 'U' ? B - Diagonal(B) + I : B + dA = SparseMatrix(B) + dx = oneVector{T}(x) + dy = oneVector{T}(y) - oneMKL.sparse_optimize_trmv!(uplo, transa, diag, dA) - oneMKL.sparse_trmv!(uplo, transa, diag, alpha, dA, dx, beta, dy) - @test isapprox(alpha * wrapper(opa(A)) * x + beta * y, collect(dy), atol=ε) - end + alpha = rand(T) + beta = rand(T) + + if SparseMatrix == oneSparseMatrixCSC + @test_broken sparse_optimize_trmv!(uplo, transa, diag, dA) # Intel oneAPI limitation: CSC triangular operations not supported + @test_throws ArgumentError oneMKL.sparse_optimize_trmv!(uplo, transa, diag, dA) + @test_throws ArgumentError oneMKL.sparse_trmv!(uplo, transa, diag, alpha, dA, dx, beta, dy) + else + oneMKL.sparse_optimize_trmv!(uplo, transa, diag, dA) + oneMKL.sparse_trmv!(uplo, transa, diag, alpha, dA, dx, beta, dy) + @test alpha * wrapper(opa(A)) * x + beta * y ≈ collect(dy) + end + end end end end - @testset "sparse trsv" begin - @testset "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR,) - @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)] + @testset "sparse trsv" begin + @testset "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC) + @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)] for (uplo, diag, wrapper) in [('L', 'N', LowerTriangular), ('L', 'U', UnitLowerTriangular), - ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular)] - (transa == 'N') || continue - (T <: Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue + ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular), + ] + (transa == 'N') || continue alpha = rand(T) A = rand(T, 10, 10) + I A = sparse(A) - x = rand(T, 10) - y = rand(T, 10) + x = rand(T, 10) + y = rand(T, 10) B = uplo == 'L' ? tril(A) : triu(A) B = diag == 'U' ? B - Diagonal(B) + I : B - dA = SparseMatrix(B) - dx = oneVector{T}(x) - dy = oneVector{T}(y) - - oneMKL.sparse_optimize_trsv!(uplo, transa, diag, dA) - oneMKL.sparse_trsv!(uplo, transa, diag, alpha, dA, dx, dy) - y = wrapper(opa(A)) \ (alpha * x) - @test isapprox(y, collect(dy), atol=ε) + dA = SparseMatrix(B) + dx = oneVector{T}(x) + dy = oneVector{T}(y) + + if SparseMatrix == oneSparseMatrixCSC + @test_broken sparse_optimize_trsv!(uplo, transa, diag, dA) # Intel oneAPI limitation: CSC triangular operations not supported + @test_throws ArgumentError oneMKL.sparse_optimize_trsv!(uplo, transa, diag, dA) + @test_throws ArgumentError oneMKL.sparse_trsv!(uplo, transa, diag, alpha, dA, dx, dy) + else + oneMKL.sparse_optimize_trsv!(uplo, transa, diag, dA) + oneMKL.sparse_trsv!(uplo, transa, diag, alpha, dA, dx, dy) + y = wrapper(opa(A)) \ (alpha * x) + @test y ≈ collect(dy) + end + end end end end - end - @testset "sparse trsm" begin - @testset "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR,) - @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)] - @testset "transx = $transx" for (transx, opx) in [('N', identity), ('T', transpose), ('C', adjoint)] - (transx != 'N') && continue - for (uplo, diag, wrapper) in [('L', 'N', LowerTriangular), ('L', 'U', UnitLowerTriangular), - ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular)] - (transa == 'N') || continue - (T <: Complex) && (SparseMatrix == oneSparseMatrixCSC) && continue - alpha = rand(T) - A = rand(T, 10, 10) + I - A = sparse(A) - X = transx == 'N' ? rand(T, 10, 4) : rand(T, 4, 10) - Y = rand(T, 10, 4) - - B = uplo == 'L' ? tril(A) : triu(A) - B = diag == 'U' ? B - Diagonal(B) + I : B - dA = SparseMatrix(B) - dX = oneMatrix{T}(X) - dY = oneMatrix{T}(Y) - - oneMKL.sparse_optimize_trsm!(uplo, transa, diag, dA) - oneMKL.sparse_trsm!(uplo, transa, transx, diag, alpha, dA, dX, dY) - Y = wrapper(opa(A)) \ (alpha * opx(X)) - @test isapprox(Y, collect(dY), atol=ε) - - oneMKL.sparse_optimize_trsm!(uplo, transa, diag, 4, dA) - end + @testset "sparse trsm" begin + @testset "$SparseMatrix" for SparseMatrix in (oneSparseMatrixCSR, oneSparseMatrixCSC) + @testset "transa = $transa" for (transa, opa) in [('N', identity), ('T', transpose), ('C', adjoint)] + @testset "transx = $transx" for (transx, opx) in [('N', identity), ('T', transpose), ('C', adjoint)] + (transx != 'N') && continue + for (uplo, diag, wrapper) in [ + ('L', 'N', LowerTriangular), ('L', 'U', UnitLowerTriangular), + ('U', 'N', UpperTriangular), ('U', 'U', UnitUpperTriangular), + ] + (transa == 'N') || continue + alpha = rand(T) + A = rand(T, 10, 10) + I + A = sparse(A) + X = transx == 'N' ? rand(T, 10, 4) : rand(T, 4, 10) + Y = rand(T, 10, 4) + + B = uplo == 'L' ? tril(A) : triu(A) + B = diag == 'U' ? B - Diagonal(B) + I : B + dA = SparseMatrix(B) + dX = oneMatrix{T}(X) + dY = oneMatrix{T}(Y) + + if SparseMatrix == oneSparseMatrixCSC + @test_broken sparse_optimize_trsm!(uplo, transa, diag, dA) # Intel oneAPI limitation: CSC triangular operations not supported + @test_throws ArgumentError oneMKL.sparse_optimize_trsm!(uplo, transa, diag, dA) + @test_throws ArgumentError oneMKL.sparse_trsm!(uplo, transa, transx, diag, alpha, dA, dX, dY) + @test_throws ArgumentError oneMKL.sparse_optimize_trsm!(uplo, transa, diag, 4, dA) + else + oneMKL.sparse_optimize_trsm!(uplo, transa, diag, dA) + oneMKL.sparse_trsm!(uplo, transa, transx, diag, alpha, dA, dX, dY) + Y = wrapper(opa(A)) \ (alpha * opx(X)) + @test Y ≈ collect(dY) + + oneMKL.sparse_optimize_trsm!(uplo, transa, diag, 4, dA) + end + end end end end