Skip to content

Commit 18ad424

Browse files
alystAlexey Stukalov
andauthored
More sparse operations support (#50)
* README: link to most recent Intel MKL docs * add top-level comments to clarify code structure * tests: ntries constant * tests: rework random matrices generation - use randn() instead of rand() to include negative values - variable sparsity rate for random sparse matrices * tests: fix matdescra use + to generate symmetric matrix; the result of matrix multiplication may have different eltype * tests: increase atol to reduce spurious failures * check_map_op_sizes(): allow C=nothing * check_map_op_sizes(): allow disabling specific checks this is required to support checking dimensions for X*A*X^T * matrix_descr(): edit specific fields * use LazyString for exceptions * rename typealias MKLSparseMat to SparseMat to distringuish from MKLSparseMatrix * tweak typealiases to improve precompilation times * fix \ support in v1.9-v1.11 * add check_nzpattern() method * convert(CSC/CSR, MKLSparseMtx): fix for empty mtx * COO, CSR: overloads necessary for unit tests * copy!(CSC/CSR, MKLSparseMatrix) * CSR/COO: improve dense conversion copyto!(Matrix, CSR) that works with unordered colval * convert(CSR, a::CSC) * add fastcopytri!() method * dual_opcode(op) * mul!(dense, sparse, sparse) support (sp2md!()) * mul!(sparse, sparse, sparse) support (sp2m()) * dense := A * A^T support (syrkd!()) * sparse := A * A^T support (syrk()) * dense := A * B * A^T support (syprd!()) * spmm() & spmmd!() (untested) * fixup ws --------- Co-authored-by: Alexey Stukalov <[email protected]>
1 parent ac243ee commit 18ad424

File tree

9 files changed

+1104
-115
lines changed

9 files changed

+1104
-115
lines changed

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22

33
[![codecov](https://codecov.io/gh/JuliaSparse/MKLSparse.jl/graph/badge.svg?token=j3KoKBEIt1)](https://codecov.io/gh/JuliaSparse/MKLSparse.jl)
44

5-
`MKLSparse.jl` is a Julia package to seamlessly use the sparse functionality in MKL to speed up operations on sparse arrays in Julia.
6-
In order to use `MKLSparse.jl` you do not need to install Intel's MKL library nor build Julia with MKL. `MKLSparse.jl` will automatically download and use the MKL library for you when installed.
5+
*MKLSparse.jl* is a Julia package to seamlessly use the [sparse BLAS routines from Intel's Math Kernel Library (MKL)](https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-c)
6+
to speed up operations on sparse arrays in Julia.
7+
In order to use *MKLSparse.jl* you do not need to install Intel's MKL library nor build Julia with MKL. *MKLSparse.jl* will automatically download and use the MKL library for you when installed.
78

89
### Matrix multiplications
910

gen/wrapper.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ function wrapper(name::String, headers::Vector{String}, optimized::Bool=false)
2424

2525
args = get_default_args()
2626
push!(args, "-I$include_dir")
27-
27+
2828
ctx = create_context(headers, args, options)
2929
build!(ctx)
3030

src/MKLSparse.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ function _log_mklsparse_call(fname)
1313
__mklsparse_calls_count[] += 1
1414
end
1515

16+
# common MKL definitions from mkl_service.h, see also MKL.jl
17+
1618
@enum Threading begin
1719
THREADING_INTEL
1820
THREADING_SEQUENTIAL
@@ -39,13 +41,16 @@ function set_interface_layer(interface::Interface = INTERFACE_LP64)
3941
return nothing
4042
end
4143

44+
# initialize the MKL interface upon module initialization
45+
# NOTE: this sets the interface for all MKL API calls, not just the sparse ones
4246
function __init__()
4347
set_interface_layer(INTERFACE_LP64)
4448
end
4549

4650
# Wrappers generated by Clang.jl
4751
include("libmklsparse.jl")
4852
include("types.jl")
53+
include("utils.jl")
4954
include("mklsparsematrix.jl")
5055

5156
# TODO: BLAS1

src/generic.jl

Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,255 @@ function mm!(transA::Char, alpha::T, A::AbstractSparseMatrix{T}, descr::matrix_d
5656
return C
5757
end
5858

59+
# C := op(A) * B, where C is sparse
60+
function spmm(transA::Char, A::S, B::S) where {S <: AbstractSparseMatrix}
61+
check_trans(transA)
62+
check_mat_op_sizes(nothing, A, transA, B, 'N')
63+
Cout = Ref{sparse_matrix_t}()
64+
hA = MKLSparseMatrix(A)
65+
hB = MKLSparseMatrix(B)
66+
res = mkl_call(Val{:mkl_sparse_spmmI}(), S,
67+
transA, hA, hB, Cout)
68+
destroy(hA)
69+
destroy(hB)
70+
check_status(res)
71+
# NOTE: we are guessing what is the storage format of C
72+
hC = MKLSparseMatrix{S}(Cout[])
73+
C = convert(S, hC)
74+
destroy(hC)
75+
return C
76+
end
77+
78+
# C := op(A) * B, where C is dense
79+
function spmmd!(transa::Char, A::S, B::S,
80+
C::StridedMatrix{T};
81+
dense_layout::sparse_layout_t = SPARSE_LAYOUT_COLUMN_MAJOR
82+
) where {S <: AbstractSparseMatrix{T}} where T
83+
check_trans(transa)
84+
check_mat_op_sizes(C, A, transa, B, 'N')
85+
ldC = stride(C, 2)
86+
hA = MKLSparseMatrix(A)
87+
hB = MKLSparseMatrix(B)
88+
res = mkl_call(Val{:mkl_sparse_T_spmmdI}(), S,
89+
transa, hA, hB, dense_layout, C, ldC)
90+
destroy(hA)
91+
destroy(hB)
92+
check_status(res)
93+
return C
94+
end
95+
96+
# C := opA(A) * opB(B), where C is sparse
97+
function sp2m(transA::Char, A::S, descrA::matrix_descr,
98+
transB::Char, B::S, descrB::matrix_descr
99+
) where S <: AbstractSparseMatrix
100+
check_trans(transA)
101+
check_trans(transB)
102+
check_mat_op_sizes(nothing, A, transA, B, transB)
103+
Cout = Ref{sparse_matrix_t}()
104+
hA = MKLSparseMatrix(A)
105+
hB = MKLSparseMatrix(B)
106+
res = mkl_call(Val{:mkl_sparse_sp2mI}(), S,
107+
transA, descrA, hA, transB, descrB, hB,
108+
SPARSE_STAGE_FULL_MULT, Cout)
109+
destroy(hA)
110+
destroy(hB)
111+
check_status(res)
112+
# NOTE: we are guessing what is the storage format of C
113+
hC = MKLSparseMatrix{S}(Cout[])
114+
C = convert(S, hC)
115+
destroy(hC)
116+
return C
117+
end
118+
119+
# C := opA(A) * opB(B), where C is sparse, in-place version
120+
# C should have the correct size and sparsity pattern
121+
function sp2m!(transA::Char, A::S, descrA::matrix_descr,
122+
transB::Char, B::S, descrB::matrix_descr,
123+
C::S;
124+
check_nzpattern::Bool = true
125+
) where {S <: AbstractSparseMatrix}
126+
check_trans(transA)
127+
check_trans(transB)
128+
check_mat_op_sizes(C, A, transA, B, transB)
129+
hA = MKLSparseMatrix(A)
130+
hB = MKLSparseMatrix(B)
131+
if check_nzpattern
132+
# pre-multiply A * B to get the number of nonzeros per column in the result
133+
CptnOut = Ref{sparse_matrix_t}()
134+
res = mkl_call(Val{:mkl_sparse_sp2mI}(), S,
135+
transA, descrA, hA, transB, descrB, hB,
136+
SPARSE_STAGE_NNZ_COUNT, CptnOut)
137+
check_status(res)
138+
hCptn = MKLSparseMatrix{S}(CptnOut[])
139+
try
140+
# check if C has the same per-column nonzeros as the result
141+
MKLSparse.check_nzpattern(C, hCptn)
142+
catch e
143+
# destroy handles to A and B if the pattern check fails,
144+
# otherwise reuse them at the actual multiplication
145+
destroy(hA)
146+
destroy(hB)
147+
rethrow(e)
148+
finally
149+
destroy(hCptn)
150+
end
151+
# FIXME rowval not checked
152+
end
153+
# FIXME the optimal way would be to create the MKLSparse handle to C reusing its arrays
154+
# and do SPARSE_STAGE_FINALIZE_MULT to directly write to the C.nzval
155+
# but that causes segfaults when the handle is destroyed
156+
# (also the partial mkl_sparse_copy(C) workaround to reuse the nz structure segfaults)
157+
# see the note stating that external memory is not currently supported:
158+
# https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-c/2024-2/two-stage-algorithm-in-inspect-execute-sp-blas.html
159+
#hC = MKLSparseMatrix(C)
160+
#mkl_call(Val{:mkl_sparse_set_memory_hintI}(), typeof(C), SPARSE_MEMORY_NONE)
161+
#hC_ref = Ref(hC)
162+
#res = mkl_call(Val{:mkl_sparse_sp2mI}(), typeof(A),
163+
# transA, descrA, hA, transB, descrB, hB,
164+
# SPARSE_STAGE_FINALIZE_MULT, hC_ref)
165+
#@assert hC_ref[] == hC
166+
# so instead we do the full multiplication and copy the result into C nzvals
167+
hCopy_ref = Ref{sparse_matrix_t}()
168+
# don't log if it's the 2nd mkl_call
169+
res = mkl_call(Val{:mkl_sparse_sp2mI}(), S,
170+
transA, descrA, hA, transB, descrB, hB,
171+
SPARSE_STAGE_FULL_MULT, hCopy_ref, log = Val(!check_nzpattern))
172+
destroy(hA)
173+
destroy(hB)
174+
#destroy(hC)
175+
check_status(res)
176+
if hCopy_ref[] != C_NULL
177+
hCopy = MKLSparseMatrix{S}(hCopy_ref[])
178+
copy!(C, hCopy; check_nzpattern)
179+
destroy(hCopy)
180+
end
181+
return C
182+
end
183+
184+
# C := alpha * opA(A) * opB(B) + beta * C, where C is dense
185+
function sp2md!(transA::Char, alpha::T, A::S, descrA::matrix_descr,
186+
transB::Char, B::S, descrB::matrix_descr,
187+
beta::T, C::StridedMatrix{T};
188+
dense_layout::sparse_layout_t = SPARSE_LAYOUT_COLUMN_MAJOR
189+
) where {S <: AbstractSparseMatrix{T}} where T
190+
check_trans(transA)
191+
check_trans(transB)
192+
check_mat_op_sizes(C, A, transA, B, transB)
193+
ldC = stride(C, 2)
194+
hA = MKLSparseMatrix(A)
195+
hB = MKLSparseMatrix(B)
196+
res = mkl_call(Val{:mkl_sparse_T_sp2mdI}(), S,
197+
transA, descrA, hA, transB, descrB, hB,
198+
alpha, beta,
199+
C, dense_layout, ldC)
200+
destroy(hA)
201+
destroy(hB)
202+
check_status(res)
203+
return C
204+
end
205+
206+
# C := A * op(A), or
207+
# C := op(A) * A, where C is sparse
208+
# note: only the upper triangular part of C is computed
209+
function syrk(transA::Char, A::SparseMatrixCSR; copytri::Bool = false)
210+
copytri && error("syrk() wrapper does not implement copytri=true")
211+
check_trans(transA)
212+
Cout = Ref{sparse_matrix_t}()
213+
hA = MKLSparseMatrix(A)
214+
res = mkl_call(Val{:mkl_sparse_syrkI}(), typeof(A),
215+
transA, hA, Cout)
216+
destroy(hA)
217+
check_status(res)
218+
# NOTE: we are guessing what is the storage format of C
219+
hC = MKLSparseMatrix{typeof(A)}(Cout[])
220+
C = convert(typeof(A), hC)
221+
destroy(hC)
222+
return C
223+
end
224+
225+
# CSC is not supported by SparseMKL directly, so treat A as Aᵀ in CSR format
226+
# note: only the lower triangular part of C is computed (lower CSC = upper CSR)
227+
function syrk(transA::Char, A::SparseMatrixCSC{T}; kwargs...) where T
228+
C = syrk(dual_opcode(T, transA),
229+
convert(SparseMatrixCSR, transpose(A)); kwargs...)
230+
return convert(typeof(A), transpose(C))
231+
end
232+
233+
# C := A * op(A), or
234+
# C := op(A) * A, where C is dense
235+
# note: only the upper triangular part of C is computed
236+
function syrkd!(transA::Char, alpha::T, A::SparseMatrixCSR{T}, beta::T,
237+
C::StridedMatrix{T};
238+
dense_layout::sparse_layout_t = SPARSE_LAYOUT_COLUMN_MAJOR,
239+
copytri::Bool = true
240+
) where T
241+
check_trans(transA)
242+
check_mat_op_sizes(C, A, transA, A, transA == 'N' ? 'T' : 'N'; dense_layout)
243+
ldC = stride(C, 2)
244+
hA = MKLSparseMatrix(A)
245+
res = mkl_call(Val{:mkl_sparse_T_syrkdI}(), typeof(A),
246+
transA, hA, alpha, beta, C, dense_layout, ldC)
247+
destroy(hA)
248+
check_status(res)
249+
copytri && fastcopytri!(C, dense_layout == SPARSE_LAYOUT_COLUMN_MAJOR ? 'U' : 'L',
250+
T <: Complex)
251+
return C
252+
end
253+
254+
# CSC is not supported by SparseMKL directly, so treat A as Aᵀ in CSR format
255+
function syrkd!(transA::Char, alpha::T, A::SparseMatrixCSC{T}, beta::T,
256+
C::StridedMatrix{T}; kwarg...
257+
) where T
258+
# since CSC support is implemented by transposing A, the A*A' has to be conjugated
259+
# to be correct in the complex case, that produces incorrect results when beta != 0
260+
(T <: Complex) && error("syrkd!() wrapper does not support SparseMatrixCSC with complex values")
261+
syrkd!(dual_opcode(T, transA), alpha,
262+
convert(SparseMatrixCSR, transpose(A)), beta, C; kwarg...)
263+
end
264+
265+
# C := alpha * op(A) * B * A + beta * C, or
266+
# C := alpha * A * B * op(A) + beta * C, where C is dense
267+
# note: only the upper triangular part of C is computed
268+
function syprd!(transA::Char, alpha::T, A::SparseMatrixCSR{T},
269+
B::StridedMatrix{T}, beta::T, C::StridedMatrix{T};
270+
dense_layout_B::sparse_layout_t = SPARSE_LAYOUT_COLUMN_MAJOR,
271+
dense_layout_C::sparse_layout_t = SPARSE_LAYOUT_COLUMN_MAJOR,
272+
copytri::Bool = true
273+
) where T
274+
check_trans(transA)
275+
# FIXME dense_layout_B not used
276+
check_mat_op_sizes(C, A, transA, B, 'N';
277+
check_result_columns = false, dense_layout = dense_layout_C)
278+
check_mat_op_sizes(C, B, 'N', A, transA == 'N' ? 'T' : 'N';
279+
check_result_rows = false, dense_layout = dense_layout_C)
280+
ldB = stride(B, 2)
281+
ldC = stride(C, 2)
282+
hA = MKLSparseMatrix(A)
283+
res = mkl_call(Val{:mkl_sparse_T_syprdI}(), typeof(A),
284+
transA, hA, B, dense_layout_B, ldB,
285+
alpha, beta, C, dense_layout_C, ldC)
286+
destroy(hA)
287+
check_status(res)
288+
copytri && fastcopytri!(C, dense_layout_C == SPARSE_LAYOUT_COLUMN_MAJOR ? 'U' : 'L',
289+
T <: Complex)
290+
return C
291+
end
292+
293+
function syprd!(transA::Char, alpha::T, A::SparseMatrixCSC{T},
294+
B::StridedMatrix{T}, beta::T, C::StridedMatrix{T};
295+
kwargs...
296+
) where T
297+
# since CSC support is implemented by transposing A, the A*A' has to be conjugated
298+
# to be correct in the complex case, that produces incorrect results when beta != 0
299+
(T <: Complex) && error("syprd!() wrapper does not support SparseMatrixCSC with complex values")
300+
301+
syprd!(
302+
dual_opcode(T, transA), alpha,
303+
convert(SparseMatrixCSR, transpose(A)),
304+
B, beta, C; kwargs...
305+
)
306+
end
307+
59308
# find y: op(A) * y = alpha * x
60309
function trsv!(transA::Char, alpha::T, A::AbstractSparseMatrix{T}, descr::matrix_descr,
61310
x::StridedVector{T}, y::StridedVector{T}

0 commit comments

Comments
 (0)