Skip to content

Commit 88eec7a

Browse files
ThreadedSparseMatrixCSC performance fix
Changed the inner loop of mul!(C::StridedVecOrMat, X::AdjOrTransStridedOrTriangularMatrix, A::ThreadedSparseMatrixCSC, α::Number, β::Number) to mimic the SparseVector version which had better performance.
1 parent 1568352 commit 88eec7a

File tree

1 file changed

+12
-2
lines changed

1 file changed

+12
-2
lines changed

src/ThreadedSparseArrays.jl

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,21 @@ function mul!(C::StridedVecOrMat, X::AdjOrTransStridedOrTriangularMatrix, A::Thr
8787
if β != 1
8888
β != 0 ? rmul!(C, β) : fill!(C, zero(eltype(C)))
8989
end
90+
# Threads.@threads for col = 1:size(A, 2)
91+
# @inbounds for multivec_row=1:mX, k=getcolptr(A)[col]:(getcolptr(A)[col+1]-1)
92+
# C[multivec_row, col] += α * X[multivec_row, rv[k]] * nzv[k] # perhaps suboptimal position of α?
93+
# end
94+
# end
9095
Threads.@threads for col = 1:size(A, 2)
91-
@inbounds for multivec_row=1:mX, k=getcolptr(A)[col]:(getcolptr(A)[col+1]-1)
92-
C[multivec_row, col] += α * X[multivec_row, rv[k]] * nzv[k] # perhaps suboptimal position of α?
96+
@inbounds for k=getcolptr(A)[col]:(getcolptr(A)[col+1]-1)
97+
j = rv[k]
98+
αv = nzv[k]*α
99+
for multivec_row=1:mX
100+
C[multivec_row, col] += X[multivec_row, j] * αv
101+
end
93102
end
94103
end
104+
95105
C
96106
end
97107

0 commit comments

Comments
 (0)