Skip to content

Commit 38b932b

Browse files
committed
Inline generic_matmatmul! branch in strided triangular matmul
1 parent 0a253be commit 38b932b

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

src/triangular.jl

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ const LowerOrUnitLowerTriangular{T,S<:AbstractMatrix{T}} = Union{LowerTriangular
144144
const UpperOrLowerTriangular{T,S<:AbstractMatrix{T}} = Union{UpperOrUnitUpperTriangular{T,S}, LowerOrUnitLowerTriangular{T,S}}
145145
const UnitUpperOrUnitLowerTriangular{T,S<:AbstractMatrix{T}} = Union{UnitUpperTriangular{T,S}, UnitLowerTriangular{T,S}}
146146

147+
const UpperOrLowerTriangularStrided{T,S<:StridedMatrix{T}} = UpperOrLowerTriangular{T,S}
148+
147149
uppertriangular(M) = UpperTriangular(M)
148150
lowertriangular(M) = LowerTriangular(M)
149151

@@ -1116,11 +1118,20 @@ for (TA, TB) in ((:AbstractTriangular, :AbstractMatrix),
11161118
if isone(alpha) && iszero(beta)
11171119
return _trimul!(C, A, B)
11181120
else
1119-
return generic_matmatmul!(C, 'N', 'N', A, B, alpha, beta)
1121+
return generic_matmatmulNN!(C, A, B, alpha, beta)
11201122
end
11211123
end
11221124
end
11231125

1126+
generic_matmatmulNN!(C, A, B, alpha, beta) = generic_matmatmul!(C, 'N', 'N', A, B, alpha, beta)
1127+
# Optimization for strided matrices, where we know that _generic_matmatmul! will be taken
1128+
for (TA, TB) in ((:UpperOrLowerTriangularStrided, :StridedMatrix),
1129+
(:StridedMatrix, :UpperOrLowerTriangularStrided),
1130+
(:UpperOrLowerTriangularStrided, :UpperOrLowerTriangularStrided)
1131+
)
1132+
@eval generic_matmatmulNN!(C, A::$TA, B::$TB, alpha, beta) = _generic_matmatmul!(C, A, B, alpha, beta)
1133+
end
1134+
11241135
ldiv!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVecOrMat) = _ldiv!(C, A, B)
11251136
# generic fallback for AbstractTriangular, directs to 2-arg [l/r]div!
11261137
_ldiv!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVecOrMat) =

0 commit comments

Comments
 (0)