Skip to content

Commit d21ad8c

Browse files
authored
Matrix constructor for triangular (#1282)
This reduces TTFX as well as improves performance. This is because we add specialized functions to avoid the branches in `getindex` for a triangular matrix. ```julia julia> using LinearAlgebra julia> U4 = UpperTriangular(ones(4,4)); julia> @time Array(U4); # TTFX 0.087534 seconds (193.27 k allocations: 9.706 MiB, 99.88% compilation time) # nightly 0.049869 seconds (91.45 k allocations: 4.531 MiB, 99.78% compilation time) # this PR julia> U = UpperTriangular(ones(2000,2000)); julia> @Btime Array($U); 3.205 ms (3 allocations: 30.52 MiB) # master 2.764 ms (3 allocations: 30.52 MiB) # this PR ```
1 parent dcf579c commit d21ad8c

File tree

2 files changed

+42
-16
lines changed

2 files changed

+42
-16
lines changed

src/generic.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2092,16 +2092,16 @@ function copytrito!(B::AbstractMatrix, A::AbstractMatrix, uplo::AbstractChar)
20922092
A = Base.unalias(B, A)
20932093
if uplo == 'U'
20942094
LAPACK.lacpy_size_check(size(B), (n < m ? n : m, n))
2095+
# extract the parents for UpperTriangular matrices
2096+
Bv, Av = uppertridata(B), uppertridata(A)
20952097
for j in axes(A,2), i in axes(A,1)[begin : min(j,end)]
2096-
# extract the parents for UpperTriangular matrices
2097-
Bv, Av = uppertridata(B), uppertridata(A)
20982098
@inbounds Bv[i,j] = Av[i,j]
20992099
end
21002100
else # uplo == 'L'
21012101
LAPACK.lacpy_size_check(size(B), (m, m < n ? m : n))
2102+
# extract the parents for LowerTriangular matrices
2103+
Bv, Av = lowertridata(B), lowertridata(A)
21022104
for j in axes(A,2), i in axes(A,1)[j:end]
2103-
# extract the parents for LowerTriangular matrices
2104-
Bv, Av = lowertridata(B), lowertridata(A)
21052105
@inbounds Bv[i,j] = Av[i,j]
21062106
end
21072107
end

src/triangular.jl

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,12 @@ lowertriangular(U::LowerOrUnitLowerTriangular) = U
152152

153153
Base.dataids(A::UpperOrLowerTriangular) = Base.dataids(A.data)
154154

155+
function Matrix{T}(U::UpperOrLowerTriangular) where {T}
156+
M = Matrix{T}(undef, size(U))
157+
copy!(M, U)
158+
return M
159+
end
160+
155161
imag(A::UpperTriangular) = UpperTriangular(imag(A.data))
156162
imag(A::LowerTriangular) = LowerTriangular(imag(A.data))
157163
imag(A::UpperTriangular{<:Any,<:StridedMaybeAdjOrTransMat}) = imag.(A)
@@ -622,29 +628,49 @@ end
622628
end
623629
# for strided matrices, we explicitly loop over the arrays to improve cache locality
624630
# This fuses the copytrito! for the two halves
631+
@propagate_inbounds function copy_unaliased_stored!(dest, U::UpperTriangular, col)
632+
for row in firstindex(dest,1):col
633+
dest[row,col] = U.data[row,col]
634+
end
635+
dest
636+
end
637+
@propagate_inbounds function copy_unaliased_stored!(dest, U::UnitUpperTriangular, col)
638+
for row in firstindex(dest,1):col-1
639+
dest[row,col] = U.data[row,col]
640+
end
641+
dest[col, col] = U[BandIndex(0,col)]
642+
dest
643+
end
644+
@propagate_inbounds function copy_unaliased_stored!(dest, L::LowerTriangular, col)
645+
for row in col:lastindex(dest,1)
646+
dest[row,col] = L.data[row,col]
647+
end
648+
dest
649+
end
650+
@propagate_inbounds function copy_unaliased_stored!(dest, L::UnitLowerTriangular, col)
651+
dest[col, col] = L[BandIndex(0,col)]
652+
for row in col+1:lastindex(dest,1)
653+
dest[row,col] = L.data[row,col]
654+
end
655+
dest
656+
end
625657
@inline function copy_unaliased!(dest::StridedMatrix, U::UpperOrUnitUpperTriangular{<:Any, <:StridedMatrix})
626658
@boundscheck checkbounds(dest, axes(U)...)
627-
isunit = U isa UnitUpperTriangular
628659
for col in axes(dest,2)
629-
for row in firstindex(dest,1):col-isunit
630-
@inbounds dest[row,col] = U.data[row,col]
631-
end
632-
for row in col+!isunit:lastindex(dest,1)
633-
@inbounds dest[row,col] = U[row,col]
660+
@inbounds copy_unaliased_stored!(dest, U, col)
661+
for row in col+1:lastindex(dest,1)
662+
@inbounds dest[row,col] = diagzero(U,row,col)
634663
end
635664
end
636665
return dest
637666
end
638667
@inline function copy_unaliased!(dest::StridedMatrix, L::LowerOrUnitLowerTriangular{<:Any, <:StridedMatrix})
639668
@boundscheck checkbounds(dest, axes(L)...)
640-
isunit = L isa UnitLowerTriangular
641669
for col in axes(dest,2)
642-
for row in firstindex(dest,1):col-!isunit
643-
@inbounds dest[row,col] = L[row,col]
644-
end
645-
for row in col+isunit:lastindex(dest,1)
646-
@inbounds dest[row,col] = L.data[row,col]
670+
for row in firstindex(dest,1):col-1
671+
@inbounds dest[row,col] = diagzero(L,row,col)
647672
end
673+
@inbounds copy_unaliased_stored!(dest, L, col)
648674
end
649675
return dest
650676
end

0 commit comments

Comments
 (0)