From 84198f02601fff085056cc57bbf97a072ff4d81e Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Sat, 5 Apr 2025 15:49:53 +0530 Subject: [PATCH 1/5] Speicalize `copy!` for triangular, and use `copy!` in `ldiv` --- src/LinearAlgebra.jl | 6 +++--- src/triangular.jl | 37 ++++++++++++++++++++++++------------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/src/LinearAlgebra.jl b/src/LinearAlgebra.jl index e9a3677d..366195a5 100644 --- a/src/LinearAlgebra.jl +++ b/src/LinearAlgebra.jl @@ -10,7 +10,7 @@ module LinearAlgebra import Base: \, /, //, *, ^, +, -, == import Base: USE_BLAS64, abs, acos, acosh, acot, acoth, acsc, acsch, adjoint, asec, asech, asin, asinh, atan, atanh, axes, big, broadcast, cbrt, ceil, cis, collect, conj, convert, - copy, copyto!, copymutable, cos, cosh, cot, coth, csc, csch, eltype, exp, fill!, floor, + copy, copy!, copyto!, copymutable, cos, cosh, cot, coth, csc, csch, eltype, exp, fill!, floor, getindex, hcat, getproperty, imag, inv, invpermuterows!, isapprox, isequal, isone, iszero, IndexStyle, kron, kron!, length, log, map, ndims, one, oneunit, parent, permutecols!, permutedims, permuterows!, power_by_squaring, promote_rule, real, sec, sech, setindex!, @@ -706,9 +706,9 @@ function ldiv(F::Factorization, B::AbstractVecOrMat) if n > size(B, 1) # Underdetermined - copyto!(view(BB, 1:m, :), B) + copy!(view(BB, axes(B,1), ntuple(_->:, ndims(B)-1)...), B) else - copyto!(BB, B) + copy!(BB, B) end ldiv!(FF, BB) diff --git a/src/triangular.jl b/src/triangular.jl index 199ff119..3b098a5f 100644 --- a/src/triangular.jl +++ b/src/triangular.jl @@ -527,30 +527,34 @@ for T in (:UpperOrUnitUpperTriangular, :LowerOrUnitLowerTriangular) if axes(dest) != axes(U) @invoke copyto!(dest::AbstractArray, U::AbstractArray) else - _copyto!(dest, U) + copy!(dest, U) end return dest end + @eval function copy!(dest::$T, U::$T) + axes(dest) == axes(U) || throw(ArgumentError( + "arrays must have the same axes for copy! (consider using `copyto!`)")) + _copy!(dest, U) + end end # copy and scale for (T, UT) in ((:UpperTriangular, :UnitUpperTriangular), (:LowerTriangular, :UnitLowerTriangular)) - @eval @inline function _copyto!(A::$T, B::$T) - @boundscheck checkbounds(A, axes(B)...) + @eval @inline function _copy!(A::$T, B::$T) copytrito!(parent(A), parent(B), uplo_char(A)) return A end - @eval @inline function _copyto!(A::$UT, B::$T) + @eval @inline function _copy!(A::$UT, B::$T) for dind in diagind(A, IndexStyle(A)) if A[dind] != B[dind] throw_nononeerror(typeof(A), B[dind], Tuple(dind)...) end end - _copyto!($T(parent(A)), B) + _copy!($T(parent(A)), B) return A end end -@inline function _copyto!(A::UpperOrUnitUpperTriangular, B::UnitUpperTriangular) +@inline function _copy!(A::UpperOrUnitUpperTriangular, B::UnitUpperTriangular) @boundscheck checkbounds(A, axes(B)...) B2 = Base.unalias(A, B) Ap = parent(A) @@ -565,7 +569,7 @@ end end return A end -@inline function _copyto!(A::LowerOrUnitLowerTriangular, B::UnitLowerTriangular) +@inline function _copy!(A::LowerOrUnitLowerTriangular, B::UnitLowerTriangular) @boundscheck checkbounds(A, axes(B)...) B2 = Base.unalias(A, B) Ap = parent(A) @@ -590,23 +594,30 @@ _triangularize!(::LowerOrUnitLowerTriangular) = tril! if axes(dest) != axes(U) @invoke copyto!(dest::StridedMatrix, U::AbstractArray) else - _copyto!(dest, U) + copy!(dest, U) end return dest end -@propagate_inbounds function _copyto!(dest::StridedMatrix, U::UpperOrLowerTriangular) + +function copy!(dest::StridedMatrix, U::UpperOrLowerTriangular) + axes(dest) == axes(U) || throw(ArgumentError( + "arrays must have the same axes for copy! (consider using `copyto!`)")) + _copy!(dest, U) +end + +@propagate_inbounds function _copy!(dest::StridedMatrix, U::UpperOrLowerTriangular) copytrito!(dest, parent(U), U isa UpperOrUnitUpperTriangular ? 'U' : 'L') copytrito!(dest, U, U isa UpperOrUnitUpperTriangular ? 'L' : 'U') return dest end -@propagate_inbounds function _copyto!(dest::StridedMatrix, U::UpperOrLowerTriangular{<:Any, <:StridedMatrix}) +@propagate_inbounds function _copy!(dest::StridedMatrix, U::UpperOrLowerTriangular{<:Any, <:StridedMatrix}) U2 = Base.unalias(dest, U) - copyto_unaliased!(dest, U2) + copy_unaliased!(dest, U2) return dest end # for strided matrices, we explicitly loop over the arrays to improve cache locality # This fuses the copytrito! for the two halves -@inline function copyto_unaliased!(dest::StridedMatrix, U::UpperOrUnitUpperTriangular{<:Any, <:StridedMatrix}) +@inline function copy_unaliased!(dest::StridedMatrix, U::UpperOrUnitUpperTriangular{<:Any, <:StridedMatrix}) @boundscheck checkbounds(dest, axes(U)...) isunit = U isa UnitUpperTriangular for col in axes(dest,2) @@ -619,7 +630,7 @@ end end return dest end -@inline function copyto_unaliased!(dest::StridedMatrix, L::LowerOrUnitLowerTriangular{<:Any, <:StridedMatrix}) +@inline function copy_unaliased!(dest::StridedMatrix, L::LowerOrUnitLowerTriangular{<:Any, <:StridedMatrix}) @boundscheck checkbounds(dest, axes(L)...) isunit = L isa UnitLowerTriangular for col in axes(dest,2) From 4f1a274ab1b32c46cde166cb858614e092d02e38 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Sat, 5 Apr 2025 17:49:42 +0530 Subject: [PATCH 2/5] Use `copy!` in `generic_mattridiv!` and friends --- src/triangular.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/triangular.jl b/src/triangular.jl index 3b098a5f..3f9f2a9b 100644 --- a/src/triangular.jl +++ b/src/triangular.jl @@ -1218,17 +1218,17 @@ end # multiplication generic_trimatmul!(c::StridedVector{T}, uploc, isunitc, tfun::Function, A::StridedMatrix{T}, b::AbstractVector{T}) where {T<:BlasFloat} = - BLAS.trmv!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, A, c === b ? c : copyto!(c, b)) + BLAS.trmv!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, A, c === b ? c : copy!(c, b)) function generic_trimatmul!(C::StridedMatrix{T}, uploc, isunitc, tfun::Function, A::StridedMatrix{T}, B::AbstractMatrix{T}) where {T<:BlasFloat} if stride(C,1) == stride(A,1) == 1 - BLAS.trmm!('L', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), A, C === B ? C : copyto!(C, B)) + BLAS.trmm!('L', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), A, C === B ? C : copy!(C, B)) else # incompatible with BLAS @invoke generic_trimatmul!(C::AbstractMatrix, uploc, isunitc, tfun::Function, A::AbstractMatrix, B::AbstractMatrix) end end function generic_mattrimul!(C::StridedMatrix{T}, uploc, isunitc, tfun::Function, A::AbstractMatrix{T}, B::StridedMatrix{T}) where {T<:BlasFloat} if stride(C,1) == stride(B,1) == 1 - BLAS.trmm!('R', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), B, C === A ? C : copyto!(C, A)) + BLAS.trmm!('R', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), B, C === A ? C : copy!(C, A)) else # incompatible with BLAS @invoke generic_mattrimul!(C::AbstractMatrix, uploc, isunitc, tfun::Function, A::AbstractMatrix, B::AbstractMatrix) end @@ -1236,14 +1236,14 @@ end # division function generic_trimatdiv!(C::StridedVecOrMat{T}, uploc, isunitc, tfun::Function, A::StridedMatrix{T}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} if stride(C,1) == stride(A,1) == 1 - LAPACK.trtrs!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, A, C === B ? C : copyto!(C, B)) + LAPACK.trtrs!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, A, C === B ? C : copy!(C, B)) else # incompatible with LAPACK @invoke generic_trimatdiv!(C::AbstractVecOrMat, uploc, isunitc, tfun::Function, A::AbstractMatrix, B::AbstractVecOrMat) end end function generic_mattridiv!(C::StridedMatrix{T}, uploc, isunitc, tfun::Function, A::AbstractMatrix{T}, B::StridedMatrix{T}) where {T<:BlasFloat} if stride(C,1) == stride(B,1) == 1 - BLAS.trsm!('R', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), B, C === A ? C : copyto!(C, A)) + BLAS.trsm!('R', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), B, C === A ? C : copy!(C, A)) else # incompatible with BLAS @invoke generic_mattridiv!(C::AbstractMatrix, uploc, isunitc, tfun::Function, A::AbstractMatrix, B::AbstractMatrix) end From 62784d8d67d1e2b14922d5376af228a9b8e5ac57 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Sat, 5 Apr 2025 18:02:22 +0530 Subject: [PATCH 3/5] Use `copy!` at more places --- src/triangular.jl | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/triangular.jl b/src/triangular.jl index 3f9f2a9b..50f41a43 100644 --- a/src/triangular.jl +++ b/src/triangular.jl @@ -178,7 +178,7 @@ end parent(A::UpperOrLowerTriangular) = A.data # For strided matrices, we may only loop over the filled triangle -copy(A::UpperOrLowerTriangular{<:Any, <:StridedMaybeAdjOrTransMat}) = copyto!(similar(A), A) +copy(A::UpperOrLowerTriangular{<:Any, <:StridedMaybeAdjOrTransMat}) = copy!(similar(A), A) # then handle all methods that requires specific handling of upper/lower and unit diagonal @@ -651,7 +651,7 @@ Base.@constprop :aggressive function copytrito_triangular!(Bdata, Adata, uplo, u BLAS.chkuplo(uplo) LAPACK.lacpy_size_check(size(Bdata), sz) # only the diagonal is copied in this case - copyto!(diagview(Bdata), diagview(Adata)) + copy!(diagview(Bdata), diagview(Adata)) end return Bdata end @@ -1061,15 +1061,17 @@ isunit_char(::UnitUpperTriangular) = 'U' isunit_char(::LowerTriangular) = 'N' isunit_char(::UnitLowerTriangular) = 'U' +_copy_or_copyto!(dest, src) = ndims(dest) == ndims(src) ? copy!(dest, src) : copyto!(dest, src) + # generic fallback for AbstractTriangular matrices outside of the four subtypes provided here _trimul!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVector) = - lmul!(A, copyto!(C, B)) + lmul!(A, _copy_or_copyto!(C, B)) _trimul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractMatrix) = - lmul!(A, copyto!(C, B)) + lmul!(A, copy!(C, B)) _trimul!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractTriangular) = - rmul!(copyto!(C, A), B) + rmul!(copy!(C, A), B) _trimul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractTriangular) = - lmul!(A, copyto!(C, B)) + lmul!(A, copy!(C, B)) # redirect for UpperOrLowerTriangular _trimul!(C::AbstractVecOrMat, A::UpperOrLowerTriangular, B::AbstractVector) = generic_trimatmul!(C, uplo_char(A), isunit_char(A), wrapperop(parent(A)), _unwrap_at(parent(A)), B) @@ -1130,9 +1132,9 @@ end ldiv!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVecOrMat) = _ldiv!(C, A, B) # generic fallback for AbstractTriangular, directs to 2-arg [l/r]div! _ldiv!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVecOrMat) = - ldiv!(A, copyto!(C, B)) + ldiv!(A, _copy_or_copyto!(C, B)) _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractTriangular) = - rdiv!(copyto!(C, A), B) + rdiv!(copy!(C, A), B) # redirect for UpperOrLowerTriangular to generic_*div! _ldiv!(C::AbstractVecOrMat, A::UpperOrLowerTriangular, B::AbstractVecOrMat) = generic_trimatdiv!(C, uplo_char(A), isunit_char(A), wrapperop(parent(A)), _unwrap_at(parent(A)), B) @@ -1210,7 +1212,7 @@ for (t, uploc, isunitc) in ((:LowerTriangular, 'L', 'N'), elseif p == Inf return inv(LAPACK.trcon!('I', $uploc, $isunitc, A.data)) else # use fallback - return cond(copyto!(similar(parent(A)), A), p) + return cond(copy!(similar(parent(A)), A), p) end end end @@ -1236,7 +1238,7 @@ end # division function generic_trimatdiv!(C::StridedVecOrMat{T}, uploc, isunitc, tfun::Function, A::StridedMatrix{T}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} if stride(C,1) == stride(A,1) == 1 - LAPACK.trtrs!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, A, C === B ? C : copy!(C, B)) + LAPACK.trtrs!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, A, C === B ? C : _copy_or_copyto!(C, B)) else # incompatible with LAPACK @invoke generic_trimatdiv!(C::AbstractVecOrMat, uploc, isunitc, tfun::Function, A::AbstractMatrix, B::AbstractVecOrMat) end @@ -1968,7 +1970,7 @@ function powm!(A0::UpperTriangular, p::Real) for i in axes(S,1) @inbounds S[i, i] = S[i, i] + 1 end - copyto!(Stmp, S) + copy!(Stmp, S) mul!(S, A, c) ldiv!(Stmp, S) @@ -1976,14 +1978,14 @@ function powm!(A0::UpperTriangular, p::Real) for i in axes(S,1) @inbounds S[i, i] = S[i, i] + 1 end - copyto!(Stmp, S) + copy!(Stmp, S) mul!(S, A, c) ldiv!(Stmp, S) end for i in axes(S,1) S[i, i] = S[i, i] + 1 end - copyto!(Stmp, S) + copy!(Stmp, S) mul!(S, A, -p) ldiv!(Stmp, S) for i in axes(S,1) @@ -1993,7 +1995,7 @@ function powm!(A0::UpperTriangular, p::Real) blockpower!(A0, S, p/(2^s)) for m = 1:s mul!(Stmp.data, S, S) - copyto!(S, Stmp) + copy!(S, Stmp) blockpower!(A0, S, p/(2^(s-m))) end rmul!(S, normA0^p) @@ -2180,7 +2182,7 @@ function _find_params_log_quasitriu!(A) break end _sqrt_quasitriu!(A isa UpperTriangular ? parent(A) : A, A) - copyto!(AmI, A) + copy!(AmI, A) for i in axes(AmI,1) @inbounds AmI[i,i] -= 1 end From 8156fd34079f7b3362627b25c5a30212e0609380 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Tue, 8 Apr 2025 17:19:54 +0530 Subject: [PATCH 4/5] `_copy_or_copyto!` in `Bidiagonal` `ldiv!` --- src/bidiag.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bidiag.jl b/src/bidiag.jl index f5964fa5..ad7de549 100644 --- a/src/bidiag.jl +++ b/src/bidiag.jl @@ -1260,7 +1260,7 @@ function ldiv!(c::AbstractVecOrMat, A::Bidiagonal, b::AbstractVecOrMat) end if N == 0 - return copyto!(c, b) + return _copy_or_copyto!(c, b) end zi = findfirst(iszero, A.dv) From 56aacffdf90d1e9bfb81ced264fa2b25b1179bf7 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Tue, 8 Apr 2025 17:41:20 +0530 Subject: [PATCH 5/5] Revert "`_copy_or_copyto!` in `Bidiagonal` `ldiv!`" This reverts commit 8156fd34079f7b3362627b25c5a30212e0609380. --- src/bidiag.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bidiag.jl b/src/bidiag.jl index ad7de549..f5964fa5 100644 --- a/src/bidiag.jl +++ b/src/bidiag.jl @@ -1260,7 +1260,7 @@ function ldiv!(c::AbstractVecOrMat, A::Bidiagonal, b::AbstractVecOrMat) end if N == 0 - return _copy_or_copyto!(c, b) + return copyto!(c, b) end zi = findfirst(iszero, A.dv)