Skip to content

Commit 374d8fe

Browse files
authored
Merge branch 'master' into sgj/evalpoly
2 parents 437b35d + 763f19f commit 374d8fe

24 files changed

+337
-156
lines changed

src/LinearAlgebra.jl

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import Base: USE_BLAS64, abs, acos, acosh, acot, acoth, acsc, acsch, adjoint, as
1313
copy, copyto!, copymutable, cos, cosh, cot, coth, csc, csch, eltype, exp, fill!, floor,
1414
getindex, hcat, getproperty, imag, inv, invpermuterows!, isapprox, isequal, isone, iszero,
1515
IndexStyle, kron, kron!, length, log, map, ndims, one, oneunit, parent, permutecols!,
16-
permutedims, permuterows!, power_by_squaring, promote_rule, real, sec, sech, setindex!,
16+
permutedims, permuterows!, power_by_squaring, promote_rule, real, isreal, sec, sech, setindex!,
1717
show, similar, sin, sincos, sinh, size, sqrt, strides, stride, tan, tanh, transpose, trunc,
1818
typed_hcat, vec, view, zero
1919
import Base: AbstractArray, AbstractMatrix, Array, Matrix
@@ -728,6 +728,8 @@ end
728728
(\)(F::TransposeFactorization{T,<:LU}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
729729
ldiv(F, B)
730730

731+
const default_peakflops_size = Int === Int32 ? 2048 : 4096
732+
731733
"""
732734
LinearAlgebra.peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false)
733735
@@ -752,10 +754,10 @@ of the problem that is solved on each processor.
752754
This function requires at least Julia 1.1. In Julia 1.0 it is available from
753755
the standard library `InteractiveUtils`.
754756
"""
755-
function peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false)
757+
function peakflops(n::Integer=default_peakflops_size; eltype::Type{ElType}=Float64, ntrials::Integer=3, parallel::Bool=false) where {ElType}
756758
t = zeros(Float64, ntrials)
757759
for i=1:ntrials
758-
a = ones(eltype,n,n)
760+
a = ones(ElType,n,n)
759761
t[i] = @elapsed a2 = a*a
760762
@assert a2[1,1] == n
761763
end
@@ -822,25 +824,30 @@ function versioninfo(io::IO=stdout)
822824
return nothing
823825
end
824826

825-
function __init__()
826-
try
827-
verbose = parse(Bool, get(ENV, "LBT_VERBOSE", "false"))
828-
BLAS.lbt_forward(OpenBLAS_jll.libopenblas_path; clear=true, verbose)
829-
BLAS.check()
830-
catch ex
831-
Base.showerror_nostdio(ex, "WARNING: Error during initialization of module LinearAlgebra")
832-
end
827+
function lbt_openblas_onload_callback()
828+
# We don't use `BLAS.lbt_forward()` here because we don't want to take a lock on the config cache.
829+
verbose = parse(Bool, get(ENV, "LBT_VERBOSE", "false"))
830+
BLAS.lbt_forward_ccall(OpenBLAS_jll.libopenblas_path; clear=true, verbose)
831+
BLAS.check()
832+
833833
# register a hook to disable BLAS threading
834834
Base.at_disable_library_threading(() -> BLAS.set_num_threads(1))
835835

836836
# https://github.com/xianyi/OpenBLAS/blob/c43ec53bdd00d9423fc609d7b7ecb35e7bf41b85/README.md#setting-the-number-of-threads-using-environment-variables
837837
if !haskey(ENV, "OPENBLAS_NUM_THREADS") && !haskey(ENV, "GOTO_NUM_THREADS") && !haskey(ENV, "OMP_NUM_THREADS")
838838
@static if Sys.isapple() && Base.BinaryPlatforms.arch(Base.BinaryPlatforms.HostPlatform()) == "aarch64"
839-
BLAS.set_num_threads(max(1, @ccall(jl_effective_threads()::Cint)))
839+
nthreads = max(1, @ccall(jl_effective_threads()::Cint))
840840
else
841-
BLAS.set_num_threads(max(1, @ccall(jl_effective_threads()::Cint) ÷ 2))
841+
nthreads = max(1, @ccall(jl_effective_threads()::Cint) ÷ 2)
842842
end
843+
BLAS.lbt_set_num_threads(nthreads)
843844
end
844845
end
845846

847+
function __init__()
848+
# If users want to lazily load a different BLAS, they'd need to either change this call, or
849+
# clear the datastructures modified by this call and call it again with their own.
850+
libblastrampoline_jll.add_dependency!(OpenBLAS_jll, libopenblas, lbt_openblas_onload_callback)
851+
end
852+
846853
end # module LinearAlgebra

src/abstractq.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ end
99
parent(adjQ::AdjointQ) = adjQ.Q
1010
eltype(::Type{<:AbstractQ{T}}) where {T} = T
1111
Base.eltypeof(Q::AbstractQ) = eltype(Q)
12+
Base.IteratorSize(::Type{<:AbstractQ}) = Base.HasShape{2}()
1213
ndims(::AbstractQ) = 2
1314

1415
# inversion/adjoint/transpose
@@ -40,7 +41,6 @@ convert(::Type{AbstractQ{T}}, adjQ::AdjointQ{T}) where {T} = adjQ
4041
convert(::Type{AbstractQ{T}}, adjQ::AdjointQ) where {T} = convert(AbstractQ{T}, adjQ.Q)'
4142

4243
# ... to matrix
43-
collect(Q::AbstractQ) = copyto!(Matrix{eltype(Q)}(undef, size(Q)), Q)
4444
Matrix{T}(Q::AbstractQ) where {T} = convert(Matrix{T}, Q*I) # generic fallback, yields square matrix
4545
Matrix{T}(adjQ::AdjointQ{S}) where {T,S} = convert(Matrix{T}, lmul!(adjQ, Matrix{S}(I, size(adjQ))))
4646
Matrix(Q::AbstractQ{T}) where {T} = Matrix{T}(Q)

src/bidiag.jl

Lines changed: 48 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -189,10 +189,20 @@ end
189189
#Converting from Bidiagonal to dense Matrix
190190
function Matrix{T}(A::Bidiagonal) where T
191191
B = Matrix{T}(undef, size(A))
192+
iszero(size(B,1)) && return B
192193
if haszero(T) # optimized path for types with zero(T) defined
193194
size(B,1) > 1 && fill!(B, zero(T))
194-
copyto!(diagview(B), A.dv)
195-
copyto!(diagview(B, _offdiagind(A.uplo)), A.ev)
195+
isupper = A.uplo == 'U'
196+
if isupper
197+
B[1,1] = A.dv[1]
198+
end
199+
for col in axes(A.ev,1)
200+
B[col+!isupper, col+isupper] = A.ev[col]
201+
B[col+isupper, col+isupper] = A.dv[col+isupper]
202+
end
203+
if !isupper
204+
B[end,end] = A.dv[end]
205+
end
196206
else
197207
copyto!(B, A)
198208
end
@@ -286,6 +296,7 @@ axes(M::Bidiagonal) = (ax = axes(M.dv, 1); (ax, ax))
286296
for func in (:conj, :copy, :real, :imag)
287297
@eval ($func)(M::Bidiagonal) = Bidiagonal(($func)(M.dv), ($func)(M.ev), M.uplo)
288298
end
299+
isreal(M::Bidiagonal) = isreal(M.dv) && isreal(M.ev)
289300

290301
adjoint(B::Bidiagonal{<:Number}) = Bidiagonal(vec(adjoint(B.dv)), vec(adjoint(B.ev)), B.uplo == 'U' ? :L : :U)
291302
adjoint(B::Bidiagonal{<:Number, <:Base.ReshapedArray{<:Number,1,<:Adjoint}}) =
@@ -454,8 +465,8 @@ function rmul!(B::Bidiagonal, x::Number)
454465
iszero(y) || throw(ArgumentError(LazyString(lazy"cannot set index ($row, $col) off ",
455466
lazy"the tridiagonal band to a nonzero value ($y)")))
456467
end
457-
@. B.dv *= x
458-
@. B.ev *= x
468+
rmul!(B.dv, x)
469+
rmul!(B.ev, x)
459470
return B
460471
end
461472
function lmul!(x::Number, B::Bidiagonal)
@@ -467,8 +478,8 @@ function lmul!(x::Number, B::Bidiagonal)
467478
iszero(y) || throw(ArgumentError(LazyString(lazy"cannot set index ($row, $col) off ",
468479
lazy"the tridiagonal band to a nonzero value ($y)")))
469480
end
470-
@. B.dv = x * B.dv
471-
@. B.ev = x * B.ev
481+
lmul!(x, B.dv)
482+
lmul!(x, B.ev)
472483
return B
473484
end
474485
/(A::Bidiagonal, B::Number) = Bidiagonal(A.dv/B, A.ev/B, A.uplo)
@@ -583,7 +594,7 @@ function _diag(A::Bidiagonal, k)
583594
elseif k == _offdiagind(A.uplo)
584595
return A.ev
585596
else
586-
return diag(A, k)
597+
return diagview(A, k)
587598
end
588599
end
589600

@@ -952,11 +963,10 @@ function _mul!(C::AbstractVecOrMat, A::BiTriSym, B::AbstractVecOrMat, _add::MulA
952963
nB = size(B,2)
953964
(iszero(nA) || iszero(nB)) && return C
954965
iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
955-
if nA <= 3
956-
# naive multiplication
957-
for I in CartesianIndices(C)
958-
col = Base.tail(Tuple(I))
959-
_modify!(_add, sum(A[I[1], k] * B[k, col...] for k in axes(A,2)), C, I)
966+
if nA == 1
967+
A11 = @inbounds A[1,1]
968+
for i in axes(B, 2)
969+
@inbounds _modify!(_add, A11 * B[1,i], C, (1,i))
960970
end
961971
return C
962972
end
@@ -1189,25 +1199,25 @@ function _dibimul!(C::Bidiagonal, A::Diagonal, B::Bidiagonal, _add)
11891199
C
11901200
end
11911201

1192-
function *(A::UpperOrUnitUpperTriangular, B::Bidiagonal)
1202+
function mul(A::UpperOrUnitUpperTriangular, B::Bidiagonal)
11931203
TS = promote_op(matprod, eltype(A), eltype(B))
11941204
C = mul!(similar(A, TS, size(A)), A, B)
11951205
return B.uplo == 'U' ? UpperTriangular(C) : C
11961206
end
11971207

1198-
function *(A::LowerOrUnitLowerTriangular, B::Bidiagonal)
1208+
function mul(A::LowerOrUnitLowerTriangular, B::Bidiagonal)
11991209
TS = promote_op(matprod, eltype(A), eltype(B))
12001210
C = mul!(similar(A, TS, size(A)), A, B)
12011211
return B.uplo == 'L' ? LowerTriangular(C) : C
12021212
end
12031213

1204-
function *(A::Bidiagonal, B::UpperOrUnitUpperTriangular)
1214+
function mul(A::Bidiagonal, B::UpperOrUnitUpperTriangular)
12051215
TS = promote_op(matprod, eltype(A), eltype(B))
12061216
C = mul!(similar(B, TS, size(B)), A, B)
12071217
return A.uplo == 'U' ? UpperTriangular(C) : C
12081218
end
12091219

1210-
function *(A::Bidiagonal, B::LowerOrUnitLowerTriangular)
1220+
function mul(A::Bidiagonal, B::LowerOrUnitLowerTriangular)
12111221
TS = promote_op(matprod, eltype(A), eltype(B))
12121222
C = mul!(similar(B, TS, size(B)), A, B)
12131223
return A.uplo == 'L' ? LowerTriangular(C) : C
@@ -1249,19 +1259,19 @@ end
12491259
ldiv!(A::Bidiagonal, b::AbstractVecOrMat) = @inline ldiv!(b, A, b)
12501260
function ldiv!(c::AbstractVecOrMat, A::Bidiagonal, b::AbstractVecOrMat)
12511261
require_one_based_indexing(c, A, b)
1252-
N = size(A, 2)
1262+
N = size(A, 1)
12531263
mb, nb = size(b, 1), size(b, 2)
12541264
if N != mb
1255-
throw(DimensionMismatch(lazy"second dimension of A, $N, does not match first dimension of b, $mb"))
1265+
dimstr = b isa AbstractVector ? "length" : "first dimension"
1266+
throw(DimensionMismatch(LazyString(lazy"the first dimension of the Bidiagonal matrix, $N, ",
1267+
lazy"does not match the $dimstr of the right-hand-side, $mb")))
12561268
end
12571269
mc, nc = size(c, 1), size(c, 2)
12581270
if mc != mb || nc != nb
1259-
throw(DimensionMismatch(lazy"size of result, ($mc, $nc), does not match the size of b, ($mb, $nb)"))
1271+
throw(DimensionMismatch(lazy"size of result, $(size(c)), does not match the size of b, $(size(b))"))
12601272
end
12611273

1262-
if N == 0
1263-
return copyto!(c, b)
1264-
end
1274+
N == 0 && return c # in this case c and b are also empty
12651275

12661276
zi = findfirst(iszero, A.dv)
12671277
isnothing(zi) || throw(SingularException(zi))
@@ -1333,27 +1343,27 @@ function _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::Bidiagonal)
13331343
isnothing(zi) || throw(SingularException(zi))
13341344

13351345
if B.uplo == 'L'
1336-
diagB = B.dv[n]
1337-
for i in 1:m
1338-
C[i,n] = A[i,n] / diagB
1346+
diagB = @inbounds B.dv[n]
1347+
for i in axes(A,1)
1348+
@inbounds C[i,n] = A[i,n] / diagB
13391349
end
1340-
for j in n-1:-1:1
1341-
diagB = B.dv[j]
1342-
offdiagB = B.ev[j]
1343-
for i in 1:m
1344-
C[i,j] = (A[i,j] - C[i,j+1]*offdiagB)/diagB
1350+
for j in reverse(axes(A,2)[1:end-1]) # n-1:-1:1
1351+
diagB = @inbounds B.dv[j]
1352+
offdiagB = @inbounds B.ev[j]
1353+
for i in axes(A,1)
1354+
@inbounds C[i,j] = (A[i,j] - C[i,j+1]*offdiagB)/diagB
13451355
end
13461356
end
13471357
else
1348-
diagB = B.dv[1]
1349-
for i in 1:m
1350-
C[i,1] = A[i,1] / diagB
1358+
diagB = @inbounds B.dv[1]
1359+
for i in axes(A,1)
1360+
@inbounds C[i,1] = A[i,1] / diagB
13511361
end
1352-
for j in 2:n
1353-
diagB = B.dv[j]
1354-
offdiagB = B.ev[j-1]
1355-
for i = 1:m
1356-
C[i,j] = (A[i,j] - C[i,j-1]*offdiagB)/diagB
1362+
for j in axes(A,2)[2:end]
1363+
diagB = @inbounds B.dv[j]
1364+
offdiagB = @inbounds B.ev[j-1]
1365+
for i in axes(A,1)
1366+
@inbounds C[i,j] = (A[i,j] - C[i,j-1]*offdiagB)/diagB
13571367
end
13581368
end
13591369
end

src/blas.jl

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,7 @@ export
8484
trsm!,
8585
trsm
8686

87-
using ..LinearAlgebra: libblastrampoline, BlasReal, BlasComplex, BlasFloat, BlasInt,
88-
DimensionMismatch, checksquare, chkstride1, SingularException
87+
using ..LinearAlgebra: libblastrampoline, BlasReal, BlasComplex, BlasFloat, BlasInt, DimensionMismatch, checksquare, chkstride1
8988

9089
include("lbt.jl")
9190

@@ -162,7 +161,9 @@ get_num_threads()::Int = lbt_get_num_threads()
162161
function check()
163162
# TODO: once we have bitfields of the BLAS functions that are actually forwarded,
164163
# ensure that we have a complete set here (warning on an incomplete BLAS implementation)
165-
config = get_config()
164+
# We don't use `get_config()` here because we are invoked in the onload callback and
165+
# we don't want to take any locks.
166+
config = LBTConfig(unsafe_load(ccall((:lbt_get_config, libblastrampoline), Ptr{lbt_config_t}, ())))
166167

167168
# Ensure that one of our loaded libraries satisfies our interface requirement
168169
interface = USE_BLAS64 ? :ilp64 : :lp64
@@ -1378,11 +1379,6 @@ for (fname, elty) in ((:dtrsv_,:Float64),
13781379
throw(DimensionMismatch(lazy"size of A is $n != length(x) = $(length(x))"))
13791380
end
13801381
chkstride1(A)
1381-
if diag == 'N'
1382-
for i in 1:n
1383-
iszero(A[i,i]) && throw(SingularException(i))
1384-
end
1385-
end
13861382
px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
13871383
GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid,
13881384
(Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
@@ -2231,11 +2227,6 @@ for (mmname, smname, elty) in
22312227
end
22322228
chkstride1(A)
22332229
chkstride1(B)
2234-
if diag == 'N'
2235-
for i in 1:k
2236-
iszero(A[i,i]) && throw(SingularException(i))
2237-
end
2238-
end
22392230
ccall((@blasfunc($smname), libblastrampoline), Cvoid,
22402231
(Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8},
22412232
Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},

src/bunchkaufman.jl

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -182,10 +182,8 @@ julia> d, u, p = S; # destructuring via iteration
182182
julia> d == S.D && u == S.U && p == S.p
183183
true
184184
185-
julia> S.U*S.D*S.U' - S.P*A*S.P'
186-
2×2 Matrix{Float64}:
187-
0.0 0.0
188-
0.0 0.0
185+
julia> S.U * S.D * S.U' ≈ S.P * A * S.P'
186+
true
189187
190188
julia> S = bunchkaufman(Symmetric(A, :L))
191189
BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
@@ -202,10 +200,8 @@ permutation:
202200
2
203201
1
204202
205-
julia> S.L*S.D*S.L' - A[S.p, S.p]
206-
2×2 Matrix{Float64}:
207-
0.0 0.0
208-
0.0 0.0
203+
julia> S.L * S.D * S.L' ≈ A[S.p, S.p]
204+
true
209205
```
210206
"""
211207
bunchkaufman(A::AbstractMatrix{T}, rook::Bool=false; check::Bool = true) where {T} =

0 commit comments

Comments
 (0)