Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
[weakdeps]
BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0"
blis_jll = "6136c539-28a5-5bf0-87cc-b183200dce32"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
HYPRE = "b5ffcf37-a2bd-41ab-a3da-4bd9bc8ad771"
Expand All @@ -44,6 +45,7 @@ RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd"

[extensions]
LinearSolveBandedMatricesExt = "BandedMatrices"
LinearSolveBLISExt = "blis_jll"
LinearSolveBlockDiagonalsExt = "BlockDiagonals"
LinearSolveCUDAExt = "CUDA"
LinearSolveEnzymeExt = "Enzyme"
Expand All @@ -58,6 +60,7 @@ LinearSolveRecursiveArrayToolsExt = "RecursiveArrayTools"
[compat]
ArrayInterface = "7.4.11"
BandedMatrices = "1"
blis_jll = "0.9.0"
BlockDiagonals = "0.1"
ConcreteStructs = "0.2"
DocStringExtensions = "0.9"
Expand Down
248 changes: 248 additions & 0 deletions ext/LinearSolveBLISExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
module LinearSolveBLISExt

using Libdl
using blis_jll
using LinearAlgebra
using LinearSolve

using LinearAlgebra: BlasInt, LU
using LinearAlgebra.LAPACK: require_one_based_indexing, chkfinite, chkstride1,
@blasfunc, chkargsok
using LinearSolve: ArrayInterface, BLISLUFactorization, @get_cacheval, LinearCache, SciMLBase

const global libblis = dlopen(blis_jll.blis_path)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unsafe; it's telling Julia to dlopen() a file at compile-time and save the pointer to that file into the precompile cache. You should just use the bljs variable from blis_jll in the first place; it will do the dlopen() for you at the appropriate time (e.g. in __init__()).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

julia> blis_jll.bljs
ERROR: UndefVarError: `bljs` not defined
Stacktrace:
 [1] getproperty(x::Module, f::Symbol)
   @ Base ./Base.jl:31
 [2] top-level scope
   @ REPL[18]:1

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, typo; blis

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ERROR: could not load symbol "dgetrf_":
dlsym(0x8f1f2a90, dgetrf_): symbol not found
Stacktrace:
 [1] getrf!(A::Matrix{Float64}; ipiv::Vector{Int32}, info::Base.RefValue{Int32}, check::Bool)
   @ LinearSolveBLISExt ~/.julia/dev/LinearSolve/ext/LinearSolveBLISExt.jl:67
 [2] getrf!
   @ ~/.julia/dev/LinearSolve/ext/LinearSolveBLISExt.jl:55 [inlined]
 [3] solve!(cache::LinearSolve.LinearCache{…}, alg::LinearSolve.BLISLUFactorization; kwargs::@Kwargs{})
   @ LinearSolveBLISExt ~/.julia/dev/LinearSolve/ext/LinearSolveBLISExt.jl:224
 [4] solve!
   @ LinearSolveBLISExt ~/.julia/dev/LinearSolve/ext/LinearSolveBLISExt.jl:218 [inlined]
 [5] #solve!#6
   @ LinearSolve ~/.julia/dev/LinearSolve/src/common.jl:209 [inlined]
 [6] solve!
   @ LinearSolve ~/.julia/dev/LinearSolve/src/common.jl:208 [inlined]
 [7] #solve#5
   @ LinearSolve ~/.julia/dev/LinearSolve/src/common.jl:205 [inlined]
 [8] solve(::LinearProblem{…}, ::LinearSolve.BLISLUFactorization)
   @ LinearSolve ~/.julia/dev/LinearSolve/src/common.jl:202
 [9] top-level scope
   @ ~/Desktop/test.jl:29
Some type information was truncated. Use `show(err)` to see complete types.

Also dgetrf_32 and dgetrf_64 give the same.


function getrf!(A::AbstractMatrix{<:ComplexF64};
ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2))),
info = Ref{BlasInt}(),
check = false)
require_one_based_indexing(A)
check && chkfinite(A)
chkstride1(A)
m, n = size(A)
lda = max(1, stride(A, 2))
if isempty(ipiv)
ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
end
ccall((@blasfunc(zgetrf_), libblis), Cvoid,
(Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF64},
Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
m, n, A, lda, ipiv, info)
chkargsok(info[])
A, ipiv, info[], info #Error code is stored in LU factorization type
end

function getrf!(A::AbstractMatrix{<:ComplexF32};
ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2))),
info = Ref{BlasInt}(),
check = false)
require_one_based_indexing(A)
check && chkfinite(A)
chkstride1(A)
m, n = size(A)
lda = max(1, stride(A, 2))
if isempty(ipiv)
ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
end
ccall((@blasfunc(cgetrf_), libblis), Cvoid,
(Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF32},
Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
m, n, A, lda, ipiv, info)
chkargsok(info[])
A, ipiv, info[], info #Error code is stored in LU factorization type
end

function getrf!(A::AbstractMatrix{<:Float64};
ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2))),
info = Ref{BlasInt}(),
check = false)
require_one_based_indexing(A)
check && chkfinite(A)
chkstride1(A)
m, n = size(A)
lda = max(1, stride(A, 2))
if isempty(ipiv)
ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
end
ccall((@blasfunc(dgetrf_), libblis), Cvoid,
(Ref{BlasInt}, Ref{BlasInt}, Ptr{Float64},
Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
m, n, A, lda, ipiv, info)
chkargsok(info[])
A, ipiv, info[], info #Error code is stored in LU factorization type
end

function getrf!(A::AbstractMatrix{<:Float32};
ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2))),
info = Ref{BlasInt}(),
check = false)
require_one_based_indexing(A)
check && chkfinite(A)
chkstride1(A)
m, n = size(A)
lda = max(1, stride(A, 2))
if isempty(ipiv)
ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
end
ccall((@blasfunc(sgetrf_), libblis), Cvoid,
(Ref{BlasInt}, Ref{BlasInt}, Ptr{Float32},
Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
m, n, A, lda, ipiv, info)
chkargsok(info[])
A, ipiv, info[], info #Error code is stored in LU factorization type
end

function getrs!(trans::AbstractChar,
A::AbstractMatrix{<:ComplexF64},
ipiv::AbstractVector{BlasInt},
B::AbstractVecOrMat{<:ComplexF64};
info = Ref{BlasInt}())
require_one_based_indexing(A, ipiv, B)
LinearAlgebra.LAPACK.chktrans(trans)
chkstride1(A, B, ipiv)
n = LinearAlgebra.checksquare(A)
if n != size(B, 1)
throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n"))
end
if n != length(ipiv)
throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n"))
end
nrhs = size(B, 2)
ccall(("zgetrs_", libblis), Cvoid,
(Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF64}, Ref{BlasInt},
Ptr{BlasInt}, Ptr{ComplexF64}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info,
1)
LinearAlgebra.LAPACK.chklapackerror(BlasInt(info[]))
B
end

function getrs!(trans::AbstractChar,
A::AbstractMatrix{<:ComplexF32},
ipiv::AbstractVector{BlasInt},
B::AbstractVecOrMat{<:ComplexF32};
info = Ref{BlasInt}())
require_one_based_indexing(A, ipiv, B)
LinearAlgebra.LAPACK.chktrans(trans)
chkstride1(A, B, ipiv)
n = LinearAlgebra.checksquare(A)
if n != size(B, 1)
throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n"))
end
if n != length(ipiv)
throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n"))
end
nrhs = size(B, 2)
ccall(("cgetrs_", libblis), Cvoid,
(Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF32}, Ref{BlasInt},
Ptr{BlasInt}, Ptr{ComplexF32}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info,
1)
LinearAlgebra.LAPACK.chklapackerror(BlasInt(info[]))
B
end

function getrs!(trans::AbstractChar,
A::AbstractMatrix{<:Float64},
ipiv::AbstractVector{BlasInt},
B::AbstractVecOrMat{<:Float64};
info = Ref{BlasInt}())
require_one_based_indexing(A, ipiv, B)
LinearAlgebra.LAPACK.chktrans(trans)
chkstride1(A, B, ipiv)
n = LinearAlgebra.checksquare(A)
if n != size(B, 1)
throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n"))
end
if n != length(ipiv)
throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n"))
end
nrhs = size(B, 2)
ccall(("dgetrs_", libblis), Cvoid,
(Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{Float64}, Ref{BlasInt},
Ptr{BlasInt}, Ptr{Float64}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info,
1)
LinearAlgebra.LAPACK.chklapackerror(BlasInt(info[]))
B
end

function getrs!(trans::AbstractChar,
A::AbstractMatrix{<:Float32},
ipiv::AbstractVector{BlasInt},
B::AbstractVecOrMat{<:Float32};
info = Ref{BlasInt}())
require_one_based_indexing(A, ipiv, B)
LinearAlgebra.LAPACK.chktrans(trans)
chkstride1(A, B, ipiv)
n = LinearAlgebra.checksquare(A)
if n != size(B, 1)
throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n"))
end
if n != length(ipiv)
throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n"))
end
nrhs = size(B, 2)
ccall(("sgetrs_", libblis), Cvoid,
(Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{Float32}, Ref{BlasInt},
Ptr{BlasInt}, Ptr{Float32}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info,
1)
LinearAlgebra.LAPACK.chklapackerror(BlasInt(info[]))
B
end

default_alias_A(::BLISLUFactorization, ::Any, ::Any) = false
default_alias_b(::BLISLUFactorization, ::Any, ::Any) = false

const PREALLOCATED_BLIS_LU = begin
A = rand(0, 0)
luinst = ArrayInterface.lu_instance(A), Ref{BlasInt}()
end

function LinearSolve.init_cacheval(alg::BLISLUFactorization, A, b, u, Pl, Pr,
maxiters::Int, abstol, reltol, verbose::Bool,
assumptions::OperatorAssumptions)
PREALLOCATED_BLIS_LU
end

function LinearSolve.init_cacheval(alg::BLISLUFactorization, A::AbstractMatrix{<:Union{Float32,ComplexF32,ComplexF64}}, b, u, Pl, Pr,
maxiters::Int, abstol, reltol, verbose::Bool,
assumptions::OperatorAssumptions)
A = rand(eltype(A), 0, 0)
ArrayInterface.lu_instance(A), Ref{BlasInt}()
end

function SciMLBase.solve!(cache::LinearCache, alg::BLISLUFactorization;
kwargs...)
A = cache.A
A = convert(AbstractMatrix, A)
if cache.isfresh
cacheval = @get_cacheval(cache, :BLISLUFactorization)
res = getrf!(A; ipiv = cacheval[1].ipiv, info = cacheval[2])
fact = LU(res[1:3]...), res[4]
cache.cacheval = fact
cache.isfresh = false
end

y = ldiv!(cache.u, @get_cacheval(cache, :BLISLUFactorization)[1], cache.b)
SciMLBase.build_linear_solution(alg, y, nothing, cache)

#=
A, info = @get_cacheval(cache, :BLISLUFactorization)
LinearAlgebra.require_one_based_indexing(cache.u, cache.b)
m, n = size(A, 1), size(A, 2)
if m > n
Bc = copy(cache.b)
getrs!('N', A.factors, A.ipiv, Bc; info)
return copyto!(cache.u, 1, Bc, 1, n)
else
copyto!(cache.u, cache.b)
getrs!('N', A.factors, A.ipiv, cache.u; info)
end

SciMLBase.build_linear_solution(alg, cache.u, nothing, cache)
=#
end

end
2 changes: 2 additions & 0 deletions src/extension_algs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -326,3 +326,5 @@ A wrapper over Apple's Metal GPU library. Direct calls to Metal in a way that pr
to avoid allocations and automatically offloads to the GPU.
"""
struct MetalLUFactorization <: AbstractFactorization end

struct BLISLUFactorization <: AbstractFactorization end