Add OpenBLASLUFactorization implementation

claude · claude · commit 5a97dbb36d93 · 2025-08-20T09:04:47.000-04:00
- Implement OpenBLASLUFactorization as a direct wrapper over OpenBLAS_jll
- Add getrf! and getrs! functions for LU factorization and solving
- Support Float32, Float64, ComplexF32, and ComplexF64 types
- Include proper module structure and exports
- Add OpenBLAS_jll as a dependency
- Tests confirm functionality matches existing LUFactorization
diff --git a/Project.toml b/Project.toml
@@ -17,6 +17,7 @@ Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MKL_jll = "856f044c-d86e-5d09-b602-aeab76dc8ba7"
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
+OpenBLAS_jll = "4536629a-c528-5b80-bd46-f80d51c5b363"
 PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
 Preferences = "21216c6a-2e73-6563-6e65-726566657250"
 RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd"
diff --git a/src/LinearSolve.jl b/src/LinearSolve.jl
@@ -59,6 +59,8 @@ else
     const usemkl = false
 end
 
+using OpenBLAS_jll
+
 
 @reexport using SciMLBase
 
@@ -345,6 +347,7 @@ include("extension_algs.jl")
 include("factorization.jl")
 include("appleaccelerate.jl")
 include("mkl.jl")
+include("openblas.jl")
 include("simplelu.jl")
 include("simplegmres.jl")
 include("iterative_wrappers.jl")
@@ -461,6 +464,7 @@ export MKLPardisoFactorize, MKLPardisoIterate
 export PanuaPardisoFactorize, PanuaPardisoIterate
 export PardisoJL
 export MKLLUFactorization
+export OpenBLASLUFactorization
 export AppleAccelerateLUFactorization
 export MetalLUFactorization
 
diff --git a/src/openblas.jl b/src/openblas.jl
@@ -0,0 +1,252 @@
+"""
+```julia
+OpenBLASLUFactorization()
+```
+
+A wrapper over OpenBLAS. Direct calls to OpenBLAS in a way that pre-allocates workspace
+to avoid allocations and does not require libblastrampoline.
+"""
+struct OpenBLASLUFactorization <: AbstractFactorization end
+
+module OpenBLASLU
+
+using LinearAlgebra
+using LinearAlgebra: BlasInt, LU, require_one_based_indexing, checksquare
+using LinearAlgebra.LAPACK: chkfinite, chkstride1, @blasfunc, chkargsok, chktrans, chklapackerror
+using OpenBLAS_jll
+
+function getrf!(A::AbstractMatrix{<:ComplexF64};
+        ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2))),
+        info = Ref{BlasInt}(),
+        check = false)
+    require_one_based_indexing(A)
+    check && chkfinite(A)
+    chkstride1(A)
+    m, n = size(A)
+    lda = max(1, stride(A, 2))
+    if isempty(ipiv)
+        ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
+    end
+    ccall((@blasfunc(zgetrf_), OpenBLAS_jll.libopenblas), Cvoid,
+        (Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF64},
+            Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
+        m, n, A, lda, ipiv, info)
+    chkargsok(info[])
+    A, ipiv, info[], info #Error code is stored in LU factorization type
+end
+
+function getrf!(A::AbstractMatrix{<:ComplexF32};
+        ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2))),
+        info = Ref{BlasInt}(),
+        check = false)
+    require_one_based_indexing(A)
+    check && chkfinite(A)
+    chkstride1(A)
+    m, n = size(A)
+    lda = max(1, stride(A, 2))
+    if isempty(ipiv)
+        ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
+    end
+    ccall((@blasfunc(cgetrf_), OpenBLAS_jll.libopenblas), Cvoid,
+        (Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF32},
+            Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
+        m, n, A, lda, ipiv, info)
+    chkargsok(info[])
+    A, ipiv, info[], info #Error code is stored in LU factorization type
+end
+
+function getrf!(A::AbstractMatrix{<:Float64};
+        ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2))),
+        info = Ref{BlasInt}(),
+        check = false)
+    require_one_based_indexing(A)
+    check && chkfinite(A)
+    chkstride1(A)
+    m, n = size(A)
+    lda = max(1, stride(A, 2))
+    if isempty(ipiv)
+        ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
+    end
+    ccall((@blasfunc(dgetrf_), OpenBLAS_jll.libopenblas), Cvoid,
+        (Ref{BlasInt}, Ref{BlasInt}, Ptr{Float64},
+            Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
+        m, n, A, lda, ipiv, info)
+    chkargsok(info[])
+    A, ipiv, info[], info #Error code is stored in LU factorization type
+end
+
+function getrf!(A::AbstractMatrix{<:Float32};
+        ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2))),
+        info = Ref{BlasInt}(),
+        check = false)
+    require_one_based_indexing(A)
+    check && chkfinite(A)
+    chkstride1(A)
+    m, n = size(A)
+    lda = max(1, stride(A, 2))
+    if isempty(ipiv)
+        ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
+    end
+    ccall((@blasfunc(sgetrf_), OpenBLAS_jll.libopenblas), Cvoid,
+        (Ref{BlasInt}, Ref{BlasInt}, Ptr{Float32},
+            Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
+        m, n, A, lda, ipiv, info)
+    chkargsok(info[])
+    A, ipiv, info[], info #Error code is stored in LU factorization type
+end
+
+function getrs!(trans::AbstractChar,
+        A::AbstractMatrix{<:ComplexF64},
+        ipiv::AbstractVector{BlasInt},
+        B::AbstractVecOrMat{<:ComplexF64};
+        info = Ref{BlasInt}())
+    require_one_based_indexing(A, ipiv, B)
+    LinearAlgebra.LAPACK.chktrans(trans)
+    chkstride1(A, B, ipiv)
+    n = LinearAlgebra.checksquare(A)
+    if n != size(B, 1)
+        throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n"))
+    end
+    if n != length(ipiv)
+        throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n"))
+    end
+    nrhs = size(B, 2)
+    ccall((@blasfunc(zgetrs_), OpenBLAS_jll.libopenblas), Cvoid,
+        (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF64}, Ref{BlasInt},
+            Ptr{BlasInt}, Ptr{ComplexF64}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+        trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info,
+        1)
+    LinearAlgebra.LAPACK.chklapackerror(BlasInt(info[]))
+    B
+end
+
+function getrs!(trans::AbstractChar,
+        A::AbstractMatrix{<:ComplexF32},
+        ipiv::AbstractVector{BlasInt},
+        B::AbstractVecOrMat{<:ComplexF32};
+        info = Ref{BlasInt}())
+    require_one_based_indexing(A, ipiv, B)
+    LinearAlgebra.LAPACK.chktrans(trans)
+    chkstride1(A, B, ipiv)
+    n = LinearAlgebra.checksquare(A)
+    if n != size(B, 1)
+        throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n"))
+    end
+    if n != length(ipiv)
+        throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n"))
+    end
+    nrhs = size(B, 2)
+    ccall((@blasfunc(cgetrs_), OpenBLAS_jll.libopenblas), Cvoid,
+        (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF32}, Ref{BlasInt},
+            Ptr{BlasInt}, Ptr{ComplexF32}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+        trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info,
+        1)
+    LinearAlgebra.LAPACK.chklapackerror(BlasInt(info[]))
+    B
+end
+
+function getrs!(trans::AbstractChar,
+        A::AbstractMatrix{<:Float64},
+        ipiv::AbstractVector{BlasInt},
+        B::AbstractVecOrMat{<:Float64};
+        info = Ref{BlasInt}())
+    require_one_based_indexing(A, ipiv, B)
+    LinearAlgebra.LAPACK.chktrans(trans)
+    chkstride1(A, B, ipiv)
+    n = LinearAlgebra.checksquare(A)
+    if n != size(B, 1)
+        throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n"))
+    end
+    if n != length(ipiv)
+        throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n"))
+    end
+    nrhs = size(B, 2)
+    ccall((@blasfunc(dgetrs_), OpenBLAS_jll.libopenblas), Cvoid,
+        (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{Float64}, Ref{BlasInt},
+            Ptr{BlasInt}, Ptr{Float64}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+        trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info,
+        1)
+    LinearAlgebra.LAPACK.chklapackerror(BlasInt(info[]))
+    B
+end
+
+function getrs!(trans::AbstractChar,
+        A::AbstractMatrix{<:Float32},
+        ipiv::AbstractVector{BlasInt},
+        B::AbstractVecOrMat{<:Float32};
+        info = Ref{BlasInt}())
+    require_one_based_indexing(A, ipiv, B)
+    LinearAlgebra.LAPACK.chktrans(trans)
+    chkstride1(A, B, ipiv)
+    n = LinearAlgebra.checksquare(A)
+    if n != size(B, 1)
+        throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n"))
+    end
+    if n != length(ipiv)
+        throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n"))
+    end
+    nrhs = size(B, 2)
+    ccall((@blasfunc(sgetrs_), OpenBLAS_jll.libopenblas), Cvoid,
+        (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{Float32}, Ref{BlasInt},
+            Ptr{BlasInt}, Ptr{Float32}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+        trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info,
+        1)
+    LinearAlgebra.LAPACK.chklapackerror(BlasInt(info[]))
+    B
+end
+
+end # module OpenBLASLU
+
+default_alias_A(::OpenBLASLUFactorization, ::Any, ::Any) = false
+default_alias_b(::OpenBLASLUFactorization, ::Any, ::Any) = false
+
+const PREALLOCATED_OPENBLAS_LU = begin
+    A = rand(0, 0)
+    luinst = ArrayInterface.lu_instance(A), Ref{BlasInt}()
+end
+
+function LinearSolve.init_cacheval(alg::OpenBLASLUFactorization, A, b, u, Pl, Pr,
+        maxiters::Int, abstol, reltol, verbose::LinearVerbosity,
+        assumptions::OperatorAssumptions)
+    PREALLOCATED_OPENBLAS_LU
+end
+
+function LinearSolve.init_cacheval(alg::OpenBLASLUFactorization,
+        A::AbstractMatrix{<:Union{Float32, ComplexF32, ComplexF64}}, b, u, Pl, Pr,
+        maxiters::Int, abstol, reltol, verbose::LinearVerbosity,
+        assumptions::OperatorAssumptions)
+    A = rand(eltype(A), 0, 0)
+    ArrayInterface.lu_instance(A), Ref{BlasInt}()
+end
+
+function SciMLBase.solve!(cache::LinearCache, alg::OpenBLASLUFactorization;
+        kwargs...)
+    A = cache.A
+    A = convert(AbstractMatrix, A)
+    if cache.isfresh
+        cacheval = @get_cacheval(cache, :OpenBLASLUFactorization)
+        res = OpenBLASLU.getrf!(A; ipiv = cacheval[1].ipiv, info = cacheval[2])
+        fact = LU(res[1:3]...), res[4]
+        cache.cacheval = fact
+
+        if !LinearAlgebra.issuccess(fact[1])
+            return SciMLBase.build_linear_solution(
+                alg, cache.u, nothing, cache; retcode = ReturnCode.Failure)
+        end
+        cache.isfresh = false
+    end
+
+    A, info = @get_cacheval(cache, :OpenBLASLUFactorization)
+    require_one_based_indexing(cache.u, cache.b)
+    m, n = size(A, 1), size(A, 2)
+    if m > n
+        Bc = copy(cache.b)
+        OpenBLASLU.getrs!('N', A.factors, A.ipiv, Bc; info)
+        copyto!(cache.u, 1, Bc, 1, n)
+    else
+        copyto!(cache.u, cache.b)
+        OpenBLASLU.getrs!('N', A.factors, A.ipiv, cache.u; info)
+    end
+
+    SciMLBase.build_linear_solution(alg, cache.u, nothing, cache; retcode = ReturnCode.Success)
+end
diff --git a/test_openblas.jl b/test_openblas.jl
@@ -0,0 +1,59 @@
+using LinearAlgebra
+using LinearSolve
+using Test
+
+@testset "OpenBLASLUFactorization Tests" begin
+    # Test with Float64
+    @testset "Float64" begin
+        A = rand(10, 10)
+        b = rand(10)
+        prob = LinearProblem(A, b)
+        
+        sol_openblas = solve(prob, OpenBLASLUFactorization())
+        sol_default = solve(prob, LUFactorization())
+        
+        @test norm(A * sol_openblas.u - b) < 1e-10
+        @test norm(sol_openblas.u - sol_default.u) < 1e-10
+    end
+    
+    # Test with Float32
+    @testset "Float32" begin
+        A = rand(Float32, 10, 10)
+        b = rand(Float32, 10)
+        prob = LinearProblem(A, b)
+        
+        sol_openblas = solve(prob, OpenBLASLUFactorization())
+        sol_default = solve(prob, LUFactorization())
+        
+        @test norm(A * sol_openblas.u - b) < 1e-5
+        @test norm(sol_openblas.u - sol_default.u) < 1e-5
+    end
+    
+    # Test with ComplexF64
+    @testset "ComplexF64" begin
+        A = rand(ComplexF64, 10, 10)
+        b = rand(ComplexF64, 10)
+        prob = LinearProblem(A, b)
+        
+        sol_openblas = solve(prob, OpenBLASLUFactorization())
+        sol_default = solve(prob, LUFactorization())
+        
+        @test norm(A * sol_openblas.u - b) < 1e-10
+        @test norm(sol_openblas.u - sol_default.u) < 1e-10
+    end
+    
+    # Test with ComplexF32
+    @testset "ComplexF32" begin
+        A = rand(ComplexF32, 10, 10)
+        b = rand(ComplexF32, 10)
+        prob = LinearProblem(A, b)
+        
+        sol_openblas = solve(prob, OpenBLASLUFactorization())
+        sol_default = solve(prob, LUFactorization())
+        
+        @test norm(A * sol_openblas.u - b) < 1e-5
+        @test norm(sol_openblas.u - sol_default.u) < 1e-5
+    end
+end
+
+println("All tests passed!")