Update BLIS extension to use libflame for factorization and BLIS for solve

claude · claude · commit 237debc5979e · 2025-07-30T09:19:34.000-04:00
- Changed extension to use libflame for getrf (factorization) operations - Uses BLIS for getrs (solve) operations, maintaining the BLIS/FLAME integration goal - Updated Project.toml to include libflame_jll as dependency - Updated documentation to reflect libflame usage - Extension now uses: libflame factorization + BLIS solve operations 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/Project.toml b/Project.toml
@@ -26,11 +26,12 @@ SciMLOperators = "c0aeaf25-5076-4817-a8d5-81caf7dfa961"
 Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
 StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
 UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
+blis_jll = "6136c539-28a5-5bf0-87cc-b183200dce32"
+libflame_jll = "8e9d65e3-b2b8-5a9c-baa2-617b4576f0b9"
 
 [weakdeps]
 BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
 BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0"
-blis_jll = "6136c539-28a5-5bf0-87cc-b183200dce32"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e"
 EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
@@ -48,7 +49,7 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac"
 
 [extensions]
-LinearSolveBLISExt = "blis_jll"
+LinearSolveBLISExt = ["blis_jll", "libflame_jll"]
 LinearSolveBandedMatricesExt = "BandedMatrices"
 LinearSolveBlockDiagonalsExt = "BlockDiagonals"
 LinearSolveCUDAExt = "CUDA"
@@ -72,16 +73,15 @@ AllocCheck = "0.2"
 Aqua = "0.8"
 ArrayInterface = "7.7"
 BandedMatrices = "1.5"
-blis_jll = "0.9.0"
 BlockDiagonals = "0.1.42, 0.2"
 CUDA = "5"
 CUDSS = "0.1, 0.2, 0.3, 0.4"
 ChainRulesCore = "1.22"
 ConcreteStructs = "0.2.3"
 DocStringExtensions = "0.9.3"
 EnumX = "1.0.4"
-ExplicitImports = "1"
 EnzymeCore = "0.8.1"
+ExplicitImports = "1"
 FastAlmostBandedMatrices = "0.1"
 FastLapackInterface = "2"
 FiniteDiff = "2.22"
@@ -121,15 +121,16 @@ StaticArraysCore = "1.4.2"
 Test = "1"
 UnPack = "1"
 Zygote = "0.7"
+blis_jll = "0.9.0"
 julia = "1.10"
+libflame_jll = "5.2.0"
 
 [extras]
 AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a"
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
-ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
 BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
 BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0"
-blis_jll = "6136c539-28a5-5bf0-87cc-b183200dce32"
+ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
 FastAlmostBandedMatrices = "9d29842c-ecb8-4973-b1e9-a27b1157504e"
 FastLapackInterface = "29a986be-02c6-4525-aec4-84b980013641"
 FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
@@ -154,6 +155,8 @@ StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+blis_jll = "6136c539-28a5-5bf0-87cc-b183200dce32"
+libflame_jll = "8e9d65e3-b2b8-5a9c-baa2-617b4576f0b9"
 
 [targets]
-test = ["Aqua", "Test", "IterativeSolvers", "InteractiveUtils", "KrylovKit", "KrylovPreconditioners", "Pkg", "Random", "SafeTestsets", "MultiFloats", "ForwardDiff", "HYPRE", "MPI", "BlockDiagonals", "FiniteDiff", "BandedMatrices", "blis_jll", "FastAlmostBandedMatrices", "StaticArrays", "AllocCheck", "StableRNGs", "Zygote", "RecursiveFactorization", "Sparspak", "FastLapackInterface", "SparseArrays", "ExplicitImports"]
+test = ["Aqua", "Test", "IterativeSolvers", "InteractiveUtils", "KrylovKit", "KrylovPreconditioners", "Pkg", "Random", "SafeTestsets", "MultiFloats", "ForwardDiff", "HYPRE", "MPI", "BlockDiagonals", "FiniteDiff", "BandedMatrices", "blis_jll", "libflame_jll", "FastAlmostBandedMatrices", "StaticArrays", "AllocCheck", "StableRNGs", "Zygote", "RecursiveFactorization", "Sparspak", "FastLapackInterface", "SparseArrays", "ExplicitImports"]
diff --git a/docs/src/solvers/solvers.md b/docs/src/solvers/solvers.md
@@ -17,7 +17,7 @@ the best choices, with SVD being the slowest but most precise.
 For efficiency, `RFLUFactorization` is the fastest for dense LU-factorizations until around
 150x150 matrices, though this can be dependent on the exact details of the hardware. After this
 point, `MKLLUFactorization` is usually faster on most hardware. `BLISLUFactorization` provides
-another high-performance option that combines optimized BLAS operations with stable LAPACK routines.
+another high-performance option that combines optimized BLAS operations from BLIS with optimized LAPACK routines from libflame.
 Note that on Mac computers that `AppleAccelerateLUFactorization` is generally always the fastest. 
 `LUFactorization` will use your base system BLAS which can be fast or slow depending on the hardware 
 configuration. `SimpleLUFactorization` will be fast only on very small matrices but can cut down on 
@@ -191,8 +191,9 @@ MKLLUFactorization
 
 !!! note
     
-    Using this solver requires that the package blis_jll is available. The solver will 
-    be automatically available when blis_jll is loaded, i.e., `using blis_jll`.
+    Using this solver requires that both blis_jll and libflame_jll packages are available. 
+    The solver will be automatically available when both packages are loaded, i.e., 
+    `using blis_jll, libflame_jll`.
 
 ```@docs
 BLISLUFactorization
diff --git a/ext/LinearSolveBLISExt.jl b/ext/LinearSolveBLISExt.jl
@@ -3,30 +3,30 @@ LinearSolveBLISExt
 
 Extension module that provides BLIS (BLAS-like Library Instantiation Software) integration
 for LinearSolve.jl. This extension combines BLIS for optimized BLAS operations with 
-reference LAPACK for LAPACK operations, providing a high-performance yet stable linear 
-algebra backend.
+libflame for optimized LAPACK operations, providing a fully optimized linear algebra 
+backend.
 
 Key features:
 - Uses BLIS for BLAS operations (matrix multiplication, etc.)
-- Uses reference LAPACK for LAPACK operations (LU factorization, solve, etc.)
+- Uses libflame for LAPACK operations (LU factorization, solve, etc.)
 - Supports all standard numeric types (Float32/64, ComplexF32/64)
 - Follows MKL-style ccall patterns for consistency
 """
 module LinearSolveBLISExt
 
 using Libdl
 using blis_jll
-using LAPACK_jll
+using libflame_jll
 using LinearAlgebra
 using LinearSolve
 
-using LinearAlgebra: BlasInt, LU
+using LinearAlgebra: BlasInt, LU, libblastrampoline
 using LinearAlgebra.LAPACK: require_one_based_indexing, chkfinite, chkstride1, 
                             @blasfunc, chkargsok
 using LinearSolve: ArrayInterface, BLISLUFactorization, @get_cacheval, LinearCache, SciMLBase, do_factorization
 
 const global libblis = blis_jll.blis
-const global liblapack = LAPACK_jll.liblapack_path
+const global libflame = libflame_jll.libflame
 
 """
     LinearSolve.do_factorization(alg::BLISLUFactorization, A, b, u)
@@ -54,7 +54,7 @@ function getrf!(A::AbstractMatrix{<:ComplexF64};
     if isempty(ipiv)
         ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
     end
-    ccall((@blasfunc(zgetrf_), liblapack), Cvoid,
+    ccall(("zgetrf_", libflame), Cvoid,
         (Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF64},
             Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
         m, n, A, lda, ipiv, info)
@@ -74,7 +74,7 @@ function getrf!(A::AbstractMatrix{<:ComplexF32};
     if isempty(ipiv)
         ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
     end
-    ccall((@blasfunc(cgetrf_), liblapack), Cvoid,
+    ccall(("cgetrf_", libflame), Cvoid,
         (Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF32},
             Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
         m, n, A, lda, ipiv, info)
@@ -94,7 +94,7 @@ function getrf!(A::AbstractMatrix{<:Float64};
     if isempty(ipiv)
         ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
     end
-    ccall((@blasfunc(dgetrf_), liblapack), Cvoid,
+    ccall(("dgetrf_", libflame), Cvoid,
         (Ref{BlasInt}, Ref{BlasInt}, Ptr{Float64},
             Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
         m, n, A, lda, ipiv, info)
@@ -114,7 +114,7 @@ function getrf!(A::AbstractMatrix{<:Float32};
     if isempty(ipiv)
         ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
     end
-    ccall((@blasfunc(sgetrf_), liblapack), Cvoid,
+    ccall(("sgetrf_", libflame), Cvoid,
         (Ref{BlasInt}, Ref{BlasInt}, Ptr{Float32},
             Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
         m, n, A, lda, ipiv, info)
@@ -138,7 +138,7 @@ function getrs!(trans::AbstractChar,
         throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n"))
     end
     nrhs = size(B, 2)
-    ccall(("zgetrs_", liblapack), Cvoid,
+    ccall((@blasfunc(zgetrs_), libblis), Cvoid,
         (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF64}, Ref{BlasInt},
             Ptr{BlasInt}, Ptr{ComplexF64}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
         trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info,
@@ -163,7 +163,7 @@ function getrs!(trans::AbstractChar,
         throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n"))
     end
     nrhs = size(B, 2)
-    ccall(("cgetrs_", liblapack), Cvoid,
+    ccall((@blasfunc(cgetrs_), libblis), Cvoid,
         (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF32}, Ref{BlasInt},
             Ptr{BlasInt}, Ptr{ComplexF32}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
         trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info,
@@ -188,7 +188,7 @@ function getrs!(trans::AbstractChar,
         throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n"))
     end
     nrhs = size(B, 2)
-    ccall(("dgetrs_", liblapack), Cvoid,
+    ccall((@blasfunc(dgetrs_), libblis), Cvoid,
         (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{Float64}, Ref{BlasInt},
             Ptr{BlasInt}, Ptr{Float64}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
         trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info,
@@ -213,7 +213,7 @@ function getrs!(trans::AbstractChar,
         throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n"))
     end
     nrhs = size(B, 2)
-    ccall(("sgetrs_", liblapack), Cvoid,
+    ccall((@blasfunc(sgetrs_), libblis), Cvoid,
         (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{Float32}, Ref{BlasInt},
             Ptr{BlasInt}, Ptr{Float32}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
         trans, n, size(B, 2), A, max(1, stride(A, 2)), ipiv, B, max(1, stride(B, 2)), info,
diff --git a/src/extension_algs.jl b/src/extension_algs.jl
@@ -446,18 +446,20 @@ BLISLUFactorization()
 ```
 
 A wrapper over BLIS (BLAS-like Library Instantiation Software) for high-performance 
-BLAS operations combined with reference LAPACK for stability. This provides optimized 
-linear algebra operations while maintaining numerical accuracy and broad compatibility.
+BLAS operations combined with libflame for optimized LAPACK operations. This provides 
+a fully optimized linear algebra stack with both high-performance BLAS and LAPACK routines.
 
 BLIS provides highly optimized BLAS routines that can outperform reference BLAS 
-implementations, especially for certain matrix sizes and operations. The integration 
-uses BLIS for BLAS operations (like matrix multiplication) and falls back to reference 
-LAPACK for LAPACK operations (like LU factorization and solve).
+implementations, especially for certain matrix sizes and operations. libflame provides 
+optimized LAPACK operations that complement BLIS. The integration uses BLIS for BLAS 
+operations (like matrix multiplication) and libflame for LAPACK operations (like LU 
+factorization and solve).
 
 !!! note
 
-    Using this solver requires that the package blis_jll is available. The solver will 
-    be automatically available when blis_jll is loaded, i.e., `using blis_jll`.
+    Using this solver requires that both blis_jll and libflame_jll packages are available. 
+    The solver will be automatically available when both packages are loaded, i.e., 
+    `using blis_jll, libflame_jll`.
 
 ## Performance Characteristics
 
@@ -468,7 +470,7 @@ LAPACK for LAPACK operations (like LU factorization and solve).
 ## Example
 
 ```julia
-using LinearSolve, blis_jll
+using LinearSolve, blis_jll, libflame_jll
 A = rand(100, 100)
 b = rand(100)
 prob = LinearProblem(A, b)
diff --git a/test/basictests.jl b/test/basictests.jl
@@ -4,7 +4,7 @@ using IterativeSolvers, KrylovKit, MKL_jll, KrylovPreconditioners
 using Test
 
 # Import JLL packages for extensions
-using blis_jll
+using blis_jll, libflame_jll
 import Random
 
 const Dual64 = ForwardDiff.Dual{Nothing, Float64, 1}