SciML
diff --git a/‎docs/src/tutorials/autotune.md
Lines changed: 20 additions & 20 deletions b/‎docs/src/tutorials/autotune.md
Lines changed: 20 additions & 20 deletions
diff --git a/‎ext/LinearSolveBLISExt.jl
Lines changed: 4 additions & 4 deletions b/‎ext/LinearSolveBLISExt.jl
Lines changed: 4 additions & 4 deletions
diff --git a/‎lib/LinearSolveAutotune/Project.toml
Lines changed: 4 additions & 7 deletions b/‎lib/LinearSolveAutotune/Project.toml
Lines changed: 4 additions & 7 deletions
diff --git a/‎lib/LinearSolveAutotune/src/LinearSolveAutotune.jl
Lines changed: 12 additions & 38 deletions b/‎lib/LinearSolveAutotune/src/LinearSolveAutotune.jl
Lines changed: 12 additions & 38 deletions
diff --git a/‎lib/LinearSolveAutotune/src/algorithms.jl
Lines changed: 7 additions & 42 deletions b/‎lib/LinearSolveAutotune/src/algorithms.jl
Lines changed: 7 additions & 42 deletions
diff --git a/‎lib/LinearSolveAutotune/src/benchmarking.jl
Lines changed: 6 additions & 11 deletions b/‎lib/LinearSolveAutotune/src/benchmarking.jl
Lines changed: 6 additions & 11 deletions
diff --git a/‎lib/LinearSolveAutotune/src/gpu_detection.jl
Lines changed: 1 addition & 5 deletions b/‎lib/LinearSolveAutotune/src/gpu_detection.jl
Lines changed: 1 addition & 5 deletions
diff --git a/‎lib/LinearSolveAutotune/test/autotune_benchmark_Float32.pdf
170 KB b/‎lib/LinearSolveAutotune/test/autotune_benchmark_Float32.pdf
170 KB
diff --git a/‎lib/LinearSolveAutotune/test/autotune_benchmark_Float32.png
217 KB b/‎lib/LinearSolveAutotune/test/autotune_benchmark_Float32.png
217 KB
diff --git a/‎lib/LinearSolveAutotune/test/autotune_benchmark_Float64.pdf
153 KB b/‎lib/LinearSolveAutotune/test/autotune_benchmark_Float64.pdf
153 KB
@@ -17,7 +17,7 @@ using LinearSolve
 using LinearSolveAutotune
 
 # Run autotune with default settings
-results = autotune_setup()
+results, sysinfo, plots = autotune_setup()
 ```
 
 This will:
@@ -32,11 +32,11 @@ This will:
 The autotune process returns benchmark results and creates several outputs:
 
 ```julia
-# Basic usage returns just the DataFrame of results
-results = autotune_setup(make_plot=false)
+# Basic usage returns just the DataFrame of results and system information
+results, sysinfo, _ = autotune_setup(make_plot=false)
 
-# With plotting enabled, returns (DataFrame, Plots)
-results, plots = autotune_setup(make_plot=true)
+# With plotting enabled, returns (DataFrame, System Info, Plots)
+results, sysinfo, plots = autotune_setup(make_plot=true)
 
 # Examine the results
 println("Algorithms tested: ", unique(results.algorithm))
@@ -52,13 +52,13 @@ You can specify which element types to benchmark:
 
 ```julia
 # Test only Float64 and ComplexF64
-results = autotune_setup(eltypes = (Float64, ComplexF64))
+results, sysinfo, _ = autotune_setup(eltypes = (Float64, ComplexF64))
 
 # Test arbitrary precision types (excludes BLAS algorithms)
-results = autotune_setup(eltypes = (BigFloat,), telemetry = false)
+results, sysinfo, _ = autotune_setup(eltypes = (BigFloat,), telemetry = false)
 
 # Test high precision float
-results = autotune_setup(eltypes = (Float64, BigFloat))
+results, sysinfo, _ = autotune_setup(eltypes = (Float64, BigFloat))
 ```
 
 ### Matrix Sizes
@@ -67,10 +67,10 @@ Control the range of matrix sizes tested:
 
 ```julia
 # Default: small to medium matrices (4×4 to 500×500)
-results = autotune_setup(large_matrices = false)
+results, sysinfo, _ = autotune_setup(large_matrices = false)
 
 # Large matrices: includes sizes up to 10,000×10,000 (good for GPU systems)
-results = autotune_setup(large_matrices = true)
+results, sysinfo, _ = autotune_setup(large_matrices = true)
 ```
 
 ### Benchmark Quality vs Speed
@@ -79,10 +79,10 @@ Adjust the thoroughness of benchmarking:
 
 ```julia
 # Quick benchmark (fewer samples, less time per test)
-results = autotune_setup(samples = 1, seconds = 0.1)
+results, sysinfo, _ = autotune_setup(samples = 1, seconds = 0.1)
 
 # Thorough benchmark (more samples, more time per test)  
-results = autotune_setup(samples = 10, seconds = 2.0)
+results, sysinfo, _ = autotune_setup(samples = 10, seconds = 2.0)
 ```
 
 ### Privacy and Telemetry
@@ -105,13 +105,13 @@ However, if your system has privacy concerns or you prefer not to share data, yo
 
 ```julia
 # Disable telemetry (no data shared)
-results = autotune_setup(telemetry = false)
+results, sysinfo, _ = autotune_setup(telemetry = false)
 
 # Disable preference setting (just benchmark, don't change defaults)
-results = autotune_setup(set_preferences = false)
+results, sysinfo, _ = autotune_setup(set_preferences = false)
 
 # Disable plotting (faster, less output)
-results = autotune_setup(make_plot = false)
+results, sysinfo, _ = autotune_setup(make_plot = false)
 ```
 
 ### Missing Algorithm Handling
@@ -125,10 +125,10 @@ you can set `skip_missing_algs = true` to allow missing algorithms without faili
 
 ```julia
 # Default behavior: error if expected algorithms are missing
-results = autotune_setup()  # Will error if RFLUFactorization missing
+results, sysinfo, _ = autotune_setup()  # Will error if RFLUFactorization missing
 
 # Allow missing algorithms (useful for incomplete setups)
-results = autotune_setup(skip_missing_algs = true)  # Will warn instead of error
+results, sysinfo, _ = autotune_setup(skip_missing_algs = true)  # Will warn instead of error
 ```
 
 ## GPU Systems
@@ -137,7 +137,7 @@ On systems with CUDA or Metal GPU support, the autotuner will automatically dete
 
 ```julia
 # Enable large matrix testing for GPUs
-results = autotune_setup(large_matrices = true, samples = 3, seconds = 1.0)
+results, sysinfo, _ = autotune_setup(large_matrices = true, samples = 3, seconds = 1.0)
 ```
 
 GPU algorithms tested (when available):
@@ -152,7 +152,7 @@ GPU algorithms tested (when available):
 using DataFrames
 using Statistics
 
-results = autotune_setup(make_plot = false)
+results, sysinfo, _ = autotune_setup(make_plot = false)
 
 # Filter successful results
 successful = filter(row -> row.success, results)
@@ -170,7 +170,7 @@ println(summary)
 When `make_plot=true`, you get separate plots for each element type:
 
 ```julia
-results, plots = autotune_setup()
+results, sysinfo, plots = autotune_setup()
 
 # plots is a dictionary keyed by element type
 for (eltype, plot) in plots
 
@@ -27,7 +27,7 @@ function getrf!(A::AbstractMatrix{<:ComplexF64};
     if isempty(ipiv)
         ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
     end
-    ccall((@blasfunc(zgetrf_), liblapack), Cvoid,
+    ccall(("zgetrf_", liblapack), Cvoid,
         (Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF64},
             Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
         m, n, A, lda, ipiv, info)
@@ -47,7 +47,7 @@ function getrf!(A::AbstractMatrix{<:ComplexF32};
     if isempty(ipiv)
         ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
     end
-    ccall((@blasfunc(cgetrf_), liblapack), Cvoid,
+    ccall(("cgetrf_", liblapack), Cvoid,
         (Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF32},
             Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
         m, n, A, lda, ipiv, info)
@@ -67,7 +67,7 @@ function getrf!(A::AbstractMatrix{<:Float64};
     if isempty(ipiv)
         ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
     end
-    ccall((@blasfunc(dgetrf_), liblapack), Cvoid,
+    ccall(("dgetrf_", liblapack), Cvoid,
         (Ref{BlasInt}, Ref{BlasInt}, Ptr{Float64},
             Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
         m, n, A, lda, ipiv, info)
@@ -87,7 +87,7 @@ function getrf!(A::AbstractMatrix{<:Float32};
     if isempty(ipiv)
         ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
     end
-    ccall((@blasfunc(sgetrf_), liblapack), Cvoid,
+    ccall(("sgetrf_", liblapack), Cvoid,
         (Ref{BlasInt}, Ref{BlasInt}, Ptr{Float32},
             Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
         m, n, A, lda, ipiv, info)
 
@@ -19,12 +19,8 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 RecursiveFactorization = "f2c3362d-daeb-58d1-803e-2bc74f2840b4"
-BLIS = "238ceb6f-8488-4382-8f3b-76d2b52b7899"
-CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
-Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
-
-[weakdeps]
-BLIS = "238ceb6f-8488-4382-8f3b-76d2b52b7899"
+blis_jll = "6136c539-28a5-5bf0-87cc-b183200dce32"
+LAPACK_jll = "51474c39-65e3-53ba-86ba-03b1b862ec14"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
 
@@ -44,7 +40,8 @@ Printf = "1"
 Dates = "1"
 Test = "1"
 RecursiveFactorization = "0.2"
-BLIS = "0.1"
+blis_jll = "0.9.0"
+LAPACK_jll = "3"
 CUDA = "5"
 Metal = "1"
 julia = "1.10"
@@ -11,47 +11,19 @@ using LinearAlgebra
 using Printf
 using Dates
 using Base64
-using RecursiveFactorization  # Hard dependency to ensure RFLUFactorization is available
+
+# Hard dependency to ensure RFLUFactorization others solvers are available
+using RecursiveFactorization  
+using blis_jll
+using LAPACK_jll
+using CUDA
+using Metal
+
 
 # Optional dependencies for telemetry and plotting
 using GitHub
 using Plots
 
-# Load JLL packages when available for better library access
-const BLIS_JLL_AVAILABLE = Ref(false)
-const LAPACK_JLL_AVAILABLE = Ref(false)
-
-function __init__()
-    # Try to load JLL packages at runtime for enhanced BLIS support
-    try
-        # Check if BLIS_jll is available in the current environment
-        if haskey(Base.loaded_modules, Base.PkgId(Base.UUID("068f7417-6964-5086-9a5b-bc0c5b4f7fa6"), "BLIS_jll"))
-            BLIS_JLL_AVAILABLE[] = true
-            @info "BLIS_jll detected - enhanced BLIS library access available"
-        else
-            @eval using BLIS_jll
-            BLIS_JLL_AVAILABLE[] = true
-            @info "BLIS_jll loaded for enhanced BLIS library access"
-        end
-    catch
-        @debug "BLIS_jll not available, BLISLUFactorization may not work"
-    end
-    
-    try
-        # Check if LAPACK_jll is available in the current environment
-        if haskey(Base.loaded_modules, Base.PkgId(Base.UUID("51474c39-65e3-53ba-86ba-03b1b862ec14"), "LAPACK_jll"))
-            LAPACK_JLL_AVAILABLE[] = true
-            @info "LAPACK_jll detected - enhanced LAPACK library access available"
-        else
-            @eval using LAPACK_jll  
-            LAPACK_JLL_AVAILABLE[] = true
-            @info "LAPACK_jll loaded for enhanced LAPACK library access"
-        end
-    catch
-        @debug "LAPACK_jll not available, some BLIS functionality may be limited"
-    end
-end
-
 export autotune_setup
 
 include("algorithms.jl")
@@ -224,11 +196,13 @@ function autotune_setup(;
 
     @info "Autotune setup completed!"
 
+    sysinfo = get_detailed_system_info()
+
     # Return results and plots
     if make_plot && plots_dict !== nothing && !isempty(plots_dict)
-        return results_df, plots_dict
+        return results_df, sysinfo, plots_dict
     else
-        return results_df
+        return results_df, sysinfo, nothing
     end
 end
 
 
@@ -17,6 +17,13 @@ function get_available_algorithms(; skip_missing_algs::Bool = false)
     push!(algs, GenericLUFactorization())
     push!(alg_names, "GenericLUFactorization")
 
+    if blis_jll.is_available()
+        push!(algs, LinearSolve.BLISLUFactorization())
+        push!(alg_names, "BLISLUFactorization")
+    else
+        @warn "blis.jll not available for this platform. BLISLUFactorization will not be included."
+    end
+
     # MKL if available
     if LinearSolve.usemkl
         push!(algs, MKLLUFactorization())
@@ -35,48 +42,6 @@ function get_available_algorithms(; skip_missing_algs::Bool = false)
         end
     end
 
-    # BLIS if JLL packages are available and hardware supports it
-    try
-        # Check if BLIS_jll and LAPACK_jll are available, which enable BLISLUFactorization
-        blis_jll_available = haskey(Base.loaded_modules, Base.PkgId(Base.UUID("068f7417-6964-5086-9a5b-bc0c5b4f7fa6"), "BLIS_jll"))
-        lapack_jll_available = haskey(Base.loaded_modules, Base.PkgId(Base.UUID("51474c39-65e3-53ba-86ba-03b1b862ec14"), "LAPACK_jll"))
-        
-        if (blis_jll_available || lapack_jll_available) && isdefined(LinearSolve, :BLISLUFactorization) && hasmethod(LinearSolve.BLISLUFactorization, ())
-            # Test if BLIS works on this hardware
-            try
-                test_alg = LinearSolve.BLISLUFactorization()
-                # Simple test to see if it can be created
-                push!(algs, test_alg)
-                push!(alg_names, "BLISLUFactorization")
-            catch e
-                msg = "BLISLUFactorization available but not supported on this hardware: $e"
-                if skip_missing_algs
-                    @warn msg
-                else
-                    @info msg  # BLIS hardware incompatibility is not an error, just info
-                end
-            end
-        else
-            if blis_jll_available || lapack_jll_available
-                msg = "BLIS_jll/LAPACK_jll loaded but BLISLUFactorization not available in LinearSolve"
-            else
-                msg = "BLIS_jll and LAPACK_jll not loaded - BLISLUFactorization requires these JLL packages"
-            end
-            if skip_missing_algs
-                @warn msg
-            else
-                @info msg  # Not having BLIS JLL packages is not an error
-            end
-        end
-    catch e
-        msg = "Error checking BLIS JLL package availability: $e"
-        if skip_missing_algs
-            @warn msg
-        else
-            @info msg
-        end
-    end
-
     # RecursiveFactorization - should always be available as it's a hard dependency
     try
         if LinearSolve.userecursivefactorization(nothing)
 
@@ -12,17 +12,12 @@ function test_algorithm_compatibility(alg, eltype::Type, test_size::Int = 4)
     alg_name = string(typeof(alg).name.name)
 
     # Define strict compatibility rules for BLAS-dependent algorithms
-    if eltype in [BigFloat, BigInt, Rational{Int}, Complex{BigFloat}]
-        # For arbitrary precision types, only allow pure Julia algorithms
-        if alg_name in ["LUFactorization", "MKLLUFactorization", "AppleAccelerateLUFactorization"]
-            return false  # These rely on BLAS and shouldn't work with arbitrary precision
-        end
-        # SimpleLUFactorization, GenericLUFactorization, and RFLUFactorization should work (pure Julia)
-    elseif eltype in [Float16, Complex{Float16}]
-        # Float16 might not be supported by all BLAS
-        if alg_name in ["MKLLUFactorization", "AppleAccelerateLUFactorization"]
-            return false  # These might not support Float16
-        end
+    if !(eltype <: LinearAlgebra.BLAS.BlasFloat) && alg_name in ["BLISFactorization", "MKLLUFactorization", "AppleAccelerateLUFactorization"]
+        return false  # BLAS algorithms not compatible with non-standard types
+    end
+
+    if alg_name == "BLISLUFactorization" && Sys.isapple()
+        return false  # BLISLUFactorization has no Apple Silicon binary
     end
 
     # For standard types or algorithms that passed the strict check, test functionality
 
@@ -305,11 +305,7 @@ function get_detailed_system_info()
     catch
         system_data["libdl"] = "unknown"
     end
-    
-    # JLL package availability for enhanced library access
-    system_data["blis_jll_available"] = LinearSolveAutotune.BLIS_JLL_AVAILABLE[]
-    system_data["lapack_jll_available"] = LinearSolveAutotune.LAPACK_JLL_AVAILABLE[]
-    
+
     # Memory information (if available)
     try
         if Sys.islinux()