Skip to content

Commit e155aa8

Browse files
Complete
1 parent 0771e97 commit e155aa8

12 files changed

+61
-135
lines changed

docs/src/tutorials/autotune.md

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ using LinearSolve
1717
using LinearSolveAutotune
1818

1919
# Run autotune with default settings
20-
results = autotune_setup()
20+
results, sysinfo, plots = autotune_setup()
2121
```
2222

2323
This will:
@@ -32,11 +32,11 @@ This will:
3232
The autotune process returns benchmark results and creates several outputs:
3333

3434
```julia
35-
# Basic usage returns just the DataFrame of results
36-
results = autotune_setup(make_plot=false)
35+
# Basic usage returns just the DataFrame of results and system information
36+
results, sysinfo, _ = autotune_setup(make_plot=false)
3737

38-
# With plotting enabled, returns (DataFrame, Plots)
39-
results, plots = autotune_setup(make_plot=true)
38+
# With plotting enabled, returns (DataFrame, System Info, Plots)
39+
results, sysinfo, plots = autotune_setup(make_plot=true)
4040

4141
# Examine the results
4242
println("Algorithms tested: ", unique(results.algorithm))
@@ -52,13 +52,13 @@ You can specify which element types to benchmark:
5252

5353
```julia
5454
# Test only Float64 and ComplexF64
55-
results = autotune_setup(eltypes = (Float64, ComplexF64))
55+
results, sysinfo, _ = autotune_setup(eltypes = (Float64, ComplexF64))
5656

5757
# Test arbitrary precision types (excludes BLAS algorithms)
58-
results = autotune_setup(eltypes = (BigFloat,), telemetry = false)
58+
results, sysinfo, _ = autotune_setup(eltypes = (BigFloat,), telemetry = false)
5959

6060
# Test high precision float
61-
results = autotune_setup(eltypes = (Float64, BigFloat))
61+
results, sysinfo, _ = autotune_setup(eltypes = (Float64, BigFloat))
6262
```
6363

6464
### Matrix Sizes
@@ -67,10 +67,10 @@ Control the range of matrix sizes tested:
6767

6868
```julia
6969
# Default: small to medium matrices (4×4 to 500×500)
70-
results = autotune_setup(large_matrices = false)
70+
results, sysinfo, _ = autotune_setup(large_matrices = false)
7171

7272
# Large matrices: includes sizes up to 10,000×10,000 (good for GPU systems)
73-
results = autotune_setup(large_matrices = true)
73+
results, sysinfo, _ = autotune_setup(large_matrices = true)
7474
```
7575

7676
### Benchmark Quality vs Speed
@@ -79,10 +79,10 @@ Adjust the thoroughness of benchmarking:
7979

8080
```julia
8181
# Quick benchmark (fewer samples, less time per test)
82-
results = autotune_setup(samples = 1, seconds = 0.1)
82+
results, sysinfo, _ = autotune_setup(samples = 1, seconds = 0.1)
8383

8484
# Thorough benchmark (more samples, more time per test)
85-
results = autotune_setup(samples = 10, seconds = 2.0)
85+
results, sysinfo, _ = autotune_setup(samples = 10, seconds = 2.0)
8686
```
8787

8888
### Privacy and Telemetry
@@ -105,13 +105,13 @@ However, if your system has privacy concerns or you prefer not to share data, yo
105105

106106
```julia
107107
# Disable telemetry (no data shared)
108-
results = autotune_setup(telemetry = false)
108+
results, sysinfo, _ = autotune_setup(telemetry = false)
109109

110110
# Disable preference setting (just benchmark, don't change defaults)
111-
results = autotune_setup(set_preferences = false)
111+
results, sysinfo, _ = autotune_setup(set_preferences = false)
112112

113113
# Disable plotting (faster, less output)
114-
results = autotune_setup(make_plot = false)
114+
results, sysinfo, _ = autotune_setup(make_plot = false)
115115
```
116116

117117
### Missing Algorithm Handling
@@ -125,10 +125,10 @@ you can set `skip_missing_algs = true` to allow missing algorithms without faili
125125

126126
```julia
127127
# Default behavior: error if expected algorithms are missing
128-
results = autotune_setup() # Will error if RFLUFactorization missing
128+
results, sysinfo, _ = autotune_setup() # Will error if RFLUFactorization missing
129129

130130
# Allow missing algorithms (useful for incomplete setups)
131-
results = autotune_setup(skip_missing_algs = true) # Will warn instead of error
131+
results, sysinfo, _ = autotune_setup(skip_missing_algs = true) # Will warn instead of error
132132
```
133133

134134
## GPU Systems
@@ -137,7 +137,7 @@ On systems with CUDA or Metal GPU support, the autotuner will automatically dete
137137

138138
```julia
139139
# Enable large matrix testing for GPUs
140-
results = autotune_setup(large_matrices = true, samples = 3, seconds = 1.0)
140+
results, sysinfo, _ = autotune_setup(large_matrices = true, samples = 3, seconds = 1.0)
141141
```
142142

143143
GPU algorithms tested (when available):
@@ -152,7 +152,7 @@ GPU algorithms tested (when available):
152152
using DataFrames
153153
using Statistics
154154

155-
results = autotune_setup(make_plot = false)
155+
results, sysinfo, _ = autotune_setup(make_plot = false)
156156

157157
# Filter successful results
158158
successful = filter(row -> row.success, results)
@@ -170,7 +170,7 @@ println(summary)
170170
When `make_plot=true`, you get separate plots for each element type:
171171

172172
```julia
173-
results, plots = autotune_setup()
173+
results, sysinfo, plots = autotune_setup()
174174

175175
# plots is a dictionary keyed by element type
176176
for (eltype, plot) in plots

ext/LinearSolveBLISExt.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ function getrf!(A::AbstractMatrix{<:ComplexF64};
2727
if isempty(ipiv)
2828
ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
2929
end
30-
ccall((@blasfunc(zgetrf_), liblapack), Cvoid,
30+
ccall(("zgetrf_", liblapack), Cvoid,
3131
(Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF64},
3232
Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
3333
m, n, A, lda, ipiv, info)
@@ -47,7 +47,7 @@ function getrf!(A::AbstractMatrix{<:ComplexF32};
4747
if isempty(ipiv)
4848
ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
4949
end
50-
ccall((@blasfunc(cgetrf_), liblapack), Cvoid,
50+
ccall(("cgetrf_", liblapack), Cvoid,
5151
(Ref{BlasInt}, Ref{BlasInt}, Ptr{ComplexF32},
5252
Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
5353
m, n, A, lda, ipiv, info)
@@ -67,7 +67,7 @@ function getrf!(A::AbstractMatrix{<:Float64};
6767
if isempty(ipiv)
6868
ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
6969
end
70-
ccall((@blasfunc(dgetrf_), liblapack), Cvoid,
70+
ccall(("dgetrf_", liblapack), Cvoid,
7171
(Ref{BlasInt}, Ref{BlasInt}, Ptr{Float64},
7272
Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
7373
m, n, A, lda, ipiv, info)
@@ -87,7 +87,7 @@ function getrf!(A::AbstractMatrix{<:Float32};
8787
if isempty(ipiv)
8888
ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2)))
8989
end
90-
ccall((@blasfunc(sgetrf_), liblapack), Cvoid,
90+
ccall(("sgetrf_", liblapack), Cvoid,
9191
(Ref{BlasInt}, Ref{BlasInt}, Ptr{Float32},
9292
Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
9393
m, n, A, lda, ipiv, info)

lib/LinearSolveAutotune/Project.toml

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,8 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
1919
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
2020
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
2121
RecursiveFactorization = "f2c3362d-daeb-58d1-803e-2bc74f2840b4"
22-
BLIS = "238ceb6f-8488-4382-8f3b-76d2b52b7899"
23-
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
24-
Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
25-
26-
[weakdeps]
27-
BLIS = "238ceb6f-8488-4382-8f3b-76d2b52b7899"
22+
blis_jll = "6136c539-28a5-5bf0-87cc-b183200dce32"
23+
LAPACK_jll = "51474c39-65e3-53ba-86ba-03b1b862ec14"
2824
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
2925
Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
3026

@@ -44,7 +40,8 @@ Printf = "1"
4440
Dates = "1"
4541
Test = "1"
4642
RecursiveFactorization = "0.2"
47-
BLIS = "0.1"
43+
blis_jll = "0.9.0"
44+
LAPACK_jll = "3"
4845
CUDA = "5"
4946
Metal = "1"
5047
julia = "1.10"

lib/LinearSolveAutotune/src/LinearSolveAutotune.jl

Lines changed: 12 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -11,47 +11,19 @@ using LinearAlgebra
1111
using Printf
1212
using Dates
1313
using Base64
14-
using RecursiveFactorization # Hard dependency to ensure RFLUFactorization is available
14+
15+
# Hard dependency to ensure RFLUFactorization others solvers are available
16+
using RecursiveFactorization
17+
using blis_jll
18+
using LAPACK_jll
19+
using CUDA
20+
using Metal
21+
1522

1623
# Optional dependencies for telemetry and plotting
1724
using GitHub
1825
using Plots
1926

20-
# Load JLL packages when available for better library access
21-
const BLIS_JLL_AVAILABLE = Ref(false)
22-
const LAPACK_JLL_AVAILABLE = Ref(false)
23-
24-
function __init__()
25-
# Try to load JLL packages at runtime for enhanced BLIS support
26-
try
27-
# Check if BLIS_jll is available in the current environment
28-
if haskey(Base.loaded_modules, Base.PkgId(Base.UUID("068f7417-6964-5086-9a5b-bc0c5b4f7fa6"), "BLIS_jll"))
29-
BLIS_JLL_AVAILABLE[] = true
30-
@info "BLIS_jll detected - enhanced BLIS library access available"
31-
else
32-
@eval using BLIS_jll
33-
BLIS_JLL_AVAILABLE[] = true
34-
@info "BLIS_jll loaded for enhanced BLIS library access"
35-
end
36-
catch
37-
@debug "BLIS_jll not available, BLISLUFactorization may not work"
38-
end
39-
40-
try
41-
# Check if LAPACK_jll is available in the current environment
42-
if haskey(Base.loaded_modules, Base.PkgId(Base.UUID("51474c39-65e3-53ba-86ba-03b1b862ec14"), "LAPACK_jll"))
43-
LAPACK_JLL_AVAILABLE[] = true
44-
@info "LAPACK_jll detected - enhanced LAPACK library access available"
45-
else
46-
@eval using LAPACK_jll
47-
LAPACK_JLL_AVAILABLE[] = true
48-
@info "LAPACK_jll loaded for enhanced LAPACK library access"
49-
end
50-
catch
51-
@debug "LAPACK_jll not available, some BLIS functionality may be limited"
52-
end
53-
end
54-
5527
export autotune_setup
5628

5729
include("algorithms.jl")
@@ -224,11 +196,13 @@ function autotune_setup(;
224196

225197
@info "Autotune setup completed!"
226198

199+
sysinfo = get_detailed_system_info()
200+
227201
# Return results and plots
228202
if make_plot && plots_dict !== nothing && !isempty(plots_dict)
229-
return results_df, plots_dict
203+
return results_df, sysinfo, plots_dict
230204
else
231-
return results_df
205+
return results_df, sysinfo, nothing
232206
end
233207
end
234208

lib/LinearSolveAutotune/src/algorithms.jl

Lines changed: 7 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@ function get_available_algorithms(; skip_missing_algs::Bool = false)
1717
push!(algs, GenericLUFactorization())
1818
push!(alg_names, "GenericLUFactorization")
1919

20+
if blis_jll.is_available()
21+
push!(algs, LinearSolve.BLISLUFactorization())
22+
push!(alg_names, "BLISLUFactorization")
23+
else
24+
@warn "blis.jll not available for this platform. BLISLUFactorization will not be included."
25+
end
26+
2027
# MKL if available
2128
if LinearSolve.usemkl
2229
push!(algs, MKLLUFactorization())
@@ -35,48 +42,6 @@ function get_available_algorithms(; skip_missing_algs::Bool = false)
3542
end
3643
end
3744

38-
# BLIS if JLL packages are available and hardware supports it
39-
try
40-
# Check if BLIS_jll and LAPACK_jll are available, which enable BLISLUFactorization
41-
blis_jll_available = haskey(Base.loaded_modules, Base.PkgId(Base.UUID("068f7417-6964-5086-9a5b-bc0c5b4f7fa6"), "BLIS_jll"))
42-
lapack_jll_available = haskey(Base.loaded_modules, Base.PkgId(Base.UUID("51474c39-65e3-53ba-86ba-03b1b862ec14"), "LAPACK_jll"))
43-
44-
if (blis_jll_available || lapack_jll_available) && isdefined(LinearSolve, :BLISLUFactorization) && hasmethod(LinearSolve.BLISLUFactorization, ())
45-
# Test if BLIS works on this hardware
46-
try
47-
test_alg = LinearSolve.BLISLUFactorization()
48-
# Simple test to see if it can be created
49-
push!(algs, test_alg)
50-
push!(alg_names, "BLISLUFactorization")
51-
catch e
52-
msg = "BLISLUFactorization available but not supported on this hardware: $e"
53-
if skip_missing_algs
54-
@warn msg
55-
else
56-
@info msg # BLIS hardware incompatibility is not an error, just info
57-
end
58-
end
59-
else
60-
if blis_jll_available || lapack_jll_available
61-
msg = "BLIS_jll/LAPACK_jll loaded but BLISLUFactorization not available in LinearSolve"
62-
else
63-
msg = "BLIS_jll and LAPACK_jll not loaded - BLISLUFactorization requires these JLL packages"
64-
end
65-
if skip_missing_algs
66-
@warn msg
67-
else
68-
@info msg # Not having BLIS JLL packages is not an error
69-
end
70-
end
71-
catch e
72-
msg = "Error checking BLIS JLL package availability: $e"
73-
if skip_missing_algs
74-
@warn msg
75-
else
76-
@info msg
77-
end
78-
end
79-
8045
# RecursiveFactorization - should always be available as it's a hard dependency
8146
try
8247
if LinearSolve.userecursivefactorization(nothing)

lib/LinearSolveAutotune/src/benchmarking.jl

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,12 @@ function test_algorithm_compatibility(alg, eltype::Type, test_size::Int = 4)
1212
alg_name = string(typeof(alg).name.name)
1313

1414
# Define strict compatibility rules for BLAS-dependent algorithms
15-
if eltype in [BigFloat, BigInt, Rational{Int}, Complex{BigFloat}]
16-
# For arbitrary precision types, only allow pure Julia algorithms
17-
if alg_name in ["LUFactorization", "MKLLUFactorization", "AppleAccelerateLUFactorization"]
18-
return false # These rely on BLAS and shouldn't work with arbitrary precision
19-
end
20-
# SimpleLUFactorization, GenericLUFactorization, and RFLUFactorization should work (pure Julia)
21-
elseif eltype in [Float16, Complex{Float16}]
22-
# Float16 might not be supported by all BLAS
23-
if alg_name in ["MKLLUFactorization", "AppleAccelerateLUFactorization"]
24-
return false # These might not support Float16
25-
end
15+
if !(eltype <: LinearAlgebra.BLAS.BlasFloat) && alg_name in ["BLISFactorization", "MKLLUFactorization", "AppleAccelerateLUFactorization"]
16+
return false # BLAS algorithms not compatible with non-standard types
17+
end
18+
19+
if alg_name == "BLISLUFactorization" && Sys.isapple()
20+
return false # BLISLUFactorization has no Apple Silicon binary
2621
end
2722

2823
# For standard types or algorithms that passed the strict check, test functionality

lib/LinearSolveAutotune/src/gpu_detection.jl

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -305,11 +305,7 @@ function get_detailed_system_info()
305305
catch
306306
system_data["libdl"] = "unknown"
307307
end
308-
309-
# JLL package availability for enhanced library access
310-
system_data["blis_jll_available"] = LinearSolveAutotune.BLIS_JLL_AVAILABLE[]
311-
system_data["lapack_jll_available"] = LinearSolveAutotune.LAPACK_JLL_AVAILABLE[]
312-
308+
313309
# Memory information (if available)
314310
try
315311
if Sys.islinux()
Binary file not shown.
217 KB
Loading
Binary file not shown.

0 commit comments

Comments
 (0)