Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
8475266
Add autotune preference integration to default solver selection
ChrisRackauckas Aug 14, 2025
a28f52a
Optimize autotune preference integration with compile-time constants
ChrisRackauckas Aug 14, 2025
fea6b0c
Complete optimization with all requested improvements
ChrisRackauckas Aug 14, 2025
56a417d
Add algorithm availability checking and fallback system
ChrisRackauckas Aug 14, 2025
59ce71f
Add comprehensive tests for dual preference system integration in def…
ChrisRackauckas Aug 15, 2025
7f9bd67
Add explicit algorithm choice verification tests for dual preference …
ChrisRackauckas Aug 15, 2025
9484b72
Clean up algorithm choice tests and ensure proper preference reset
ChrisRackauckas Aug 15, 2025
5eda050
Add separate Preferences test group with FastLapack algorithm verific…
ChrisRackauckas Aug 15, 2025
5a3f480
Fix preference tests: only print on failure, correct extension-depend…
ChrisRackauckas Aug 15, 2025
913cded
Fix size category boundaries to match LinearSolveAutotune and add com…
ChrisRackauckas Aug 15, 2025
374aba5
Remove unnecessary success prints from FastLapack and RecursiveFactor…
ChrisRackauckas Aug 15, 2025
822ff6a
Add explicit algorithm choice verification for FastLapack and RFLU
ChrisRackauckas Aug 15, 2025
ee4f0b0
Add explicit algorithm choice tests: verify FastLU and RFLU selection…
ChrisRackauckas Aug 15, 2025
6af69d8
Apply suggestions from code review
ChrisRackauckas Aug 15, 2025
89bcb9e
Add comprehensive size category algorithm verification with different…
ChrisRackauckas Aug 15, 2025
6847dc5
Fix algorithm choice test to use AppleAccelerateLUFactorization from …
ChrisRackauckas Aug 15, 2025
19beb8d
Add comprehensive algorithm choice analysis function for testing and …
ChrisRackauckas Aug 15, 2025
a372fdb
Make preference tests strict: require exact algorithm match
ChrisRackauckas Aug 15, 2025
3240462
Remove boundary testing section as requested
ChrisRackauckas Aug 15, 2025
66faf95
Revert "Remove boundary testing section as requested"
ChrisRackauckas Aug 15, 2025
4958c38
Remove non-LU algorithms from _string_to_algorithm_choice
ChrisRackauckas Aug 15, 2025
beeec34
Move show_algorithm_choices to main package and simplify
ChrisRackauckas Aug 15, 2025
c55e420
Update documentation for dual preference system and show_algorithm_ch…
ChrisRackauckas Aug 15, 2025
3dc46f1
Update test/preferences.jl
ChrisRackauckas Aug 15, 2025
7ee156c
Fix FastLapack test to use GenericLUFactorization as always_loaded
ChrisRackauckas Aug 15, 2025
5161904
Add reset_defaults! function for testing preference system integration
ChrisRackauckas Aug 16, 2025
6150d55
Clean up preference system and enhance show_algorithm_choices display
ChrisRackauckas Aug 16, 2025
a86bd4c
Streamline preference tests with single reset_defaults! call
ChrisRackauckas Aug 16, 2025
a52b267
Move preference handling to dedicated src/preferences.jl file
ChrisRackauckas Aug 16, 2025
fbd7155
Fix preference tests: correct FastLU mapping and add preference isola…
ChrisRackauckas Aug 16, 2025
da8f72d
Replace algorithm test with robust RFLU vs GenericLU verification
ChrisRackauckas Aug 16, 2025
0e69356
Update test/preferences.jl
ChrisRackauckas Aug 16, 2025
9881aeb
Clean up preference system: remove analysis.jl, use eval-based testin…
ChrisRackauckas Aug 16, 2025
bb9c717
Rename reset_defaults! to make_preferences_dynamic!
ChrisRackauckas Aug 16, 2025
1be6f5b
Update documentation for final preference system implementation
ChrisRackauckas Aug 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/Tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ jobs:
- "LinearSolvePardiso"
- "NoPre"
- "LinearSolveAutotune"
- "Preferences"
os:
- ubuntu-latest
- macos-latest
Expand Down
181 changes: 181 additions & 0 deletions src/LinearSolve.jl
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ else
const usemkl = false
end


@reexport using SciMLBase

"""
Expand Down Expand Up @@ -276,6 +277,186 @@ EnumX.@enumx DefaultAlgorithmChoice begin
KrylovJL_LSMR
end

# Autotune preference constants - loaded once at package import time
# Helper function to convert algorithm name string to DefaultAlgorithmChoice enum
function _string_to_algorithm_choice(algorithm_name::Union{String, Nothing})
algorithm_name === nothing && return nothing

# Core LU algorithms from LinearSolveAutotune
if algorithm_name == "LUFactorization"
return DefaultAlgorithmChoice.LUFactorization
elseif algorithm_name == "GenericLUFactorization"
return DefaultAlgorithmChoice.GenericLUFactorization
elseif algorithm_name == "RFLUFactorization" || algorithm_name == "RecursiveFactorization"
return DefaultAlgorithmChoice.RFLUFactorization
elseif algorithm_name == "MKLLUFactorization"
return DefaultAlgorithmChoice.MKLLUFactorization
elseif algorithm_name == "AppleAccelerateLUFactorization"
return DefaultAlgorithmChoice.AppleAccelerateLUFactorization
elseif algorithm_name == "SimpleLUFactorization"
return DefaultAlgorithmChoice.LUFactorization # Map to standard LU
elseif algorithm_name == "FastLUFactorization"
return DefaultAlgorithmChoice.LUFactorization # Map to standard LU (FastLapack extension)
elseif algorithm_name == "BLISLUFactorization"
return DefaultAlgorithmChoice.LUFactorization # Map to standard LU (BLIS extension)
elseif algorithm_name == "CudaOffloadLUFactorization"
return DefaultAlgorithmChoice.LUFactorization # Map to standard LU (CUDA extension)
elseif algorithm_name == "MetalLUFactorization"
return DefaultAlgorithmChoice.LUFactorization # Map to standard LU (Metal extension)
elseif algorithm_name == "AMDGPUOffloadLUFactorization"
return DefaultAlgorithmChoice.LUFactorization # Map to standard LU (AMDGPU extension)
# Non-LU algorithms (not typically tuned in default selection but support for completeness)
elseif algorithm_name == "QRFactorization"
return DefaultAlgorithmChoice.QRFactorization
elseif algorithm_name == "CholeskyFactorization"
return DefaultAlgorithmChoice.CholeskyFactorization
elseif algorithm_name == "SVDFactorization"
return DefaultAlgorithmChoice.SVDFactorization
elseif algorithm_name == "BunchKaufmanFactorization"
return DefaultAlgorithmChoice.BunchKaufmanFactorization
elseif algorithm_name == "LDLtFactorization"
return DefaultAlgorithmChoice.LDLtFactorization
else
@warn "Unknown algorithm preference: $algorithm_name, falling back to heuristics"
return nothing
end
end

# Load autotune preferences as constants for each element type and size category
# Support both best overall algorithm and best always-loaded algorithm as fallback
const AUTOTUNE_PREFS = (
Float32 = (
tiny = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float32_tiny", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float32_tiny", nothing))
),
small = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float32_small", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float32_small", nothing))
),
medium = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float32_medium", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float32_medium", nothing))
),
large = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float32_large", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float32_large", nothing))
),
big = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float32_big", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float32_big", nothing))
)
),
Float64 = (
tiny = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float64_tiny", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float64_tiny", nothing))
),
small = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float64_small", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float64_small", nothing))
),
medium = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float64_medium", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float64_medium", nothing))
),
large = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float64_large", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float64_large", nothing))
),
big = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float64_big", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float64_big", nothing))
)
),
ComplexF32 = (
tiny = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF32_tiny", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF32_tiny", nothing))
),
small = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF32_small", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF32_small", nothing))
),
medium = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF32_medium", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF32_medium", nothing))
),
large = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF32_large", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF32_large", nothing))
),
big = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF32_big", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF32_big", nothing))
)
),
ComplexF64 = (
tiny = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF64_tiny", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF64_tiny", nothing))
),
small = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF64_small", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF64_small", nothing))
),
medium = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF64_medium", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF64_medium", nothing))
),
large = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF64_large", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF64_large", nothing))
),
big = (
best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF64_big", nothing)),
fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF64_big", nothing))
)
)
)

# Fast path: check if any autotune preferences are actually set
const AUTOTUNE_PREFS_SET = let
any_set = false
for type_prefs in (AUTOTUNE_PREFS.Float32, AUTOTUNE_PREFS.Float64, AUTOTUNE_PREFS.ComplexF32, AUTOTUNE_PREFS.ComplexF64)
for size_pref in (type_prefs.tiny, type_prefs.small, type_prefs.medium, type_prefs.large, type_prefs.big)
if size_pref.best !== nothing || size_pref.fallback !== nothing
any_set = true
break
end
end
any_set && break
end
any_set
end

# Algorithm availability checking functions
"""
is_algorithm_available(alg::DefaultAlgorithmChoice.T)

Check if the given algorithm is currently available (extensions loaded, etc.).
"""
function is_algorithm_available(alg::DefaultAlgorithmChoice.T)
if alg === DefaultAlgorithmChoice.LUFactorization
return true # Always available
elseif alg === DefaultAlgorithmChoice.GenericLUFactorization
return true # Always available
elseif alg === DefaultAlgorithmChoice.MKLLUFactorization
return usemkl # Available if MKL is loaded
elseif alg === DefaultAlgorithmChoice.AppleAccelerateLUFactorization
return appleaccelerate_isavailable() # Available on macOS with Accelerate
elseif alg === DefaultAlgorithmChoice.RFLUFactorization
return userecursivefactorization(nothing) # Requires RecursiveFactorization extension
else
# For extension-dependent algorithms not explicitly handled above,
# we cannot easily check availability without trying to use them.
# For now, assume they're not available in the default selection.
# This includes FastLU, BLIS, CUDA, Metal, etc. which would require
# specific extension checks.
return false
end
end

"""
DefaultLinearSolver(;safetyfallback=true)

Expand Down
115 changes: 100 additions & 15 deletions src/default.jl
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,80 @@ end

userecursivefactorization(A) = false

"""
get_tuned_algorithm(::Type{eltype_A}, ::Type{eltype_b}, matrix_size) where {eltype_A, eltype_b}

Get the tuned algorithm preference for the given element type and matrix size.
Returns `nothing` if no preference exists. Uses preloaded constants for efficiency.
Fast path when no preferences are set.
"""
@inline function get_tuned_algorithm(::Type{eltype_A}, ::Type{eltype_b}, matrix_size::Integer) where {eltype_A, eltype_b}
# Fast path: if no preferences are set, return nothing immediately
AUTOTUNE_PREFS_SET || return nothing

# Determine the element type to use for preference lookup
target_eltype = eltype_A !== Nothing ? eltype_A : eltype_b

# Determine size category based on matrix size (matching LinearSolveAutotune categories)
size_category = if matrix_size <= 20
:tiny
elseif matrix_size <= 100
:small
elseif matrix_size <= 300
:medium
elseif matrix_size <= 1000
:large
else
:big
end

# Look up the tuned algorithm from preloaded constants with type specialization
return _get_tuned_algorithm_impl(target_eltype, size_category)
end

# Type-specialized implementation with availability checking and fallback logic
@inline function _get_tuned_algorithm_impl(::Type{Float32}, size_category::Symbol)
prefs = getproperty(AUTOTUNE_PREFS.Float32, size_category)
return _choose_available_algorithm(prefs)
end

@inline function _get_tuned_algorithm_impl(::Type{Float64}, size_category::Symbol)
prefs = getproperty(AUTOTUNE_PREFS.Float64, size_category)
return _choose_available_algorithm(prefs)
end

@inline function _get_tuned_algorithm_impl(::Type{ComplexF32}, size_category::Symbol)
prefs = getproperty(AUTOTUNE_PREFS.ComplexF32, size_category)
return _choose_available_algorithm(prefs)
end

@inline function _get_tuned_algorithm_impl(::Type{ComplexF64}, size_category::Symbol)
prefs = getproperty(AUTOTUNE_PREFS.ComplexF64, size_category)
return _choose_available_algorithm(prefs)
end

@inline _get_tuned_algorithm_impl(::Type, ::Symbol) = nothing # Fallback for other types

# Helper function to choose available algorithm with fallback logic
@inline function _choose_available_algorithm(prefs)
# Try the best algorithm first
if prefs.best !== nothing && is_algorithm_available(prefs.best)
return prefs.best
end

# Fall back to always-loaded algorithm if best is not available
if prefs.fallback !== nothing && is_algorithm_available(prefs.fallback)
return prefs.fallback
end

# No tuned algorithms available
return nothing
end

# Convenience method for when A is nothing - delegate to main implementation
@inline get_tuned_algorithm(::Type{Nothing}, ::Type{eltype_b}, matrix_size::Integer) where {eltype_b} =
get_tuned_algorithm(eltype_b, eltype_b, matrix_size)

# Allows A === nothing as a stand-in for dense matrix
function defaultalg(A, b, assump::OperatorAssumptions{Bool})
alg = if assump.issq
Expand All @@ -245,24 +319,35 @@ function defaultalg(A, b, assump::OperatorAssumptions{Bool})
ArrayInterface.can_setindex(b) &&
(__conditioning(assump) === OperatorCondition.IllConditioned ||
__conditioning(assump) === OperatorCondition.WellConditioned)

# Small matrix override - always use GenericLUFactorization for tiny problems
if length(b) <= 10
DefaultAlgorithmChoice.GenericLUFactorization
elseif appleaccelerate_isavailable() && b isa Array &&
eltype(b) <: Union{Float32, Float64, ComplexF32, ComplexF64}
DefaultAlgorithmChoice.AppleAccelerateLUFactorization
elseif (length(b) <= 100 || (isopenblas() && length(b) <= 500) ||
(usemkl && length(b) <= 200)) &&
(A === nothing ? eltype(b) <: Union{Float32, Float64} :
eltype(A) <: Union{Float32, Float64}) &&
userecursivefactorization(A)
DefaultAlgorithmChoice.RFLUFactorization
#elseif A === nothing || A isa Matrix
# alg = FastLUFactorization()
elseif usemkl && b isa Array &&
eltype(b) <: Union{Float32, Float64, ComplexF32, ComplexF64}
DefaultAlgorithmChoice.MKLLUFactorization
else
DefaultAlgorithmChoice.LUFactorization
# Check if autotune preferences exist for larger matrices
matrix_size = length(b)
eltype_A = A === nothing ? Nothing : eltype(A)
tuned_alg = get_tuned_algorithm(eltype_A, eltype(b), matrix_size)

if tuned_alg !== nothing
tuned_alg
elseif appleaccelerate_isavailable() && b isa Array &&
eltype(b) <: Union{Float32, Float64, ComplexF32, ComplexF64}
DefaultAlgorithmChoice.AppleAccelerateLUFactorization
elseif (length(b) <= 100 || (isopenblas() && length(b) <= 500) ||
(usemkl && length(b) <= 200)) &&
(A === nothing ? eltype(b) <: Union{Float32, Float64} :
eltype(A) <: Union{Float32, Float64}) &&
userecursivefactorization(A)
DefaultAlgorithmChoice.RFLUFactorization
#elseif A === nothing || A isa Matrix
# alg = FastLUFactorization()
elseif usemkl && b isa Array &&
eltype(b) <: Union{Float32, Float64, ComplexF32, ComplexF64}
DefaultAlgorithmChoice.MKLLUFactorization
else
DefaultAlgorithmChoice.LUFactorization
end
end
elseif __conditioning(assump) === OperatorCondition.VeryIllConditioned
DefaultAlgorithmChoice.QRFactorization
Expand Down
1 change: 1 addition & 0 deletions test/default_algs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,4 @@ sol = solve(prob,

sol = solve(prob)
@test sol.u β‰ˆ svd(A)\b

Loading
Loading