SciML · ChrisRackauckas · Aug 16, 2025 · Aug 14, 2025 · Aug 14, 2025 · Aug 14, 2025
diff --git a/.github/workflows/Tests.yml b/.github/workflows/Tests.yml
@@ -37,6 +37,7 @@ jobs:
           - "LinearSolvePardiso"
           - "NoPre"
           - "LinearSolveAutotune"
+          - "Preferences"
         os:
           - ubuntu-latest
           - macos-latest

diff --git a/src/LinearSolve.jl b/src/LinearSolve.jl
@@ -58,6 +58,7 @@ else
     const usemkl = false
 end
 
+
 @reexport using SciMLBase
 
 """
@@ -276,6 +277,186 @@ EnumX.@enumx DefaultAlgorithmChoice begin
     KrylovJL_LSMR
 end
 
+# Autotune preference constants - loaded once at package import time
+# Helper function to convert algorithm name string to DefaultAlgorithmChoice enum
+function _string_to_algorithm_choice(algorithm_name::Union{String, Nothing})
+    algorithm_name === nothing && return nothing
+
+    # Core LU algorithms from LinearSolveAutotune
+    if algorithm_name == "LUFactorization"
+        return DefaultAlgorithmChoice.LUFactorization
+    elseif algorithm_name == "GenericLUFactorization"
+        return DefaultAlgorithmChoice.GenericLUFactorization
+    elseif algorithm_name == "RFLUFactorization" || algorithm_name == "RecursiveFactorization"
+        return DefaultAlgorithmChoice.RFLUFactorization
+    elseif algorithm_name == "MKLLUFactorization"
+        return DefaultAlgorithmChoice.MKLLUFactorization
+    elseif algorithm_name == "AppleAccelerateLUFactorization"
+        return DefaultAlgorithmChoice.AppleAccelerateLUFactorization
+    elseif algorithm_name == "SimpleLUFactorization"
+        return DefaultAlgorithmChoice.LUFactorization  # Map to standard LU
+    elseif algorithm_name == "FastLUFactorization"
+        return DefaultAlgorithmChoice.LUFactorization  # Map to standard LU (FastLapack extension)
+    elseif algorithm_name == "BLISLUFactorization"
+        return DefaultAlgorithmChoice.LUFactorization  # Map to standard LU (BLIS extension)
+    elseif algorithm_name == "CudaOffloadLUFactorization"
+        return DefaultAlgorithmChoice.LUFactorization  # Map to standard LU (CUDA extension)
+    elseif algorithm_name == "MetalLUFactorization"
+        return DefaultAlgorithmChoice.LUFactorization  # Map to standard LU (Metal extension)
+    elseif algorithm_name == "AMDGPUOffloadLUFactorization"
+        return DefaultAlgorithmChoice.LUFactorization  # Map to standard LU (AMDGPU extension)
+    # Non-LU algorithms (not typically tuned in default selection but support for completeness)
+    elseif algorithm_name == "QRFactorization"
+        return DefaultAlgorithmChoice.QRFactorization
+    elseif algorithm_name == "CholeskyFactorization"
+        return DefaultAlgorithmChoice.CholeskyFactorization
+    elseif algorithm_name == "SVDFactorization"
+        return DefaultAlgorithmChoice.SVDFactorization
+    elseif algorithm_name == "BunchKaufmanFactorization"
+        return DefaultAlgorithmChoice.BunchKaufmanFactorization
+    elseif algorithm_name == "LDLtFactorization"
+        return DefaultAlgorithmChoice.LDLtFactorization
+    else
+        @warn "Unknown algorithm preference: $algorithm_name, falling back to heuristics"
+        return nothing
+    end
+end
+
+# Load autotune preferences as constants for each element type and size category
+# Support both best overall algorithm and best always-loaded algorithm as fallback
+const AUTOTUNE_PREFS = (
+    Float32 = (
+        tiny = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float32_tiny", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float32_tiny", nothing))
+        ),
+        small = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float32_small", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float32_small", nothing))
+        ),
+        medium = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float32_medium", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float32_medium", nothing))
+        ),
+        large = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float32_large", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float32_large", nothing))
+        ),
+        big = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float32_big", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float32_big", nothing))
+        )
+    ),
+    Float64 = (
+        tiny = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float64_tiny", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float64_tiny", nothing))
+        ),
+        small = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float64_small", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float64_small", nothing))
+        ),
+        medium = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float64_medium", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float64_medium", nothing))
+        ),
+        large = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float64_large", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float64_large", nothing))
+        ),
+        big = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_Float64_big", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_Float64_big", nothing))
+        )
+    ),
+    ComplexF32 = (
+        tiny = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF32_tiny", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF32_tiny", nothing))
+        ),
+        small = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF32_small", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF32_small", nothing))
+        ),
+        medium = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF32_medium", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF32_medium", nothing))
+        ),
+        large = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF32_large", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF32_large", nothing))
+        ),
+        big = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF32_big", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF32_big", nothing))
+        )
+    ),
+    ComplexF64 = (
+        tiny = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF64_tiny", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF64_tiny", nothing))
+        ),
+        small = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF64_small", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF64_small", nothing))
+        ),
+        medium = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF64_medium", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF64_medium", nothing))
+        ),
+        large = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF64_large", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF64_large", nothing))
+        ),
+        big = (
+            best = _string_to_algorithm_choice(Preferences.@load_preference("best_algorithm_ComplexF64_big", nothing)),
+            fallback = _string_to_algorithm_choice(Preferences.@load_preference("best_always_loaded_ComplexF64_big", nothing))
+        )
+    )
+)
+
+# Fast path: check if any autotune preferences are actually set
+const AUTOTUNE_PREFS_SET = let
+    any_set = false
+    for type_prefs in (AUTOTUNE_PREFS.Float32, AUTOTUNE_PREFS.Float64, AUTOTUNE_PREFS.ComplexF32, AUTOTUNE_PREFS.ComplexF64)
+        for size_pref in (type_prefs.tiny, type_prefs.small, type_prefs.medium, type_prefs.large, type_prefs.big)
+            if size_pref.best !== nothing || size_pref.fallback !== nothing
+                any_set = true
+                break
+            end
+        end
+        any_set && break
+    end
+    any_set
+end
+
+# Algorithm availability checking functions
+"""
+    is_algorithm_available(alg::DefaultAlgorithmChoice.T)
+
+Check if the given algorithm is currently available (extensions loaded, etc.).
+"""
+function is_algorithm_available(alg::DefaultAlgorithmChoice.T)
+    if alg === DefaultAlgorithmChoice.LUFactorization
+        return true  # Always available
+    elseif alg === DefaultAlgorithmChoice.GenericLUFactorization
+        return true  # Always available
+    elseif alg === DefaultAlgorithmChoice.MKLLUFactorization
+        return usemkl  # Available if MKL is loaded
+    elseif alg === DefaultAlgorithmChoice.AppleAccelerateLUFactorization
+        return appleaccelerate_isavailable()  # Available on macOS with Accelerate
+    elseif alg === DefaultAlgorithmChoice.RFLUFactorization
+        return userecursivefactorization(nothing)  # Requires RecursiveFactorization extension
+    else
+        # For extension-dependent algorithms not explicitly handled above,
+        # we cannot easily check availability without trying to use them.
+        # For now, assume they're not available in the default selection.
+        # This includes FastLU, BLIS, CUDA, Metal, etc. which would require
+        # specific extension checks.
+        return false
+    end
+end
+
 """
     DefaultLinearSolver(;safetyfallback=true)
 

diff --git a/src/default.jl b/src/default.jl
@@ -233,6 +233,80 @@ end
 
 userecursivefactorization(A) = false
 
+"""
+    get_tuned_algorithm(::Type{eltype_A}, ::Type{eltype_b}, matrix_size) where {eltype_A, eltype_b}
+
+Get the tuned algorithm preference for the given element type and matrix size.
+Returns `nothing` if no preference exists. Uses preloaded constants for efficiency.
+Fast path when no preferences are set.
+"""
+@inline function get_tuned_algorithm(::Type{eltype_A}, ::Type{eltype_b}, matrix_size::Integer) where {eltype_A, eltype_b}
+    # Fast path: if no preferences are set, return nothing immediately
+    AUTOTUNE_PREFS_SET || return nothing
+
+    # Determine the element type to use for preference lookup
+    target_eltype = eltype_A !== Nothing ? eltype_A : eltype_b
+
+    # Determine size category based on matrix size (matching LinearSolveAutotune categories)
+    size_category = if matrix_size <= 20
+        :tiny
+    elseif matrix_size <= 100
+        :small
+    elseif matrix_size <= 300
+        :medium
+    elseif matrix_size <= 1000
+        :large
+    else
+        :big
+    end
+
+    # Look up the tuned algorithm from preloaded constants with type specialization
+    return _get_tuned_algorithm_impl(target_eltype, size_category)
+end
+
+# Type-specialized implementation with availability checking and fallback logic
+@inline function _get_tuned_algorithm_impl(::Type{Float32}, size_category::Symbol)
+    prefs = getproperty(AUTOTUNE_PREFS.Float32, size_category)
+    return _choose_available_algorithm(prefs)
+end
+
+@inline function _get_tuned_algorithm_impl(::Type{Float64}, size_category::Symbol)
+    prefs = getproperty(AUTOTUNE_PREFS.Float64, size_category)
+    return _choose_available_algorithm(prefs)
+end
+
+@inline function _get_tuned_algorithm_impl(::Type{ComplexF32}, size_category::Symbol)
+    prefs = getproperty(AUTOTUNE_PREFS.ComplexF32, size_category)
+    return _choose_available_algorithm(prefs)
+end
+
+@inline function _get_tuned_algorithm_impl(::Type{ComplexF64}, size_category::Symbol)
+    prefs = getproperty(AUTOTUNE_PREFS.ComplexF64, size_category)
+    return _choose_available_algorithm(prefs)
+end
+
+@inline _get_tuned_algorithm_impl(::Type, ::Symbol) = nothing  # Fallback for other types
+
+# Helper function to choose available algorithm with fallback logic
+@inline function _choose_available_algorithm(prefs)
+    # Try the best algorithm first
+    if prefs.best !== nothing && is_algorithm_available(prefs.best)
+        return prefs.best
+    end
+
+    # Fall back to always-loaded algorithm if best is not available
+    if prefs.fallback !== nothing && is_algorithm_available(prefs.fallback)
+        return prefs.fallback
+    end
+
+    # No tuned algorithms available
+    return nothing
+end
+
+# Convenience method for when A is nothing - delegate to main implementation
+@inline get_tuned_algorithm(::Type{Nothing}, ::Type{eltype_b}, matrix_size::Integer) where {eltype_b} = 
+    get_tuned_algorithm(eltype_b, eltype_b, matrix_size)
+
 # Allows A === nothing as a stand-in for dense matrix
 function defaultalg(A, b, assump::OperatorAssumptions{Bool})
     alg = if assump.issq
@@ -245,24 +319,35 @@ function defaultalg(A, b, assump::OperatorAssumptions{Bool})
                ArrayInterface.can_setindex(b) &&
                (__conditioning(assump) === OperatorCondition.IllConditioned ||
                 __conditioning(assump) === OperatorCondition.WellConditioned)
+
+                # Small matrix override - always use GenericLUFactorization for tiny problems
                 if length(b) <= 10
                     DefaultAlgorithmChoice.GenericLUFactorization
-                elseif appleaccelerate_isavailable() && b isa Array &&
-                       eltype(b) <: Union{Float32, Float64, ComplexF32, ComplexF64}
-                    DefaultAlgorithmChoice.AppleAccelerateLUFactorization
-                elseif (length(b) <= 100 || (isopenblas() && length(b) <= 500) ||
-                        (usemkl && length(b) <= 200)) &&
-                       (A === nothing ? eltype(b) <: Union{Float32, Float64} :
-                        eltype(A) <: Union{Float32, Float64}) &&
-                       userecursivefactorization(A)
-                    DefaultAlgorithmChoice.RFLUFactorization
-                    #elseif A === nothing || A isa Matrix
-                    #    alg = FastLUFactorization()
-                elseif usemkl && b isa Array &&
-                       eltype(b) <: Union{Float32, Float64, ComplexF32, ComplexF64}
-                    DefaultAlgorithmChoice.MKLLUFactorization
                 else
-                    DefaultAlgorithmChoice.LUFactorization
+                    # Check if autotune preferences exist for larger matrices
+                    matrix_size = length(b)
+                    eltype_A = A === nothing ? Nothing : eltype(A)
+                    tuned_alg = get_tuned_algorithm(eltype_A, eltype(b), matrix_size)
+
+                    if tuned_alg !== nothing
+                        tuned_alg
+                    elseif appleaccelerate_isavailable() && b isa Array &&
+                           eltype(b) <: Union{Float32, Float64, ComplexF32, ComplexF64}
+                        DefaultAlgorithmChoice.AppleAccelerateLUFactorization
+                    elseif (length(b) <= 100 || (isopenblas() && length(b) <= 500) ||
+                            (usemkl && length(b) <= 200)) &&
+                           (A === nothing ? eltype(b) <: Union{Float32, Float64} :
+                            eltype(A) <: Union{Float32, Float64}) &&
+                           userecursivefactorization(A)
+                        DefaultAlgorithmChoice.RFLUFactorization
+                        #elseif A === nothing || A isa Matrix
+                        #    alg = FastLUFactorization()
+                    elseif usemkl && b isa Array &&
+                           eltype(b) <: Union{Float32, Float64, ComplexF32, ComplexF64}
+                        DefaultAlgorithmChoice.MKLLUFactorization
+                    else
+                        DefaultAlgorithmChoice.LUFactorization
+                    end
                 end
             elseif __conditioning(assump) === OperatorCondition.VeryIllConditioned
                 DefaultAlgorithmChoice.QRFactorization

diff --git a/test/default_algs.jl b/test/default_algs.jl
@@ -170,3 +170,4 @@ sol = solve(prob,
 
 sol = solve(prob)
 @test sol.u ≈ svd(A)\b
+
Original file line number	Diff line number	Diff line change
Expand Up		@@ -170,3 +170,4 @@ sol = solve(prob,

		sol = solve(prob)
		@test sol.u ≈ svd(A)\b