@@ -174,14 +174,18 @@ end
174174userecursivefactorization (A) = false
175175
176176"""
177- get_tuned_algorithm(eltype_A, eltype_b, matrix_size)
177+ get_tuned_algorithm(::Type{ eltype_A}, ::Type{ eltype_b} , matrix_size) where {eltype_A, eltype_b}
178178
179179Get the tuned algorithm preference for the given element type and matrix size.
180180Returns `nothing` if no preference exists. Uses preloaded constants for efficiency.
181+ Fast path when no preferences are set.
181182"""
182- @inline function get_tuned_algorithm (eltype_A, eltype_b, matrix_size)
183+ @inline function get_tuned_algorithm (:: Type{eltype_A} , :: Type{eltype_b} , matrix_size:: Integer ) where {eltype_A, eltype_b}
184+ # Fast path: if no preferences are set, return nothing immediately
185+ AUTOTUNE_PREFS_SET || return nothing
186+
183187 # Determine the element type to use for preference lookup
184- target_eltype = eltype_A != = nothing ? eltype_A : eltype_b
188+ target_eltype = eltype_A != = Nothing ? eltype_A : eltype_b
185189
186190 # Determine size category based on matrix size
187191 size_category = if matrix_size <= 128
@@ -194,20 +198,21 @@ Returns `nothing` if no preference exists. Uses preloaded constants for efficien
194198 :big
195199 end
196200
197- # Look up the tuned algorithm from preloaded constants
198- if target_eltype === Float32
199- return getproperty (AUTOTUNE_PREFS. Float32, size_category)
200- elseif target_eltype === Float64
201- return getproperty (AUTOTUNE_PREFS. Float64, size_category)
202- elseif target_eltype === ComplexF32
203- return getproperty (AUTOTUNE_PREFS. ComplexF32, size_category)
204- elseif target_eltype === ComplexF64
205- return getproperty (AUTOTUNE_PREFS. ComplexF64, size_category)
206- else
207- return nothing
208- end
201+ # Look up the tuned algorithm from preloaded constants with type specialization
202+ return _get_tuned_algorithm_impl (target_eltype, size_category)
209203end
210204
205+ # Type-specialized implementation for optimal performance
206+ @inline _get_tuned_algorithm_impl (:: Type{Float32} , size_category:: Symbol ) = getproperty (AUTOTUNE_PREFS. Float32, size_category)
207+ @inline _get_tuned_algorithm_impl (:: Type{Float64} , size_category:: Symbol ) = getproperty (AUTOTUNE_PREFS. Float64, size_category)
208+ @inline _get_tuned_algorithm_impl (:: Type{ComplexF32} , size_category:: Symbol ) = getproperty (AUTOTUNE_PREFS. ComplexF32, size_category)
209+ @inline _get_tuned_algorithm_impl (:: Type{ComplexF64} , size_category:: Symbol ) = getproperty (AUTOTUNE_PREFS. ComplexF64, size_category)
210+ @inline _get_tuned_algorithm_impl (:: Type , :: Symbol ) = nothing # Fallback for other types
211+
212+ # Convenience method for when A is nothing - delegate to main implementation
213+ @inline get_tuned_algorithm (:: Type{Nothing} , :: Type{eltype_b} , matrix_size:: Integer ) where {eltype_b} =
214+ get_tuned_algorithm (eltype_b, eltype_b, matrix_size)
215+
211216# Allows A === nothing as a stand-in for dense matrix
212217function defaultalg (A, b, assump:: OperatorAssumptions{Bool} )
213218 alg = if assump. issq
@@ -221,30 +226,34 @@ function defaultalg(A, b, assump::OperatorAssumptions{Bool})
221226 (__conditioning (assump) === OperatorCondition. IllConditioned ||
222227 __conditioning (assump) === OperatorCondition. WellConditioned)
223228
224- # First check if autotune preferences exist
225- matrix_size = length (b)
226- tuned_alg = get_tuned_algorithm (A === nothing ? nothing : eltype (A), eltype (b), matrix_size)
227-
228- if tuned_alg != = nothing
229- tuned_alg
230- elseif length (b) <= 10
229+ # Small matrix override - always use GenericLUFactorization for tiny problems
230+ if length (b) <= 10
231231 DefaultAlgorithmChoice. GenericLUFactorization
232- elseif appleaccelerate_isavailable () && b isa Array &&
233- eltype (b) <: Union{Float32, Float64, ComplexF32, ComplexF64}
234- DefaultAlgorithmChoice. AppleAccelerateLUFactorization
235- elseif (length (b) <= 100 || (isopenblas () && length (b) <= 500 ) ||
236- (usemkl && length (b) <= 200 )) &&
237- (A === nothing ? eltype (b) <: Union{Float32, Float64} :
238- eltype (A) <: Union{Float32, Float64} ) &&
239- userecursivefactorization (A)
240- DefaultAlgorithmChoice. RFLUFactorization
241- # elseif A === nothing || A isa Matrix
242- # alg = FastLUFactorization()
243- elseif usemkl && b isa Array &&
244- eltype (b) <: Union{Float32, Float64, ComplexF32, ComplexF64}
245- DefaultAlgorithmChoice. MKLLUFactorization
246232 else
247- DefaultAlgorithmChoice. LUFactorization
233+ # Check if autotune preferences exist for larger matrices
234+ matrix_size = length (b)
235+ eltype_A = A === nothing ? Nothing : eltype (A)
236+ tuned_alg = get_tuned_algorithm (eltype_A, eltype (b), matrix_size)
237+
238+ if tuned_alg != = nothing
239+ tuned_alg
240+ elseif appleaccelerate_isavailable () && b isa Array &&
241+ eltype (b) <: Union{Float32, Float64, ComplexF32, ComplexF64}
242+ DefaultAlgorithmChoice. AppleAccelerateLUFactorization
243+ elseif (length (b) <= 100 || (isopenblas () && length (b) <= 500 ) ||
244+ (usemkl && length (b) <= 200 )) &&
245+ (A === nothing ? eltype (b) <: Union{Float32, Float64} :
246+ eltype (A) <: Union{Float32, Float64} ) &&
247+ userecursivefactorization (A)
248+ DefaultAlgorithmChoice. RFLUFactorization
249+ # elseif A === nothing || A isa Matrix
250+ # alg = FastLUFactorization()
251+ elseif usemkl && b isa Array &&
252+ eltype (b) <: Union{Float32, Float64, ComplexF32, ComplexF64}
253+ DefaultAlgorithmChoice. MKLLUFactorization
254+ else
255+ DefaultAlgorithmChoice. LUFactorization
256+ end
248257 end
249258 elseif __conditioning (assump) === OperatorCondition. VeryIllConditioned
250259 DefaultAlgorithmChoice. QRFactorization
0 commit comments