@@ -234,14 +234,18 @@ end
234
234
userecursivefactorization (A) = false
235
235
236
236
"""
237
- get_tuned_algorithm(eltype_A, eltype_b, matrix_size)
237
+ get_tuned_algorithm(::Type{ eltype_A}, ::Type{ eltype_b} , matrix_size) where {eltype_A, eltype_b}
238
238
239
239
Get the tuned algorithm preference for the given element type and matrix size.
240
240
Returns `nothing` if no preference exists. Uses preloaded constants for efficiency.
241
+ Fast path when no preferences are set.
241
242
"""
242
- @inline function get_tuned_algorithm (eltype_A, eltype_b, matrix_size)
243
+ @inline function get_tuned_algorithm (:: Type{eltype_A} , :: Type{eltype_b} , matrix_size:: Integer ) where {eltype_A, eltype_b}
244
+ # Fast path: if no preferences are set, return nothing immediately
245
+ AUTOTUNE_PREFS_SET || return nothing
246
+
243
247
# Determine the element type to use for preference lookup
244
- target_eltype = eltype_A != = nothing ? eltype_A : eltype_b
248
+ target_eltype = eltype_A != = Nothing ? eltype_A : eltype_b
245
249
246
250
# Determine size category based on matrix size
247
251
size_category = if matrix_size <= 128
@@ -254,20 +258,21 @@ Returns `nothing` if no preference exists. Uses preloaded constants for efficien
254
258
:big
255
259
end
256
260
257
- # Look up the tuned algorithm from preloaded constants
258
- if target_eltype === Float32
259
- return getproperty (AUTOTUNE_PREFS. Float32, size_category)
260
- elseif target_eltype === Float64
261
- return getproperty (AUTOTUNE_PREFS. Float64, size_category)
262
- elseif target_eltype === ComplexF32
263
- return getproperty (AUTOTUNE_PREFS. ComplexF32, size_category)
264
- elseif target_eltype === ComplexF64
265
- return getproperty (AUTOTUNE_PREFS. ComplexF64, size_category)
266
- else
267
- return nothing
268
- end
261
+ # Look up the tuned algorithm from preloaded constants with type specialization
262
+ return _get_tuned_algorithm_impl (target_eltype, size_category)
269
263
end
270
264
265
+ # Type-specialized implementation for optimal performance
266
+ @inline _get_tuned_algorithm_impl (:: Type{Float32} , size_category:: Symbol ) = getproperty (AUTOTUNE_PREFS. Float32, size_category)
267
+ @inline _get_tuned_algorithm_impl (:: Type{Float64} , size_category:: Symbol ) = getproperty (AUTOTUNE_PREFS. Float64, size_category)
268
+ @inline _get_tuned_algorithm_impl (:: Type{ComplexF32} , size_category:: Symbol ) = getproperty (AUTOTUNE_PREFS. ComplexF32, size_category)
269
+ @inline _get_tuned_algorithm_impl (:: Type{ComplexF64} , size_category:: Symbol ) = getproperty (AUTOTUNE_PREFS. ComplexF64, size_category)
270
+ @inline _get_tuned_algorithm_impl (:: Type , :: Symbol ) = nothing # Fallback for other types
271
+
272
+ # Convenience method for when A is nothing - delegate to main implementation
273
+ @inline get_tuned_algorithm (:: Type{Nothing} , :: Type{eltype_b} , matrix_size:: Integer ) where {eltype_b} =
274
+ get_tuned_algorithm (eltype_b, eltype_b, matrix_size)
275
+
271
276
# Allows A === nothing as a stand-in for dense matrix
272
277
function defaultalg (A, b, assump:: OperatorAssumptions{Bool} )
273
278
alg = if assump. issq
@@ -281,30 +286,34 @@ function defaultalg(A, b, assump::OperatorAssumptions{Bool})
281
286
(__conditioning (assump) === OperatorCondition. IllConditioned ||
282
287
__conditioning (assump) === OperatorCondition. WellConditioned)
283
288
284
- # First check if autotune preferences exist
285
- matrix_size = length (b)
286
- tuned_alg = get_tuned_algorithm (A === nothing ? nothing : eltype (A), eltype (b), matrix_size)
287
-
288
- if tuned_alg != = nothing
289
- tuned_alg
290
- elseif length (b) <= 10
289
+ # Small matrix override - always use GenericLUFactorization for tiny problems
290
+ if length (b) <= 10
291
291
DefaultAlgorithmChoice. GenericLUFactorization
292
- elseif appleaccelerate_isavailable () && b isa Array &&
293
- eltype (b) <: Union{Float32, Float64, ComplexF32, ComplexF64}
294
- DefaultAlgorithmChoice. AppleAccelerateLUFactorization
295
- elseif (length (b) <= 100 || (isopenblas () && length (b) <= 500 ) ||
296
- (usemkl && length (b) <= 200 )) &&
297
- (A === nothing ? eltype (b) <: Union{Float32, Float64} :
298
- eltype (A) <: Union{Float32, Float64} ) &&
299
- userecursivefactorization (A)
300
- DefaultAlgorithmChoice. RFLUFactorization
301
- # elseif A === nothing || A isa Matrix
302
- # alg = FastLUFactorization()
303
- elseif usemkl && b isa Array &&
304
- eltype (b) <: Union{Float32, Float64, ComplexF32, ComplexF64}
305
- DefaultAlgorithmChoice. MKLLUFactorization
306
292
else
307
- DefaultAlgorithmChoice. LUFactorization
293
+ # Check if autotune preferences exist for larger matrices
294
+ matrix_size = length (b)
295
+ eltype_A = A === nothing ? Nothing : eltype (A)
296
+ tuned_alg = get_tuned_algorithm (eltype_A, eltype (b), matrix_size)
297
+
298
+ if tuned_alg != = nothing
299
+ tuned_alg
300
+ elseif appleaccelerate_isavailable () && b isa Array &&
301
+ eltype (b) <: Union{Float32, Float64, ComplexF32, ComplexF64}
302
+ DefaultAlgorithmChoice. AppleAccelerateLUFactorization
303
+ elseif (length (b) <= 100 || (isopenblas () && length (b) <= 500 ) ||
304
+ (usemkl && length (b) <= 200 )) &&
305
+ (A === nothing ? eltype (b) <: Union{Float32, Float64} :
306
+ eltype (A) <: Union{Float32, Float64} ) &&
307
+ userecursivefactorization (A)
308
+ DefaultAlgorithmChoice. RFLUFactorization
309
+ # elseif A === nothing || A isa Matrix
310
+ # alg = FastLUFactorization()
311
+ elseif usemkl && b isa Array &&
312
+ eltype (b) <: Union{Float32, Float64, ComplexF32, ComplexF64}
313
+ DefaultAlgorithmChoice. MKLLUFactorization
314
+ else
315
+ DefaultAlgorithmChoice. LUFactorization
316
+ end
308
317
end
309
318
elseif __conditioning (assump) === OperatorCondition. VeryIllConditioned
310
319
DefaultAlgorithmChoice. QRFactorization
0 commit comments