SciML
diff --git a/‎.github/workflows/Tests.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/Tests.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Project.toml‎
Lines changed: 40 additions & 33 deletions b/‎Project.toml‎
Lines changed: 40 additions & 33 deletions
diff --git a/‎docs/pages.jl‎
Lines changed: 4 additions & 2 deletions b/‎docs/pages.jl‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎docs/src/advanced/internal_api.md‎
Lines changed: 175 additions & 0 deletions b/‎docs/src/advanced/internal_api.md‎
Lines changed: 175 additions & 0 deletions
@@ -37,6 +37,7 @@ jobs:
           - "LinearSolvePardiso"
           - "NoPre"
           - "LinearSolveAutotune"
+          - "Preferences"
         os:
           - ubuntu-latest
           - macos-latest
 
@@ -1,7 +1,7 @@
 name = "LinearSolve"
 uuid = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
 authors = ["SciML"]
-version = "3.28.0"
+version = "3.40.2"
 
 [deps]
 ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
@@ -17,6 +17,7 @@ Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MKL_jll = "856f044c-d86e-5d09-b602-aeab76dc8ba7"
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
+OpenBLAS_jll = "4536629a-c528-5b80-bd46-f80d51c5b363"
 PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
 Preferences = "21216c6a-2e73-6563-6e65-726566657250"
 RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd"
@@ -34,6 +35,7 @@ BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e"
 CUSOLVERRF = "a8cc9031-bad2-4722-94f5-40deabb4245c"
+CliqueTrees = "60701a23-6482-424a-84db-faee86b9b1f8"
 EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
 FastAlmostBandedMatrices = "9d29842c-ecb8-4973-b1e9-a27b1157504e"
 FastLapackInterface = "29a986be-02c6-4525-aec4-84b980013641"
@@ -59,6 +61,7 @@ LinearSolveBlockDiagonalsExt = "BlockDiagonals"
 LinearSolveCUDAExt = "CUDA"
 LinearSolveCUDSSExt = "CUDSS"
 LinearSolveCUSOLVERRFExt = ["CUSOLVERRF", "SparseArrays"]
+LinearSolveCliqueTreesExt = ["CliqueTrees", "SparseArrays"]
 LinearSolveEnzymeExt = "EnzymeCore"
 LinearSolveFastAlmostBandedMatricesExt = "FastAlmostBandedMatrices"
 LinearSolveFastLapackInterfaceExt = "FastLapackInterface"
@@ -75,60 +78,63 @@ LinearSolveSparseArraysExt = "SparseArrays"
 LinearSolveSparspakExt = ["SparseArrays", "Sparspak"]
 
 [compat]
-AMDGPU = "1"
+AMDGPU = "1.2, 2"
 AllocCheck = "0.2"
 Aqua = "0.8"
-ArrayInterface = "7.7"
-BandedMatrices = "1.5"
+ArrayInterface = "7.17"
+BandedMatrices = "1.8"
 BlockDiagonals = "0.2"
-CUDA = "5"
+CUDA = "5.5"
 CUDSS = "0.4"
 CUSOLVERRF = "0.2.6"
-ChainRulesCore = "1.22"
+ChainRulesCore = "1.25"
+CliqueTrees = "1.11.0"
 ConcreteStructs = "0.2.3"
 DocStringExtensions = "0.9.3"
 EnumX = "1.0.4"
-EnzymeCore = "0.8.1"
-ExplicitImports = "1"
-FastAlmostBandedMatrices = "0.1"
-FastLapackInterface = "2"
-FiniteDiff = "2.22"
-ForwardDiff = "0.10.36, 1"
+EnzymeCore = "0.8.5"
+ExplicitImports = "1.10"
+FastAlmostBandedMatrices = "0.1.4"
+FastLapackInterface = "2.0.4"
+FiniteDiff = "2.26"
+ForwardDiff = "0.10.38, 1"
 GPUArraysCore = "0.2"
-HYPRE = "1.4.0"
+HYPRE = "1.7"
 InteractiveUtils = "1.10"
-IterativeSolvers = "0.9.3"
-KernelAbstractions = "0.9.27"
+IterativeSolvers = "0.9.4"
+KernelAbstractions = "0.9.30"
 Krylov = "0.10"
 KrylovKit = "0.10"
 KrylovPreconditioners = "0.3"
 LAPACK_jll = "3"
-LazyArrays = "1.8, 2"
+LazyArrays = "2.3"
 Libdl = "1.10"
 LinearAlgebra = "1.10"
+MKL_jll = "2019, 2020, 2021, 2022, 2023, 2024, 2025"
 MPI = "0.20"
 Markdown = "1.10"
-Metal = "1"
-MultiFloats = "1"
-Pardiso = "0.5.7, 1"
-Pkg = "1"
+Metal = "1.4"
+MultiFloats = "2.3"
+OpenBLAS_jll = "0.3"
+Pardiso = "1"
+Pkg = "1.10"
 PrecompileTools = "1.2"
 Preferences = "1.4"
-Random = "1"
-RecursiveArrayTools = "3.27.2"
-RecursiveFactorization = "0.2.14"
-Reexport = "1"
+Random = "1.10"
+RecursiveArrayTools = "3.37"
+RecursiveFactorization = "0.2.23"
+Reexport = "1.2.2"
 SafeTestsets = "0.1"
 SciMLBase = "2.70"
-SciMLOperators = "1"
-Setfield = "1"
+SciMLOperators = "1.7.1"
+Setfield = "1.1.1"
 SparseArrays = "1.10"
-Sparspak = "0.3.6"
-StableRNGs = "1"
-StaticArrays = "1.5"
-StaticArraysCore = "1.4.2"
-Test = "1"
-UnPack = "1"
+Sparspak = "0.3.9"
+StableRNGs = "1.0"
+StaticArrays = "1.9"
+StaticArraysCore = "1.4.3"
+Test = "1.10"
+UnPack = "1.0.2"
 Zygote = "0.7"
 blis_jll = "0.9.0"
 julia = "1.10"
@@ -138,6 +144,7 @@ AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a"
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
 BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0"
+CliqueTrees = "60701a23-6482-424a-84db-faee86b9b1f8"
 ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
 FastAlmostBandedMatrices = "9d29842c-ecb8-4973-b1e9-a27b1157504e"
 FastLapackInterface = "29a986be-02c6-4525-aec4-84b980013641"
@@ -166,4 +173,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-test = ["Aqua", "Test", "IterativeSolvers", "InteractiveUtils", "KrylovKit", "KrylovPreconditioners", "Pkg", "Random", "SafeTestsets", "MultiFloats", "ForwardDiff","Mooncake", "HYPRE", "MPI", "BlockDiagonals", "FiniteDiff", "BandedMatrices", "FastAlmostBandedMatrices", "StaticArrays", "AllocCheck", "StableRNGs", "Zygote", "RecursiveFactorization", "Sparspak", "FastLapackInterface", "SparseArrays", "ExplicitImports"]
+test = ["Aqua", "Test", "IterativeSolvers", "InteractiveUtils", "KrylovKit", "KrylovPreconditioners", "Pkg", "Random", "SafeTestsets", "MultiFloats", "ForwardDiff", "HYPRE", "MPI", "BlockDiagonals", "FiniteDiff", "BandedMatrices", "FastAlmostBandedMatrices", "StaticArrays", "AllocCheck", "StableRNGs", "Zygote", "RecursiveFactorization", "Sparspak", "CliqueTrees", "FastLapackInterface", "SparseArrays", "ExplicitImports"]
@@ -8,12 +8,14 @@ pages = ["index.md",
         "tutorials/gpu.md",
         "tutorials/autotune.md"],
     "Basics" => Any["basics/LinearProblem.md",
+        "basics/algorithm_selection.md",
         "basics/common_solver_opts.md",
         "basics/OperatorAssumptions.md",
         "basics/Preconditioners.md",
         "basics/FAQ.md"],
     "Solvers" => Any["solvers/solvers.md"],
-    "Advanced" => Any["advanced/developing.md"
-                      "advanced/custom.md"],
+    "Advanced" => Any["advanced/developing.md",
+                      "advanced/custom.md",
+                      "advanced/internal_api.md"],
     "Release Notes" => "release_notes.md"
 ]
@@ -0,0 +1,175 @@
+# Internal API Documentation
+
+This page documents LinearSolve.jl's internal API, which is useful for developers who want to understand the package's architecture, contribute to the codebase, or develop custom linear solver algorithms.
+
+## Abstract Type Hierarchy
+
+LinearSolve.jl uses a well-structured type hierarchy to organize different classes of linear solver algorithms:
+
+```@docs
+LinearSolve.SciMLLinearSolveAlgorithm
+LinearSolve.AbstractFactorization
+LinearSolve.AbstractDenseFactorization
+LinearSolve.AbstractSparseFactorization
+LinearSolve.AbstractKrylovSubspaceMethod
+LinearSolve.AbstractSolveFunction
+```
+
+## Core Cache System
+
+The caching system is central to LinearSolve.jl's performance and functionality:
+
+```@docs
+LinearSolve.LinearCache
+LinearSolve.init_cacheval
+```
+
+## Algorithm Selection
+
+The automatic algorithm selection is one of LinearSolve.jl's key features:
+
+```@docs
+LinearSolve.defaultalg
+LinearSolve.get_tuned_algorithm
+LinearSolve.is_algorithm_available
+LinearSolve.show_algorithm_choices
+LinearSolve.make_preferences_dynamic!
+```
+
+### Preference System Architecture
+
+The dual preference system provides intelligent algorithm selection with comprehensive fallbacks:
+
+#### **Core Functions**
+- **`get_tuned_algorithm`**: Retrieves tuned algorithm preferences based on matrix size and element type
+- **`is_algorithm_available`**: Checks if a specific algorithm is currently available (extensions loaded)  
+- **`show_algorithm_choices`**: Analysis function displaying algorithm choices for all element types
+- **`make_preferences_dynamic!`**: Testing function that enables runtime preference checking
+
+#### **Size Categorization**
+The system categorizes matrix sizes to match LinearSolveAutotune benchmarking:
+- **tiny**: ≤20 elements (matrices ≤10 always override to GenericLU)
+- **small**: 21-100 elements  
+- **medium**: 101-300 elements
+- **large**: 301-1000 elements
+- **big**: >1000 elements
+
+#### **Dual Preference Structure**
+For each category and element type (Float32, Float64, ComplexF32, ComplexF64):
+- `best_algorithm_{type}_{size}`: Overall fastest algorithm from autotune
+- `best_always_loaded_{type}_{size}`: Fastest always-available algorithm (fallback)
+
+#### **Preference File Organization**
+All preference-related functionality is consolidated in `src/preferences.jl`:
+
+**Compile-Time Constants**:
+- `AUTOTUNE_PREFS`: Preference structure loaded at package import
+- `AUTOTUNE_PREFS_SET`: Fast path check for whether any preferences are set
+- `_string_to_algorithm_choice`: Mapping from preference strings to algorithm enums
+
+**Runtime Functions**:
+- `_get_tuned_algorithm_runtime`: Dynamic preference checking for testing
+- `_choose_available_algorithm`: Algorithm availability and fallback logic
+- `show_algorithm_choices`: Comprehensive analysis and display function
+
+**Testing Infrastructure**:
+- `make_preferences_dynamic!`: Eval-based function redefinition for testing
+- Enables runtime preference verification without affecting production performance
+
+#### **Testing Mode Operation**
+The testing system uses an elegant eval-based approach:
+```julia
+# Production: Uses compile-time constants (maximum performance)
+get_tuned_algorithm(Float64, Float64, 200)  # → Uses AUTOTUNE_PREFS constants
+
+# Testing: Redefines function to use runtime checking
+make_preferences_dynamic!()
+get_tuned_algorithm(Float64, Float64, 200)  # → Uses runtime preference loading
+```
+
+This approach maintains type stability and inference while enabling comprehensive testing.
+
+#### **Algorithm Support Scope**
+The preference system focuses exclusively on LU algorithms for dense matrices:
+
+**Supported LU Algorithms**:
+- `LUFactorization`, `GenericLUFactorization`, `RFLUFactorization`
+- `MKLLUFactorization`, `AppleAccelerateLUFactorization`
+- `SimpleLUFactorization`, `FastLUFactorization` (both map to LU)
+- GPU LU variants (CUDA, Metal, AMDGPU - all map to LU)
+
+**Non-LU algorithms** (QR, Cholesky, SVD, etc.) are not included in the preference system
+as they serve different use cases and are not typically the focus of dense matrix autotune optimization.
+
+## Trait Functions
+
+These trait functions help determine algorithm capabilities and requirements:
+
+```@docs
+LinearSolve.needs_concrete_A
+```
+
+## Utility Functions
+
+Various utility functions support the core functionality:
+
+```@docs
+LinearSolve.default_tol
+LinearSolve.default_alias_A
+LinearSolve.default_alias_b
+LinearSolve.__init_u0_from_Ab
+```
+
+## Solve Functions
+
+For custom solving strategies:
+
+```@docs
+LinearSolve.LinearSolveFunction
+LinearSolve.DirectLdiv!
+```
+
+## Preconditioner Infrastructure
+
+The preconditioner system allows for flexible preconditioning strategies:
+
+```@docs
+LinearSolve.ComposePreconditioner
+LinearSolve.InvPreconditioner
+```
+
+## Internal Algorithm Types
+
+These are internal algorithm implementations:
+
+```@docs
+LinearSolve.SimpleLUFactorization
+LinearSolve.LUSolver
+```
+
+## Developer Notes
+
+### Adding New Algorithms
+
+When adding a new linear solver algorithm to LinearSolve.jl:
+
+1. **Choose the appropriate abstract type**: Inherit from the most specific abstract type that fits your algorithm
+2. **Implement required methods**: At minimum, implement `solve!` and possibly `init_cacheval`
+3. **Consider trait functions**: Override trait functions like `needs_concrete_A` if needed
+4. **Document thoroughly**: Add comprehensive docstrings following the patterns shown here
+
+### Performance Considerations
+
+- The `LinearCache` system is designed for efficient repeated solves
+- Use `cache.isfresh` to avoid redundant computations when the matrix hasn't changed
+- Consider implementing specialized `init_cacheval` for algorithms that need setup
+- Leverage trait functions to optimize dispatch and memory usage
+
+### Testing Guidelines
+
+When adding new functionality:
+
+- Test with various matrix types (dense, sparse, GPU arrays)
+- Verify caching behavior works correctly
+- Ensure trait functions return appropriate values
+- Test integration with the automatic algorithm selection system