From 4c8d145954afcf3a48b1429ee6b6cd5103ec53ce Mon Sep 17 00:00:00 2001
From: ChrisRackauckas <accounts@chrisrackauckas.com>
Date: Thu, 14 Aug 2025 13:23:39 -0400
Subject: [PATCH 1/3] Add comprehensive docstrings for core LinearSolve types
 and functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Document LinearCache struct with detailed field descriptions
- Add docstrings for utility functions: init_cacheval, default_tol, default_alias_*
- Document preconditioner types: ComposePreconditioner, InvPreconditioner
- Add comprehensive docs for LinearSolveFunction and DirectLdiv! solve functions
- Improve RFLUFactorization docs with parameter and performance details
- Enhance FastLUFactorization and FastQRFactorization documentation
- Document SimpleLUFactorization and LUSolver with usage examples

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/common.jl          | 120 +++++++++++++++++++++++++++++++++++++++++
 src/extension_algs.jl  | 112 ++++++++++++++++++++++++++++++++++----
 src/preconditioners.jl |  60 +++++++++++++++++++++
 src/simplelu.jl        |  89 +++++++++++++++++++++++++++---
 src/solve_function.jl  |  66 ++++++++++++++++++++++-
 5 files changed, 428 insertions(+), 19 deletions(-)

diff --git a/src/common.jl b/src/common.jl
index a40064e51..a3d8d8971 100644
--- a/src/common.jl
+++ b/src/common.jl
@@ -65,6 +65,46 @@ end
 __issquare(assump::OperatorAssumptions) = assump.issq
 __conditioning(assump::OperatorAssumptions) = assump.condition
 
+"""
+    LinearCache{TA, Tb, Tu, Tp, Talg, Tc, Tl, Tr, Ttol, issq, S}
+
+The core cache structure used by LinearSolve for storing and managing the state of linear
+solver computations. This mutable struct acts as the primary interface for iterative 
+solving and caching of factorizations and intermediate results.
+
+## Fields
+
+- `A::TA`: The matrix operator of the linear system.
+- `b::Tb`: The right-hand side vector of the linear system.
+- `u::Tu`: The solution vector (preallocated storage for the result).
+- `p::Tp`: Parameters passed to the linear solver algorithm.
+- `alg::Talg`: The linear solver algorithm instance.
+- `cacheval::Tc`: Algorithm-specific cache storage for factorizations and intermediate computations.
+- `isfresh::Bool`: Cache validity flag for the matrix `A`. `false` means `cacheval` is up-to-date 
+  with respect to `A`, `true` means `cacheval` needs to be updated.
+- `precsisfresh::Bool`: Cache validity flag for preconditioners. `false` means `Pl` and `Pr` 
+  are up-to-date with respect to `A`, `true` means they need to be updated.
+- `Pl::Tl`: Left preconditioner operator.
+- `Pr::Tr`: Right preconditioner operator.
+- `abstol::Ttol`: Absolute tolerance for iterative solvers.
+- `reltol::Ttol`: Relative tolerance for iterative solvers.
+- `maxiters::Int`: Maximum number of iterations for iterative solvers.
+- `verbose::Bool`: Whether to print verbose output during solving.
+- `assumptions::OperatorAssumptions{issq}`: Assumptions about the operator properties.
+- `sensealg::S`: Sensitivity analysis algorithm for automatic differentiation.
+
+## Usage
+
+The `LinearCache` is typically created via `init(::LinearProblem, ::SciMLLinearSolveAlgorithm)` 
+and then used with `solve!(cache)` for efficient repeated solves with the same matrix structure
+but potentially different right-hand sides or parameter values.
+
+## Cache Management
+
+The cache automatically tracks when matrix `A` or parameters `p` change by setting the 
+appropriate freshness flags. When `solve!` is called, stale cache entries are automatically
+recomputed as needed.
+"""
 mutable struct LinearCache{TA, Tb, Tu, Tp, Talg, Tc, Tl, Tr, Ttol, issq, S}
     A::TA
     b::Tb
@@ -106,19 +146,81 @@ function update_cacheval!(cache::LinearCache, name::Symbol, x)
 end
 update_cacheval!(cache, cacheval, name::Symbol, x) = cacheval
 
+"""
+    init_cacheval(alg::SciMLLinearSolveAlgorithm, args...)
+
+Initialize algorithm-specific cache values for the given linear solver algorithm.
+This function returns `nothing` by default and is intended to be overloaded by 
+specific algorithm implementations that need to store intermediate computations
+or factorizations.
+
+## Arguments
+- `alg`: The linear solver algorithm instance
+- `args...`: Additional arguments passed to the cache initialization
+
+## Returns
+Algorithm-specific cache value or `nothing` for algorithms that don't require caching.
+"""
 init_cacheval(alg::SciMLLinearSolveAlgorithm, args...) = nothing
 
 function SciMLBase.init(prob::LinearProblem, args...; kwargs...)
     SciMLBase.init(prob, nothing, args...; kwargs...)
 end
 
+"""
+    default_tol(T)
+
+Compute the default tolerance for iterative linear solvers based on the element type.
+The tolerance is typically set as the square root of the machine epsilon for the 
+given floating point type, ensuring numerical accuracy appropriate for that precision.
+
+## Arguments
+- `T`: The element type of the linear system
+
+## Returns
+- For floating point types: `√(eps(T))`
+- For exact types (Rational, Integer): `0` (exact arithmetic)
+- For Any type: `0` (conservative default)
+"""
 default_tol(::Type{T}) where {T} = √(eps(T))
 default_tol(::Type{Complex{T}}) where {T} = √(eps(T))
 default_tol(::Type{<:Rational}) = 0
 default_tol(::Type{<:Integer}) = 0
 default_tol(::Type{Any}) = 0
 
+"""
+    default_alias_A(alg, A, b) -> Bool
+
+Determine the default aliasing behavior for the matrix `A` given the algorithm type.
+Aliasing allows the algorithm to modify the original matrix in-place for efficiency,
+but this may not be desirable or safe for all algorithm types.
+
+## Arguments
+- `alg`: The linear solver algorithm
+- `A`: The matrix operator  
+- `b`: The right-hand side vector
+
+## Returns
+- `false`: Safe default, algorithm will not modify the original matrix `A`
+- `true`: Algorithm may modify `A` in-place for efficiency
+
+## Algorithm-Specific Behavior
+- Dense factorizations: `false` (destructive, need to preserve original)
+- Krylov methods: `true` (non-destructive, safe to alias)
+- Sparse factorizations: `true` (typically preserve sparsity structure)
+"""
 default_alias_A(::Any, ::Any, ::Any) = false
+
+"""
+    default_alias_b(alg, A, b) -> Bool
+
+Determine the default aliasing behavior for the right-hand side vector `b` given the 
+algorithm type. Similar to `default_alias_A` but for the RHS vector.
+
+## Returns
+- `false`: Safe default, algorithm will not modify the original vector `b`
+- `true`: Algorithm may modify `b` in-place for efficiency
+"""
 default_alias_b(::Any, ::Any, ::Any) = false
 
 # Non-destructive algorithms default to true
@@ -130,6 +232,24 @@ default_alias_b(::AbstractSparseFactorization, ::Any, ::Any) = true
 
 DEFAULT_PRECS(A, p) = IdentityOperator(size(A)[1]), IdentityOperator(size(A)[2])
 
+"""
+    __init_u0_from_Ab(A, b)
+
+Initialize the solution vector `u0` with appropriate size and type based on the 
+matrix `A` and right-hand side `b`. The solution vector is allocated with the 
+same element type as `b` and sized to match the number of columns in `A`.
+
+## Arguments
+- `A`: The matrix operator (determines solution vector size)
+- `b`: The right-hand side vector (determines element type)
+
+## Returns
+A zero-initialized vector of size `(size(A, 2),)` with element type matching `b`.
+
+## Specializations
+- For static matrices (`SMatrix`): Returns a static vector (`SVector`)
+- For regular matrices: Returns a similar vector to `b` with appropriate size
+"""
 function __init_u0_from_Ab(A, b)
     u0 = similar(b, size(A, 2))
     fill!(u0, false)
diff --git a/src/extension_algs.jl b/src/extension_algs.jl
index 3224ddfa0..f019a5f6a 100644
--- a/src/extension_algs.jl
+++ b/src/extension_algs.jl
@@ -174,13 +174,42 @@ end
 ## RFLUFactorization
 
 """
-`RFLUFactorization()`
+    RFLUFactorization{P, T}(; pivot = Val(true), thread = Val(true))
+
+A fast pure Julia LU-factorization implementation using RecursiveFactorization.jl. 
+This is by far the fastest LU-factorization implementation, usually outperforming 
+OpenBLAS and MKL for smaller matrices (<500x500), but currently optimized only for 
+Base `Array` with `Float32` or `Float64`. Additional optimization for complex matrices 
+is in the works.
+
+## Type Parameters
+- `P`: Pivoting strategy as `Val{Bool}`. `Val{true}` enables partial pivoting for stability.
+- `T`: Threading strategy as `Val{Bool}`. `Val{true}` enables multi-threading for performance.
+
+## Constructor Arguments
+- `pivot = Val(true)`: Enable partial pivoting. Set to `Val{false}` to disable for speed 
+  at the cost of numerical stability.
+- `thread = Val(true)`: Enable multi-threading. Set to `Val{false}` for single-threaded 
+  execution.
+- `throwerror = true`: Whether to throw an error if RecursiveFactorization.jl is not loaded.
+
+## Performance Notes
+- Fastest for dense matrices with dimensions roughly < 500×500
+- Optimized specifically for Float32 and Float64 element types
+- Recursive blocking strategy provides excellent cache performance
+- Multi-threading can provide significant speedups on multi-core systems
+
+## Requirements
+Using this solver requires that RecursiveFactorization.jl is loaded: `using RecursiveFactorization`
 
-A fast pure Julia LU-factorization implementation
-using RecursiveFactorization.jl. This is by far the fastest LU-factorization
-implementation, usually outperforming OpenBLAS and MKL for smaller matrices
-(<500x500), but currently optimized only for Base `Array` with `Float32` or `Float64`.
-Additional optimization for complex matrices is in the works.
+## Example
+```julia
+using RecursiveFactorization
+# Fast, stable (with pivoting)
+alg1 = RFLUFactorization()
+# Fastest (no pivoting), less stable
+alg2 = RFLUFactorization(pivot=Val(false))  
+```
 """
 struct RFLUFactorization{P, T} <: AbstractDenseFactorization
     function RFLUFactorization(::Val{P}, ::Val{T}; throwerror = true) where {P, T}
@@ -200,17 +229,78 @@ end
 # But I'm not sure it makes sense as a GenericFactorization
 # since it just uses `LAPACK.getrf!`.
 """
-`FastLUFactorization()`
+    FastLUFactorization()
+
+A high-performance LU factorization using the FastLapackInterface.jl package.
+This provides an optimized interface to LAPACK routines with reduced overhead
+compared to the standard LinearAlgebra LAPACK wrappers.
+
+## Features
+- Reduced function call overhead compared to standard LAPACK wrappers
+- Optimized for performance-critical applications
+- Uses partial pivoting (no choice of pivoting method available)
+- Suitable for dense matrices where maximum performance is required
 
-The FastLapackInterface.jl version of the LU factorization. Notably,
-this version does not allow for choice of pivoting method.
+## Limitations
+- Does not allow customization of pivoting strategy (always uses partial pivoting)
+- Requires FastLapackInterface.jl to be loaded
+- Limited to dense matrix types supported by LAPACK
+
+## Requirements
+Using this solver requires that FastLapackInterface.jl is loaded: `using FastLapackInterface`
+
+## Performance Notes
+This factorization is optimized for cases where the overhead of standard LAPACK
+function calls becomes significant, typically for moderate-sized dense matrices
+or when performing many factorizations.
+
+## Example
+```julia
+using FastLapackInterface
+alg = FastLUFactorization()
+sol = solve(prob, alg)
+```
 """
 struct FastLUFactorization <: AbstractDenseFactorization end
 
 """
-`FastQRFactorization()`
+    FastQRFactorization{P}(; pivot = ColumnNorm(), blocksize = 36)
+
+A high-performance QR factorization using the FastLapackInterface.jl package.
+This provides an optimized interface to LAPACK QR routines with reduced overhead
+compared to the standard LinearAlgebra LAPACK wrappers.
+
+## Type Parameters
+- `P`: The type of pivoting strategy used
+
+## Fields
+- `pivot::P`: Pivoting strategy (e.g., `ColumnNorm()` for column pivoting, `nothing` for no pivoting)
+- `blocksize::Int`: Block size for the blocked QR algorithm (default: 36)
+
+## Features
+- Reduced function call overhead compared to standard LAPACK wrappers
+- Supports various pivoting strategies for numerical stability
+- Configurable block size for optimal performance
+- Suitable for dense matrices, especially overdetermined systems
 
-The FastLapackInterface.jl version of the QR factorization.
+## Performance Notes
+The block size can be tuned for optimal performance depending on matrix size and architecture.
+The default value of 36 is generally good for most cases, but experimentation may be beneficial
+for specific applications.
+
+## Requirements
+Using this solver requires that FastLapackInterface.jl is loaded: `using FastLapackInterface`
+
+## Example
+```julia
+using FastLapackInterface
+# QR with column pivoting
+alg1 = FastQRFactorization()  
+# QR without pivoting for speed
+alg2 = FastQRFactorization(pivot=nothing)
+# Custom block size
+alg3 = FastQRFactorization(blocksize=64)
+```
 """
 struct FastQRFactorization{P} <: AbstractDenseFactorization
     pivot::P
diff --git a/src/preconditioners.jl b/src/preconditioners.jl
index 423f44b14..088c2fd81 100644
--- a/src/preconditioners.jl
+++ b/src/preconditioners.jl
@@ -1,5 +1,32 @@
 # Tooling Preconditioners
 
+"""
+    ComposePreconditioner{Ti, To}
+
+A preconditioner that composes two preconditioners by applying them sequentially.
+The inner preconditioner is applied first, followed by the outer preconditioner.
+This allows for building complex preconditioning strategies by combining simpler ones.
+
+## Fields
+- `inner::Ti`: The inner (first) preconditioner to apply
+- `outer::To`: The outer (second) preconditioner to apply
+
+## Usage
+
+```julia
+# Compose a diagonal preconditioner with an ILU preconditioner
+inner_prec = DiagonalPreconditioner(diag(A))
+outer_prec = ILUFactorization()  
+composed = ComposePreconditioner(inner_prec, outer_prec)
+```
+
+The composed preconditioner applies: `outer(inner(x))` for any vector `x`.
+
+## Mathematical Interpretation
+
+For a linear system `Ax = b`, if `P₁` is the inner and `P₂` is the outer preconditioner,
+then the composed preconditioner effectively applies `P₂P₁` as the combined preconditioner.
+"""
 struct ComposePreconditioner{Ti, To}
     inner::Ti
     outer::To
@@ -21,6 +48,39 @@ function LinearAlgebra.ldiv!(y, A::ComposePreconditioner, x)
     ldiv!(outer, y)
 end
 
+"""
+    InvPreconditioner{T}
+
+A preconditioner wrapper that treats a matrix or operator as if it represents
+the inverse of the actual preconditioner. Instead of solving `Px = y`, it 
+computes `P*y` where `P` is stored as the "inverse" preconditioner matrix.
+
+## Fields
+- `P::T`: The stored preconditioner matrix/operator (representing `P⁻¹`)
+
+## Usage
+
+This is useful when you have a matrix that approximates the inverse of your
+desired preconditioner. For example, if you have computed an approximate 
+inverse matrix `Ainv ≈ A⁻¹`, you can use:
+
+```julia
+prec = InvPreconditioner(Ainv)
+```
+
+## Mathematical Interpretation
+
+For a linear system `Ax = b` with preconditioner `M`, normally we solve `M⁻¹Ax = M⁻¹b`.
+With `InvPreconditioner`, the stored matrix `P` represents `M⁻¹` directly, so
+applying the preconditioner becomes a matrix-vector multiplication rather than
+a linear solve.
+
+## Methods
+
+- `ldiv!(A::InvPreconditioner, x)`: Computes `x ← P*x` (in-place)
+- `ldiv!(y, A::InvPreconditioner, x)`: Computes `y ← P*x`  
+- `mul!(y, A::InvPreconditioner, x)`: Computes `y ← P⁻¹*x` (inverse operation)
+"""
 struct InvPreconditioner{T}
     P::T
 end
diff --git a/src/simplelu.jl b/src/simplelu.jl
index 9c1ad0bf7..9917f5869 100644
--- a/src/simplelu.jl
+++ b/src/simplelu.jl
@@ -1,5 +1,44 @@
 ## From https://github.com/JuliaGNI/SimpleSolvers.jl/blob/master/src/linear/lu_solver.jl
 
+"""
+    LUSolver{T}
+
+A mutable workspace for performing LU factorization and solving linear systems.
+This struct maintains all necessary arrays and state information for the 
+factorization and solve phases, allowing for efficient reuse when solving
+multiple systems with the same matrix structure.
+
+## Fields
+- `n::Int`: Dimension of the square matrix
+- `A::Matrix{T}`: Working copy of the matrix to be factorized (modified in-place)
+- `b::Vector{T}`: Right-hand side vector storage
+- `x::Vector{T}`: Solution vector storage  
+- `pivots::Vector{Int}`: Pivot indices from the factorization
+- `perms::Vector{Int}`: Permutation vector tracking row exchanges
+- `info::Int`: Status information (0 = success, >0 indicates singularity)
+
+## Constructor
+```julia
+LUSolver{T}(n)  # Create solver for n×n matrix with element type T
+```
+
+## Usage
+The solver is typically created from a matrix using the convenience constructors:
+```julia
+solver = LUSolver(A)        # From matrix A
+solver = LUSolver(A, b)     # From matrix A and RHS b
+```
+
+Then factorized and solved:
+```julia
+simplelu_factorize!(solver)    # Perform LU factorization
+simplelu_solve!(solver)        # Solve for the stored RHS
+```
+
+## Notes
+This is a pure Julia implementation primarily for educational purposes and
+small matrices. For production use, prefer optimized LAPACK-based factorizations.
+"""
 mutable struct LUSolver{T}
     n::Int
     A::Matrix{T}
@@ -114,13 +153,49 @@ end
 ### Wrapper
 
 """
-`SimpleLUFactorization(pivot::Bool = true)`
-
-A simple LU-factorization implementation without BLAS. Fast for small matrices.
-
-## Positional Arguments
-
-  - pivot::Bool: whether to perform pivoting. Defaults to `true`
+    SimpleLUFactorization(pivot::Bool = true)
+
+A pure Julia LU factorization implementation without BLAS dependencies.
+This solver is optimized for small matrices and situations where BLAS 
+is not available or desirable.
+
+## Constructor Arguments
+- `pivot::Bool = true`: Whether to perform partial pivoting for numerical stability.
+  Set to `false` for slightly better performance at the cost of stability.
+
+## Features
+- Pure Julia implementation (no BLAS dependencies)
+- Partial pivoting support for numerical stability
+- In-place matrix modification for memory efficiency  
+- Fast for small matrices (typically < 100×100)
+- Educational value for understanding LU factorization
+
+## Performance Characteristics
+- Optimal for small dense matrices
+- No overhead from BLAS calls
+- Linear scaling with problem size (O(n³) operations)
+- Memory efficient due to in-place operations
+
+## Use Cases
+- Small matrices where BLAS overhead is significant
+- Systems without optimized BLAS libraries
+- Educational and prototyping purposes
+- Embedded systems with memory constraints
+
+## Example
+```julia
+# Stable version with pivoting (default)
+alg1 = SimpleLUFactorization()
+# Faster version without pivoting
+alg2 = SimpleLUFactorization(false)
+
+prob = LinearProblem(A, b)
+sol = solve(prob, alg1)
+```
+
+## Notes
+For larger matrices (> 100×100), consider using BLAS-based factorizations 
+like `LUFactorization()` for better performance.
 """
 struct SimpleLUFactorization <: AbstractFactorization
     pivot::Bool
diff --git a/src/solve_function.jl b/src/solve_function.jl
index 5c74199cb..a9680d71b 100644
--- a/src/solve_function.jl
+++ b/src/solve_function.jl
@@ -1,4 +1,46 @@
-#
+"""
+    LinearSolveFunction{F} <: AbstractSolveFunction
+
+A flexible wrapper that allows using custom functions as linear solver algorithms.
+This provides a way to integrate user-defined solving strategies into the LinearSolve.jl
+framework while maintaining compatibility with the caching and interface systems.
+
+## Fields
+- `solve_func::F`: A callable that implements the custom linear solving logic
+
+## Function Signature
+
+The wrapped function should have the signature:
+```julia
+solve_func(A, b, u, p, isfresh, Pl, Pr, cacheval; kwargs...)
+```
+
+## Arguments to wrapped function
+- `A`: The matrix operator of the linear system  
+- `b`: The right-hand side vector
+- `u`: Pre-allocated solution vector (can be used as working space)
+- `p`: Parameters passed to the solver
+- `isfresh`: Boolean indicating if the matrix `A` has changed since last solve
+- `Pl`: Left preconditioner operator
+- `Pr`: Right preconditioner operator  
+- `cacheval`: Algorithm-specific cache storage
+- `kwargs...`: Additional keyword arguments
+
+## Returns
+The wrapped function should return a solution vector.
+
+## Example
+
+```julia
+function my_custom_solver(A, b, u, p, isfresh, Pl, Pr, cacheval; kwargs...)
+    # Custom solving logic here
+    return A \\ b  # Simple example
+end
+
+alg = LinearSolveFunction(my_custom_solver)
+sol = solve(prob, alg)
+```
+"""
 struct LinearSolveFunction{F} <: AbstractSolveFunction
     solve_func::F
 end
@@ -12,6 +54,28 @@ function SciMLBase.solve!(cache::LinearCache, alg::LinearSolveFunction,
     return SciMLBase.build_linear_solution(alg, u, nothing, cache)
 end
 
+"""
+    DirectLdiv! <: AbstractSolveFunction
+
+A simple linear solver that directly applies the left-division operator (`\\`) 
+to solve the linear system. This algorithm calls `ldiv!(u, A, b)` which computes
+`u = A \\ b` in-place.
+
+## Usage
+
+```julia
+alg = DirectLdiv!()
+sol = solve(prob, alg)
+```
+
+## Notes
+
+- This is essentially a direct wrapper around Julia's built-in `ldiv!` function
+- Suitable for cases where the matrix `A` has a natural inverse or factorization
+- Performance depends on the specific matrix type and its `ldiv!` implementation
+- No preconditioners or advanced numerical techniques are applied
+- Best used for small to medium problems or when `A` has special structure
+"""
 struct DirectLdiv! <: AbstractSolveFunction end
 
 function SciMLBase.solve!(cache::LinearCache, alg::DirectLdiv!, args...; kwargs...)

From c303ed2c5d2cbe93e8d4e3613e35cb8cd3f60363 Mon Sep 17 00:00:00 2001
From: ChrisRackauckas <accounts@chrisrackauckas.com>
Date: Thu, 14 Aug 2025 13:35:52 -0400
Subject: [PATCH 2/3] Add documentation for abstract types and algorithm
 selection logic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Document complete abstract type hierarchy with detailed explanations
- Add comprehensive docstring for defaultalg function with algorithm selection logic
- Document needs_concrete_A trait function with usage examples
- Provide detailed characteristics and use cases for each abstract type
- Include cross-references between related algorithm types

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/LinearSolve.jl | 147 +++++++++++++++++++++++++++++++++++++++++++++
 src/default.jl     |  60 ++++++++++++++++++
 2 files changed, 207 insertions(+)

diff --git a/src/LinearSolve.jl b/src/LinearSolve.jl
index 5afa07ad9..6041680c6 100644
--- a/src/LinearSolve.jl
+++ b/src/LinearSolve.jl
@@ -60,15 +60,162 @@ end
 
 @reexport using SciMLBase
 
+"""
+    SciMLLinearSolveAlgorithm <: SciMLBase.AbstractLinearAlgorithm
+
+The root abstract type for all linear solver algorithms in LinearSolve.jl.
+All concrete linear solver implementations should inherit from one of the 
+specialized subtypes rather than directly from this type.
+
+This type integrates with the SciMLBase ecosystem, providing a consistent
+interface for linear algebra operations across the Julia scientific computing
+ecosystem.
+"""
 abstract type SciMLLinearSolveAlgorithm <: SciMLBase.AbstractLinearAlgorithm end
+
+"""
+    AbstractFactorization <: SciMLLinearSolveAlgorithm
+
+Abstract type for linear solvers that work by computing a matrix factorization.
+These algorithms typically decompose the matrix `A` into a product of simpler
+matrices (e.g., `A = LU`, `A = QR`, `A = LDL'`) and then solve the system
+using forward/backward substitution.
+
+## Characteristics
+- Requires concrete matrix representation (`needs_concrete_A() = true`)
+- Typically efficient for multiple solves with the same matrix
+- Generally provides high accuracy for well-conditioned problems
+- Memory requirements depend on the specific factorization type
+
+## Subtypes
+- `AbstractDenseFactorization`: For dense matrix factorizations
+- `AbstractSparseFactorization`: For sparse matrix factorizations
+
+## Examples of concrete subtypes
+- `LUFactorization`, `QRFactorization`, `CholeskyFactorization`
+- `UMFPACKFactorization`, `KLUFactorization`
+"""
 abstract type AbstractFactorization <: SciMLLinearSolveAlgorithm end
+
+"""
+    AbstractSparseFactorization <: AbstractFactorization
+
+Abstract type for factorization-based linear solvers optimized for sparse matrices.
+These algorithms take advantage of sparsity patterns to reduce memory usage and 
+computational cost compared to dense factorizations.
+
+## Characteristics  
+- Optimized for matrices with many zero entries
+- Often use specialized pivoting strategies to preserve sparsity
+- May reorder rows/columns to minimize fill-in during factorization
+- Typically more memory-efficient than dense methods for sparse problems
+
+## Examples of concrete subtypes
+- `UMFPACKFactorization`: General sparse LU with partial pivoting
+- `KLUFactorization`: Sparse LU optimized for circuit simulation
+- `CHOLMODFactorization`: Sparse Cholesky for positive definite systems
+- `SparspakFactorization`: Envelope/profile method for sparse systems
+"""
 abstract type AbstractSparseFactorization <: AbstractFactorization end
+
+"""
+    AbstractDenseFactorization <: AbstractFactorization
+
+Abstract type for factorization-based linear solvers optimized for dense matrices.
+These algorithms assume the matrix has no particular sparsity structure and use
+dense linear algebra routines (typically from BLAS/LAPACK) for optimal performance.
+
+## Characteristics
+- Optimized for matrices with few zeros or no sparsity structure  
+- Leverage highly optimized BLAS/LAPACK routines when available
+- Generally provide excellent performance for moderately-sized dense problems
+- Memory usage scales as O(n²) with matrix size
+
+## Examples of concrete subtypes  
+- `LUFactorization`: Dense LU with partial pivoting (via LAPACK)
+- `QRFactorization`: Dense QR factorization for overdetermined systems
+- `CholeskyFactorization`: Dense Cholesky for symmetric positive definite matrices
+- `BunchKaufmanFactorization`: For symmetric indefinite matrices
+"""
 abstract type AbstractDenseFactorization <: AbstractFactorization end
+
+"""
+    AbstractKrylovSubspaceMethod <: SciMLLinearSolveAlgorithm
+
+Abstract type for iterative linear solvers based on Krylov subspace methods.
+These algorithms solve linear systems by iteratively building an approximation
+from a sequence of Krylov subspaces, without requiring explicit matrix factorization.
+
+## Characteristics
+- Does not require concrete matrix representation (`needs_concrete_A() = false`)
+- Only needs matrix-vector products `A*v` (can work with operators/functions)
+- Memory usage typically O(n) or O(kn) where k << n
+- Convergence depends on matrix properties (condition number, eigenvalue distribution)
+- Often benefits significantly from preconditioning
+
+## Advantages
+- Low memory requirements for large sparse systems
+- Can handle matrix-free operators (functions that compute `A*v`)
+- Often the only feasible approach for very large systems
+- Can exploit matrix structure through specialized operators
+
+## Examples of concrete subtypes
+- `GMRESIteration`: Generalized Minimal Residual method
+- `CGIteration`: Conjugate Gradient (for symmetric positive definite systems)  
+- `BiCGStabLIteration`: Bi-Conjugate Gradient Stabilized
+- Wrapped external iterative solvers (KrylovKit.jl, IterativeSolvers.jl)
+"""
 abstract type AbstractKrylovSubspaceMethod <: SciMLLinearSolveAlgorithm end
+
+"""
+    AbstractSolveFunction <: SciMLLinearSolveAlgorithm
+
+Abstract type for linear solvers that wrap custom solving functions or
+provide direct interfaces to specific solve methods. These provide flexibility
+for integrating custom algorithms or simple solve strategies.
+
+## Characteristics  
+- Does not require concrete matrix representation (`needs_concrete_A() = false`)
+- Provides maximum flexibility for custom solving strategies
+- Can wrap external solver libraries or implement specialized algorithms
+- Performance and stability depend entirely on the wrapped implementation
+
+## Examples of concrete subtypes
+- `LinearSolveFunction`: Wraps arbitrary user-defined solve functions
+- `DirectLdiv!`: Direct application of the `\\` operator
+"""
 abstract type AbstractSolveFunction <: SciMLLinearSolveAlgorithm end
 
 # Traits
 
+"""
+    needs_concrete_A(alg) -> Bool
+
+Trait function that determines whether a linear solver algorithm requires
+a concrete matrix representation or can work with abstract operators.
+
+## Arguments
+- `alg`: A linear solver algorithm instance
+
+## Returns
+- `true`: Algorithm requires a concrete matrix (e.g., for factorization)
+- `false`: Algorithm can work with abstract operators (e.g., matrix-free methods)
+
+## Usage
+This trait is used internally by LinearSolve.jl to optimize algorithm dispatch
+and determine when matrix operators need to be converted to concrete arrays.
+
+## Algorithm-Specific Behavior
+- `AbstractFactorization`: `true` (needs explicit matrix entries for factorization)
+- `AbstractKrylovSubspaceMethod`: `false` (only needs matrix-vector products)
+- `AbstractSolveFunction`: `false` (depends on the wrapped function's requirements)
+
+## Example
+```julia
+needs_concrete_A(LUFactorization())  # true
+needs_concrete_A(GMRESIteration())   # false
+```
+"""
 needs_concrete_A(alg::AbstractFactorization) = true
 needs_concrete_A(alg::AbstractSparseFactorization) = true
 needs_concrete_A(alg::AbstractKrylovSubspaceMethod) = false
diff --git a/src/default.jl b/src/default.jl
index 6604d3ade..27ff8a5b6 100644
--- a/src/default.jl
+++ b/src/default.jl
@@ -47,6 +47,66 @@ function __setfield!(cache::DefaultLinearSolverInit,
     setfield!(cache, :QRFactorizationPivoted, v)
 end
 
+"""
+    defaultalg(A, b, assumptions::OperatorAssumptions)
+
+Select the most appropriate linear solver algorithm based on the matrix `A`, 
+right-hand side `b`, and operator assumptions. This is the core algorithm 
+selection logic used by LinearSolve.jl's automatic algorithm choice.
+
+## Arguments
+- `A`: The matrix operator (can be a matrix, factorization, or abstract operator)
+- `b`: The right-hand side vector
+- `assumptions`: Operator assumptions including square matrix flag and conditioning
+
+## Returns
+A `DefaultLinearSolver` instance configured with the most appropriate algorithm choice,
+or a specific algorithm instance for certain special cases.
+
+## Algorithm Selection Logic
+
+The function uses a hierarchy of dispatch rules based on:
+
+1. **Matrix Type**: Special handling for structured matrices (Diagonal, Tridiagonal, etc.)
+2. **Matrix Properties**: Square vs. rectangular, sparse vs. dense
+3. **Hardware**: GPU vs. CPU arrays
+4. **Conditioning**: Well-conditioned vs. ill-conditioned systems
+5. **Size**: Small vs. large matrices for performance optimization
+
+## Common Algorithm Choices
+
+- **Diagonal matrices**: `DiagonalFactorization` for optimal O(n) performance
+- **Tridiagonal/Bidiagonal**: Direct methods or specialized factorizations
+- **Dense matrices**: LU, QR, or Cholesky based on structure and conditioning
+- **Sparse matrices**: Specialized sparse factorizations (UMFPACK, KLU, etc.)
+- **GPU arrays**: QR or LU factorizations optimized for GPU computation
+- **Abstract operators**: Krylov methods (GMRES, CRAIGMR, LSMR)
+- **Symmetric positive definite**: Cholesky factorization
+- **Symmetric indefinite**: Bunch-Kaufman factorization
+
+## Examples
+
+```julia
+# Dense square matrix - typically chooses LU
+A = rand(100, 100)
+b = rand(100)
+alg = defaultalg(A, b, OperatorAssumptions(true))
+
+# Overdetermined system - typically chooses QR  
+A = rand(100, 50)
+b = rand(100)
+alg = defaultalg(A, b, OperatorAssumptions(false))
+
+# Diagonal matrix - chooses diagonal factorization
+A = Diagonal(rand(100))
+alg = defaultalg(A, b, OperatorAssumptions(true))
+```
+
+## Notes
+This function is primarily used internally by `solve(::LinearProblem)` when no
+explicit algorithm is provided. For manual algorithm selection, users can
+directly instantiate specific algorithm types.
+"""
 # Legacy fallback
 # For SciML algorithms already using `defaultalg`, all assume square matrix.
 defaultalg(A, b) = defaultalg(A, b, OperatorAssumptions(true))

From 013d93223ebf95620b1f33ebb13bb80e6845e2cf Mon Sep 17 00:00:00 2001
From: ChrisRackauckas <accounts@chrisrackauckas.com>
Date: Thu, 14 Aug 2025 17:10:37 -0400
Subject: [PATCH 3/3] Add comprehensive documentation for internal APIs and
 algorithm selection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Create new Internal API documentation page with complete docstring coverage
  * Document abstract type hierarchy with detailed explanations
  * Add LinearCache and caching system documentation
  * Include trait functions and utility function docs
  * Cover solve functions and preconditioner infrastructure

- Add Algorithm Selection Guide for users
  * Explain automatic algorithm selection logic with examples
  * Provide performance guidance for different matrix types
  * Include decision flowchart and manual override examples
  * Cover dense, sparse, GPU, and iterative method selection

- Enhance existing solver documentation
  * Add DirectLdiv! and LinearSolveFunction to solver list
  * Update documentation navigation structure

- Update documentation navigation
  * Add internal_api.md to Advanced section
  * Add algorithm_selection.md to Basics section

This significantly improves both user and developer documentation by making
the package's architecture and algorithm selection transparent and accessible.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 docs/pages.jl                          |   6 +-
 docs/src/advanced/internal_api.md      | 106 ++++++++++++++++
 docs/src/basics/algorithm_selection.md | 163 +++++++++++++++++++++++++
 docs/src/solvers/solvers.md            |   2 +
 4 files changed, 275 insertions(+), 2 deletions(-)
 create mode 100644 docs/src/advanced/internal_api.md
 create mode 100644 docs/src/basics/algorithm_selection.md

diff --git a/docs/pages.jl b/docs/pages.jl
index e16ee1657..032a2235c 100644
--- a/docs/pages.jl
+++ b/docs/pages.jl
@@ -8,12 +8,14 @@ pages = ["index.md",
         "tutorials/gpu.md",
         "tutorials/autotune.md"],
     "Basics" => Any["basics/LinearProblem.md",
+        "basics/algorithm_selection.md",
         "basics/common_solver_opts.md",
         "basics/OperatorAssumptions.md",
         "basics/Preconditioners.md",
         "basics/FAQ.md"],
     "Solvers" => Any["solvers/solvers.md"],
-    "Advanced" => Any["advanced/developing.md"
-                      "advanced/custom.md"],
+    "Advanced" => Any["advanced/developing.md",
+                      "advanced/custom.md",
+                      "advanced/internal_api.md"],
     "Release Notes" => "release_notes.md"
 ]
diff --git a/docs/src/advanced/internal_api.md b/docs/src/advanced/internal_api.md
new file mode 100644
index 000000000..98d79a2f9
--- /dev/null
+++ b/docs/src/advanced/internal_api.md
@@ -0,0 +1,106 @@
+# Internal API Documentation
+
+This page documents LinearSolve.jl's internal API, which is useful for developers who want to understand the package's architecture, contribute to the codebase, or develop custom linear solver algorithms.
+
+## Abstract Type Hierarchy
+
+LinearSolve.jl uses a well-structured type hierarchy to organize different classes of linear solver algorithms:
+
+```@docs
+LinearSolve.SciMLLinearSolveAlgorithm
+LinearSolve.AbstractFactorization
+LinearSolve.AbstractDenseFactorization
+LinearSolve.AbstractSparseFactorization
+LinearSolve.AbstractKrylovSubspaceMethod
+LinearSolve.AbstractSolveFunction
+```
+
+## Core Cache System
+
+The caching system is central to LinearSolve.jl's performance and functionality:
+
+```@docs
+LinearSolve.LinearCache
+LinearSolve.init_cacheval
+```
+
+## Algorithm Selection
+
+The automatic algorithm selection is one of LinearSolve.jl's key features:
+
+```@docs
+LinearSolve.defaultalg
+```
+
+## Trait Functions
+
+These trait functions help determine algorithm capabilities and requirements:
+
+```@docs
+LinearSolve.needs_concrete_A
+```
+
+## Utility Functions
+
+Various utility functions support the core functionality:
+
+```@docs
+LinearSolve.default_tol
+LinearSolve.default_alias_A
+LinearSolve.default_alias_b
+LinearSolve.__init_u0_from_Ab
+```
+
+## Solve Functions
+
+For custom solving strategies:
+
+```@docs
+LinearSolve.LinearSolveFunction
+LinearSolve.DirectLdiv!
+```
+
+## Preconditioner Infrastructure
+
+The preconditioner system allows for flexible preconditioning strategies:
+
+```@docs
+LinearSolve.ComposePreconditioner
+LinearSolve.InvPreconditioner
+```
+
+## Internal Algorithm Types
+
+These are internal algorithm implementations:
+
+```@docs
+LinearSolve.SimpleLUFactorization
+LinearSolve.LUSolver
+```
+
+## Developer Notes
+
+### Adding New Algorithms
+
+When adding a new linear solver algorithm to LinearSolve.jl:
+
+1. **Choose the appropriate abstract type**: Inherit from the most specific abstract type that fits your algorithm
+2. **Implement required methods**: At minimum, implement `solve!` and possibly `init_cacheval`
+3. **Consider trait functions**: Override trait functions like `needs_concrete_A` if needed
+4. **Document thoroughly**: Add comprehensive docstrings following the patterns shown here
+
+### Performance Considerations
+
+- The `LinearCache` system is designed for efficient repeated solves
+- Use `cache.isfresh` to avoid redundant computations when the matrix hasn't changed
+- Consider implementing specialized `init_cacheval` for algorithms that need setup
+- Leverage trait functions to optimize dispatch and memory usage
+
+### Testing Guidelines
+
+When adding new functionality:
+
+- Test with various matrix types (dense, sparse, GPU arrays)
+- Verify caching behavior works correctly
+- Ensure trait functions return appropriate values
+- Test integration with the automatic algorithm selection system
\ No newline at end of file
diff --git a/docs/src/basics/algorithm_selection.md b/docs/src/basics/algorithm_selection.md
new file mode 100644
index 000000000..5fa1348b8
--- /dev/null
+++ b/docs/src/basics/algorithm_selection.md
@@ -0,0 +1,163 @@
+# Algorithm Selection Guide
+
+LinearSolve.jl automatically selects appropriate algorithms based on your problem characteristics, but understanding how this works can help you make better choices for your specific use case.
+
+## Automatic Algorithm Selection
+
+When you call `solve(prob)` without specifying an algorithm, LinearSolve.jl uses intelligent heuristics to choose the best solver:
+
+```julia
+using LinearSolve
+
+# LinearSolve.jl automatically chooses the best algorithm
+A = rand(100, 100)
+b = rand(100)
+prob = LinearProblem(A, b)
+sol = solve(prob)  # Automatic algorithm selection
+```
+
+The selection process considers:
+
+- **Matrix type**: Dense vs. sparse vs. structured matrices
+- **Matrix properties**: Square vs. rectangular, symmetric, positive definite
+- **Size**: Small vs. large matrices for performance optimization  
+- **Hardware**: CPU vs. GPU arrays
+- **Conditioning**: Well-conditioned vs. ill-conditioned systems
+
+## Algorithm Categories
+
+LinearSolve.jl organizes algorithms into several categories:
+
+### Factorization Methods
+
+These algorithms decompose your matrix into simpler components:
+
+- **Dense factorizations**: Best for matrices without special sparsity structure
+  - `LUFactorization()`: General-purpose, good balance of speed and stability
+  - `QRFactorization()`: More stable for ill-conditioned problems
+  - `CholeskyFactorization()`: Fastest for symmetric positive definite matrices
+
+- **Sparse factorizations**: Optimized for matrices with many zeros
+  - `UMFPACKFactorization()`: General sparse LU with good fill-in control
+  - `KLUFactorization()`: Optimized for circuit simulation problems
+
+### Iterative Methods
+
+These solve the system iteratively without explicit factorization:
+
+- **Krylov methods**: Memory-efficient for large sparse systems
+  - `KrylovJL_GMRES()`: General-purpose iterative solver
+  - `KrylovJL_CG()`: For symmetric positive definite systems
+
+### Direct Methods
+
+Simple direct approaches:
+
+- `DirectLdiv!()`: Uses Julia's built-in `\` operator
+- `DiagonalFactorization()`: Optimized for diagonal matrices
+
+## Performance Characteristics
+
+### Dense Matrices
+
+For dense matrices, algorithm choice depends on size and conditioning:
+
+```julia
+# Small matrices (< 100×100): SimpleLUFactorization often fastest
+A_small = rand(50, 50)
+sol = solve(LinearProblem(A_small, rand(50)), SimpleLUFactorization())
+
+# Medium matrices (100×500): RFLUFactorization often optimal  
+A_medium = rand(200, 200)
+sol = solve(LinearProblem(A_medium, rand(200)), RFLUFactorization())
+
+# Large matrices (> 500×500): MKLLUFactorization or AppleAccelerate
+A_large = rand(1000, 1000) 
+sol = solve(LinearProblem(A_large, rand(1000)), MKLLUFactorization())
+```
+
+### Sparse Matrices
+
+For sparse matrices, structure matters:
+
+```julia
+using SparseArrays
+
+# General sparse matrices
+A_sparse = sprand(1000, 1000, 0.01)
+sol = solve(LinearProblem(A_sparse, rand(1000)), UMFPACKFactorization())
+
+# Structured sparse (e.g., from discretized PDEs)
+# KLUFactorization often better for circuit-like problems
+```
+
+### GPU Acceleration
+
+For very large problems, GPU offloading can be beneficial:
+
+```julia
+# Requires CUDA.jl
+# A_gpu = CuArray(rand(Float32, 2000, 2000))
+# sol = solve(LinearProblem(A_gpu, CuArray(rand(Float32, 2000))), 
+#            CudaOffloadLUFactorization())
+```
+
+## When to Override Automatic Selection
+
+You might want to manually specify an algorithm when:
+
+1. **You know your problem structure**: E.g., you know your matrix is positive definite
+   ```julia
+   sol = solve(prob, CholeskyFactorization())  # Faster for SPD matrices
+   ```
+
+2. **You need maximum stability**: For ill-conditioned problems
+   ```julia
+   sol = solve(prob, QRFactorization())  # More numerically stable
+   ```
+
+3. **You're doing many solves**: Factorization methods amortize cost over multiple solves
+   ```julia
+   cache = init(prob, LUFactorization())
+   for i in 1:1000
+       cache.b = new_rhs[i]
+       sol = solve!(cache)
+   end
+   ```
+
+4. **Memory constraints**: Iterative methods use less memory
+   ```julia
+   sol = solve(prob, KrylovJL_GMRES())  # Lower memory usage
+   ```
+
+## Algorithm Selection Flowchart
+
+The automatic selection roughly follows this logic:
+
+```
+Is A diagonal? → DiagonalFactorization
+Is A tridiagonal/bidiagonal? → DirectLdiv! (Julia 1.11+) or LUFactorization  
+Is A symmetric positive definite? → CholeskyFactorization
+Is A symmetric indefinite? → BunchKaufmanFactorization
+Is A sparse? → UMFPACKFactorization or KLUFactorization
+Is A small dense? → RFLUFactorization or SimpleLUFactorization
+Is A large dense? → MKLLUFactorization or AppleAccelerateLUFactorization
+Is A GPU array? → QRFactorization or LUFactorization
+Is A an operator/function? → KrylovJL_GMRES
+Is the system overdetermined? → QRFactorization or KrylovJL_LSMR
+```
+
+## Custom Functions
+
+For specialized algorithms not covered by the built-in solvers:
+
+```julia
+function my_custom_solver(A, b, u, p, isfresh, Pl, Pr, cacheval; kwargs...)
+    # Your custom solving logic here
+    return A \ b  # Simple example
+end
+
+sol = solve(prob, LinearSolveFunction(my_custom_solver))
+```
+
+See the [Custom Linear Solvers](@ref custom) section for more details.
\ No newline at end of file
diff --git a/docs/src/solvers/solvers.md b/docs/src/solvers/solvers.md
index 0d2b31ac4..1290a8fa2 100644
--- a/docs/src/solvers/solvers.md
+++ b/docs/src/solvers/solvers.md
@@ -135,6 +135,8 @@ LinearSolve.jl contains some linear solvers built in for specialized cases.
 SimpleLUFactorization
 DiagonalFactorization
 SimpleGMRES
+DirectLdiv!
+LinearSolveFunction
 ```
 
 ### FastLapackInterface.jl