SciML
diff --git a/‎src/common.jl‎
Lines changed: 120 additions & 0 deletions b/‎src/common.jl‎
Lines changed: 120 additions & 0 deletions
diff --git a/‎src/extension_algs.jl‎
Lines changed: 101 additions & 11 deletions b/‎src/extension_algs.jl‎
Lines changed: 101 additions & 11 deletions
diff --git a/‎src/preconditioners.jl‎
Lines changed: 60 additions & 0 deletions b/‎src/preconditioners.jl‎
Lines changed: 60 additions & 0 deletions
@@ -65,6 +65,46 @@ end
 __issquare(assump::OperatorAssumptions) = assump.issq
 __conditioning(assump::OperatorAssumptions) = assump.condition
 
+"""
+    LinearCache{TA, Tb, Tu, Tp, Talg, Tc, Tl, Tr, Ttol, issq, S}
+
+The core cache structure used by LinearSolve for storing and managing the state of linear
+solver computations. This mutable struct acts as the primary interface for iterative 
+solving and caching of factorizations and intermediate results.
+
+## Fields
+
+- `A::TA`: The matrix operator of the linear system.
+- `b::Tb`: The right-hand side vector of the linear system.
+- `u::Tu`: The solution vector (preallocated storage for the result).
+- `p::Tp`: Parameters passed to the linear solver algorithm.
+- `alg::Talg`: The linear solver algorithm instance.
+- `cacheval::Tc`: Algorithm-specific cache storage for factorizations and intermediate computations.
+- `isfresh::Bool`: Cache validity flag for the matrix `A`. `false` means `cacheval` is up-to-date 
+  with respect to `A`, `true` means `cacheval` needs to be updated.
+- `precsisfresh::Bool`: Cache validity flag for preconditioners. `false` means `Pl` and `Pr` 
+  are up-to-date with respect to `A`, `true` means they need to be updated.
+- `Pl::Tl`: Left preconditioner operator.
+- `Pr::Tr`: Right preconditioner operator.
+- `abstol::Ttol`: Absolute tolerance for iterative solvers.
+- `reltol::Ttol`: Relative tolerance for iterative solvers.
+- `maxiters::Int`: Maximum number of iterations for iterative solvers.
+- `verbose::Bool`: Whether to print verbose output during solving.
+- `assumptions::OperatorAssumptions{issq}`: Assumptions about the operator properties.
+- `sensealg::S`: Sensitivity analysis algorithm for automatic differentiation.
+
+## Usage
+
+The `LinearCache` is typically created via `init(::LinearProblem, ::SciMLLinearSolveAlgorithm)` 
+and then used with `solve!(cache)` for efficient repeated solves with the same matrix structure
+but potentially different right-hand sides or parameter values.
+
+## Cache Management
+
+The cache automatically tracks when matrix `A` or parameters `p` change by setting the 
+appropriate freshness flags. When `solve!` is called, stale cache entries are automatically
+recomputed as needed.
+"""
 mutable struct LinearCache{TA, Tb, Tu, Tp, Talg, Tc, Tl, Tr, Ttol, issq, S}
     A::TA
     b::Tb
@@ -106,19 +146,81 @@ function update_cacheval!(cache::LinearCache, name::Symbol, x)
 end
 update_cacheval!(cache, cacheval, name::Symbol, x) = cacheval
 
+"""
+    init_cacheval(alg::SciMLLinearSolveAlgorithm, args...)
+
+Initialize algorithm-specific cache values for the given linear solver algorithm.
+This function returns `nothing` by default and is intended to be overloaded by 
+specific algorithm implementations that need to store intermediate computations
+or factorizations.
+
+## Arguments
+- `alg`: The linear solver algorithm instance
+- `args...`: Additional arguments passed to the cache initialization
+
+## Returns
+Algorithm-specific cache value or `nothing` for algorithms that don't require caching.
+"""
 init_cacheval(alg::SciMLLinearSolveAlgorithm, args...) = nothing
 
 function SciMLBase.init(prob::LinearProblem, args...; kwargs...)
     SciMLBase.init(prob, nothing, args...; kwargs...)
 end
 
+"""
+    default_tol(T)
+
+Compute the default tolerance for iterative linear solvers based on the element type.
+The tolerance is typically set as the square root of the machine epsilon for the 
+given floating point type, ensuring numerical accuracy appropriate for that precision.
+
+## Arguments
+- `T`: The element type of the linear system
+
+## Returns
+- For floating point types: `√(eps(T))`
+- For exact types (Rational, Integer): `0` (exact arithmetic)
+- For Any type: `0` (conservative default)
+"""
 default_tol(::Type{T}) where {T} = √(eps(T))
 default_tol(::Type{Complex{T}}) where {T} = √(eps(T))
 default_tol(::Type{<:Rational}) = 0
 default_tol(::Type{<:Integer}) = 0
 default_tol(::Type{Any}) = 0
 
+"""
+    default_alias_A(alg, A, b) -> Bool
+
+Determine the default aliasing behavior for the matrix `A` given the algorithm type.
+Aliasing allows the algorithm to modify the original matrix in-place for efficiency,
+but this may not be desirable or safe for all algorithm types.
+
+## Arguments
+- `alg`: The linear solver algorithm
+- `A`: The matrix operator  
+- `b`: The right-hand side vector
+
+## Returns
+- `false`: Safe default, algorithm will not modify the original matrix `A`
+- `true`: Algorithm may modify `A` in-place for efficiency
+
+## Algorithm-Specific Behavior
+- Dense factorizations: `false` (destructive, need to preserve original)
+- Krylov methods: `true` (non-destructive, safe to alias)
+- Sparse factorizations: `true` (typically preserve sparsity structure)
+"""
 default_alias_A(::Any, ::Any, ::Any) = false
+
+"""
+    default_alias_b(alg, A, b) -> Bool
+
+Determine the default aliasing behavior for the right-hand side vector `b` given the 
+algorithm type. Similar to `default_alias_A` but for the RHS vector.
+
+## Returns
+- `false`: Safe default, algorithm will not modify the original vector `b`
+- `true`: Algorithm may modify `b` in-place for efficiency
+"""
 default_alias_b(::Any, ::Any, ::Any) = false
 
 # Non-destructive algorithms default to true
@@ -130,6 +232,24 @@ default_alias_b(::AbstractSparseFactorization, ::Any, ::Any) = true
 
 DEFAULT_PRECS(A, p) = IdentityOperator(size(A)[1]), IdentityOperator(size(A)[2])
 
+"""
+    __init_u0_from_Ab(A, b)
+
+Initialize the solution vector `u0` with appropriate size and type based on the 
+matrix `A` and right-hand side `b`. The solution vector is allocated with the 
+same element type as `b` and sized to match the number of columns in `A`.
+
+## Arguments
+- `A`: The matrix operator (determines solution vector size)
+- `b`: The right-hand side vector (determines element type)
+
+## Returns
+A zero-initialized vector of size `(size(A, 2),)` with element type matching `b`.
+
+## Specializations
+- For static matrices (`SMatrix`): Returns a static vector (`SVector`)
+- For regular matrices: Returns a similar vector to `b` with appropriate size
+"""
 function __init_u0_from_Ab(A, b)
     u0 = similar(b, size(A, 2))
     fill!(u0, false)
 
@@ -174,13 +174,42 @@ end
 ## RFLUFactorization
 
 """
-`RFLUFactorization()`
+    RFLUFactorization{P, T}(; pivot = Val(true), thread = Val(true))
+
+A fast pure Julia LU-factorization implementation using RecursiveFactorization.jl. 
+This is by far the fastest LU-factorization implementation, usually outperforming 
+OpenBLAS and MKL for smaller matrices (<500x500), but currently optimized only for 
+Base `Array` with `Float32` or `Float64`. Additional optimization for complex matrices 
+is in the works.
+
+## Type Parameters
+- `P`: Pivoting strategy as `Val{Bool}`. `Val{true}` enables partial pivoting for stability.
+- `T`: Threading strategy as `Val{Bool}`. `Val{true}` enables multi-threading for performance.
+
+## Constructor Arguments
+- `pivot = Val(true)`: Enable partial pivoting. Set to `Val{false}` to disable for speed 
+  at the cost of numerical stability.
+- `thread = Val(true)`: Enable multi-threading. Set to `Val{false}` for single-threaded 
+  execution.
+- `throwerror = true`: Whether to throw an error if RecursiveFactorization.jl is not loaded.
+
+## Performance Notes
+- Fastest for dense matrices with dimensions roughly < 500×500
+- Optimized specifically for Float32 and Float64 element types
+- Recursive blocking strategy provides excellent cache performance
+- Multi-threading can provide significant speedups on multi-core systems
+
+## Requirements
+Using this solver requires that RecursiveFactorization.jl is loaded: `using RecursiveFactorization`
 
-A fast pure Julia LU-factorization implementation
-using RecursiveFactorization.jl. This is by far the fastest LU-factorization
-implementation, usually outperforming OpenBLAS and MKL for smaller matrices
-(<500x500), but currently optimized only for Base `Array` with `Float32` or `Float64`.
-Additional optimization for complex matrices is in the works.
+## Example
+```julia
+using RecursiveFactorization
+# Fast, stable (with pivoting)
+alg1 = RFLUFactorization()
+# Fastest (no pivoting), less stable
+alg2 = RFLUFactorization(pivot=Val(false))  
+```
 """
 struct RFLUFactorization{P, T} <: AbstractDenseFactorization
     function RFLUFactorization(::Val{P}, ::Val{T}; throwerror = true) where {P, T}
@@ -200,17 +229,78 @@ end
 # But I'm not sure it makes sense as a GenericFactorization
 # since it just uses `LAPACK.getrf!`.
 """
-`FastLUFactorization()`
+    FastLUFactorization()
+
+A high-performance LU factorization using the FastLapackInterface.jl package.
+This provides an optimized interface to LAPACK routines with reduced overhead
+compared to the standard LinearAlgebra LAPACK wrappers.
+
+## Features
+- Reduced function call overhead compared to standard LAPACK wrappers
+- Optimized for performance-critical applications
+- Uses partial pivoting (no choice of pivoting method available)
+- Suitable for dense matrices where maximum performance is required
 
-The FastLapackInterface.jl version of the LU factorization. Notably,
-this version does not allow for choice of pivoting method.
+## Limitations
+- Does not allow customization of pivoting strategy (always uses partial pivoting)
+- Requires FastLapackInterface.jl to be loaded
+- Limited to dense matrix types supported by LAPACK
+
+## Requirements
+Using this solver requires that FastLapackInterface.jl is loaded: `using FastLapackInterface`
+
+## Performance Notes
+This factorization is optimized for cases where the overhead of standard LAPACK
+function calls becomes significant, typically for moderate-sized dense matrices
+or when performing many factorizations.
+
+## Example
+```julia
+using FastLapackInterface
+alg = FastLUFactorization()
+sol = solve(prob, alg)
+```
 """
 struct FastLUFactorization <: AbstractDenseFactorization end
 
 """
-`FastQRFactorization()`
+    FastQRFactorization{P}(; pivot = ColumnNorm(), blocksize = 36)
+
+A high-performance QR factorization using the FastLapackInterface.jl package.
+This provides an optimized interface to LAPACK QR routines with reduced overhead
+compared to the standard LinearAlgebra LAPACK wrappers.
+
+## Type Parameters
+- `P`: The type of pivoting strategy used
+
+## Fields
+- `pivot::P`: Pivoting strategy (e.g., `ColumnNorm()` for column pivoting, `nothing` for no pivoting)
+- `blocksize::Int`: Block size for the blocked QR algorithm (default: 36)
+
+## Features
+- Reduced function call overhead compared to standard LAPACK wrappers
+- Supports various pivoting strategies for numerical stability
+- Configurable block size for optimal performance
+- Suitable for dense matrices, especially overdetermined systems
 
-The FastLapackInterface.jl version of the QR factorization.
+## Performance Notes
+The block size can be tuned for optimal performance depending on matrix size and architecture.
+The default value of 36 is generally good for most cases, but experimentation may be beneficial
+for specific applications.
+
+## Requirements
+Using this solver requires that FastLapackInterface.jl is loaded: `using FastLapackInterface`
+
+## Example
+```julia
+using FastLapackInterface
+# QR with column pivoting
+alg1 = FastQRFactorization()  
+# QR without pivoting for speed
+alg2 = FastQRFactorization(pivot=nothing)
+# Custom block size
+alg3 = FastQRFactorization(blocksize=64)
+```
 """
 struct FastQRFactorization{P} <: AbstractDenseFactorization
     pivot::P
 
@@ -1,5 +1,32 @@
 # Tooling Preconditioners
 
+"""
+    ComposePreconditioner{Ti, To}
+
+A preconditioner that composes two preconditioners by applying them sequentially.
+The inner preconditioner is applied first, followed by the outer preconditioner.
+This allows for building complex preconditioning strategies by combining simpler ones.
+
+## Fields
+- `inner::Ti`: The inner (first) preconditioner to apply
+- `outer::To`: The outer (second) preconditioner to apply
+
+## Usage
+
+```julia
+# Compose a diagonal preconditioner with an ILU preconditioner
+inner_prec = DiagonalPreconditioner(diag(A))
+outer_prec = ILUFactorization()  
+composed = ComposePreconditioner(inner_prec, outer_prec)
+```
+
+The composed preconditioner applies: `outer(inner(x))` for any vector `x`.
+
+## Mathematical Interpretation
+
+For a linear system `Ax = b`, if `P₁` is the inner and `P₂` is the outer preconditioner,
+then the composed preconditioner effectively applies `P₂P₁` as the combined preconditioner.
+"""
 struct ComposePreconditioner{Ti, To}
     inner::Ti
     outer::To
@@ -21,6 +48,39 @@ function LinearAlgebra.ldiv!(y, A::ComposePreconditioner, x)
     ldiv!(outer, y)
 end
 
+"""
+    InvPreconditioner{T}
+
+A preconditioner wrapper that treats a matrix or operator as if it represents
+the inverse of the actual preconditioner. Instead of solving `Px = y`, it 
+computes `P*y` where `P` is stored as the "inverse" preconditioner matrix.
+
+## Fields
+- `P::T`: The stored preconditioner matrix/operator (representing `P⁻¹`)
+
+## Usage
+
+This is useful when you have a matrix that approximates the inverse of your
+desired preconditioner. For example, if you have computed an approximate 
+inverse matrix `Ainv ≈ A⁻¹`, you can use:
+
+```julia
+prec = InvPreconditioner(Ainv)
+```
+
+## Mathematical Interpretation
+
+For a linear system `Ax = b` with preconditioner `M`, normally we solve `M⁻¹Ax = M⁻¹b`.
+With `InvPreconditioner`, the stored matrix `P` represents `M⁻¹` directly, so
+applying the preconditioner becomes a matrix-vector multiplication rather than
+a linear solve.
+
+## Methods
+
+- `ldiv!(A::InvPreconditioner, x)`: Computes `x ← P*x` (in-place)
+- `ldiv!(y, A::InvPreconditioner, x)`: Computes `y ← P*x`  
+- `mul!(y, A::InvPreconditioner, x)`: Computes `y ← P⁻¹*x` (inverse operation)
+"""
 struct InvPreconditioner{T}
     P::T
 end