Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 83 additions & 1 deletion ext/LinearSolveRecursiveFactorizationExt.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
module LinearSolveRecursiveFactorizationExt

using LinearSolve: LinearSolve, userecursivefactorization, LinearCache, @get_cacheval,
RFLUFactorization
RFLUFactorization, RF32MixedLUFactorization, default_alias_A,
default_alias_b
using LinearSolve.LinearAlgebra, LinearSolve.ArrayInterface, RecursiveFactorization
using SciMLBase: SciMLBase, ReturnCode

Expand Down Expand Up @@ -30,4 +31,85 @@ function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::RFLUFactorization
SciMLBase.build_linear_solution(alg, y, nothing, cache; retcode = ReturnCode.Success)
end

# Mixed precision RecursiveFactorization implementation
LinearSolve.default_alias_A(::RF32MixedLUFactorization, ::Any, ::Any) = false
LinearSolve.default_alias_b(::RF32MixedLUFactorization, ::Any, ::Any) = false

const PREALLOCATED_RF32_LU = begin
A = rand(Float32, 0, 0)
luinst = ArrayInterface.lu_instance(A)
(luinst, Vector{LinearAlgebra.BlasInt}(undef, 0))
end

function LinearSolve.init_cacheval(alg::RF32MixedLUFactorization{P, T}, A, b, u, Pl, Pr,
maxiters::Int, abstol, reltol, verbose::Bool,
assumptions::LinearSolve.OperatorAssumptions) where {P, T}
# Pre-allocate appropriate 32-bit arrays based on input type
if eltype(A) <: Complex
A_32 = rand(ComplexF32, 0, 0)
else
A_32 = rand(Float32, 0, 0)
end
luinst = ArrayInterface.lu_instance(A_32)
ipiv = Vector{LinearAlgebra.BlasInt}(undef, min(size(A)...))
(luinst, ipiv)
end

function SciMLBase.solve!(
cache::LinearSolve.LinearCache, alg::RF32MixedLUFactorization{P, T};
kwargs...) where {P, T}
A = cache.A
A = convert(AbstractMatrix, A)

# Check if we have complex numbers
iscomplex = eltype(A) <: Complex

fact, ipiv = LinearSolve.@get_cacheval(cache, :RF32MixedLUFactorization)
if cache.isfresh
# Convert to appropriate 32-bit type for factorization
if iscomplex
A_f32 = ComplexF32.(A)
else
A_f32 = Float32.(A)
end

# Ensure ipiv is the right size
if length(ipiv) != min(size(A_f32)...)
ipiv = Vector{LinearAlgebra.BlasInt}(undef, min(size(A_f32)...))
end

fact = RecursiveFactorization.lu!(A_f32, ipiv, Val(P), Val(T), check = false)
cache.cacheval = (fact, ipiv)

if !LinearAlgebra.issuccess(fact)
return SciMLBase.build_linear_solution(
alg, cache.u, nothing, cache; retcode = ReturnCode.Failure)
end

cache.isfresh = false
end

# Get the factorization from the cache
fact_cached = LinearSolve.@get_cacheval(cache, :RF32MixedLUFactorization)[1]

# Convert b to appropriate 32-bit type for solving
if iscomplex
b_f32 = ComplexF32.(cache.b)
u_f32 = similar(b_f32)
else
b_f32 = Float32.(cache.b)
u_f32 = similar(b_f32)
end

# Solve in 32-bit precision
ldiv!(u_f32, fact_cached, b_f32)

# Convert back to original precision
T_orig = eltype(cache.u)
cache.u .= T_orig.(u_f32)

SciMLBase.build_linear_solution(
alg, cache.u, nothing, cache; retcode = ReturnCode.Success)
end

end
130 changes: 74 additions & 56 deletions src/LinearSolve.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,13 @@ else
const useopenblas = false
end


@reexport using SciMLBase

"""
SciMLLinearSolveAlgorithm <: SciMLBase.AbstractLinearAlgorithm

The root abstract type for all linear solver algorithms in LinearSolve.jl.
All concrete linear solver implementations should inherit from one of the
All concrete linear solver implementations should inherit from one of the
specialized subtypes rather than directly from this type.

This type integrates with the SciMLBase ecosystem, providing a consistent
Expand All @@ -91,39 +90,44 @@ matrices (e.g., `A = LU`, `A = QR`, `A = LDL'`) and then solve the system
using forward/backward substitution.

## Characteristics
- Requires concrete matrix representation (`needs_concrete_A() = true`)
- Typically efficient for multiple solves with the same matrix
- Generally provides high accuracy for well-conditioned problems
- Memory requirements depend on the specific factorization type

- Requires concrete matrix representation (`needs_concrete_A() = true`)
- Typically efficient for multiple solves with the same matrix
- Generally provides high accuracy for well-conditioned problems
- Memory requirements depend on the specific factorization type

## Subtypes
- `AbstractDenseFactorization`: For dense matrix factorizations
- `AbstractSparseFactorization`: For sparse matrix factorizations

- `AbstractDenseFactorization`: For dense matrix factorizations
- `AbstractSparseFactorization`: For sparse matrix factorizations

## Examples of concrete subtypes
- `LUFactorization`, `QRFactorization`, `CholeskyFactorization`
- `UMFPACKFactorization`, `KLUFactorization`

- `LUFactorization`, `QRFactorization`, `CholeskyFactorization`
- `UMFPACKFactorization`, `KLUFactorization`
"""
abstract type AbstractFactorization <: SciMLLinearSolveAlgorithm end

"""
AbstractSparseFactorization <: AbstractFactorization

Abstract type for factorization-based linear solvers optimized for sparse matrices.
These algorithms take advantage of sparsity patterns to reduce memory usage and
These algorithms take advantage of sparsity patterns to reduce memory usage and
computational cost compared to dense factorizations.

## Characteristics
- Optimized for matrices with many zero entries
- Often use specialized pivoting strategies to preserve sparsity
- May reorder rows/columns to minimize fill-in during factorization
- Typically more memory-efficient than dense methods for sparse problems
## Characteristics

- Optimized for matrices with many zero entries
- Often use specialized pivoting strategies to preserve sparsity
- May reorder rows/columns to minimize fill-in during factorization
- Typically more memory-efficient than dense methods for sparse problems

## Examples of concrete subtypes
- `UMFPACKFactorization`: General sparse LU with partial pivoting
- `KLUFactorization`: Sparse LU optimized for circuit simulation
- `CHOLMODFactorization`: Sparse Cholesky for positive definite systems
- `SparspakFactorization`: Envelope/profile method for sparse systems

- `UMFPACKFactorization`: General sparse LU with partial pivoting
- `KLUFactorization`: Sparse LU optimized for circuit simulation
- `CHOLMODFactorization`: Sparse Cholesky for positive definite systems
- `SparspakFactorization`: Envelope/profile method for sparse systems
"""
abstract type AbstractSparseFactorization <: AbstractFactorization end

Expand All @@ -135,16 +139,18 @@ These algorithms assume the matrix has no particular sparsity structure and use
dense linear algebra routines (typically from BLAS/LAPACK) for optimal performance.

## Characteristics
- Optimized for matrices with few zeros or no sparsity structure
- Leverage highly optimized BLAS/LAPACK routines when available
- Generally provide excellent performance for moderately-sized dense problems
- Memory usage scales as O(n²) with matrix size

## Examples of concrete subtypes
- `LUFactorization`: Dense LU with partial pivoting (via LAPACK)
- `QRFactorization`: Dense QR factorization for overdetermined systems
- `CholeskyFactorization`: Dense Cholesky for symmetric positive definite matrices
- `BunchKaufmanFactorization`: For symmetric indefinite matrices

- Optimized for matrices with few zeros or no sparsity structure
- Leverage highly optimized BLAS/LAPACK routines when available
- Generally provide excellent performance for moderately-sized dense problems
- Memory usage scales as O(n²) with matrix size

## Examples of concrete subtypes

- `LUFactorization`: Dense LU with partial pivoting (via LAPACK)
- `QRFactorization`: Dense QR factorization for overdetermined systems
- `CholeskyFactorization`: Dense Cholesky for symmetric positive definite matrices
- `BunchKaufmanFactorization`: For symmetric indefinite matrices
"""
abstract type AbstractDenseFactorization <: AbstractFactorization end

Expand All @@ -156,23 +162,26 @@ These algorithms solve linear systems by iteratively building an approximation
from a sequence of Krylov subspaces, without requiring explicit matrix factorization.

## Characteristics
- Does not require concrete matrix representation (`needs_concrete_A() = false`)
- Only needs matrix-vector products `A*v` (can work with operators/functions)
- Memory usage typically O(n) or O(kn) where k << n
- Convergence depends on matrix properties (condition number, eigenvalue distribution)
- Often benefits significantly from preconditioning

- Does not require concrete matrix representation (`needs_concrete_A() = false`)
- Only needs matrix-vector products `A*v` (can work with operators/functions)
- Memory usage typically O(n) or O(kn) where k << n
- Convergence depends on matrix properties (condition number, eigenvalue distribution)
- Often benefits significantly from preconditioning

## Advantages
- Low memory requirements for large sparse systems
- Can handle matrix-free operators (functions that compute `A*v`)
- Often the only feasible approach for very large systems
- Can exploit matrix structure through specialized operators

- Low memory requirements for large sparse systems
- Can handle matrix-free operators (functions that compute `A*v`)
- Often the only feasible approach for very large systems
- Can exploit matrix structure through specialized operators

## Examples of concrete subtypes
- `GMRESIteration`: Generalized Minimal Residual method
- `CGIteration`: Conjugate Gradient (for symmetric positive definite systems)
- `BiCGStabLIteration`: Bi-Conjugate Gradient Stabilized
- Wrapped external iterative solvers (KrylovKit.jl, IterativeSolvers.jl)

- `GMRESIteration`: Generalized Minimal Residual method
- `CGIteration`: Conjugate Gradient (for symmetric positive definite systems)
- `BiCGStabLIteration`: Bi-Conjugate Gradient Stabilized
- Wrapped external iterative solvers (KrylovKit.jl, IterativeSolvers.jl)
"""
abstract type AbstractKrylovSubspaceMethod <: SciMLLinearSolveAlgorithm end

Expand All @@ -183,15 +192,17 @@ Abstract type for linear solvers that wrap custom solving functions or
provide direct interfaces to specific solve methods. These provide flexibility
for integrating custom algorithms or simple solve strategies.

## Characteristics
- Does not require concrete matrix representation (`needs_concrete_A() = false`)
- Provides maximum flexibility for custom solving strategies
- Can wrap external solver libraries or implement specialized algorithms
- Performance and stability depend entirely on the wrapped implementation
## Characteristics

- Does not require concrete matrix representation (`needs_concrete_A() = false`)
- Provides maximum flexibility for custom solving strategies
- Can wrap external solver libraries or implement specialized algorithms
- Performance and stability depend entirely on the wrapped implementation

## Examples of concrete subtypes
- `LinearSolveFunction`: Wraps arbitrary user-defined solve functions
- `DirectLdiv!`: Direct application of the `\\` operator

- `LinearSolveFunction`: Wraps arbitrary user-defined solve functions
- `DirectLdiv!`: Direct application of the `\\` operator
"""
abstract type AbstractSolveFunction <: SciMLLinearSolveAlgorithm end

Expand All @@ -204,22 +215,27 @@ Trait function that determines whether a linear solver algorithm requires
a concrete matrix representation or can work with abstract operators.

## Arguments
- `alg`: A linear solver algorithm instance

- `alg`: A linear solver algorithm instance

## Returns
- `true`: Algorithm requires a concrete matrix (e.g., for factorization)
- `false`: Algorithm can work with abstract operators (e.g., matrix-free methods)

- `true`: Algorithm requires a concrete matrix (e.g., for factorization)
- `false`: Algorithm can work with abstract operators (e.g., matrix-free methods)

## Usage

This trait is used internally by LinearSolve.jl to optimize algorithm dispatch
and determine when matrix operators need to be converted to concrete arrays.

## Algorithm-Specific Behavior
- `AbstractFactorization`: `true` (needs explicit matrix entries for factorization)
- `AbstractKrylovSubspaceMethod`: `false` (only needs matrix-vector products)
- `AbstractSolveFunction`: `false` (depends on the wrapped function's requirements)

- `AbstractFactorization`: `true` (needs explicit matrix entries for factorization)
- `AbstractKrylovSubspaceMethod`: `false` (only needs matrix-vector products)
- `AbstractSolveFunction`: `false` (depends on the wrapped function's requirements)

## Example

```julia
needs_concrete_A(LUFactorization()) # true
needs_concrete_A(GMRESIteration()) # false
Expand Down Expand Up @@ -470,9 +486,11 @@ export PanuaPardisoFactorize, PanuaPardisoIterate
export PardisoJL
export MKLLUFactorization
export OpenBLASLUFactorization
export OpenBLAS32MixedLUFactorization
export MKL32MixedLUFactorization
export AppleAccelerateLUFactorization
export AppleAccelerate32MixedLUFactorization
export RF32MixedLUFactorization
export MetalLUFactorization
export MetalOffload32MixedLUFactorization

Expand Down
Loading
Loading