Merge pull request #153 from Wimmerer/fastlapack

ChrisRackauckas · web-flow · commit aeedba40c647 · 2022-07-14T02:38:35.000+03:00
FastLAPACK
diff --git a/Project.toml b/Project.toml
@@ -6,6 +6,7 @@ version = "1.20.0"
 [deps]
 ArrayInterfaceCore = "30b0a656-2188-435a-8636-2ec0e6a096e2"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
+FastLapackInterface = "29a986be-02c6-4525-aec4-84b980013641"
 GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
 IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
 KLU = "ef3ab10e-7fda-4108-b977-705223b18434"
diff --git a/src/LinearSolve.jl b/src/LinearSolve.jl
@@ -12,8 +12,8 @@ using Setfield
 using UnPack
 using SuiteSparse
 using KLU
+using FastLapackInterface
 using DocStringExtensions
-
 import GPUArraysCore
 
 # wrap
diff --git a/src/factorization.jl b/src/factorization.jl
@@ -346,3 +346,101 @@ function init_cacheval(alg::GenericFactorization{<:RFWrapper},
                        abstol, reltol, verbose)
     ArrayInterfaceCore.lu_instance(convert(AbstractMatrix, A))
 end
+
+## FastLAPACKFactorizations
+
+struct WorkspaceAndFactors{W, F}
+    workspace::W
+    factors::F
+end
+
+# There's no options like pivot here.
+# But I'm not sure it makes sense as a GenericFactorization
+# since it just uses `LAPACK.getrf!`.
+struct FastLUFactorization <: AbstractFactorization end
+
+function init_cacheval(::FastLUFactorization, A, b, u, Pl, Pr,
+                       maxiters, abstol, reltol, verbose)
+    ws = LUWs(A)
+    return WorkspaceAndFactors(ws, LinearAlgebra.LU(LAPACK.getrf!(ws, A)...))
+end
+
+function SciMLBase.solve(cache::LinearCache, alg::FastLUFactorization)
+    A = cache.A
+    A = convert(AbstractMatrix, A)
+    ws_and_fact = cache.cacheval
+    if cache.isfresh
+        # we will fail here if A is a different *size* than in a previous version of the same cache.
+        # it may instead be desirable to resize the workspace.
+        @set! ws_and_fact.factors = LinearAlgebra.LU(LAPACK.getrf!(ws_and_fact.workspace,
+                                                                   A)...)
+        cache = set_cacheval(cache, ws_and_fact)
+    end
+    y = ldiv!(cache.u, cache.cacheval.factors, cache.b)
+    SciMLBase.build_linear_solution(alg, y, nothing, cache)
+end
+
+struct FastQRFactorization{P} <: AbstractFactorization
+    pivot::P
+    blocksize::Int
+end
+
+function FastQRFactorization()
+    pivot = @static if VERSION < v"1.7beta"
+        Val(false)
+    else
+        NoPivot()
+    end
+    FastQRFactorization(pivot, 36) # is 36 or 16 better here? LinearAlgebra and FastLapackInterface use 36,
+    # but QRFactorization uses 16.
+end
+
+@static if VERSION < v"1.7beta"
+    function init_cacheval(alg::FastQRFactorization{Val{false}}, A, b, u, Pl, Pr,
+                           maxiters, abstol, reltol, verbose)
+        ws = QRWYWs(A; blocksize = alg.blocksize)
+        return WorkspaceAndFactors(ws, LinearAlgebra.QRCompactWY(LAPACK.geqrt!(ws, A)...))
+    end
+
+    function init_cacheval(::FastQRFactorization{Val{true}}, A, b, u, Pl, Pr,
+                           maxiters, abstol, reltol, verbose)
+        ws = QRpWs(A)
+        return WorkspaceAndFactors(ws, LinearAlgebra.QRPivoted(LAPACK.geqp3!(ws, A)...))
+    end
+else
+    function init_cacheval(alg::FastQRFactorization{NoPivot}, A, b, u, Pl, Pr,
+                           maxiters, abstol, reltol, verbose)
+        ws = QRWYWs(A; blocksize = alg.blocksize)
+        return WorkspaceAndFactors(ws, LinearAlgebra.QRCompactWY(LAPACK.geqrt!(ws, A)...))
+    end
+    function init_cacheval(::FastQRFactorization{ColumnNorm}, A, b, u, Pl, Pr,
+                           maxiters, abstol, reltol, verbose)
+        ws = QRpWs(A)
+        return WorkspaceAndFactors(ws, LinearAlgebra.QRPivoted(LAPACK.geqp3!(ws, A)...))
+    end
+end
+
+function SciMLBase.solve(cache::LinearCache, alg::FastQRFactorization{P}) where {P}
+    A = cache.A
+    A = convert(AbstractMatrix, A)
+    ws_and_fact = cache.cacheval
+    if cache.isfresh
+        # we will fail here if A is a different *size* than in a previous version of the same cache.
+        # it may instead be desirable to resize the workspace.
+        nopivot = @static if VERSION < v"1.7beta"
+            Val{false}
+        else
+            NoPivot
+        end
+        if P === nopivot
+            @set! ws_and_fact.factors = LinearAlgebra.QRCompactWY(LAPACK.geqrt!(ws_and_fact.workspace,
+                                                                                A)...)
+        else
+            @set! ws_and_fact.factors = LinearAlgebra.QRPivoted(LAPACK.geqp3!(ws_and_fact.workspace,
+                                                                              A)...)
+        end
+        cache = set_cacheval(cache, ws_and_fact)
+    end
+    y = ldiv!(cache.u, cache.cacheval.factors, cache.b)
+    SciMLBase.build_linear_solution(alg, y, nothing, cache)
+end
diff --git a/test/basictests.jl b/test/basictests.jl
@@ -119,6 +119,24 @@ end
         @test_throws ArgumentError solve(cache)
     end
 
+    @testset "FastLAPACK Factorizations" begin
+        A1 = A / 1
+        b1 = rand(n)
+        x1 = zero(b)
+        A2 = A / 2
+        b2 = rand(n)
+        x2 = zero(b)
+
+        prob1 = LinearProblem(A1, b1; u0 = x1)
+        prob2 = LinearProblem(A2, b2; u0 = x2)
+        test_interface(LinearSolve.FastLUFactorization(), prob1, prob2)
+        test_interface(LinearSolve.FastQRFactorization(), prob1, prob2)
+
+        # TODO: Resizing tests. Upstream doesn't currently support it.
+        # Need to be absolutely certain we never segfault with incorrect
+        # ws sizes.
+    end
+
     @testset "Concrete Factorizations" begin for alg in (LUFactorization(),
                                                          QRFactorization(),
                                                          SVDFactorization(),