port from laptop, basic func, no tests

rayegun · rayegun · commit 93665b74f7c9 · 2022-07-13T01:20:31.000-04:00
diff --git a/Project.toml b/Project.toml
@@ -6,6 +6,7 @@ version = "1.20.0"
 [deps]
 ArrayInterfaceCore = "30b0a656-2188-435a-8636-2ec0e6a096e2"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
+FastLapackInterface = "29a986be-02c6-4525-aec4-84b980013641"
 GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
 IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
 KLU = "ef3ab10e-7fda-4108-b977-705223b18434"
diff --git a/src/LinearSolve.jl b/src/LinearSolve.jl
@@ -12,8 +12,8 @@ using Setfield
 using UnPack
 using SuiteSparse
 using KLU
+using FastLapackInterface
 using DocStringExtensions
-
 import GPUArraysCore
 
 # wrap
diff --git a/src/factorization.jl b/src/factorization.jl
@@ -346,3 +346,83 @@ function init_cacheval(alg::GenericFactorization{<:RFWrapper},
                        abstol, reltol, verbose)
     ArrayInterfaceCore.lu_instance(convert(AbstractMatrix, A))
 end
+
+
+## FastLAPACKFactorizations
+
+struct WorkspaceAndFactors{W, F}
+    workspace::W
+    factors::F
+end
+
+# There's no options like pivot here.
+# But I'm not sure it makes sense as a GenericFactorization
+# since it just uses `LAPACK.getrf!`.
+struct FastLUFactorization <: AbstractFactorization end
+
+function init_cacheval(::FastLUFactorization, A, b, u, Pl, Pr,
+    maxiters, abstol, reltol, verbose)
+    ws = LUWs(A)
+    return WorkspaceAndFactors(ws, LinearAlgebra.LU(LAPACK.getrf!(ws, A)...))
+end
+
+function SciMLBase.solve(cache::LinearCache, alg::FastLUFactorization)
+    A = cache.A
+    A = convert(AbstractMatrix, A)
+    ws_and_fact = cache.cacheval
+    if cache.isfresh
+        # we will fail here if A is a different *size* than in a previous version of the same cache.
+        # it may instead be desirable to resize the workspace.
+        @set! ws_and_fact.factors = LinearAlgebra.LU(LAPACK.getrf!(ws_and_fact.workspace, A)...)
+        cache = set_cacheval(cache, ws_and_fact)
+    end
+    y = ldiv!(cache.u, cache.cacheval.factors, cache.b)
+    SciMLBase.build_linear_solution(alg, y, nothing, cache)
+end
+
+struct FastQRFactorization{P} <: AbstractFactorization
+    pivot::P
+    blocksize::Int
+end
+
+function FastQRFactorization()
+    pivot = @static if VERSION < v"1.7beta"
+        Val(false)
+    else
+        NoPivot()
+    end
+    FastQRFactorization(pivot, 36) # is 36 or 16 better here? LinearAlgebra and FastLapackInterface use 36,
+    # but QRFactorization uses 16.
+end
+
+function init_cacheval(alg::FastQRFactorization{NoPivot}, A, b, u, Pl, Pr,
+    maxiters, abstol, reltol, verbose)
+    ws = QRWYWs(A; blocksize = alg.blocksize)
+    return WorkspaceAndFactors(ws, LinearAlgebra.QRCompactWY(LAPACK.geqrt!(ws, A)...))
+end
+
+function init_cacheval(::FastQRFactorization{ColumnNorm}, A, b, u, Pl, Pr,
+    maxiters, abstol, reltol, verbose)
+    ws = QRpWs(A)
+    return WorkspaceAndFactors(ws, LinearAlgebra.QRPivoted(LAPACK.geqp3!(ws, A)...))
+end
+
+function SciMLBase.solve(cache::LinearCache, alg::FastQRFactorization{P}) where {P}
+    A = cache.A
+    A = convert(AbstractMatrix, A)
+    ws_and_fact = cache.cacheval
+    if cache.isfresh
+        # we will fail here if A is a different *size* than in a previous version of the same cache.
+        # it may instead be desirable to resize the workspace.
+        if P === NoPivot
+            @set! ws_and_fact.factors = LinearAlgebra.QRCompactWY(LAPACK.geqrt!(ws_and_fact.workspace, A)...)
+        elseif P === ColumnNorm
+            @set! ws_and_fact.factors = LinearAlgebra.QRPivoted(LAPACK.geqp3!(ws_and_fact.workspace, A)...)
+        else
+            error("No FastLAPACK Factorization defined for $P")
+        end
+        cache = set_cacheval(cache, ws_and_fact)
+    end
+    y = ldiv!(cache.u, cache.cacheval.factors, cache.b)
+    SciMLBase.build_linear_solution(alg, y, nothing, cache)
+end