BlockDiagonal experiment

Pawel Latawiec · Pawel Latawiec · commit 34792c30a2af · 2022-07-27T22:49:32.000-04:00
diff --git a/Project.toml b/Project.toml
@@ -3,6 +3,7 @@ uuid = "42fd0dbc-a981-5370-80f2-aaf504508153"
 version = "0.9.2"
 
 [deps]
+BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -12,3 +13,4 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 [compat]
 RecipesBase = "0.6, 0.7, 0.8, 1.0"
 julia = "1.3"
+BlockDiagonals = "0.1"
diff --git a/src/lal.jl b/src/lal.jl
@@ -1,6 +1,8 @@
 using Printf
+import BlockDiagonals: BlockDiagonal
+import BlockDiagonals
 import Base: iterate
-import LinearAlgebra: UpperTriangular
+import LinearAlgebra: UpperTriangular, UpperHessenberg
 
 """
     LookAheadLanczosDecompOptions
@@ -71,21 +73,23 @@ mutable struct LookAheadLanczosDecomp{OpT, OptT, VecT, MatT, ElT, ElRT}
     γ::Vector{ElRT}
 
     # Eq. 2.13
-    D::Matrix{ElT}
+    D::BlockDiagonal{ElT, Matrix{ElT}}
     # Eq. 3.14
-    E::Matrix{ElT}
+    E::BlockDiagonal{ElT, Matrix{ElT}}
     # Defined after Eq. 5.1
-    F::Matrix{ElT}
+    F::BlockDiagonal{ElT, Matrix{ElT}}
     F̃lastcol::Vector{ElT}
     # Eq. 5.1
     G::Vector{ElT}
     # Eq. 3.11
     H::Vector{ElT}
 
     # Eq. 3.9
+    # need to keep previous columns of U for G checks
     U::UpperTriangular{ElT, Matrix{ElT}}
-    L::Matrix{ElT}
-
+    # need to keep previous columns of L for H checks
+    L::UpperHessenberg{ElT, Matrix{ElT}}
+    
     # Indices tracking location in block and sequence
     n::Int
     k::Int
@@ -173,16 +177,16 @@ function LookAheadLanczosDecomp(
     γ = Vector{real(elT)}(undef, 1)
     γ[1] = 1.0
 
-    D = Matrix{elT}(undef, 0, 0)
-    E = Matrix{elT}(undef, 0, 0)
+    D = BlockDiagonal{elT, Matrix{elT}}(Vector{Matrix{elT}}())
+    E = BlockDiagonal{elT, Matrix{elT}}(Vector{Matrix{elT}}())
     G = Vector{elT}()
     H = Vector{elT}()
 
-    F = Matrix{elT}(undef, 0, 0)
+    F = BlockDiagonal{elT, Matrix{elT}}(Vector{Matrix{elT}}())
     F̃lastcol = Vector{elT}()
 
     U = UpperTriangular(Matrix{elT}(undef, 0, 0))
-    L = Matrix{elT}(undef, 0, 0)
+    L = UpperHessenberg(Matrix{elT}(undef, 0, 0))
 
     # Alg 5.2.0
     n     = 1
@@ -241,6 +245,34 @@ _VW_block_size(ld) = ld.n+1 - ld.nl[ld.l]
 _VW_prev_block_size(ld) = ld.nl[ld.l] - ld.nl[max(1, ld.l-1)]
 _is_block_small(ld, n) = n < ld.opts.max_block_size
 
+"""
+    _grow_last_block!(A, Bcol, Brow, Bcorner)
+
+Grows the last block in-place in `A` by appending the column `Bcol`, the row `Brow`, and the corner element `Bcorner`. `Bcol` and `Brow` are automatically truncated to match the size of the grown block
+"""
+function _grow_last_block!(A::BlockDiagonal{T, TM}, Bcol, Brow, Bcorner) where {T, TM}
+    n = BlockDiagonals.nblocks(A)
+    b = BlockDiagonals.blocks(A)
+    s = size(last(b), 1)
+    b[n] = TM([
+        b[n] Bcol[end-s+1:end]
+        Brow[:, end-s+1:end] Bcorner
+    ])
+    return A
+end
+
+"""
+    _start_new_block!(A, B)
+
+Appends a new block to the end of `A` with `B`
+"""
+function _start_new_block!(A::BlockDiagonal{T, TM}, B) where {T, TM}
+    push!(BlockDiagonals.blocks(A), TM(fill(only(B), 1, 1)))
+    return A
+end
+
+Base.size(B::BlockDiagonals.BlockDiagonal) = sum(first∘size, BlockDiagonals.blocks(B), init=0), sum(last∘size, BlockDiagonals.blocks(B), init=0)
+
 start(::LookAheadLanczosDecomp) = 1
 done(ld::LookAheadLanczosDecomp, iteration::Int) = iteration ≥ ld.opts.max_iter
 function iterate(ld::LookAheadLanczosDecomp, n::Int=start(ld))
@@ -288,7 +320,7 @@ function _update_PQ_sequence!(ld)
         if !innerp
             # Alg. 5.2.8
             _update_pq_regular!(ld)
-            _mv_pq!(ld)
+            _matvec_pq!(ld)
             # Alg. 5.2.9
             _update_Gn!(ld)
             innerp = inner_ok && _check_G(ld)
@@ -306,19 +338,19 @@ function _update_PQ_sequence!(ld)
                 # Alg. 5.2.11
                 isverbose(ld) && @info "Inner P-Q construction, second G check"
                 _update_pq_inner!(ld)
-                _mv_pq!(ld, true)
+                _matvec_pq!(ld, true)
             end
         else
             # Alg. 5.2.11
             isverbose(ld) && @info "Inner P-Q construction, first G check"
             _update_pq_inner!(ld)
-            _mv_pq!(ld)
+            _matvec_pq!(ld)
         end
     else
         # Alg. 5.2.11
         isverbose(ld) && @info "Inner P-Q construction, singular E check"
         _update_pq_inner!(ld)
-        _mv_pq!(ld)
+        _matvec_pq!(ld)
     end
     ld.innerp = innerp
     return ld
@@ -397,20 +429,16 @@ function _update_D!(ld)
     # Alg. 5.2.1
     # Eq. 5.2:
     # F[n-1] = Wt[n-1]V[n]L[n-1] = D[n-1]L[1:n-1, 1:n-1] + l[n, n-1]D[1:n-1, n][0 ... 0 1]
-    # => D[1:end-1, end] = (F[:, end] - (D_prev L[1:end-1, 1:end]))[:, end] / ρ
+    # => D[1:end-1, end] = (F[:, end] - (D_prev L[1:end-1, end])) / ρ
     # Eq. 3.15, (D Γ)ᵀ = (D Γ)
     # D[n, n] = wtv
 
-    # TODO: closed block
-    if isone(ld.n)
-        ld.D = fill(ld.wtv, 1, 1)
+    if isone(ld.n) || _VW_block_size(ld) == 1
+        _start_new_block!(ld.D, ld.wtv)
     else
-        D_lastcol = (ld.F[:, end] - (ld.D * ld.L[1:end-1, :])[:, end]) / ld.ρ
-        D_lastrow = D_lastcol * ld.γ[end] ./ ld.γ[1:end-1]
-        ld.D = [
-            ld.D D_lastcol
-            transpose(D_lastrow) ld.wtv
-        ]
+        D_lastcol = (ld.F[:, end] - (ld.D * ld.L[1:end-1, end])) / ld.ρ
+        D_lastrow = transpose(D_lastcol * ld.γ[end] ./ ld.γ[1:end-1])
+        _grow_last_block!(ld.D, D_lastcol, D_lastrow, ld.wtv)
     end
     return ld
 end
@@ -433,14 +461,17 @@ function _update_Flastrow!(ld)
     # Eq. 5.2 (w/ indices advanced): 
     # F_{n} = D_{n}L[1:n, 1:n] + l[n+1, n]D_{n}[1:n, n+1][0 ... 0 1]
     # TODO: block
-    if !isone(ld.n) # We only need to do this if we are constructing a block
+    if isone(ld.n)
+        _start_new_block!(ld.F, 0.0)
+    else
         Flastrow = reshape(ld.D[end:end, :] * ld.L, :)
         ld.F̃lastcol = Flastrow .* ld.γ[1:end-1] ./ ld.γ[end]
         # we are not able to fill in the last column yet, so we fill with zero
-        ld.F = [
-            ld.F fill(0.0, size(ld.F, 1))
-            transpose(Flastrow) 0.0
-        ]
+        if _VW_block_size(ld) == 1
+            _grow_last_block!(ld.F, fill(0.0, size(ld.F, 1)), transpose(Flastrow), 0.0)
+        else
+            _grow_last_block!(ld.F, fill(0.0, size(ld.F, 1)), transpose(Flastrow), 0.0)
+        end
     end
 end
 
@@ -453,6 +484,7 @@ function _update_U!(ld, innerp)
     idx_offset = 0
     # TODO
     # we only store the entries from mk[kstar] to n-1
+    
     ld.U  = UpperTriangular(
         [
             ld.U fill(0.0, n-1, 1)
@@ -547,7 +579,7 @@ function _update_pq_inner!(ld)
     return ld
 end
 
-function _mv_pq!(ld, retry=false)
+function _matvec_pq!(ld, retry=false)
     # Common part of Alg. 5.2.8, Alg. 5.2.11
     # if retry, then this means we have already added data to the vectors, but our
     # inner block check failed, so we overwrite what he have. This is the case if
@@ -580,16 +612,13 @@ function _update_E!(ld)
     # 5.2.14
     n = ld.n
 
-    if isone(ld.n)
-        ld.E = fill(ld.qtAp, 1, 1)
+    if isone(ld.n) || _PQ_block_size(ld) == 1
+        _start_new_block!(ld.E, ld.qtAp)
     else
         ΓUtinvΓ = ld.γ .* transpose(ld.U) ./ transpose(ld.γ)
-        Elastrow = (ΓUtinvΓ \ ld.F[1:n, 1:n-1])[n, :]
+        Elastrow = (ΓUtinvΓ[end, end] \ ld.F[n:n, 1:n-1] - ΓUtinvΓ[end:end, 1:end-1]*ld.E)
         Elastcol = (Elastrow .* ld.γ[1:n-1] ./ ld.γ[n])
-        ld.E = [
-            ld.E Elastcol
-            transpose(Elastrow) ld.qtAp
-        ]
+        _grow_last_block!(ld.E, Elastcol, Elastrow, ld.qtAp)
     end
     return ld
 end
@@ -608,7 +637,7 @@ function _update_Flastcol!(ld)
     ΓUtinvΓ = ld.γ .* transpose(ld.U) ./ transpose(ld.γ)
     # length n, ld.F_lastrow of length n-1
     if isone(n)
-        ld.F = fill(ΓUtinvΓ[end, end] * ld.E[end, end], 1, 1)
+        ld.F[1, 1] = ΓUtinvΓ[end, end] * ld.E[end, end]
     else
         ld.F[:, end] .= ΓUtinvΓ * ld.E[:, end]
     end
@@ -625,14 +654,19 @@ function _update_L!(ld, innerv)
         Llastcol[block_start:block_end] .= ld.D[block_start:block_end, block_start:block_end] \ ld.F[block_start:block_end, end]
     end
     if !innerv
+        @show ld.D
         Llastcol[nl[l]:end] .= ld.D[nl[l]:end, nl[l]:end] \ ld.F[nl[l]:end, end]
     end
     if isone(n)
-        ld.L = reshape([Llastcol[1]
+        ld.L = UpperHessenberg(
+            reshape([Llastcol[1]
                 0.0], 2, 1)
+        )
     else
-        ld.L = [ld.L Llastcol
-                fill(0.0, 1, n)]
+        ld.L = UpperHessenberg(
+            [ld.L Llastcol
+            fill(0.0, 1, n)]
+        )
     end
     return ld
 end
diff --git a/test/lal.jl b/test/lal.jl
@@ -6,6 +6,28 @@ using Test
 # Equation references and identities from:
 # Freund, R. W., & Nachtigal, N. M. (1994). An Implementation of the QMR Method Based on Coupled Two-Term Recurrences. SIAM Journal on Scientific Computing, 15(2), 313–337. https://doi.org/10.1137/0915022
 
+function _append_leading_nonzeros(A, B)
+    # appends the leading nonzero diagonal and subdiagonal elements in `B` to `A`. This
+    # grows `A` in size by 1. For instance, if B is [1 2; 3 4], then if `A` is `[0;]`, this
+    # returns [0 2; 3 4]. If `A` is bigger than `B`, then the off-diagonal elements are
+    # padded with 0
+    @assert size(A, 1) ≥ size(B, 1)-1
+    if size(A, 1) == size(B, 1)-1
+        Aapp = [
+            A B[1:end-1, end:end]
+            B[end:end, :]
+        ]
+    else
+        zcol = zeros(eltype(A), 1 + size(A, 1) - size(B, 1), 1)
+        zrow = zeros(eltype(A), 1, 1 + size(A, 2) - size(B, 2))
+        Aapp = [
+            A [zcol; B[1:end-1, end:end]]
+            zrow B[end:end, :]
+        ]
+    end
+    return Aapp
+end
+
 function _iterate_and_collect_lal_intermediates(ld)
     # iterates through ld and collects the intermediate matrices by appending the last
     # row and column to the matrix being built
@@ -45,13 +67,14 @@ function _iterate_and_collect_lal_intermediates(ld)
             U = ld.U[:, :]
             L = ld.L[:, :]
         else
-            D = [D ld.D[1:end-1, end]; ld.D[end:end, :]]
-            E = [E ld.E[1:end-1, end]; ld.E[end:end, :]]
-            F = [F ld.F[1:end-1, end]; ld.F[end:end, :]]
+            ivw = IS._VW_block_size(ld)
+            D = _append_leading_nonzeros(D, ld.D)
+            E = _append_leading_nonzeros(E, ld.E)
+            F = _append_leading_nonzeros(F, ld.F)
             # F̃ row is not explicitly calculated, so we calculate from F using Lemma 5.1
             F̃ = [F̃ ld.F̃lastcol; (transpose(F * Γ[1:end-1, 1:end-1]) / Γ[1:end-1, 1:end-1])[end:end, :]]
-            U = [U ld.U[1:end-1, end]; ld.U[end:end, :]]
-            L = [L ld.L[1:end-1, end]; ld.L[end:end, :]]
+            U = _append_leading_nonzeros(U, ld.U)
+            L = _append_leading_nonzeros(L, ld.L)
         end
     end
 
@@ -134,6 +157,34 @@ function test_regular_lal_identities(ld, log; early_exit=false)
     end
 end
 
+@testset "Block Diagonal Utilities" begin
+    for T in (Matrix, UpperTriangular)
+        @testset "$T" begin
+            A = IS.BlockDiagonal([T([1 2; 3 4]), T(fill(1, 1, 1))])
+            Bcol = [-1]
+            Brow = [1]
+            Bcorner = 0
+            IS._grow_last_block!(A, Bcol, Brow, Bcorner)
+            # note that we are converting the container type, so upon conversion to UpperTriangular the sub-diagonals will go to 0 and the equality is satisfied
+            @test A ≈ T([
+                1 2 0 0
+                3 4 0 0
+                0 0 1 -1
+                0 0 1 0
+            ])
+
+            A = IS.BlockDiagonal([T([1 2; 3 4]), T(fill(1, 1, 1))])
+            IS._start_new_block!(A, 1)
+            @test A ≈ T([
+                1 2 0 0
+                3 4 0 0
+                0 0 1 0
+                0 0 0 1
+            ])
+        end
+    end
+end
+
 @testset "A = I" begin
     # A = I terminates immediately (because p1 = v1 -> v2 = Ap1 - v1 = 0)
     A = Diagonal(fill(1.0, 5))