@@ -41,16 +41,16 @@ mutable struct LookAheadLanczosDecomp{OpT, OptT, VecT, MatT, ElT, ElRT}
41
41
q:: VecT
42
42
p̂:: VecT
43
43
q̂:: VecT
44
- P:: MatT
45
- Q:: MatT
44
+ P:: LimitedMemoryMatrix{ElT, MatT}
45
+ Q:: LimitedMemoryMatrix{ElT, MatT}
46
46
47
47
# V-W sequence
48
48
v:: VecT
49
49
w:: VecT
50
50
ṽ:: VecT
51
51
w̃:: VecT
52
- V:: MatT
53
- W:: MatT
52
+ V:: LimitedMemoryMatrix{ElT, MatT}
53
+ W:: LimitedMemoryMatrix{ElT, MatT}
54
54
55
55
# matrix-vector products
56
56
Ap:: VecT
@@ -153,8 +153,8 @@ function LookAheadLanczosDecomp(
153
153
q = similar (v)
154
154
p̂ = similar (v)
155
155
q̂ = similar (v)
156
- P = similar (v, size (v, 1 ), 0 )
157
- Q = similar (v, size (v, 1 ), 0 )
156
+ P = LimitedMemoryMatrix ( similar (v, size (v, 1 ), 0 ), max_block_size )
157
+ Q = LimitedMemoryMatrix ( similar (v, size (v, 1 ), 0 ), max_block_size )
158
158
Ap = similar (v)
159
159
Atq = similar (v)
160
160
qtAp = zero (elT)
@@ -163,8 +163,8 @@ function LookAheadLanczosDecomp(
163
163
164
164
ṽ = similar (v)
165
165
w̃ = similar (v)
166
- V = reshape (copy (v), size (v, 1 ), 1 )
167
- W = reshape (copy (w), size (v, 1 ), 1 )
166
+ V = LimitedMemoryMatrix (copy (v), max_block_size )
167
+ W = LimitedMemoryMatrix (copy (w), max_block_size )
168
168
w̃tṽ = zero (elT)
169
169
170
170
wtv = transpose (w) * v
@@ -451,10 +451,6 @@ function _update_U!(ld, innerp)
451
451
# U is upper triangular matrix in decomposition of recurrence relation for P-Q sequence
452
452
# updates last column of U
453
453
n, mk, k, kstar = ld. n, ld. mk, ld. k, ld. kstar
454
- # idx_offset = mk[kstar]-1
455
- idx_offset = 0
456
- # TODO
457
- # we only store the entries from mk[kstar] to n-1
458
454
ld. U = UpperTriangular (
459
455
[
460
456
ld. U fill (0.0 , n- 1 , 1 )
@@ -463,12 +459,12 @@ function _update_U!(ld, innerp)
463
459
)
464
460
465
461
for i = kstar: k- 1
466
- block_start = mk[i]- idx_offset
467
- block_end = mk[i+ 1 ]- 1 - idx_offset
462
+ block_start = mk[i]
463
+ block_end = mk[i+ 1 ]- 1
468
464
ld. U[block_start: block_end, end ] .= ld. E[block_start: block_end, block_start: block_end] \ ld. F̃lastcol[block_start: block_end]
469
465
end
470
466
if ! innerp && ! isone (n)
471
- ld. U[mk[k]- idx_offset : end - 1 , end ] .= ld. E[mk[k]: end , mk[k]: end ] \ ld. F̃lastcol[mk[k]- idx_offset : end ]
467
+ ld. U[mk[k]: end - 1 , end ] .= ld. E[mk[k]: end , mk[k]: end ] \ ld. F̃lastcol[mk[k]: end ]
472
468
end
473
469
return ld
474
470
end
@@ -478,11 +474,11 @@ function _update_p̂q̂_common!(ld)
478
474
mk, k, kstar = ld. mk, ld. k, ld. kstar
479
475
copyto! (ld. p̂, ld. v)
480
476
copyto! (ld. q̂, ld. w)
481
- # idx_offset = mk[kstar]-1
482
- idx_offset = 0
483
477
for i = mk[kstar]: mk[k]- 1 # TODO : OPTIMIZE gemv! (or 5-arg mul!)
484
- axpy! (- ld. U[i- idx_offset, end ], ld. P[:, i- idx_offset], ld. p̂)
485
- axpy! (- ld. U[i- idx_offset, end ] * ld. γ[end ] / ld. γ[i- idx_offset], ld. Q[:, i- idx_offset], ld. q̂)
478
+ if ld. U[i, end ] != 0
479
+ axpy! (- ld. U[i, end ], ld. P[:, i], ld. p̂)
480
+ axpy! (- ld. U[i, end ] * ld. γ[end ] / ld. γ[i], ld. Q[:, i], ld. q̂)
481
+ end
486
482
end
487
483
end
488
484
function _update_Gnm1! (ld)
@@ -527,10 +523,11 @@ function _update_pq_regular!(ld)
527
523
n, mk, k, kstar = ld. n, ld. mk, ld. k, ld. kstar
528
524
copyto! (ld. p, ld. p̂)
529
525
copyto! (ld. q, ld. q̂)
530
- idx_offset = 0
531
526
for i = mk[k]: n- 1 # TODO : OPTIMIZE gemv! (or 5-arg mul!)
532
- axpy! (- ld. U[i- idx_offset, end ], ld. P[:, i], ld. p)
533
- axpy! (- ld. U[i- idx_offset, end ] * ld. γ[n] / ld. γ[i - idx_offset], ld. Q[:, i], ld. q)
527
+ if ld. U[i, end ] != 0
528
+ axpy! (- ld. U[i, end ], ld. P[:, i], ld. p)
529
+ axpy! (- ld. U[i, end ] * ld. γ[n] / ld. γ[i], ld. Q[:, i], ld. q)
530
+ end
534
531
end
535
532
return ld
536
533
end
@@ -540,11 +537,13 @@ function _update_pq_inner!(ld)
540
537
n, mk, k, kstar = ld. n, ld. mk, ld. k, ld. kstar
541
538
copyto! (ld. p, ld. p̂)
542
539
copyto! (ld. q, ld. q̂)
543
- idx_offset = 0
544
540
for i = mk[k]: n- 1 # TODO : OPTIMIZE gemv!
545
- ld. U[i- idx_offset, end ] = _u (i, n, mk[k])
546
- axpy! (- _u (i, n, mk[k]), ld. P[:, i], ld. p)
547
- axpy! (- _u (i, n, mk[k]) * ld. γ[n] / ld. γ[i - idx_offset], ld. Q[:, i], ld. q)
541
+ u = _u (i, n, mk[k])
542
+ ld. U[i, end ] = u
543
+ if u != 0
544
+ axpy! (- u, ld. P[:, i], ld. p)
545
+ axpy! (- u * ld. γ[n] / ld. γ[i], ld. Q[:, i], ld. q)
546
+ end
548
547
end
549
548
return ld
550
549
end
@@ -558,8 +557,8 @@ function _matvec_pq!(ld, retry=false)
558
557
ld. P[:, end ] .= ld. p
559
558
ld. Q[:, end ] .= ld. q
560
559
else
561
- ld. P = [ ld. P ld . p]
562
- ld. Q = [ ld. Q ld . q]
560
+ hcat! ( ld. P, ld. p)
561
+ hcat! ( ld. Q, ld. q)
563
562
end
564
563
mul! (ld. Ap, ld. A, ld. p)
565
564
ld. qtAp = transpose (ld. q) * ld. Ap
@@ -647,11 +646,11 @@ function _update_v̂ŵ_common!(ld)
647
646
# 5.2.6
648
647
l, lstar, nl, n = ld. l, ld. lstar, ld. nl, ld. n
649
648
650
- # idx_offset = nl[lstar]-1
651
- idx_offset = 0
652
649
for i = nl[lstar]: nl[l]- 1 # TODO : OPTIMIZE gemv! (or 5-arg mul!)
653
- axpy! (- ld. L[i- idx_offset, n], ld. V[:, i- idx_offset], ld. Ap)
654
- axpy! (- ld. L[i- idx_offset, n] * ld. γ[n] / ld. γ[i- idx_offset], ld. W[:, i- idx_offset], ld. Atq)
650
+ if ld. L[i, n] != 0
651
+ axpy! (- ld. L[i, n], ld. V[:, i], ld. Ap)
652
+ axpy! (- ld. L[i, n] * ld. γ[n] / ld. γ[i], ld. W[:, i], ld. Atq)
653
+ end
655
654
end
656
655
return ld
657
656
end
@@ -691,11 +690,11 @@ function _update_vw_regular!(ld)
691
690
n, l, lstar, nl, k, mk = ld. n, ld. l, ld. lstar, ld. nl, ld. k, ld. mk
692
691
copyto! (ld. ṽ, ld. Ap)
693
692
copyto! (ld. w̃, ld. Atq)
694
- # idx_offset = nl[lstar]-1
695
- idx_offset = 0
696
693
for i = nl[l]: n # TODO : OPTIMIZE gemv! (or 5-arg mul!)
697
- axpy! (- ld. L[i- idx_offset, end ], ld. V[:, i], ld. ṽ)
698
- axpy! (- ld. L[i- idx_offset, end ] * ld. γ[n] / ld. γ[i - idx_offset], ld. W[:, i], ld. w̃)
694
+ if ld. L[i, end ] != 0
695
+ axpy! (- ld. L[i, end ], ld. V[:, i], ld. ṽ)
696
+ axpy! (- ld. L[i, end ] * ld. γ[n] / ld. γ[i], ld. W[:, i], ld. w̃)
697
+ end
699
698
end
700
699
return ld
701
700
end
@@ -705,12 +704,13 @@ function _update_vw_inner!(ld)
705
704
n, l, k, lstar, nl, mk = ld. n, ld. l, ld. k, ld. lstar, ld. nl, ld. mk
706
705
copyto! (ld. ṽ, ld. Ap)
707
706
copyto! (ld. w̃, ld. Atq)
708
- # idx_offset = nl[lstar]-1
709
- idx_offset = 0
710
707
for i = nl[l]: n # TODO : OPTIMIZE gemv!
711
- ld. L[i- idx_offset, end ] = _l (i, n, nl[l])
712
- axpy! (- _l (i, n, nl[l]), ld. V[:, i], ld. ṽ)
713
- axpy! (- _l (i, n, nl[l]) * ld. γ[n] / ld. γ[i - idx_offset], ld. W[:, i], ld. w̃)
708
+ ll = _l (i, n, nl[l])
709
+ ld. L[i, end ] = ll
710
+ if ll != 0
711
+ axpy! (- _l (i, n, nl[l]), ld. V[:, i], ld. ṽ)
712
+ axpy! (- _l (i, n, nl[l]) * ld. γ[n] / ld. γ[i], ld. W[:, i], ld. w̃)
713
+ end
714
714
end
715
715
return ld
716
716
end
@@ -743,8 +743,8 @@ function _update_vw!(ld)
743
743
ld. v = ld. ṽ / ld. ρ
744
744
ld. w = ld. w̃ / ld. ξ
745
745
ld. wtv = ld. w̃tṽ / (ld. ρ * ld. ξ)
746
- ld. V = [ ld. V ld . v]
747
- ld. W = [ ld. W ld . w]
746
+ hcat! ( ld. V, ld. v)
747
+ hcat! ( ld. W, ld. w)
748
748
return ld
749
749
end
750
750
0 commit comments