@@ -167,7 +167,6 @@ function BivariateGramMatrix(μ::BlockedVector{T, <: PaddedVector{T}}, X::XT, Y:
167
167
end
168
168
for n in 3 : n
169
169
for m in n: min (N- n+ 1 , b+ n)
170
- # println("m = $m, n = $n")
171
170
recurse! (W, X, Y, m, n)
172
171
end
173
172
symmetrize_block! (view (W, Block (n, n)))
@@ -622,3 +621,316 @@ function _chebyshev_y(::Type{T}, n::Integer) where T
622
621
623
622
return Y
624
623
end
624
+
625
+
626
+ # # Fast Cholesky algorithm using the displacement equations: XᵀW - WX = Gx J Gxᵀ and YᵀW - WY = Gy J Gyᵀ
627
+
628
+ function cholesky (W:: BivariateChebyshevGramMatrix{T} ) where T
629
+ n = blocksize (W, 1 )
630
+ N = n* (n+ 1 )÷ 2
631
+ X = _chebyshev_x (T, n)
632
+ Y = _chebyshev_y (T, n)
633
+ @assert blockbandwidths (X) == blockbandwidths (Y) == (1 , 1 )
634
+ @assert subblockbandwidths (X) == (0 , 0 )
635
+ @assert subblockbandwidths (Y) == (1 , 1 )
636
+ Xt = BandedBlockBandedMatrix (X' )
637
+ Yt = BandedBlockBandedMatrix (Y' )
638
+ Gx = Matrix (compute_skew_generators (Val (1 ), W))
639
+ Gy = Matrix (compute_skew_generators (Val (2 ), W))
640
+ L = BlockedMatrix {T} (I, 1 : n, 1 : n)
641
+ c = zeros (T, N, n)
642
+ ĉ = zeros (T, N, n)
643
+ vc = view (c, 1 : N, 1 : 1 )
644
+ vc .= W[1 : N, 1 : 1 ]
645
+ ĉ_xd = zeros (T, N, n)
646
+ ĉ_yd = zeros (T, N, n)
647
+ l = zeros (T, N, n)
648
+ linvd = zeros (T, N, n)
649
+ d = zeros (T, n, n)
650
+ Xrow1 = zeros (T, N, n)
651
+ Yrow1 = zeros (T, N, n)
652
+ vx = zeros (T, N, n)
653
+ vy = zeros (T, N, n)
654
+ @inbounds for k in 1 : n- 1
655
+ K = k* (k- 1 )÷ 2
656
+ # d = cholesky(Symmetric(c[Block(1, 1)], :L)).L
657
+ # L[Block(k, k)] .= d
658
+ vd = view (d, 1 : k, 1 : k)
659
+ vd .= view (c, 1 : k, 1 : k)
660
+ cholesky! (Symmetric (vd, :L ))
661
+ # l = BlockedMatrix(Matrix(c[Block.(2:n-k+1), Block(1)])/d', k+1:n, [k])
662
+ # L[Block.(k+1:n), Block(k)] .= l
663
+ vl = view (l, k+ 1 : N- K, 1 : k)
664
+ vl .= view (c, k+ 1 : N- K, 1 : k)
665
+ rdiv! (vl, LowerTriangular (vd)' )
666
+ view (L, Block (k, k)) .= vd
667
+ # view(L, Block.(k+1:n), Block(k)) .= vl
668
+ J = 1
669
+ for j in k+ 1 : n
670
+ view (L, Block (j, k)) .= view (vl, J: J- 1 + j, 1 : k)
671
+ J += j
672
+ end
673
+ # vx = Gx[:, 1:n]*Gx[1:k, n+1:2n]' - Gx[:, n+1:2n]*Gx[1:k, 1:n]'
674
+ # vy = Gy[:, 1:n]*Gy[1:k, n+1:2n]' - Gy[:, n+1:2n]*Gy[1:k, 1:n]'
675
+ compute_v! (vx, Gx, n, k)
676
+ compute_v! (vy, Gy, n, k)
677
+ # ĉ_xd = X[Block.(k:n), Block.(k:n)]'c + Xrow1*c[1:k, 1:k]-c*Xrow1[1:k, 1:k]'-vx
678
+ # ĉ_yd = Y[Block.(k:n), Block.(k:n)]'c + Yrow1*c[1:k, 1:k]-c*Yrow1[1:k, 1:k]'-vy
679
+ compute_ĉd! (ĉ_xd, Xt, Xrow1, vx, c, n, k)
680
+ compute_ĉd! (ĉ_yd, Yt, Yrow1, vy, c, n, k)
681
+ # ĉ = BlockedMatrix([Matrix(ĉ_xd) / Diagonal(X[Block(2, 1)]) Matrix(ĉ_yd)[:, end]/Y[BlockIndex((2, 1), (k+1, k))]], k:n, [k+1])
682
+ vĉ1 = view (ĉ, 1 : N- K, 1 : k)
683
+ vĉ1 .= view (ĉ_xd, 1 : N- K, 1 : k)./ view (X. data, Block (3 , k))
684
+ # vĉ1 .= view(ĉ_xd, 1:N-K, 1:k)
685
+ # rdiv!(vĉ1, Diagonal(X[Block(k+1, k)]))
686
+ vĉ2 = view (ĉ, 1 : N- K, k+ 1 )
687
+ vĉ2 .= view (ĉ_yd, 1 : N- K, k)./ viewblock (Y, Block (k+ 1 , k))[k+ 1 , k]
688
+ vĉ = view (ĉ, k+ 1 : N- K, 1 : k+ 1 )
689
+ # c = ĉ[Block.(2:n-k+1), :] - l * (d \ c[Block(2, 1)]')
690
+ # linvd = vl/LowerTriangular(vd)
691
+ vlinvd = view (linvd, 1 : N- K- k, 1 : k)
692
+ vlinvd .= vl
693
+ rdiv! (vlinvd, LowerTriangular (vd))
694
+ vc = view (c, 1 : N- K- k, 1 : k+ 1 )
695
+ # vc .= vĉ .- vlinvd*vc[k+1:2k+1, 1:k]'
696
+ vd = view (d, 1 : k+ 1 , 1 : k)
697
+ vd .= view (c, k+ 1 : 2 k+ 1 , 1 : k)
698
+ vc .= vĉ
699
+ mul! (vc, vlinvd, vd' , - 1 , 1 )
700
+ # c[Block(1, 1)] .= Symmetric(c[Block(1, 1)], :L)
701
+ vc[1 : k+ 1 , 1 : k+ 1 ] .= Symmetric (view (c, 1 : k+ 1 , 1 : k+ 1 ), :L )
702
+ # Xrow1 = -linvd*Matrix(X[Block(k+1, k)]')
703
+ # Yrow1 = -linvd*Matrix(Y[Block(k+1, k)]')
704
+ mul! (view (Xrow1, 1 : N- K- k, 1 : k+ 1 ), vlinvd, viewblock (X, Block (k+ 1 , k))' , - 1 , 0 )
705
+ mul! (view (Yrow1, 1 : N- K- k, 1 : k+ 1 ), vlinvd, viewblock (Y, Block (k+ 1 , k))' , - 1 , 0 )
706
+ # Gx = Gx[k+1:end, :] - linvd * Gx[1:k, :]
707
+ # Gy = Gy[k+1:end, :] - linvd * Gy[1:k, :]
708
+ update_generators! (Gx, linvd, n, k)
709
+ update_generators! (Gy, linvd, n, k)
710
+ end
711
+ L[Block (n, n)] = cholesky (Symmetric (c[1 : n, 1 : n], :L )). L
712
+ return Cholesky (L, ' L' , 0 )
713
+ end
714
+
715
+ function compute_v! (v, G, n, k)
716
+ N = n* (n+ 1 )÷ 2
717
+ K = k* (k- 1 )÷ 2
718
+ vv = view (v, 1 : N- K, 1 : k)
719
+ vG1 = view (G, K+ 1 : N, 1 : n)
720
+ vG2 = view (G, K+ 1 : k+ K, n+ 1 : 2 n)
721
+ vG3 = view (G, K+ 1 : N, n+ 1 : 2 n)
722
+ vG4 = view (G, K+ 1 : k+ K, 1 : n)
723
+ mul! (vv, vG1, vG2' )
724
+ mul! (vv, vG3, vG4' , - 1 , 1 )
725
+ end
726
+
727
+ function update_generators! (G, linvd, n, k)
728
+ N = n* (n+ 1 )÷ 2
729
+ K = k* (k+ 1 )÷ 2
730
+ vG1 = view (G, K+ 1 : N, 1 : 2 n)
731
+ vG2 = view (G, K- k+ 1 : K, 1 : 2 n)
732
+ vlinvd = view (linvd, 1 : N- K, 1 : k)
733
+ mul! (vG1, vlinvd, vG2, - 1 , 1 )
734
+ end
735
+
736
+ # ĉd = Z[Block.(k:n), Block.(k:n)]'c + Zrow1*c[1:k, 1:k]-c*Zrow1[1:k, 1:k]'-v
737
+ function compute_ĉd! (ĉd, Zt, Zrow1, v, c, n, k)
738
+ N = n* (n+ 1 )÷ 2
739
+ K = k* (k- 1 )÷ 2
740
+ jacobi_mul! (ĉd, Zt, c, n, k)
741
+ vĉd = view (ĉd, 1 : N- K, 1 : k)
742
+ mul! (vĉd, view (Zrow1, 1 : N- K, 1 : k), view (c, 1 : k, 1 : k), 1 , 1 )
743
+ mul! (vĉd, view (c, 1 : N- K, 1 : k), view (Zrow1, 1 : k, 1 : k)' , - 1 , 1 )
744
+ vĉd .- = view (v, 1 : N- K, 1 : k)
745
+ end
746
+
747
+ # y[:, 1:k] = Z[Block.(k:n), Block.(k:n)]*x[:, 1:k]
748
+ function jacobi_mul! (y, Z, x, n, k)
749
+ @assert 1 ≤ k ≤ n
750
+ if subblockbandwidths (Z) == (0 , 0 )
751
+ return jacobi_mul00! (y, Z, x, n, k)
752
+ elseif subblockbandwidths (Z) == (1 , 1 )
753
+ return jacobi_mul11! (y, Z, x, n, k)
754
+ else
755
+ return jacobi_mul_default! (y, Z, x, n, k)
756
+ end
757
+ end
758
+
759
+ function jacobi_mul00! (y, Z, x, n, k)
760
+ @inbounds for col in 1 : k
761
+ Jshift = 0
762
+ if n == 1
763
+ ZBj = view (Z. data, Block (2 , k))
764
+ for i in 1 : size (ZBj, 2 )
765
+ y[i+ Jshift, col] = ZBj[1 , i]* x[i+ Jshift, col]
766
+ end
767
+ else
768
+ if k == n
769
+ ZBj = view (Z. data, Block (2 , k))
770
+ for i in 1 : size (ZBj, 2 )
771
+ y[i+ Jshift, col] = ZBj[1 , i]* x[i+ Jshift, col]
772
+ end
773
+ else
774
+ ZBj = view (Z. data, Block (2 , k))
775
+ ZBjp1 = view (Z. data, Block (1 , k+ 1 ))
776
+ for i in 1 : size (ZBj, 2 )
777
+ y[i+ Jshift, col] = ZBj[1 , i]* x[i+ Jshift, col] + ZBjp1[1 , i]* x[i+ size (ZBj, 2 )+ Jshift, col]
778
+ end
779
+ Jshift += size (ZBj, 2 )
780
+ for j in k+ 1 : n- 1
781
+ ZBjm1 = view (Z. data, Block (3 , j- 1 ))
782
+ ZBj = view (Z. data, Block (2 , j))
783
+ ZBjp1 = view (Z. data, Block (1 , j+ 1 ))
784
+ for i in 1 : size (ZBj, 2 )- 1
785
+ y[i+ Jshift, col] = ZBjm1[1 , i]* x[i+ Jshift- size (ZBjm1, 2 ), col] + ZBj[1 , i]* x[i+ Jshift, col] + ZBjp1[1 , i]* x[i+ size (ZBj, 2 )+ Jshift, col]
786
+ end
787
+ i = size (ZBj, 2 )
788
+ y[i+ Jshift, col] = ZBj[1 , i]* x[i+ Jshift, col] + ZBjp1[1 , i]* x[i+ size (ZBj, 2 )+ Jshift, col]
789
+ Jshift += size (ZBj, 2 )
790
+ end
791
+ ZBjm1 = view (Z. data, Block (3 , n- 1 ))
792
+ ZBj = view (Z. data, Block (2 , n))
793
+ for i in 1 : size (ZBj, 2 )- 1
794
+ y[i+ Jshift, col] = ZBjm1[1 , i]* x[i+ Jshift- size (ZBjm1, 2 ), col] + ZBj[1 , i]* x[i+ Jshift, col]
795
+ end
796
+ i = size (ZBj, 2 )
797
+ y[i+ Jshift, col] = ZBj[1 , i]* x[i+ Jshift, col]
798
+ end
799
+ end
800
+ end
801
+ return y
802
+ end
803
+
804
+ function jacobi_mul11! (y, Z, x, n, k)
805
+ @inbounds for col in 1 : k
806
+ Jshift = 0
807
+ if n == 1
808
+ ZBj = view (Z. data, Block (2 , k))
809
+ for i in 1 : size (ZBj, 2 )
810
+ y[i+ Jshift, col] = ZBj[2 , i]* x[i+ Jshift, col]
811
+ end
812
+ else
813
+ if k == n
814
+ ZBj = view (Z. data, Block (2 , k))
815
+ for i in 1 : size (ZBj, 2 )
816
+ y[i+ Jshift, col] = ZBj[2 , i]* x[i+ Jshift, col]
817
+ end
818
+ for i in 2 : size (ZBj, 2 )
819
+ y[i- 1 + Jshift, col] += ZBj[1 , i]* x[i+ Jshift, col]
820
+ end
821
+ for i in 1 : size (ZBj, 2 )- 1
822
+ y[i+ 1 + Jshift, col] += ZBj[3 , i]* x[i+ Jshift, col]
823
+ end
824
+ else
825
+ ZBj = view (Z. data, Block (2 , k))
826
+ for i in 1 : size (ZBj, 2 )
827
+ y[i+ Jshift, col] = ZBj[2 , i]* x[i+ Jshift, col]
828
+ end
829
+ for i in 2 : size (ZBj, 2 )
830
+ y[i- 1 + Jshift, col] += ZBj[1 , i]* x[i+ Jshift, col]
831
+ end
832
+ for i in 1 : size (ZBj, 2 )- 1
833
+ y[i+ 1 + Jshift, col] += ZBj[3 , i]* x[i+ Jshift, col]
834
+ end
835
+
836
+ ZBjp1 = view (Z. data, Block (1 , k+ 1 ))
837
+ for i in 2 : size (ZBj, 2 )+ 1
838
+ y[i- 1 + Jshift, col] += ZBjp1[1 , i]* x[i+ size (ZBj, 2 )+ Jshift, col]
839
+ end
840
+ for i in 1 : size (ZBj, 2 )
841
+ y[i+ Jshift, col] += ZBjp1[2 , i]* x[i+ size (ZBj, 2 )+ Jshift, col]
842
+ end
843
+ for i in 1 : size (ZBj, 2 )- 1
844
+ y[i+ 1 + Jshift, col] += ZBjp1[3 , i]* x[i+ size (ZBj, 2 )+ Jshift, col]
845
+ end
846
+
847
+ Jshift += size (ZBj, 2 )
848
+ for j in k+ 1 : n- 1
849
+ ZBj = view (Z. data, Block (2 , j))
850
+ for i in 1 : size (ZBj, 2 )
851
+ y[i+ Jshift, col] = ZBj[2 , i]* x[i+ Jshift, col]
852
+ end
853
+ for i in 2 : size (ZBj, 2 )
854
+ y[i- 1 + Jshift, col] += ZBj[1 , i]* x[i+ Jshift, col]
855
+ end
856
+ for i in 1 : size (ZBj, 2 )- 1
857
+ y[i+ 1 + Jshift, col] += ZBj[3 , i]* x[i+ Jshift, col]
858
+ end
859
+
860
+ ZBjm1 = view (Z. data, Block (3 , j- 1 ))
861
+ for i in 2 : size (ZBjm1, 2 )
862
+ y[i- 1 + Jshift, col] += ZBjm1[1 , i]* x[i- size (ZBjm1, 2 )+ Jshift, col]
863
+ end
864
+ for i in 1 : size (ZBjm1, 2 )
865
+ y[i+ Jshift, col] += ZBjm1[2 , i]* x[i- size (ZBjm1, 2 )+ Jshift, col]
866
+ end
867
+ for i in 1 : size (ZBjm1, 2 )
868
+ y[i+ 1 + Jshift, col] += ZBjm1[3 , i]* x[i- size (ZBjm1, 2 )+ Jshift, col]
869
+ end
870
+
871
+ ZBjp1 = view (Z. data, Block (1 , j+ 1 ))
872
+ for i in 2 : size (ZBj, 2 )+ 1
873
+ y[i- 1 + Jshift, col] += ZBjp1[1 , i]* x[i+ size (ZBj, 2 )+ Jshift, col]
874
+ end
875
+ for i in 1 : size (ZBj, 2 )
876
+ y[i+ Jshift, col] += ZBjp1[2 , i]* x[i+ size (ZBj, 2 )+ Jshift, col]
877
+ end
878
+ for i in 1 : size (ZBj, 2 )- 1
879
+ y[i+ 1 + Jshift, col] += ZBjp1[3 , i]* x[i+ size (ZBj, 2 )+ Jshift, col]
880
+ end
881
+
882
+ Jshift += size (ZBj, 2 )
883
+ end
884
+ ZBj = view (Z. data, Block (2 , n))
885
+ for i in 1 : size (ZBj, 2 )
886
+ y[i+ Jshift, col] = ZBj[2 , i]* x[i+ Jshift, col]
887
+ end
888
+ for i in 2 : size (ZBj, 2 )
889
+ y[i- 1 + Jshift, col] += ZBj[1 , i]* x[i+ Jshift, col]
890
+ end
891
+ for i in 1 : size (ZBj, 2 )- 1
892
+ y[i+ 1 + Jshift, col] += ZBj[3 , i]* x[i+ Jshift, col]
893
+ end
894
+
895
+ ZBjm1 = view (Z. data, Block (3 , n- 1 ))
896
+ for i in 2 : size (ZBjm1, 2 )
897
+ y[i- 1 + Jshift, col] += ZBjm1[1 , i]* x[i- size (ZBjm1, 2 )+ Jshift, col]
898
+ end
899
+ for i in 1 : size (ZBjm1, 2 )
900
+ y[i+ Jshift, col] += ZBjm1[2 , i]* x[i- size (ZBjm1, 2 )+ Jshift, col]
901
+ end
902
+ for i in 1 : size (ZBjm1, 2 )
903
+ y[i+ 1 + Jshift, col] += ZBjm1[3 , i]* x[i- size (ZBjm1, 2 )+ Jshift, col]
904
+ end
905
+ end
906
+ end
907
+ end
908
+ return y
909
+ end
910
+
911
+ function jacobi_mul_default! (y, Z, x, n, k)
912
+ if n == 1
913
+ vy = view (y, 1 : k, 1 : k)
914
+ mul! (vy, Z[Block (k, k)], view (x, 1 : k, 1 : k))
915
+ else
916
+ vy = view (y, 1 : k, 1 : k)
917
+ Jstart = 1
918
+ Jstop = k
919
+ mul! (vy, Z[Block (k, k)], view (x, Jstart: Jstop, 1 : k))
920
+ mul! (vy, Z[Block (k, k+ 1 )], view (x, (Jstop+ 1 ): (Jstop+ k+ 1 ), 1 : k), 1 , 1 )
921
+ for j in k+ 1 : n- 1
922
+ vy = view (y, (Jstop+ 1 ): (Jstop+ j), 1 : k)
923
+ mul! (vy, Z[Block (j, j- 1 )], view (x, Jstart: Jstop, 1 : k))
924
+ Jstart = Jstop+ 1
925
+ Jstop = Jstop+ j
926
+ mul! (vy, Z[Block (j, j)], view (x, Jstart: Jstop, 1 : k), 1 , 1 )
927
+ mul! (vy, Z[Block (j, j+ 1 )], view (x, (Jstop+ 1 ): (Jstop+ j+ 1 ), 1 : k), 1 , 1 )
928
+ end
929
+ vy = view (y, (Jstop+ 1 ): (Jstop+ n), 1 : k)
930
+ mul! (vy, Z[Block (n, n- 1 )], view (x, Jstart: Jstop, 1 : k))
931
+ Jstart = Jstop+ 1
932
+ Jstop = Jstop+ n
933
+ mul! (vy, Z[Block (n, n)], view (x, Jstart: Jstop, 1 : k), 1 , 1 )
934
+ end
935
+ return y
936
+ end
0 commit comments