Skip to content

Commit 2a2a82b

Browse files
add fast Cholesky for bivariate Chebyshev Gram matrix
1 parent da1dc17 commit 2a2a82b

File tree

2 files changed

+338
-1
lines changed

2 files changed

+338
-1
lines changed

src/BivariateGramMatrix.jl

Lines changed: 313 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,6 @@ function BivariateGramMatrix(μ::BlockedVector{T, <: PaddedVector{T}}, X::XT, Y:
167167
end
168168
for n in 3:n
169169
for m in n:min(N-n+1, b+n)
170-
#println("m = $m, n = $n")
171170
recurse!(W, X, Y, m, n)
172171
end
173172
symmetrize_block!(view(W, Block(n, n)))
@@ -622,3 +621,316 @@ function _chebyshev_y(::Type{T}, n::Integer) where T
622621

623622
return Y
624623
end
624+
625+
626+
## Fast Cholesky algorithm using the displacement equations: XᵀW - WX = Gx J Gxᵀ and YᵀW - WY = Gy J Gyᵀ
627+
628+
function cholesky(W::BivariateChebyshevGramMatrix{T}) where T
629+
n = blocksize(W, 1)
630+
N = n*(n+1)÷2
631+
X = _chebyshev_x(T, n)
632+
Y = _chebyshev_y(T, n)
633+
@assert blockbandwidths(X) == blockbandwidths(Y) == (1, 1)
634+
@assert subblockbandwidths(X) == (0, 0)
635+
@assert subblockbandwidths(Y) == (1, 1)
636+
Xt = BandedBlockBandedMatrix(X')
637+
Yt = BandedBlockBandedMatrix(Y')
638+
Gx = Matrix(compute_skew_generators(Val(1), W))
639+
Gy = Matrix(compute_skew_generators(Val(2), W))
640+
L = BlockedMatrix{T}(I, 1:n, 1:n)
641+
c = zeros(T, N, n)
642+
= zeros(T, N, n)
643+
vc = view(c, 1:N, 1:1)
644+
vc .= W[1:N, 1:1]
645+
ĉ_xd = zeros(T, N, n)
646+
ĉ_yd = zeros(T, N, n)
647+
l = zeros(T, N, n)
648+
linvd = zeros(T, N, n)
649+
d = zeros(T, n, n)
650+
Xrow1 = zeros(T, N, n)
651+
Yrow1 = zeros(T, N, n)
652+
vx = zeros(T, N, n)
653+
vy = zeros(T, N, n)
654+
@inbounds for k in 1:n-1
655+
K = k*(k-1)÷2
656+
# d = cholesky(Symmetric(c[Block(1, 1)], :L)).L
657+
# L[Block(k, k)] .= d
658+
vd = view(d, 1:k, 1:k)
659+
vd .= view(c, 1:k, 1:k)
660+
cholesky!(Symmetric(vd, :L))
661+
# l = BlockedMatrix(Matrix(c[Block.(2:n-k+1), Block(1)])/d', k+1:n, [k])
662+
# L[Block.(k+1:n), Block(k)] .= l
663+
vl = view(l, k+1:N-K, 1:k)
664+
vl .= view(c, k+1:N-K, 1:k)
665+
rdiv!(vl, LowerTriangular(vd)')
666+
view(L, Block(k, k)) .= vd
667+
#view(L, Block.(k+1:n), Block(k)) .= vl
668+
J = 1
669+
for j in k+1:n
670+
view(L, Block(j, k)) .= view(vl, J:J-1+j, 1:k)
671+
J += j
672+
end
673+
# vx = Gx[:, 1:n]*Gx[1:k, n+1:2n]' - Gx[:, n+1:2n]*Gx[1:k, 1:n]'
674+
# vy = Gy[:, 1:n]*Gy[1:k, n+1:2n]' - Gy[:, n+1:2n]*Gy[1:k, 1:n]'
675+
compute_v!(vx, Gx, n, k)
676+
compute_v!(vy, Gy, n, k)
677+
#ĉ_xd = X[Block.(k:n), Block.(k:n)]'c + Xrow1*c[1:k, 1:k]-c*Xrow1[1:k, 1:k]'-vx
678+
#ĉ_yd = Y[Block.(k:n), Block.(k:n)]'c + Yrow1*c[1:k, 1:k]-c*Yrow1[1:k, 1:k]'-vy
679+
compute_ĉd!(ĉ_xd, Xt, Xrow1, vx, c, n, k)
680+
compute_ĉd!(ĉ_yd, Yt, Yrow1, vy, c, n, k)
681+
#ĉ = BlockedMatrix([Matrix(ĉ_xd) / Diagonal(X[Block(2, 1)]) Matrix(ĉ_yd)[:, end]/Y[BlockIndex((2, 1), (k+1, k))]], k:n, [k+1])
682+
vĉ1 = view(ĉ, 1:N-K, 1:k)
683+
vĉ1 .= view(ĉ_xd, 1:N-K, 1:k)./view(X.data, Block(3, k))
684+
#vĉ1 .= view(ĉ_xd, 1:N-K, 1:k)
685+
#rdiv!(vĉ1, Diagonal(X[Block(k+1, k)]))
686+
vĉ2 = view(ĉ, 1:N-K, k+1)
687+
vĉ2 .= view(ĉ_yd, 1:N-K, k)./viewblock(Y, Block(k+1, k))[k+1, k]
688+
vĉ = view(ĉ, k+1:N-K, 1:k+1)
689+
# c = ĉ[Block.(2:n-k+1), :] - l * (d \ c[Block(2, 1)]')
690+
#linvd = vl/LowerTriangular(vd)
691+
vlinvd = view(linvd, 1:N-K-k, 1:k)
692+
vlinvd .= vl
693+
rdiv!(vlinvd, LowerTriangular(vd))
694+
vc = view(c, 1:N-K-k, 1:k+1)
695+
#vc .= vĉ .- vlinvd*vc[k+1:2k+1, 1:k]'
696+
vd = view(d, 1:k+1, 1:k)
697+
vd .= view(c, k+1:2k+1, 1:k)
698+
vc .= vĉ
699+
mul!(vc, vlinvd, vd', -1, 1)
700+
# c[Block(1, 1)] .= Symmetric(c[Block(1, 1)], :L)
701+
vc[1:k+1, 1:k+1] .= Symmetric(view(c, 1:k+1, 1:k+1), :L)
702+
# Xrow1 = -linvd*Matrix(X[Block(k+1, k)]')
703+
# Yrow1 = -linvd*Matrix(Y[Block(k+1, k)]')
704+
mul!(view(Xrow1, 1:N-K-k, 1:k+1), vlinvd, viewblock(X, Block(k+1, k))', -1, 0)
705+
mul!(view(Yrow1, 1:N-K-k, 1:k+1), vlinvd, viewblock(Y, Block(k+1, k))', -1, 0)
706+
# Gx = Gx[k+1:end, :] - linvd * Gx[1:k, :]
707+
# Gy = Gy[k+1:end, :] - linvd * Gy[1:k, :]
708+
update_generators!(Gx, linvd, n, k)
709+
update_generators!(Gy, linvd, n, k)
710+
end
711+
L[Block(n, n)] = cholesky(Symmetric(c[1:n, 1:n], :L)).L
712+
return Cholesky(L, 'L', 0)
713+
end
714+
715+
function compute_v!(v, G, n, k)
716+
N = n*(n+1)÷2
717+
K = k*(k-1)÷2
718+
vv = view(v, 1:N-K, 1:k)
719+
vG1 = view(G, K+1:N, 1:n)
720+
vG2 = view(G, K+1:k+K, n+1:2n)
721+
vG3 = view(G, K+1:N, n+1:2n)
722+
vG4 = view(G, K+1:k+K, 1:n)
723+
mul!(vv, vG1, vG2')
724+
mul!(vv, vG3, vG4', -1, 1)
725+
end
726+
727+
function update_generators!(G, linvd, n, k)
728+
N = n*(n+1)÷2
729+
K = k*(k+1)÷2
730+
vG1 = view(G, K+1:N, 1:2n)
731+
vG2 = view(G, K-k+1:K, 1:2n)
732+
vlinvd = view(linvd, 1:N-K, 1:k)
733+
mul!(vG1, vlinvd, vG2, -1, 1)
734+
end
735+
736+
#ĉd = Z[Block.(k:n), Block.(k:n)]'c + Zrow1*c[1:k, 1:k]-c*Zrow1[1:k, 1:k]'-v
737+
function compute_ĉd!(ĉd, Zt, Zrow1, v, c, n, k)
738+
N = n*(n+1)÷2
739+
K = k*(k-1)÷2
740+
jacobi_mul!(ĉd, Zt, c, n, k)
741+
vĉd = view(ĉd, 1:N-K, 1:k)
742+
mul!(vĉd, view(Zrow1, 1:N-K, 1:k), view(c, 1:k, 1:k), 1, 1)
743+
mul!(vĉd, view(c, 1:N-K, 1:k), view(Zrow1, 1:k, 1:k)', -1, 1)
744+
vĉd .-= view(v, 1:N-K, 1:k)
745+
end
746+
747+
# y[:, 1:k] = Z[Block.(k:n), Block.(k:n)]*x[:, 1:k]
748+
function jacobi_mul!(y, Z, x, n, k)
749+
@assert 1 k n
750+
if subblockbandwidths(Z) == (0, 0)
751+
return jacobi_mul00!(y, Z, x, n, k)
752+
elseif subblockbandwidths(Z) == (1, 1)
753+
return jacobi_mul11!(y, Z, x, n, k)
754+
else
755+
return jacobi_mul_default!(y, Z, x, n, k)
756+
end
757+
end
758+
759+
function jacobi_mul00!(y, Z, x, n, k)
760+
@inbounds for col in 1:k
761+
Jshift = 0
762+
if n == 1
763+
ZBj = view(Z.data, Block(2, k))
764+
for i in 1:size(ZBj, 2)
765+
y[i+Jshift, col] = ZBj[1, i]*x[i+Jshift, col]
766+
end
767+
else
768+
if k == n
769+
ZBj = view(Z.data, Block(2, k))
770+
for i in 1:size(ZBj, 2)
771+
y[i+Jshift, col] = ZBj[1, i]*x[i+Jshift, col]
772+
end
773+
else
774+
ZBj = view(Z.data, Block(2, k))
775+
ZBjp1 = view(Z.data, Block(1, k+1))
776+
for i in 1:size(ZBj, 2)
777+
y[i+Jshift, col] = ZBj[1, i]*x[i+Jshift, col] + ZBjp1[1, i]*x[i+size(ZBj, 2)+Jshift, col]
778+
end
779+
Jshift += size(ZBj, 2)
780+
for j in k+1:n-1
781+
ZBjm1 = view(Z.data, Block(3, j-1))
782+
ZBj = view(Z.data, Block(2, j))
783+
ZBjp1 = view(Z.data, Block(1, j+1))
784+
for i in 1:size(ZBj, 2)-1
785+
y[i+Jshift, col] = ZBjm1[1, i]*x[i+Jshift-size(ZBjm1, 2), col] + ZBj[1, i]*x[i+Jshift, col] + ZBjp1[1, i]*x[i+size(ZBj, 2)+Jshift, col]
786+
end
787+
i = size(ZBj, 2)
788+
y[i+Jshift, col] = ZBj[1, i]*x[i+Jshift, col] + ZBjp1[1, i]*x[i+size(ZBj, 2)+Jshift, col]
789+
Jshift += size(ZBj, 2)
790+
end
791+
ZBjm1 = view(Z.data, Block(3, n-1))
792+
ZBj = view(Z.data, Block(2, n))
793+
for i in 1:size(ZBj, 2)-1
794+
y[i+Jshift, col] = ZBjm1[1, i]*x[i+Jshift-size(ZBjm1, 2), col] + ZBj[1, i]*x[i+Jshift, col]
795+
end
796+
i = size(ZBj, 2)
797+
y[i+Jshift, col] = ZBj[1, i]*x[i+Jshift, col]
798+
end
799+
end
800+
end
801+
return y
802+
end
803+
804+
function jacobi_mul11!(y, Z, x, n, k)
805+
@inbounds for col in 1:k
806+
Jshift = 0
807+
if n == 1
808+
ZBj = view(Z.data, Block(2, k))
809+
for i in 1:size(ZBj, 2)
810+
y[i+Jshift, col] = ZBj[2, i]*x[i+Jshift, col]
811+
end
812+
else
813+
if k == n
814+
ZBj = view(Z.data, Block(2, k))
815+
for i in 1:size(ZBj, 2)
816+
y[i+Jshift, col] = ZBj[2, i]*x[i+Jshift, col]
817+
end
818+
for i in 2:size(ZBj, 2)
819+
y[i-1+Jshift, col] += ZBj[1, i]*x[i+Jshift, col]
820+
end
821+
for i in 1:size(ZBj, 2)-1
822+
y[i+1+Jshift, col] += ZBj[3, i]*x[i+Jshift, col]
823+
end
824+
else
825+
ZBj = view(Z.data, Block(2, k))
826+
for i in 1:size(ZBj, 2)
827+
y[i+Jshift, col] = ZBj[2, i]*x[i+Jshift, col]
828+
end
829+
for i in 2:size(ZBj, 2)
830+
y[i-1+Jshift, col] += ZBj[1, i]*x[i+Jshift, col]
831+
end
832+
for i in 1:size(ZBj, 2)-1
833+
y[i+1+Jshift, col] += ZBj[3, i]*x[i+Jshift, col]
834+
end
835+
836+
ZBjp1 = view(Z.data, Block(1, k+1))
837+
for i in 2:size(ZBj, 2)+1
838+
y[i-1+Jshift, col] += ZBjp1[1, i]*x[i+size(ZBj, 2)+Jshift, col]
839+
end
840+
for i in 1:size(ZBj, 2)
841+
y[i+Jshift, col] += ZBjp1[2, i]*x[i+size(ZBj, 2)+Jshift, col]
842+
end
843+
for i in 1:size(ZBj, 2)-1
844+
y[i+1+Jshift, col] += ZBjp1[3, i]*x[i+size(ZBj, 2)+Jshift, col]
845+
end
846+
847+
Jshift += size(ZBj, 2)
848+
for j in k+1:n-1
849+
ZBj = view(Z.data, Block(2, j))
850+
for i in 1:size(ZBj, 2)
851+
y[i+Jshift, col] = ZBj[2, i]*x[i+Jshift, col]
852+
end
853+
for i in 2:size(ZBj, 2)
854+
y[i-1+Jshift, col] += ZBj[1, i]*x[i+Jshift, col]
855+
end
856+
for i in 1:size(ZBj, 2)-1
857+
y[i+1+Jshift, col] += ZBj[3, i]*x[i+Jshift, col]
858+
end
859+
860+
ZBjm1 = view(Z.data, Block(3, j-1))
861+
for i in 2:size(ZBjm1, 2)
862+
y[i-1+Jshift, col] += ZBjm1[1, i]*x[i-size(ZBjm1, 2)+Jshift, col]
863+
end
864+
for i in 1:size(ZBjm1, 2)
865+
y[i+Jshift, col] += ZBjm1[2, i]*x[i-size(ZBjm1, 2)+Jshift, col]
866+
end
867+
for i in 1:size(ZBjm1, 2)
868+
y[i+1+Jshift, col] += ZBjm1[3, i]*x[i-size(ZBjm1, 2)+Jshift, col]
869+
end
870+
871+
ZBjp1 = view(Z.data, Block(1, j+1))
872+
for i in 2:size(ZBj, 2)+1
873+
y[i-1+Jshift, col] += ZBjp1[1, i]*x[i+size(ZBj, 2)+Jshift, col]
874+
end
875+
for i in 1:size(ZBj, 2)
876+
y[i+Jshift, col] += ZBjp1[2, i]*x[i+size(ZBj, 2)+Jshift, col]
877+
end
878+
for i in 1:size(ZBj, 2)-1
879+
y[i+1+Jshift, col] += ZBjp1[3, i]*x[i+size(ZBj, 2)+Jshift, col]
880+
end
881+
882+
Jshift += size(ZBj, 2)
883+
end
884+
ZBj = view(Z.data, Block(2, n))
885+
for i in 1:size(ZBj, 2)
886+
y[i+Jshift, col] = ZBj[2, i]*x[i+Jshift, col]
887+
end
888+
for i in 2:size(ZBj, 2)
889+
y[i-1+Jshift, col] += ZBj[1, i]*x[i+Jshift, col]
890+
end
891+
for i in 1:size(ZBj, 2)-1
892+
y[i+1+Jshift, col] += ZBj[3, i]*x[i+Jshift, col]
893+
end
894+
895+
ZBjm1 = view(Z.data, Block(3, n-1))
896+
for i in 2:size(ZBjm1, 2)
897+
y[i-1+Jshift, col] += ZBjm1[1, i]*x[i-size(ZBjm1, 2)+Jshift, col]
898+
end
899+
for i in 1:size(ZBjm1, 2)
900+
y[i+Jshift, col] += ZBjm1[2, i]*x[i-size(ZBjm1, 2)+Jshift, col]
901+
end
902+
for i in 1:size(ZBjm1, 2)
903+
y[i+1+Jshift, col] += ZBjm1[3, i]*x[i-size(ZBjm1, 2)+Jshift, col]
904+
end
905+
end
906+
end
907+
end
908+
return y
909+
end
910+
911+
function jacobi_mul_default!(y, Z, x, n, k)
912+
if n == 1
913+
vy = view(y, 1:k, 1:k)
914+
mul!(vy, Z[Block(k, k)], view(x, 1:k, 1:k))
915+
else
916+
vy = view(y, 1:k, 1:k)
917+
Jstart = 1
918+
Jstop = k
919+
mul!(vy, Z[Block(k, k)], view(x, Jstart:Jstop, 1:k))
920+
mul!(vy, Z[Block(k, k+1)], view(x, (Jstop+1):(Jstop+k+1), 1:k), 1, 1)
921+
for j in k+1:n-1
922+
vy = view(y, (Jstop+1):(Jstop+j), 1:k)
923+
mul!(vy, Z[Block(j, j-1)], view(x, Jstart:Jstop, 1:k))
924+
Jstart = Jstop+1
925+
Jstop = Jstop+j
926+
mul!(vy, Z[Block(j, j)], view(x, Jstart:Jstop, 1:k), 1, 1)
927+
mul!(vy, Z[Block(j, j+1)], view(x, (Jstop+1):(Jstop+j+1), 1:k), 1, 1)
928+
end
929+
vy = view(y, (Jstop+1):(Jstop+n), 1:k)
930+
mul!(vy, Z[Block(n, n-1)], view(x, Jstart:Jstop, 1:k))
931+
Jstart = Jstop+1
932+
Jstop = Jstop+n
933+
mul!(vy, Z[Block(n, n)], view(x, Jstart:Jstop, 1:k), 1, 1)
934+
end
935+
return y
936+
end

test/bivariategrammatrixtests.jl

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,29 @@ import FastTransforms: chebyshevmoments1, chebyshevabsmoments1, bivariatemoments
4545
WC = BivariateChebyshevGramMatrix(μ)
4646
@test blockbandwidths(W) == blockbandwidths(WC) == subblockbandwidths(W) == subblockbandwidths(WC) == (7, 7)
4747
@test W WC
48+
49+
μ = BlockedVector(PaddedVector(inv.(1:21), (2n-1)*(2n)÷2), 1:2n-1)
50+
W = BivariateGramMatrix(μ, X, Y) # works with Chebyshev X & Y, but blocks are not extracted as banded matrices
4851
end
52+
53+
54+
using ClassicalOrthogonalPolynomials, MultivariateOrthogonalPolynomials, FastTransforms, LinearAlgebra, Plots
55+
56+
using LazyArrays, BlockArrays, BlockBandedMatrices
57+
58+
n = 50
59+
60+
μ = BlockedVector(PaddedVector([10; zeros(20)] + inv.(1:21), (2n-1)*(2n)÷2), 1:2n-1)
61+
#W = BivariateGramMatrix(μ, X, Y) # works with Chebyshev X & Y, but blocks are not extracted as banded matrices
62+
63+
P = JacobiTriangle(0, 0, 0)
64+
#P = RectPolynomial(Legendre(), Legendre())
65+
X = jacobimatrix(Val(1), P)
66+
Y = jacobimatrix(Val(2), P)
67+
x, y = coordinates(P);
68+
69+
X = BandedBlockBandedMatrix(X[Block.(1:2n-1), Block.(1:2n-1)])
70+
Y = BandedBlockBandedMatrix(Y[Block.(1:2n-1), Block.(1:2n-1)])
71+
#Y = FastTransforms._chebyshev_y(Float64, 2n-1)
72+
W = BivariateGramMatrix(μ, X, Y) # works with Chebyshev X & Y, but blocks are not extracted as banded matrices
73+
eigvals(W)

0 commit comments

Comments
 (0)