Skip to content

Commit ca71c00

Browse files
committed
switch to solves for Jbuild
1 parent 8e378c8 commit ca71c00

File tree

4 files changed

+92
-21
lines changed

4 files changed

+92
-21
lines changed

docs/technical_notes/fock_build.tex

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ \section{Introduction}
5151
B^{P}_{\mu \nu} = \sum_Q (P|Q)^{-1/2} (Q|\mu \nu).
5252
\end{equation}
5353

54-
The matrix inverse and inverse square root of the Coulomb metric $A\equiv (P|Q)$ can be computed by Cholesky decomposition:
54+
The matrix inverse square root of the Coulomb metric $A\equiv (P|Q)$ can be computed by Cholesky decomposition:
5555
\begin{equation}
5656
A = L L^T,
5757
\end{equation}
@@ -60,12 +60,7 @@ \section{Introduction}
6060
\label{eq:inv}
6161
A^{-1} = L^{-T} L^{-1},
6262
\end{equation}
63-
which can be obtained by two triangular solves (\textit{e.g.}, with \texttt{scipy.linalg.solve\_triangular}):
64-
\begin{equation}
65-
Lx = I, \quad L^T y = x
66-
\end{equation}
67-
where $x = L^{-1}$ and $y = A^{-1}$.
68-
From \cref{eq:inv}, we can also identify $L^{-1}$ as a valid choice for $A^{-1/2}$ in our context, since $L^{-1}_{PR}(R|\rho \sigma) = B^{P}_{\rho \sigma}$, and the contraction of the B tensors yields the original four-center integrals:
63+
From \cref{eq:inv}, we can identify $L^{-1}$ as a valid choice for $A^{-1/2}$ in our context, since $L^{-1}_{PR}(R|\rho \sigma) = B^{P}_{\rho \sigma}$, and the contraction of the B tensors yields the original four-center integrals:
6964
\begin{align}
7065
\sum_{P} B^{P}_{\mu \nu} B^{P}_{\rho \sigma}
7166
& = \sum_{PQR}(\mu \nu | P) L^{-\text{T}}_{PQ} L^{-1}_{QR} (R|\rho \sigma) \\
@@ -109,6 +104,7 @@ \section{Coulomb matrix algorithm}
109104

110105
\item Matrix vector multiplication
111106
\begin{equation}
107+
\label{eq:bp}
112108
b_P = \sum_{Q} (P|Q)^{-1} a_Q
113109
\end{equation}
114110
\item Second pass over all the three-center integrals
@@ -118,6 +114,23 @@ \section{Coulomb matrix algorithm}
118114
In this step, one can apply integral screening of $(\mu \nu | P)$ to reduce the evaluation cost.
119115
\end{enumerate}
120116

117+
Note in \cref{eq:bp}, we require the application of the inverse of the Coulomb metric to a vector.
118+
There are at least four, in principle equivalent, ways to do this:
119+
\begin{enumerate}
120+
\item Use the pre-computed $L^{-1}$ to form $A^{-1}=L^{-T} L^{-1}$ and do a matrix-vector product with the problem vector: $b_Q = A^{-1} a_Q$.
121+
\item Two triangular solves with $L$ on the problem vector $a_Q$: first solve $L z = a_Q$ for $z$ and then solve $L^T b_P = z$ for $b_P$.
122+
\item Compute the eigen-decomposition of the Coulomb metric $A = U s U^T$ and threshold the eigenvalues to get $A_{\eta}^{-1} = U_{\eta} s_{\eta}^{-1} U_{\eta}^T$ where $s_{\eta}$ are the eigenvalues above some threshold $\eta$ and $U_{\eta}$ are the corresponding eigenvectors. Then do a matrix-vector product with the problem vector: $b_P = A_{\eta}^{-1}a_Q$.
123+
\item Compute the same $U_{\eta}$ and $s_{\eta}$ as above, but apply it to each problem vector without forming $A_{\eta}^{-1}$: $b_P = U_{\eta} (U_{\eta}^T a_Q / s_{\eta})$.
124+
\end{enumerate}
125+
The first two methods cannot deal with Coulomb metrics that have large condition numbers. The first and third methods at first seem appealing, since we pre-compute a `universal' $A^{-1}$ and just do matrix-vector products later.
126+
Turns out, forming of $A^{-1}$ directly, as in the first and third methods is not numerically stable, since the error of these operations is on the order of $\text{cond}(A)^2\epsilon$, whereas the error of the second and fourth methods is on the order of $\text{cond}(A)\epsilon$.
127+
128+
If ill-conditioning is not a problem, then \texttt{scipy} offers a very succint way to carry out method 2:
129+
\begin{quote}
130+
\texttt{bP = scipy.linalg.cho\_solve((L, True), aQ)}
131+
\end{quote}
132+
which does the two triangular solves with the Cholesky factor $L$ in one call.
133+
121134
\section{Exchange matrix algorithm}
122135

123136
The exchange matrix can be computed efficiently as

forte2/jkbuilder/jkbuilder.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
cholesky_wrapper,
1212
invsqrt_matrix,
1313
print_metric_info,
14+
_eigh_metric_kernel,
1415
)
1516

1617

@@ -622,22 +623,27 @@ def _build_metric(self):
622623
# M = (P|Q)
623624
M = integrals.coulomb_2c(self.system, self.auxbasis)
624625
if self.metric_ortho_rtol is not None:
625-
X, _, info = invsqrt_matrix(M, rtol=self.metric_ortho_rtol)
626+
_precomp = _eigh_metric_kernel(M, rtol=self.metric_ortho_rtol)
627+
self.Mm12, _, info = invsqrt_matrix(M, precomp=_precomp)
626628
print_metric_info(info, "Density fitting Coulomb metric (P|Q)")
627-
self.Mm12 = X
628-
self.Mm1 = np.einsum("PQ,QR->PR", X, X, optimize=True)
629+
sevals = _precomp[0]
630+
sevecs = _precomp[1]
631+
ndiscard = _precomp[2]["n_discarded"]
632+
self.sevals = sevals[ndiscard:]
633+
self.sevecs = sevecs[:, ndiscard:]
629634
else:
630635
# M = L L.T
631-
L = sp.linalg.cholesky(M, lower=True)
632-
# M^{-1} = L^{-T} L^{-1}
633-
# two triangular solves to get M^{-1}:
634-
# 1. solve L Y = I for Y = L^{-1}
635-
# 2. solve L.T X = Y for X = M^{-1}
636+
self.L = sp.linalg.cholesky(M, lower=True)
636637
I = np.eye(M.shape[0])
637-
Y = sp.linalg.solve_triangular(L, I, lower=True)
638-
# M^{-1/2} = L^{-1}
639-
self.Mm12 = Y
640-
self.Mm1 = np.einsum("QP,QR->PR", Y, Y, optimize=True)
638+
self.Mm12 = sp.linalg.solve_triangular(self.L, I, lower=True)
639+
640+
def _apply_Mm1(self, y):
641+
"""Compute x = (P|Q)^{-1} y without forming (P|Q)^{-1}"""
642+
if self.metric_ortho_rtol is not None:
643+
# x = U s^{-1} U^T y
644+
return self.sevecs @ ((self.sevecs.T @ y) / self.sevals)
645+
else:
646+
return sp.linalg.cho_solve((self.L, True), y)
641647

642648
def _find_aux_shell_block(self, pshell0):
643649
# find the block of auxiliary shells that fit in the buffer, starting from pshell0
@@ -679,7 +685,7 @@ def _J_kernel(self, D):
679685
out=bP[pb0:pb1],
680686
)
681687
pshell0 = pshell1
682-
bP = self.Mm1 @ bP
688+
bP = self._apply_Mm1(bP)
683689

684690
J = np.zeros_like(D)
685691
# 3. Batch over the (raw) P index again to build the J matrix
@@ -864,7 +870,7 @@ def _JK_kernel(self, C):
864870
optimize=True,
865871
)
866872
if J_pass == 0:
867-
bP = self.Mm1 @ bP
873+
bP = self._apply_Mm1(bP)
868874
J_pass += 1
869875
i0 = i1
870876

tests/jkbuilder/test_jkbuilder.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,3 +257,38 @@ def test_jkbuilder_on_the_fly_complex():
257257
[J_otf], [K_otf] = fb_otf.build_JK([Cocc])
258258
assert np.allclose(J_otf, J_ref), np.linalg.norm(J_otf - J_ref)
259259
assert np.allclose(K_otf, K_ref), np.linalg.norm(K_otf - K_ref)
260+
261+
262+
def test_jkbuilder_on_the_fly_large():
263+
xyz = """
264+
Cr 0.0 0.0 0.0
265+
Cr 0.0 0.0 2.0
266+
"""
267+
268+
system = System(
269+
xyz=xyz,
270+
basis_set="cc-pvtz",
271+
auxiliary_basis_set="cc-pvtz-autoaux",
272+
df_ortho_rtol=1e-8,
273+
)
274+
275+
nmo = system.nbf
276+
rng = np.random.default_rng(12345)
277+
Cocc = rng.standard_normal((nmo, 24))
278+
D = [Cocc @ Cocc.T.conj()]
279+
280+
fb = system.fock_builder
281+
J_ref = fb.build_J(D)
282+
K_ref = fb.build_K([Cocc])
283+
284+
fb_otf = jkbuilder.FockBuilderOTF(system, memory_threshold_mb=100)
285+
J_otf = fb_otf.build_J(D)[0]
286+
K_otf = fb_otf.build_K([Cocc])[0]
287+
288+
assert np.linalg.norm(J_otf - J_ref) < 1e-8
289+
assert np.linalg.norm(K_otf - K_ref) < 1e-8
290+
291+
# separately test the combined JK builder, since the algorithm is different for the combined builder
292+
J_otf, K_otf = fb_otf.build_JK([Cocc])
293+
assert np.linalg.norm(J_otf[0] - J_ref[0]) < 1e-8
294+
assert np.linalg.norm(K_otf[0] - K_ref[0]) < 1e-8

tests/scf/test_rhf.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,20 @@ def test_rhf_cl2_otf():
108108
scf.run()
109109

110110
assert scf.E == approx(-918.943349338796)
111+
112+
def test_rhf_cr2_autoaux_otf():
113+
xyz = """
114+
Cr 0 0 0
115+
Cr 0 0 1.681
116+
"""
117+
system = System(
118+
xyz=xyz,
119+
basis_set="cc-pvtz",
120+
auxiliary_basis_set="cc-pvtz-autoaux",
121+
memory_threshold_mb=130,
122+
)
123+
scf = RHF(charge=0)(system)
124+
scf.run()
125+
126+
assert scf.E == approx(-2085.926511540127)
127+
test_rhf_cr2_autoaux_otf()

0 commit comments

Comments
 (0)