Merge pull request #1786 from wjmaddox/kp_logdet

Balandat · web-flow · commit 19e67f371d46 · 2021-11-24T07:08:23.000-08:00
Add _logdet and _solve to KroneckerProductLazyTensor
diff --git a/gpytorch/lazy/kronecker_product_lazy_tensor.py b/gpytorch/lazy/kronecker_product_lazy_tensor.py
@@ -150,10 +150,15 @@ def inverse(self):
         inverses = [lt.inverse() for lt in self.lazy_tensors]
         return self.__class__(*inverses)
 
-    def inv_matmul(self, right_tensor, left_tensor=None):
-        # TODO: Investigate under what conditions computing individual inverses makes sense
-        # For now, retain existing behavior
-        return super().inv_matmul(right_tensor=right_tensor, left_tensor=left_tensor)
+    def inv_quad_logdet(self, inv_quad_rhs=None, logdet=False, reduce_inv_quad=True):
+        if inv_quad_rhs is not None:
+            inv_quad_term, _ = super().inv_quad_logdet(
+                inv_quad_rhs=inv_quad_rhs, logdet=False, reduce_inv_quad=reduce_inv_quad
+            )
+        else:
+            inv_quad_term = None
+        logdet_term = self._logdet() if logdet else None
+        return inv_quad_term, logdet_term
 
     @cached(name="cholesky")
     def _cholesky(self, upper=False):
@@ -183,22 +188,47 @@ def _get_indices(self, row_index, col_index, *batch_indices):
 
         return res
 
-    def _inv_matmul(self, right_tensor, left_tensor=None):
+    def _solve(self, rhs, preconditioner=None, num_tridiag=0):
         # Computes inv_matmul by exploiting the identity (A \kron B)^-1 = A^-1 \kron B^-1
+        # we perform the solve first before worrying about any tridiagonal matrices
+
         tsr_shapes = [q.size(-1) for q in self.lazy_tensors]
-        n_rows = right_tensor.size(-2)
-        batch_shape = _mul_broadcast_shape(self.shape[:-2], right_tensor.shape[:-2])
+        n_rows = rhs.size(-2)
+        batch_shape = _mul_broadcast_shape(self.shape[:-2], rhs.shape[:-2])
         perm_batch = tuple(range(len(batch_shape)))
-        y = right_tensor.clone().expand(*batch_shape, *right_tensor.shape[-2:])
+        y = rhs.clone().expand(*batch_shape, *rhs.shape[-2:])
         for n, q in zip(tsr_shapes, self.lazy_tensors):
             # for KroneckerProductTriangularLazyTensor this inv_matmul is very cheap
             y = q.inv_matmul(y.reshape(*batch_shape, n, -1))
             y = y.reshape(*batch_shape, n, n_rows // n, -1).permute(*perm_batch, -2, -3, -1)
         res = y.reshape(*batch_shape, n_rows, -1)
+
+        if num_tridiag == 0:
+            return res
+        else:
+            # we need to return the t mat, so we return the eigenvalues
+            # in general, this should not be called because log determinant estimation
+            # is closed form and is implemented in _logdet
+            # TODO: make this more efficient
+            evals, _ = self.diagonalization()
+            evals_repeated = evals.unsqueeze(0).repeat(num_tridiag, *[1] * evals.ndim)
+            lazy_evals = DiagLazyTensor(evals_repeated)
+            batch_repeated_evals = lazy_evals.evaluate()
+            return res, batch_repeated_evals
+
+    def _inv_matmul(self, right_tensor, left_tensor=None):
+        # if _inv_matmul is called, we ignore the eigenvalue handling
+        # this is efficient because of the structure of the lazy tensor
+        res = self._solve(rhs=right_tensor)
         if left_tensor is not None:
             res = left_tensor @ res
         return res
 
+    def _logdet(self):
+        evals, _ = self.diagonalization()
+        logdet = evals.clamp(min=1e-7).log().sum(-1)
+        return logdet
+
     def _matmul(self, rhs):
         is_vec = rhs.ndimension() == 1
         if is_vec:
diff --git a/test/lazy/test_lazy_evaluated_kernel_tensor.py b/test/lazy/test_lazy_evaluated_kernel_tensor.py
@@ -137,6 +137,7 @@ def test_half(self):
 
 class TestLazyEvaluatedKernelTensorMultitaskBatch(TestLazyEvaluatedKernelTensorBatch):
     seed = 0
+    skip_slq_tests = True  # we skip these because of the kronecker structure
 
     def create_lazy_tensor(self):
         kern = gpytorch.kernels.MultitaskKernel(gpytorch.kernels.RBFKernel(), num_tasks=3, rank=2)
diff --git a/test/lazy/test_sum_kronecker_lazy_tensor.py b/test/lazy/test_sum_kronecker_lazy_tensor.py
@@ -47,19 +47,3 @@ def evaluate_lazy_tensor(self, lazy_tensor):
             lazy_tensor.lazy_tensors[1].lazy_tensors[0].tensor, lazy_tensor.lazy_tensors[1].lazy_tensors[1].tensor
         )
         return res1 + res2
-
-    def test_inv_quad_logdet(self):
-        # mock call cg here
-        self.__class__.should_call_cg = True
-        super().test_inv_quad_logdet()
-        self.__class__.should_call_cg = False
-
-    def test_inv_quad_logdet_no_reduce(self):
-        self.__class__.should_call_cg = True
-        super().test_inv_quad_logdet_no_reduce()
-        self.__class__.should_call_cg = False
-
-    def test_root_decomposition_cholesky(self):
-        self.__class__.should_call_cg = True
-        super().test_root_decomposition_cholesky()
-        self.__class__.should_call_cg = False