Merge pull request #2317 from yyexela/dGPFantasize

jacobrgardner · web-flow · commit c6a4f46838c2 · 2023-05-11T15:09:09.000-04:00
Enable fantasy models for multitask GPs Reborn
diff --git a/gpytorch/models/exact_gp.py b/gpytorch/models/exact_gp.py
@@ -6,7 +6,7 @@
 import torch
 
 from .. import settings
-from ..distributions import MultivariateNormal
+from ..distributions import MultitaskMultivariateNormal, MultivariateNormal
 from ..likelihoods import _GaussianLikelihoodBase
 from ..utils.generic import length_safe_zip
 from ..utils.warnings import GPInputWarning
@@ -162,15 +162,17 @@ def get_fantasy_model(self, inputs, targets, **kwargs):
 
         model_batch_shape = self.train_inputs[0].shape[:-2]
 
-        if self.train_targets.dim() > len(model_batch_shape) + 1:
-            raise RuntimeError("Cannot yet add fantasy observations to multitask GPs, but this is coming soon!")
-
         if not isinstance(inputs, list):
             inputs = [inputs]
 
         inputs = [i.unsqueeze(-1) if i.ndimension() == 1 else i for i in inputs]
 
-        target_batch_shape = targets.shape[:-1]
+        if not isinstance(self.prediction_strategy.train_prior_dist, MultitaskMultivariateNormal):
+            data_dim_start = -1
+        else:
+            data_dim_start = -2
+
+        target_batch_shape = targets.shape[:data_dim_start]
         input_batch_shape = inputs[0].shape[:-2]
         tbdim, ibdim = len(target_batch_shape), len(input_batch_shape)
 
@@ -198,7 +200,7 @@ def get_fantasy_model(self, inputs, targets, **kwargs):
         # computing the covariance for each element of the batch. Therefore we don't expand the inputs to the
         # size of the fantasy model here - this is done below, after the evaluation and fast fantasy update
         train_inputs = [tin.expand(input_batch_shape + tin.shape[-2:]) for tin in self.train_inputs]
-        train_targets = self.train_targets.expand(target_batch_shape + self.train_targets.shape[-1:])
+        train_targets = self.train_targets.expand(target_batch_shape + self.train_targets.shape[data_dim_start:])
 
         full_inputs = [
             torch.cat(
@@ -208,8 +210,7 @@ def get_fantasy_model(self, inputs, targets, **kwargs):
             for train_input, input in length_safe_zip(train_inputs, inputs)
         ]
         full_targets = torch.cat(
-            [train_targets, targets.expand(target_batch_shape + targets.shape[-1:])],
-            dim=-1,
+            [train_targets, targets.expand(target_batch_shape + targets.shape[data_dim_start:])], dim=data_dim_start
         )
 
         try:
diff --git a/gpytorch/models/exact_prediction_strategies.py b/gpytorch/models/exact_prediction_strategies.py
@@ -22,6 +22,8 @@
 from torch import Tensor
 
 from .. import settings
+
+from ..distributions import MultitaskMultivariateNormal
 from ..lazy import LazyEvaluatedKernelTensor
 from ..utils.memoize import add_to_cache, cached, clear_cache_hook, pop_from_cache
 
@@ -134,16 +136,28 @@ def get_fantasy_strategy(self, inputs, targets, full_inputs, full_targets, full_
             A `DefaultPredictionStrategy` model with `n + m` training examples, where the `m` fantasy examples have
             been added and all test-time caches have been updated.
         """
+        if not isinstance(full_output, MultitaskMultivariateNormal):
+            target_batch_shape = targets.shape[:-1]
+        else:
+            target_batch_shape = targets.shape[:-2]
+
         full_mean, full_covar = full_output.mean, full_output.lazy_covariance_matrix
 
         batch_shape = full_inputs[0].shape[:-2]
 
-        full_mean = full_mean.view(*batch_shape, -1)
         num_train = self.num_train
 
+        if isinstance(full_output, MultitaskMultivariateNormal):
+            num_tasks = full_output.event_shape[-1]
+            full_mean = full_mean.view(*batch_shape, -1, num_tasks)
+            fant_mean = full_mean[..., (num_train // num_tasks) :, :]
+            full_targets = full_targets.view(*target_batch_shape, -1)
+        else:
+            full_mean = full_mean.view(*batch_shape, -1)
+            fant_mean = full_mean[..., num_train:]
+
         # Evaluate fant x train and fant x fant covariance matrices, leave train x train unevaluated.
         fant_fant_covar = full_covar[..., num_train:, num_train:]
-        fant_mean = full_mean[..., num_train:]
         mvn = self.train_prior_dist.__class__(fant_mean, fant_fant_covar)
         fant_likelihood = self.likelihood.get_fantasy_likelihood(**kwargs)
         mvn_obs = fant_likelihood(mvn, inputs, **kwargs)
@@ -209,6 +223,9 @@ def get_fantasy_strategy(self, inputs, targets, full_inputs, full_targets, full_
             new_root = BatchRepeatLinearOperator(DenseLinearOperator(new_root), repeat_shape)
             # no need to repeat the covar cache, broadcasting will do the right thing
 
+        if isinstance(full_output, MultitaskMultivariateNormal):
+            full_mean = full_mean.view(*target_batch_shape, -1, num_tasks).contiguous()
+
         # Create new DefaultPredictionStrategy object
         fant_strat = self.__class__(
             train_inputs=full_inputs,
@@ -285,7 +302,11 @@ def exact_predictive_mean(self, test_mean: Tensor, test_train_covar: LinearOpera
         # NOTE TO FUTURE SELF:
         # You **cannot* use addmv here, because test_train_covar may not actually be a non lazy tensor even for an exact
         # GP, and using addmv requires you to to_dense test_train_covar, which is obviously a huge no-no!
-        res = (test_train_covar @ self.mean_cache.unsqueeze(-1)).squeeze(-1)
+
+        if len(self.mean_cache.shape) == 4:
+            res = (test_train_covar @ self.mean_cache.squeeze(1).unsqueeze(-1)).squeeze(-1)
+        else:
+            res = (test_train_covar @ self.mean_cache.unsqueeze(-1)).squeeze(-1)
         res = res + test_mean
 
         return res
diff --git a/test/examples/test_derivative_gp_fantasy.py b/test/examples/test_derivative_gp_fantasy.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+
+import unittest
+from math import pi
+
+import torch
+
+import gpytorch
+from gpytorch.distributions import MultitaskMultivariateNormal
+from gpytorch.kernels import ScaleKernel, RBFKernelGrad
+from gpytorch.likelihoods import MultitaskGaussianLikelihood
+from gpytorch.means import ConstantMeanGrad
+from gpytorch.test.base_test_case import BaseTestCase
+
+# Simple training data
+num_train_samples = 15
+num_fantasies = 10
+dim = 1
+train_X = torch.linspace(0, 1, num_train_samples).reshape(-1, 1)
+train_Y = torch.hstack([
+    torch.sin(train_X * (2 * pi)).reshape(-1, 1),
+    (2 * pi) * torch.cos(train_X * (2 * pi)).reshape(-1, 1),
+])
+
+
+class GPWithDerivatives(gpytorch.models.ExactGP):
+    def __init__(self, train_X, train_Y):
+        likelihood = MultitaskGaussianLikelihood(num_tasks=1 + dim)
+        super().__init__(train_X, train_Y, likelihood)
+        self.mean_module = ConstantMeanGrad()
+        self.base_kernel = RBFKernelGrad()
+        self.covar_module = ScaleKernel(self.base_kernel)
+        self._num_outputs = 1 + dim
+
+    def forward(self, x):
+        mean_x = self.mean_module(x)
+        covar_x = self.covar_module(x)
+        return MultitaskMultivariateNormal(mean_x, covar_x)
+
+
+class TestDerivativeGPFutures(BaseTestCase, unittest.TestCase):
+
+    # Inspired by test_lanczos_fantasy_model
+    def test_derivative_gp_futures(self):
+        model = GPWithDerivatives(train_X, train_Y)
+        mll = gpytorch.mlls.sum_marginal_log_likelihood.ExactMarginalLogLikelihood(model.likelihood, model)
+
+        mll.train()
+        mll.eval()
+
+        # get a posterior to fill in caches
+        model(torch.randn(num_train_samples).reshape(-1, 1))
+
+        new_x = torch.randn((1, 1, dim))
+        new_y = torch.randn((num_fantasies, 1, 1, 1 + dim))
+
+        # just check that this can run without error
+        model.get_fantasy_model(new_x, new_y)
+
+
+if __name__ == "__main__":
+    unittest.main()