qKG First commit

yyexela · yyexela · commit 969a9eced8a1 · 2023-03-31T15:44:52.000-06:00
diff --git a/gpytorch/models/exact_gp.py b/gpytorch/models/exact_gp.py
@@ -6,7 +6,7 @@
 import torch
 
 from .. import settings
-from ..distributions import MultivariateNormal
+from ..distributions import MultitaskMultivariateNormal, MultivariateNormal
 from ..likelihoods import _GaussianLikelihoodBase
 from ..utils.generic import length_safe_zip
 from ..utils.warnings import GPInputWarning
@@ -162,15 +162,17 @@ def get_fantasy_model(self, inputs, targets, **kwargs):
 
         model_batch_shape = self.train_inputs[0].shape[:-2]
 
-        if self.train_targets.dim() > len(model_batch_shape) + 1:
-            raise RuntimeError("Cannot yet add fantasy observations to multitask GPs, but this is coming soon!")
-
         if not isinstance(inputs, list):
             inputs = [inputs]
 
         inputs = [i.unsqueeze(-1) if i.ndimension() == 1 else i for i in inputs]
 
-        target_batch_shape = targets.shape[:-1]
+        if not isinstance(self.prediction_strategy.train_prior_dist, MultitaskMultivariateNormal):
+            data_dim_start = -1
+        else:
+            data_dim_start = -2
+
+        target_batch_shape = targets.shape[:data_dim_start]
         input_batch_shape = inputs[0].shape[:-2]
         tbdim, ibdim = len(target_batch_shape), len(input_batch_shape)
 
@@ -198,7 +200,7 @@ def get_fantasy_model(self, inputs, targets, **kwargs):
         # computing the covariance for each element of the batch. Therefore we don't expand the inputs to the
         # size of the fantasy model here - this is done below, after the evaluation and fast fantasy update
         train_inputs = [tin.expand(input_batch_shape + tin.shape[-2:]) for tin in self.train_inputs]
-        train_targets = self.train_targets.expand(target_batch_shape + self.train_targets.shape[-1:])
+        train_targets = self.train_targets.expand(target_batch_shape + self.train_targets.shape[data_dim_start:])
 
         full_inputs = [
             torch.cat(
@@ -208,8 +210,7 @@ def get_fantasy_model(self, inputs, targets, **kwargs):
             for train_input, input in length_safe_zip(train_inputs, inputs)
         ]
         full_targets = torch.cat(
-            [train_targets, targets.expand(target_batch_shape + targets.shape[-1:])],
-            dim=-1,
+            [train_targets, targets.expand(target_batch_shape + targets.shape[data_dim_start:])], dim=data_dim_start
         )
 
         try:
@@ -316,7 +317,7 @@ def __call__(self, *args, **kwargs):
             if settings.debug().on():
                 if not isinstance(full_output, MultivariateNormal):
                     raise RuntimeError("ExactGP.forward must return a MultivariateNormal")
-            full_mean, full_covar = full_output.loc, full_output.lazy_covariance_matrix
+            full_mean, full_covar = full_output.mean, full_output.lazy_covariance_matrix
 
             # Determine the shape of the joint distribution
             batch_shape = full_output.batch_shape
diff --git a/gpytorch/models/exact_prediction_strategies.py b/gpytorch/models/exact_prediction_strategies.py
@@ -22,6 +22,8 @@
 from torch import Tensor
 
 from .. import settings
+
+from ..distributions import MultitaskMultivariateNormal
 from ..lazy import LazyEvaluatedKernelTensor
 from ..utils.memoize import add_to_cache, cached, clear_cache_hook, pop_from_cache
 
@@ -134,16 +136,27 @@ def get_fantasy_strategy(self, inputs, targets, full_inputs, full_targets, full_
             A `DefaultPredictionStrategy` model with `n + m` training examples, where the `m` fantasy examples have
             been added and all test-time caches have been updated.
         """
+        if not isinstance(full_output, MultitaskMultivariateNormal):
+            target_batch_shape = targets.shape[:-1]
+        else:
+            target_batch_shape = targets.shape[:-2]
+
         full_mean, full_covar = full_output.mean, full_output.lazy_covariance_matrix
 
         batch_shape = full_inputs[0].shape[:-2]
 
-        full_mean = full_mean.view(*batch_shape, -1)
         num_train = self.num_train
 
+        if isinstance(full_output, MultitaskMultivariateNormal):
+            num_tasks = full_output.event_shape[-1]
+            full_mean = full_mean.view(*batch_shape, -1, num_tasks)
+            fant_mean = full_mean[..., (num_train // num_tasks) :, :]
+        else:
+            full_mean = full_mean.view(*batch_shape, -1)
+            fant_mean = full_mean[..., num_train:]
+
         # Evaluate fant x train and fant x fant covariance matrices, leave train x train unevaluated.
         fant_fant_covar = full_covar[..., num_train:, num_train:]
-        fant_mean = full_mean[..., num_train:]
         mvn = self.train_prior_dist.__class__(fant_mean, fant_fant_covar)
         fant_likelihood = self.likelihood.get_fantasy_likelihood(**kwargs)
         mvn_obs = fant_likelihood(mvn, inputs, **kwargs)
@@ -198,6 +211,8 @@ def get_fantasy_strategy(self, inputs, targets, full_inputs, full_targets, full_
         new_root = new_lt.root_decomposition().root.to_dense()
         new_covar_cache = new_lt.root_inv_decomposition().root.to_dense()
 
+        full_targets = full_targets.view(*target_batch_shape, -1)
+
         # Expand inputs accordingly if necessary (for fantasies at the same points)
         if full_inputs[0].dim() <= full_targets.dim():
             fant_batch_shape = full_targets.shape[:1]
@@ -209,6 +224,9 @@ def get_fantasy_strategy(self, inputs, targets, full_inputs, full_targets, full_
             new_root = BatchRepeatLinearOperator(DenseLinearOperator(new_root), repeat_shape)
             # no need to repeat the covar cache, broadcasting will do the right thing
 
+        if isinstance(full_output, MultitaskMultivariateNormal):
+            full_mean = full_mean.view(*target_batch_shape, -1, num_tasks).contiguous()
+
         # Create new DefaultPredictionStrategy object
         fant_strat = self.__class__(
             train_inputs=full_inputs,
@@ -258,7 +276,11 @@ def train_shape(self):
 
     def exact_prediction(self, joint_mean, joint_covar):
         # Find the components of the distribution that contain test data
-        test_mean = joint_mean[..., self.num_train :]
+        if not isinstance(self.train_prior_dist, MultitaskMultivariateNormal):
+            test_mean = joint_mean[..., self.num_train :]
+        else:
+            num_tasks = joint_mean.shape[-1]
+            test_mean = joint_mean[..., (self.num_train // num_tasks) :, :]
         # For efficiency - we can make things more efficient
         if joint_covar.size(-1) <= settings.max_eager_kernel_size.value():
             test_covar = joint_covar[..., self.num_train :, :].to_dense()
@@ -285,7 +307,10 @@ def exact_predictive_mean(self, test_mean: Tensor, test_train_covar: LinearOpera
         # NOTE TO FUTURE SELF:
         # You **cannot* use addmv here, because test_train_covar may not actually be a non lazy tensor even for an exact
         # GP, and using addmv requires you to to_dense test_train_covar, which is obviously a huge no-no!
-        res = (test_train_covar @ self.mean_cache.unsqueeze(-1)).squeeze(-1)
+        if not isinstance(self.train_prior_dist, MultitaskMultivariateNormal):
+            res = (test_train_covar @ self.mean_cache.unsqueeze(-1)).squeeze(-1)
+        else:
+            res = (test_train_covar.unsqueeze(1) @ self.mean_cache.unsqueeze(-1)).squeeze(-1)
         res = res + test_mean
 
         return res