Add subset_output functionality to (most) models (#324)

Balandat · facebook-github-bot · commit c426b0a9e251 · 2019-12-06T09:44:50.000-08:00
Summary: Pull Request resolved: #324 In some cases we want to be able to subset models along the output dimension. For instance, if we fit a multi-output model with a number of metrics, we may want to optimize an acquisition function with an objective that only involves a subset of the outputs. By subsetting the model prior to that, we can save a lot of compute. This diff adds a `subset_output` function to the model API. Calling this on a model with a list of indices will return a new model object that is restricted to the desired outputs. For some models (e.g `AffineDeterministicModel` or `ModelListGP`) the implementation is trivial. For others it's a little more involved but doable.The main challenge is with things like passing in generic covariance modules - we really don't have any way of knowing what dimensions of the respective buffers and parameters we need to subset / rescale in this case. Reviewed By: sdaulton Differential Revision: D18668985 fbshipit-source-id: 41479203f23e8bcfa08bbe5f025ed12f0124a091
diff --git a/botorch/models/deterministic.py b/botorch/models/deterministic.py
@@ -73,6 +73,21 @@ def __init__(self, f: Callable[[Tensor], Tensor], num_outputs: int = 1) -> None:
         self._f = f
         self._num_outputs = num_outputs
 
+    def subset_output(self, idcs: List[int]) -> "GenericDeterministicModel":
+        r"""Subset the model along the output dimension.
+
+        Args:
+            idcs: The output indices to subset the model to.
+
+        Returns:
+            The current model, subset to the specified output indices.
+        """
+
+        def f_subset(X: Tensor) -> Tensor:
+            return self._f(X)[..., idcs]
+
+        return self.__class__(f=f_subset)
+
     def forward(self, X: Tensor) -> Tensor:
         r"""Compute the (deterministic) model output at X.
 
@@ -113,5 +128,18 @@ def __init__(self, a: Tensor, b: Union[Tensor, float] = 0.01) -> None:
         self.register_buffer("b", b.expand(a.size(-1)))
         self._num_outputs = a.size(-1)
 
+    def subset_output(self, idcs: List[int]) -> "AffineDeterministicModel":
+        r"""Subset the model along the output dimension.
+
+        Args:
+            idcs: The output indices to subset the model to.
+
+        Returns:
+            The current model, subset to the specified output indices.
+        """
+        a_sub = self.a.detach()[..., idcs].clone()
+        b_sub = self.b.detach()[..., idcs].clone()
+        return self.__class__(a=a_sub, b=b_sub)
+
     def forward(self, X: Tensor) -> Tensor:
         return self.b + torch.einsum("...d,dm", X, self.a)
diff --git a/botorch/models/gp_regression.py b/botorch/models/gp_regression.py
@@ -8,7 +8,7 @@
 Gaussian Process Regression models based on GPyTorch models.
 """
 
-from typing import Any, Optional, Union
+from typing import Any, List, Optional, Union
 
 import torch
 from gpytorch.constraints.constraints import GreaterThan
@@ -117,8 +117,15 @@ def __init__(
                 batch_shape=self._aug_batch_shape,
                 outputscale_prior=GammaPrior(2.0, 0.15),
             )
+            self._subset_batch_dict = {
+                "likelihood.noise_covar.raw_noise": -2,
+                "mean_module.constant": -2,
+                "covar_module.raw_outputscale": -1,
+                "covar_module.base_kernel.raw_lengthscale": -3,
+            }
         else:
             self.covar_module = covar_module
+        # TODO: Allow subsetting of other covar modules
         if outcome_transform is not None:
             self.outcome_transform = outcome_transform
         self.to(train_X)
@@ -192,6 +199,11 @@ def __init__(
         )
         if outcome_transform is not None:
             self.outcome_transform = outcome_transform
+        self._subset_batch_dict = {
+            "mean_module.constant": -2,
+            "covar_module.raw_outputscale": -1,
+            "covar_module.base_kernel.raw_lengthscale": -3,
+        }
         self.to(train_X)
 
     def fantasize(
@@ -242,6 +254,21 @@ def forward(self, x: Tensor) -> MultivariateNormal:
         covar_x = self.covar_module(x)
         return MultivariateNormal(mean_x, covar_x)
 
+    def subset_output(self, idcs: List[int]) -> "BatchedMultiOutputGPyTorchModel":
+        r"""Subset the model along the output dimension.
+
+        Args:
+            idcs: The output indices to subset the model to.
+
+        Returns:
+            The current model, subset to the specified output indices.
+        """
+        new_model = super().subset_output(idcs=idcs)
+        full_noise = new_model.likelihood.noise_covar.noise
+        new_noise = full_noise[..., idcs if len(idcs) > 1 else idcs[0], :]
+        new_model.likelihood.noise_covar.noise = new_noise
+        return new_model
+
 
 class HeteroskedasticSingleTaskGP(SingleTaskGP):
     r"""A single-task exact GP model using a heteroskeastic noise model.
@@ -311,3 +338,6 @@ def condition_on_observations(
         self, X: Tensor, Y: Tensor, **kwargs: Any
     ) -> "HeteroskedasticSingleTaskGP":
         raise NotImplementedError
+
+    def subset_output(self, idcs: List[int]) -> "HeteroskedasticSingleTaskGP":
+        raise NotImplementedError
diff --git a/botorch/models/gpytorch.py b/botorch/models/gpytorch.py
@@ -11,8 +11,10 @@
 GPyTorch Model class such as an ExactGP.
 """
 
+import itertools
 import warnings
 from abc import ABC
+from copy import deepcopy
 from typing import Any, Iterator, List, Optional, Tuple, Union
 
 import torch
@@ -26,7 +28,12 @@
 from ..posteriors.gpytorch import GPyTorchPosterior
 from ..utils.transforms import gpt_posterior_settings
 from .model import Model
-from .utils import _make_X_full, add_output_dim, multioutput_to_batch_mode_transform
+from .utils import (
+    _make_X_full,
+    add_output_dim,
+    mod_batch_shape,
+    multioutput_to_batch_mode_transform,
+)
 
 
 class GPyTorchModel(Model, ABC):
@@ -358,6 +365,50 @@ def condition_on_observations(
         fantasy_model._aug_batch_shape = fantasy_model.train_targets.shape[:-1]
         return fantasy_model
 
+    def subset_output(self, idcs: List[int]) -> "BatchedMultiOutputGPyTorchModel":
+        r"""Subset the model along the output dimension.
+
+        Args:
+            idcs: The output indices to subset the model to.
+
+        Returns:
+            The current model, subset to the specified output indices.
+        """
+        try:
+            subset_batch_dict = self._subset_batch_dict
+        except AttributeError:
+            raise NotImplementedError(
+                "subset_output requires the model to define a `_subset_dict` attribute"
+            )
+
+        m = len(idcs)
+        tidxr = torch.tensor(idcs)
+        idxr = tidxr if m > 1 else idcs[0]
+        new_tail_bs = torch.Size([m]) if m > 1 else torch.Size()
+        new_model = deepcopy(self)
+
+        new_model._num_outputs = m
+        new_model._aug_batch_shape = new_model._aug_batch_shape[:-1] + new_tail_bs
+        new_model.train_inputs = tuple(
+            ti[..., idxr, :, :] for ti in new_model.train_inputs
+        )
+        new_model.train_targets = new_model.train_targets[..., idxr, :]
+
+        # adjust batch shapes of parameters/buffers if necessary
+        for full_name, p in itertools.chain(
+            new_model.named_parameters(), new_model.named_buffers()
+        ):
+            if full_name in subset_batch_dict:
+                idx = subset_batch_dict[full_name]
+                new_data = p.index_select(idx, tidxr)
+                if m == 1:
+                    new_data = new_data.squeeze(idx)
+                p.data = new_data
+            mod_name = full_name.split(".")[:-1]
+            mod_batch_shape(new_model, mod_name, m if m > 1 else 0)
+
+        return new_model
+
 
 class ModelListGPyTorchModel(GPyTorchModel, ABC):
     r"""Abstract base class for models based on multi-output GPyTorch models.
diff --git a/botorch/models/model.py b/botorch/models/model.py
@@ -55,6 +55,18 @@ def num_outputs(self) -> int:
         cls_name = self.__class__.__name__
         raise NotImplementedError(f"{cls_name} does not define num_outputs property")
 
+    def subset_output(self, idcs: List[int]) -> "Model":
+        r"""Subset the model along the output dimension.
+
+        Args:
+            idcs: The output indices to subset the model to.
+
+        Returns:
+            A `Model` object of the same type and with the same parameters as
+            the current model, subset to the specified output indices.
+        """
+        raise NotImplementedError
+
     def condition_on_observations(self, X: Tensor, Y: Tensor, **kwargs: Any) -> "Model":
         r"""Condition the model on new observations.
 
diff --git a/botorch/models/model_list_gp_regression.py b/botorch/models/model_list_gp_regression.py
@@ -8,7 +8,8 @@
 Model List GP Regression models.
 """
 
-from typing import Any
+from copy import deepcopy
+from typing import Any, List
 
 from gpytorch.models import IndependentModelList
 from torch import Tensor
@@ -89,3 +90,14 @@ def condition_on_observations(
         else:
             kwargs_ = kwargs
         return super().get_fantasy_model(inputs, targets, **kwargs_)
+
+    def subset_output(self, idcs: List[int]) -> "ModelListGP":
+        r"""Subset the model along the output dimension.
+
+        Args:
+            idcs: The output indices to subset the model to.
+
+        Returns:
+            The current model, subset to the specified output indices.
+        """
+        return self.__class__(*[deepcopy(self.models[i]) for i in idcs])
diff --git a/botorch/models/utils.py b/botorch/models/utils.py
@@ -12,6 +12,7 @@
 from typing import List, Optional, Tuple
 
 import torch
+from gpytorch.module import Module
 from gpytorch.utils.broadcasting import _mul_broadcast_shape
 from torch import Tensor
 
@@ -222,3 +223,25 @@ def validate_input_scaling(
             raise InputDataError("Input data contains negative variances.")
     check_min_max_scaling(X=train_X, raise_on_fail=raise_on_fail)
     check_standardization(Y=train_Y, raise_on_fail=raise_on_fail)
+
+
+def mod_batch_shape(module: Module, names: List[str], b: int) -> None:
+    r"""Recursive helper to modify gpytorch modules' batch shape attribute.
+
+    Modifies the module in-place.
+
+    Args:
+        module: The module to be modified.
+        names: The list of names to access the attribute. If the full name of
+            the module is `"module.sub_module.leaf_module"`, this will be
+            `["sub_module", "leaf_module"]`.
+        b: The new size of the last element of the module's `batch_shape`
+            attribute.
+    """
+    if len(names) == 0:
+        return
+    m = getattr(module, names[0])
+    if len(names) == 1 and hasattr(m, "batch_shape") and len(m.batch_shape) > 0:
+        m.batch_shape = m.batch_shape[:-1] + torch.Size([b] if b > 0 else [])
+    else:
+        mod_batch_shape(module=m, names=names[1:], b=b)
diff --git a/test/models/test_deterministic.py b/test/models/test_deterministic.py
@@ -39,6 +39,11 @@ def f(X):
         self.assertEqual(model.num_outputs, 2)
         p = model.posterior(X, output_indices=[0])
         self.assertTrue(torch.equal(p.mean, X[..., [0]]))
+        # test subset output
+        subset_model = model.subset_output([0])
+        self.assertIsInstance(subset_model, GenericDeterministicModel)
+        p_sub = subset_model.posterior(X)
+        self.assertTrue(torch.equal(p_sub.mean, X[..., [0]]))
 
     def test_AffineDeterministicModel(self):
         # test error on bad shape of a
@@ -65,3 +70,10 @@ def test_AffineDeterministicModel(self):
             p = model.posterior(X)
             mean_exp = model.b + (X.unsqueeze(-1) * a).sum(dim=-2)
             self.assertTrue(torch.equal(p.mean, mean_exp))
+        # test subset output
+        X = torch.rand(4, 3)
+        subset_model = model.subset_output([0])
+        self.assertIsInstance(subset_model, AffineDeterministicModel)
+        p = model.posterior(X)
+        p_sub = subset_model.posterior(X)
+        self.assertTrue(torch.equal(p_sub.mean, p.mean[..., [0]]))
diff --git a/test/models/test_gp_regression.py b/test/models/test_gp_regression.py
@@ -250,6 +250,27 @@ def test_fantasize(self):
             fm = model.fantasize(X=X_f, sampler=sampler, observation_noise=False)
             self.assertIsInstance(fm, model.__class__)
 
+    def test_subset_model(self):
+        for batch_shape, dtype in itertools.product(
+            (torch.Size(), torch.Size([2])), (torch.float, torch.double)
+        ):
+            tkwargs = {"device": self.device, "dtype": dtype}
+            model, model_kwargs = self._get_model_and_data(
+                batch_shape=batch_shape, m=2, **tkwargs
+            )
+            subset_model = model.subset_output([0])
+            X = torch.rand(torch.Size(batch_shape + torch.Size([3, 1])), **tkwargs)
+            p = model.posterior(X)
+            p_sub = subset_model.posterior(X)
+            self.assertTrue(
+                torch.allclose(p_sub.mean, p.mean[..., [0]], atol=1e-4, rtol=1e-4)
+            )
+            self.assertTrue(
+                torch.allclose(
+                    p_sub.variance, p.variance[..., [0]], atol=1e-4, rtol=1e-4
+                )
+            )
+
 
 class TestFixedNoiseGP(TestSingleTaskGP):
     def _get_model_and_data(self, batch_shape, m, outcome_transform=None, **tkwargs):
@@ -324,6 +345,10 @@ def test_fantasize(self):
         with self.assertRaises(NotImplementedError):
             super().test_fantasize()
 
+    def test_subset_model(self):
+        with self.assertRaises(NotImplementedError):
+            super().test_subset_model()
+
 
 def _get_pvar_expected(posterior, model, X, m):
     lh_kwargs = {}
diff --git a/test/models/test_gpytorch.py b/test/models/test_gpytorch.py
@@ -116,6 +116,9 @@ def test_gpytorch_model(self):
             )
             self.assertIsInstance(cm, SimpleGPyTorchModel)
             self.assertEqual(cm.train_targets.shape, torch.Size([7]))
+            # test subset_output
+            with self.assertRaises(NotImplementedError):
+                model.subset_output([0])
             # test fantasize
             sampler = SobolQMCNormalSampler(num_samples=2)
             cm = model.fantasize(torch.rand(2, 1, **tkwargs), sampler=sampler)
@@ -191,6 +194,9 @@ def test_batched_multi_output_gpytorch_model(self):
             )
             self.assertIsInstance(posterior, GPyTorchPosterior)
             self.assertEqual(posterior.mean.shape, torch.Size([2, 2]))
+            # test subset_output
+            with self.assertRaises(NotImplementedError):
+                model.subset_output([0])
             # test conditioning on observations
             cm = model.condition_on_observations(
                 torch.rand(2, 1, **tkwargs), torch.rand(2, 2, **tkwargs)
diff --git a/test/models/test_model.py b/test/models/test_model.py
@@ -24,3 +24,5 @@ def test_not_so_abstract_base_model(self):
             model.condition_on_observations(None, None)
         with self.assertRaises(NotImplementedError):
             model.num_outputs
+        with self.assertRaises(NotImplementedError):
+            model.subset_output([0])
diff --git a/test/models/test_model_list_gp_regression.py b/test/models/test_model_list_gp_regression.py
@@ -93,6 +93,15 @@ def test_ModelListGP(self):
                     mll, options={"maxiter": 1}, max_retries=1, sequential=False
                 )
 
+            # test subset outputs
+            subset_model = model.subset_output([1])
+            self.assertIsInstance(subset_model, ModelListGP)
+            self.assertEqual(len(subset_model.models), 1)
+            sd_subset = subset_model.models[0].state_dict()
+            sd = model.models[1].state_dict()
+            self.assertTrue(set(sd_subset.keys()) == set(sd.keys()))
+            self.assertTrue(all(torch.equal(v, sd[k]) for k, v in sd_subset.items()))
+
             # test posterior
             test_x = torch.tensor([[0.25], [0.75]], **tkwargs)
             posterior = model.posterior(test_x)