Sequentially fit batched models using ModelList converter (#189)

Balandat · facebook-github-bot · commit 72a476b36e75 · 2019-06-26T18:00:14.000-07:00
Summary: Pull Request resolved: #189 Fitting batched multi-output models with a lot of outputs jointly can result in inferior model fits (due to the size of the resulting optimization problem). This makes use of the ModelList <-> BatchedModel converter to fit the models corresponding to the different (independent) outputs individually. Note that this current form may cause issues with custom likelihoods in `SingleTaskGP` models. Reviewed By: sdaulton Differential Revision: D16007838 fbshipit-source-id: 530d85aa5b17c0d2aa3bc2f184ad3a20e1994c06
diff --git a/botorch/fit.py b/botorch/fit.py
@@ -15,24 +15,30 @@
 from gpytorch.mlls.sum_marginal_log_likelihood import SumMarginalLogLikelihood
 
 from .exceptions.warnings import OptimizationWarning
+from .models.converter import batched_to_model_list, model_list_to_batched
+from .models.gp_regression import HeteroskedasticSingleTaskGP
+from .models.gpytorch import BatchedMultiOutputGPyTorchModel
 from .optim.fit import fit_gpytorch_scipy
 from .optim.utils import sample_all_priors
 
 
 def fit_gpytorch_model(
     mll: MarginalLogLikelihood, optimizer: Callable = fit_gpytorch_scipy, **kwargs: Any
 ) -> MarginalLogLikelihood:
-    r"""Fit hyperparameters of a gpytorch model. On optimizer failures, a new
-    initial condition is sampled from the hyperparameter priors and optimization
-    is retried. The maximum number of retries can be passed in as a `max_retries`
-    kwarg (default is 5).
+    r"""Fit hyperparameters of a GPyTorch model.
+
+    On optimizer failures, a new initial condition is sampled from the
+    hyperparameter priors and optimization is retried. The maximum number of
+    retries can be passed in as a `max_retries` kwarg (default is 5).
 
     Optimizer functions are in botorch.optim.fit.
 
     Args:
         mll: MarginalLogLikelihood to be maximized.
         optimizer: The optimizer function.
-        kwargs: Arguments passed along to the optimizer function.
+        kwargs: Arguments passed along to the optimizer function, including
+            `max_retries` and `sequential` (controls the fitting of `ModelListGP`
+            and `BatchedMultiOutputGPyTorchModel` models).
 
     Returns:
         MarginalLogLikelihood with optimized parameters.
@@ -43,13 +49,34 @@ def fit_gpytorch_model(
         >>> fit_gpytorch_model(mll)
     """
     sequential = kwargs.pop("sequential", True)
+    max_retries = kwargs.pop("max_retries", 5)
     if isinstance(mll, SumMarginalLogLikelihood) and sequential:
         for mll_ in mll.mlls:
-            fit_gpytorch_model(mll=mll_, optimizer=optimizer, **kwargs)
+            fit_gpytorch_model(
+                mll=mll_, optimizer=optimizer, max_retries=max_retries, **kwargs
+            )
         return mll
-    max_retries = kwargs.pop("max_retries", 5)
-    original_state_dict = deepcopy(mll.model.state_dict())
+    elif (
+        isinstance(mll.model, BatchedMultiOutputGPyTorchModel)
+        and mll.model._num_outputs > 1
+        and sequential
+        and not isinstance(mll.model, HeteroskedasticSingleTaskGP)
+    ):
+        model_list = batched_to_model_list(mll.model)
+        mll_ = SumMarginalLogLikelihood(model_list.likelihood, model_list)
+        fit_gpytorch_model(
+            mll=mll_,
+            optimizer=optimizer,
+            sequential=True,
+            max_retries=max_retries,
+            **kwargs,
+        )
+        model_ = model_list_to_batched(mll_.model)
+        mll.model.load_state_dict(model_.state_dict())
+        mll.eval()
+    # retry with random samples from the priors upon failure
     mll.train()
+    original_state_dict = deepcopy(mll.model.state_dict())
     retry = 0
     while retry < max_retries:
         with warnings.catch_warnings(record=True) as ws:
diff --git a/test/test_fit.py b/test/test_fit.py
@@ -8,7 +8,8 @@
 
 import torch
 from botorch import fit_gpytorch_model
-from botorch.models import SingleTaskGP
+from botorch.exceptions.warnings import OptimizationWarning
+from botorch.models import FixedNoiseGP, HeteroskedasticSingleTaskGP, SingleTaskGP
 from botorch.optim.fit import (
     OptimizationIteration,
     fit_gpytorch_scipy,
@@ -36,6 +37,27 @@ def _getModel(self, double=False, cuda=False):
         mll = ExactMarginalLogLikelihood(model.likelihood, model)
         return mll.to(device=device, dtype=dtype)
 
+    def _getBatchedModel(self, kind="SingleTaskGP", double=False, cuda=False):
+        device = torch.device("cuda") if cuda else torch.device("cpu")
+        dtype = torch.double if double else torch.float
+        train_x = torch.linspace(0, 1, 10, device=device, dtype=dtype).unsqueeze(-1)
+        noise = torch.tensor(NOISE, device=device, dtype=dtype)
+        train_y1 = torch.sin(train_x.view(-1) * (2 * math.pi)) + noise
+        train_y2 = torch.sin(train_x.view(-1) * (2 * math.pi)) + noise
+        train_y = torch.stack([train_y1, train_y2], dim=-1)
+        if kind == "SingleTaskGP":
+            model = SingleTaskGP(train_x, train_y)
+        elif kind == "FixedNoiseGP":
+            model = FixedNoiseGP(train_x, train_y, 0.1 * torch.ones_like(train_y))
+        elif kind == "HeteroskedasticSingleTaskGP":
+            model = HeteroskedasticSingleTaskGP(
+                train_x, train_y, 0.1 * torch.ones_like(train_y)
+            )
+        else:
+            raise NotImplementedError
+        mll = ExactMarginalLogLikelihood(model.likelihood, model)
+        return mll.to(device=device, dtype=dtype)
+
     def test_fit_gpytorch_model(self, cuda=False, optimizer=fit_gpytorch_scipy):
         options = {"disp": False, "maxiter": 5}
         for double in (False, True):
@@ -46,7 +68,7 @@ def test_fit_gpytorch_model(self, cuda=False, optimizer=fit_gpytorch_scipy):
                 )
                 if optimizer == fit_gpytorch_scipy:
                     self.assertEqual(len(ws), 1)
-                    self.assertTrue(MAX_RETRY_MSG in str(ws[-1].message))
+                    self.assertTrue(MAX_RETRY_MSG in str(ws[0].message))
             model = mll.model
             # Make sure all of the parameters changed
             self.assertGreater(model.likelihood.raw_noise.abs().item(), 1e-3)
@@ -68,7 +90,7 @@ def test_fit_gpytorch_model(self, cuda=False, optimizer=fit_gpytorch_scipy):
                 )
                 if optimizer == fit_gpytorch_scipy:
                     self.assertEqual(len(ws), 1)
-                    self.assertTrue(MAX_RETRY_MSG in str(ws[-1].message))
+                    self.assertTrue(MAX_RETRY_MSG in str(ws[0].message))
 
             model = mll.model
             self.assertGreaterEqual(model.likelihood.raw_noise.abs().item(), 1e-1)
@@ -86,7 +108,7 @@ def test_fit_gpytorch_model(self, cuda=False, optimizer=fit_gpytorch_scipy):
                 mll, iterations = optimizer(mll, options=options, track_iterations=True)
                 if optimizer == fit_gpytorch_scipy:
                     self.assertEqual(len(ws), 1)
-                    self.assertTrue(MAX_ITER_MSG in str(ws[-1].message))
+                    self.assertTrue(MAX_ITER_MSG in str(ws[0].message))
             self.assertEqual(len(iterations), options["maxiter"])
             self.assertIsInstance(iterations[0], OptimizationIteration)
 
@@ -109,15 +131,15 @@ def test_fit_gpytorch_model(self, cuda=False, optimizer=fit_gpytorch_scipy):
                 )
                 if optimizer == fit_gpytorch_scipy:
                     self.assertEqual(len(ws), 1)
-                    self.assertTrue(MAX_RETRY_MSG in str(ws[-1].message))
+                    self.assertTrue(MAX_RETRY_MSG in str(ws[0].message))
             self.assertTrue(mll.dummy_param.grad is None)
 
     def test_fit_gpytorch_model_cuda(self):
         if torch.cuda.is_available():
             self.test_fit_gpytorch_model(cuda=True)
 
     def test_fit_gpytorch_model_singular(self, cuda=False):
-        options = {"disp": False, "maxiter": 2}
+        options = {"disp": False, "maxiter": 5}
         device = torch.device("cuda") if cuda else torch.device("cpu")
         for dtype in (torch.float, torch.double):
             X_train = torch.rand(2, 2, device=device, dtype=dtype)
@@ -130,7 +152,7 @@ def test_fit_gpytorch_model_singular(self, cuda=False):
             mll.to(device=device, dtype=dtype)
             with warnings.catch_warnings(record=True) as ws:
                 # this will do multiple retries
-                fit_gpytorch_model(mll, options=options)
+                fit_gpytorch_model(mll, options=options, max_retries=1)
                 self.assertEqual(len(ws), 1)
                 self.assertTrue(MAX_RETRY_MSG in str(ws[0].message))
 
@@ -144,3 +166,24 @@ def test_fit_gpytorch_model_torch(self, cuda=False):
     def test_fit_gpytorch_model_torch_cuda(self):
         if torch.cuda.is_available():
             self.test_fit_gpytorch_model_torch(cuda=True)
+
+    def test_fit_gpytorch_model_sequential(self, cuda=False):
+        options = {"disp": False, "maxiter": 1}
+        for double in (False, True):
+            for kind in ("SingleTaskGP", "FixedNoiseGP", "HeteroskedasticSingleTaskGP"):
+                with warnings.catch_warnings():
+                    warnings.filterwarnings("ignore", category=OptimizationWarning)
+                    mll = self._getBatchedModel(kind=kind, double=double, cuda=cuda)
+                    mll = fit_gpytorch_model(mll, options=options, max_retries=1)
+                    mll = self._getBatchedModel(kind=kind, double=double, cuda=cuda)
+                    mll = fit_gpytorch_model(
+                        mll, options=options, sequential=True, max_retries=1
+                    )
+                    mll = self._getBatchedModel(kind=kind, double=double, cuda=cuda)
+                    mll = fit_gpytorch_model(
+                        mll, options=options, sequential=False, max_retries=1
+                    )
+
+    def test_fit_gpytorch_model_sequential_cuda(self):
+        if torch.cuda.is_available():
+            self.test_fit_gpytorch_model_sequential(cuda=True)