restart optimization by re-sampling from prior (#188)

Daniel Jiang · facebook-github-bot · commit 62c763a822d2 · 2019-06-26T16:08:08.000-07:00
Summary: Pull Request resolved: #188 when fitting optimization fails, resample initial conditions from HP priors. Reviewed By: Balandat Differential Revision: D15980657 fbshipit-source-id: a2ff8d3d92b489e44a071f077f4abfcdf9b4ba77
diff --git a/botorch/exceptions/__init__.py b/botorch/exceptions/__init__.py
@@ -3,7 +3,12 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 
 from .errors import BotorchError, CandidateGenerationError, UnsupportedError
-from .warnings import BadInitialCandidatesWarning, BotorchWarning, SamplingWarning
+from .warnings import (
+    BadInitialCandidatesWarning,
+    BotorchWarning,
+    OptimizationWarning,
+    SamplingWarning,
+)
 
 
 __all__ = [
@@ -12,5 +17,6 @@
     "UnsupportedError",
     "BotorchWarning",
     "BadInitialCandidatesWarning",
+    "OptimizationWarning",
     "SamplingWarning",
 ]
diff --git a/botorch/fit.py b/botorch/fit.py
@@ -6,18 +6,26 @@
 Utilities for model fitting.
 """
 
+import logging
+import warnings
+from copy import deepcopy
 from typing import Any, Callable
 
 from gpytorch.mlls.marginal_log_likelihood import MarginalLogLikelihood
 from gpytorch.mlls.sum_marginal_log_likelihood import SumMarginalLogLikelihood
 
+from .exceptions.warnings import OptimizationWarning
 from .optim.fit import fit_gpytorch_scipy
+from .optim.utils import sample_all_priors
 
 
 def fit_gpytorch_model(
     mll: MarginalLogLikelihood, optimizer: Callable = fit_gpytorch_scipy, **kwargs: Any
 ) -> MarginalLogLikelihood:
-    r"""Fit hyperparameters of a gpytorch model.
+    r"""Fit hyperparameters of a gpytorch model. On optimizer failures, a new
+    initial condition is sampled from the hyperparameter priors and optimization
+    is retried. The maximum number of retries can be passed in as a `max_retries`
+    kwarg (default is 5).
 
     Optimizer functions are in botorch.optim.fit.
 
@@ -39,7 +47,22 @@ def fit_gpytorch_model(
         for mll_ in mll.mlls:
             fit_gpytorch_model(mll=mll_, optimizer=optimizer, **kwargs)
         return mll
+    max_retries = kwargs.pop("max_retries", 5)
+    original_state_dict = deepcopy(mll.model.state_dict())
     mll.train()
-    mll, _ = optimizer(mll, track_iterations=False, **kwargs)
+    retry = 0
+    while retry < max_retries:
+        with warnings.catch_warnings(record=True) as ws:
+            if retry > 0:  # use normal initial conditions on first try
+                mll.model.load_state_dict(original_state_dict)
+                sample_all_priors(mll.model)
+            mll, _ = optimizer(mll, track_iterations=False, **kwargs)
+            if not any(issubclass(w.category, OptimizationWarning) for w in ws):
+                mll.eval()
+                return mll
+            retry += 1
+            logging.warning(f"Fitting failed on try {retry}.")
+
+    warnings.warn("Fitting failed on all retries.", OptimizationWarning)
     mll.eval()
     return mll
diff --git a/botorch/optim/fit.py b/botorch/optim/fit.py
@@ -198,13 +198,11 @@ def store_iteration(xk):
                 x=xk, mll=mll, property_dict=property_dict
             )
             iterations.append(OptimizationIteration(i, obj, ts[i]))
-
     if not res.success:
         msg = res.message.decode("ascii")
         warnings.warn(
             f"Fitting failed with the optimizer reporting '{msg}'", OptimizationWarning
         )
-
     # Set to optimum
     mll = set_params_with_array(mll, res.x, property_dict)
     return mll, iterations
diff --git a/botorch/optim/utils.py b/botorch/optim/utils.py
@@ -6,6 +6,7 @@
 Utilities for optimization.
 """
 
+import warnings
 from inspect import signature
 from typing import Any, Callable, Dict, List, Optional, Union
 
@@ -16,6 +17,29 @@
 from gpytorch.mlls.variational_elbo import VariationalELBO
 from torch import Tensor
 
+from ..exceptions.warnings import BotorchWarning
+from ..models.gpytorch import GPyTorchModel
+
+
+def sample_all_priors(model: GPyTorchModel) -> None:
+    r"""Sample from hyperparameter priors (in-place).
+
+    Args:
+        model: A GPyTorchModel.
+    """
+    for _, prior, _, setting_closure in model.named_priors():
+        if setting_closure is None:
+            raise RuntimeError(
+                "Must provide inverse transform to be able to sample from prior."
+            )
+        try:
+            setting_closure(prior.sample())
+        except NotImplementedError:
+            warnings.warn(
+                f"`rsample` not implemented for {type(prior)}. Skipping.",
+                BotorchWarning,
+            )
+
 
 def check_convergence(
     loss_trajectory: List[float],
@@ -129,7 +153,7 @@ def _expand_bounds(
 
     Returns:
         A tensor of bounds expanded to be compatible with the size of `X` if
-        bounds is not None, and None if bounds is None
+        bounds is not None, and None if bounds is None.
     """
     if bounds is not None:
         if not torch.is_tensor(bounds):
diff --git a/test/models/test_gp_regression.py b/test/models/test_gp_regression.py
@@ -63,7 +63,7 @@ def test_gp(self, cuda=False):
                     mll = ExactMarginalLogLikelihood(model.likelihood, model).to(
                         **tkwargs
                     )
-                    fit_gpytorch_model(mll, options={"maxiter": 1})
+                    fit_gpytorch_model(mll, options={"maxiter": 1}, max_retries=1)
 
                     # test init
                     self.assertIsInstance(model.mean_module, ConstantMean)
diff --git a/test/models/test_model_list_gp_regression.py b/test/models/test_model_list_gp_regression.py
@@ -70,9 +70,11 @@ def test_ModelListGP(self, cuda=False):
                 self.assertIsInstance(mll_, ExactMarginalLogLikelihood)
 
             # test model fitting (sequential)
-            mll = fit_gpytorch_model(mll, options={"maxiter": 1})
+            mll = fit_gpytorch_model(mll, options={"maxiter": 1}, max_retries=1)
             # test model fitting (joint)
-            mll = fit_gpytorch_model(mll, options={"maxiter": 1}, sequential=False)
+            mll = fit_gpytorch_model(
+                mll, options={"maxiter": 1}, max_retries=1, sequential=False
+            )
 
             # test posterior
             test_x = torch.tensor([[0.25], [0.75]], **tkwargs)
@@ -138,7 +140,7 @@ def test_ModelListGP_fixed_noise(self, cuda=False):
             mll = SumMarginalLogLikelihood(model.likelihood, model)
             for mll_ in mll.mlls:
                 self.assertIsInstance(mll_, ExactMarginalLogLikelihood)
-            mll = fit_gpytorch_model(mll, options={"maxiter": 1})
+            mll = fit_gpytorch_model(mll, options={"maxiter": 1}, max_retries=1)
 
             # test posterior
             test_x = torch.tensor([[0.25], [0.75]], **tkwargs)
diff --git a/test/models/test_multitask.py b/test/models/test_multitask.py
@@ -82,7 +82,7 @@ def test_MultiTaskGP(self, cuda=False):
 
             # test model fitting
             mll = ExactMarginalLogLikelihood(model.likelihood, model)
-            mll = fit_gpytorch_model(mll, options={"maxiter": 1})
+            mll = fit_gpytorch_model(mll, options={"maxiter": 1}, max_retries=1)
 
             # test posterior
             test_x = torch.rand(2, 1, **tkwargs)
@@ -155,7 +155,7 @@ def test_MultiTaskGP_single_output(self, cuda=False):
 
             # test model fitting
             mll = ExactMarginalLogLikelihood(model.likelihood, model)
-            mll = fit_gpytorch_model(mll, options={"maxiter": 1})
+            mll = fit_gpytorch_model(mll, options={"maxiter": 1}, max_retries=1)
 
             # test posterior
             test_x = torch.rand(2, 1, **tkwargs)
@@ -197,7 +197,7 @@ def test_FixedNoiseMultiTaskGP(self, cuda=False):
 
             # test model fitting
             mll = ExactMarginalLogLikelihood(model.likelihood, model)
-            mll = fit_gpytorch_model(mll, options={"maxiter": 1})
+            mll = fit_gpytorch_model(mll, options={"maxiter": 1}, max_retries=1)
 
             # test posterior
             test_x = torch.rand(2, 1, **tkwargs)
@@ -268,7 +268,7 @@ def test_FixedNoiseMultiTaskGP_single_output(self, cuda=False):
 
             # test model fitting
             mll = ExactMarginalLogLikelihood(model.likelihood, model)
-            mll = fit_gpytorch_model(mll, options={"maxiter": 1})
+            mll = fit_gpytorch_model(mll, options={"maxiter": 1}, max_retries=1)
 
             # test posterior
             test_x = torch.rand(2, 1, **tkwargs)
diff --git a/test/optim/test_utils.py b/test/optim/test_utils.py
@@ -3,6 +3,7 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 
 import unittest
+from copy import deepcopy
 
 import torch
 from botorch.models import ModelListGP, SingleTaskGP
@@ -12,11 +13,16 @@
     check_convergence,
     columnwise_clamp,
     fix_features,
+    sample_all_priors,
 )
+from gpytorch.kernels.matern_kernel import MaternKernel
+from gpytorch.kernels.scale_kernel import ScaleKernel
 from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood
 from gpytorch.mlls.marginal_log_likelihood import MarginalLogLikelihood
 from gpytorch.mlls.sum_marginal_log_likelihood import SumMarginalLogLikelihood
 from gpytorch.mlls.variational_elbo import VariationalELBO
+from gpytorch.priors.smoothed_box_prior import SmoothedBoxPrior
+from gpytorch.priors.torch_priors import GammaPrior
 
 
 class TestCheckConvergence(unittest.TestCase):
@@ -191,3 +197,61 @@ def test_expand_bounds(self):
         # bounds is None
         expanded_bounds = _expand_bounds(bounds=None, X=X)
         self.assertIsNone(expanded_bounds)
+
+
+class TestSampleAllPriors(unittest.TestCase):
+    def test_sample_all_priors(self, cuda=False):
+        device = torch.device("cuda" if cuda else "cpu")
+        for dtype in (torch.float, torch.double):
+            train_X = torch.rand(3, 5, device=device, dtype=dtype)
+            train_Y = torch.rand(3, device=device, dtype=dtype)
+            model = SingleTaskGP(train_X=train_X, train_Y=train_Y)
+            mll = ExactMarginalLogLikelihood(model.likelihood, model)
+            mll.to(device=device, dtype=dtype)
+            original_state_dict = dict(deepcopy(mll.model.state_dict()))
+            sample_all_priors(model)
+
+            # make sure one of the hyperparameters changed
+            self.assertTrue(
+                dict(model.state_dict())["likelihood.noise_covar.raw_noise"]
+                != original_state_dict["likelihood.noise_covar.raw_noise"]
+            )
+
+            # change one of the priors to SmoothedBoxPrior
+            model.covar_module = ScaleKernel(
+                MaternKernel(
+                    nu=2.5,
+                    ard_num_dims=model.train_inputs[0].shape[-1],
+                    batch_shape=model._aug_batch_shape,
+                    lengthscale_prior=SmoothedBoxPrior(3.0, 6.0),
+                ),
+                batch_shape=model._aug_batch_shape,
+                outputscale_prior=GammaPrior(2.0, 0.15),
+            )
+            original_state_dict = dict(deepcopy(mll.model.state_dict()))
+            sample_all_priors(model)
+
+            # the lengthscale should not have changed because sampling is
+            # not implemented for SmoothedBoxPrior
+            self.assertTrue(
+                torch.equal(
+                    dict(model.state_dict())[
+                        "covar_module.base_kernel.raw_lengthscale"
+                    ],
+                    original_state_dict["covar_module.base_kernel.raw_lengthscale"],
+                )
+            )
+
+            # set setting_closure to None and make sure RuntimeError is raised
+            prior_tuple = model.likelihood.noise_covar._priors["noise_prior"]
+            model.likelihood.noise_covar._priors["noise_prior"] = (
+                prior_tuple[0],
+                prior_tuple[1],
+                None,
+            )
+            with self.assertRaises(RuntimeError):
+                sample_all_priors(model)
+
+    def test_sample_all_priors_cuda(self):
+        if torch.cuda.is_available():
+            self.test_sample_all_priors(cuda=True)
diff --git a/test/test_end_to_end.py b/test/test_end_to_end.py
@@ -35,15 +35,19 @@ def _setUp(self, double=False, cuda=False):
         self.mll_st = ExactMarginalLogLikelihood(
             self.model_st.likelihood, self.model_st
         )
-        self.mll_st = fit_gpytorch_model(self.mll_st, options={"maxiter": 5})
+        self.mll_st = fit_gpytorch_model(
+            self.mll_st, options={"maxiter": 5}, max_retries=1
+        )
         model_fn = FixedNoiseGP(
             self.train_x, self.train_y, self.train_yvar.expand_as(self.train_y)
         )
         self.model_fn = model_fn.to(device=device, dtype=dtype)
         self.mll_fn = ExactMarginalLogLikelihood(
             self.model_fn.likelihood, self.model_fn
         )
-        self.mll_fn = fit_gpytorch_model(self.mll_fn, options={"maxiter": 5})
+        self.mll_fn = fit_gpytorch_model(
+            self.mll_fn, options={"maxiter": 5}, max_retries=1
+        )
 
     def test_qEI(self, cuda=False):
         for double in (True, False):
diff --git a/test/test_fit.py b/test/test_fit.py
@@ -22,6 +22,7 @@
 NOISE = [0.127, -0.113, -0.345, -0.034, -0.069, -0.272, 0.013, 0.056, 0.087, -0.081]
 
 MAX_ITER_MSG = "TOTAL NO. of ITERATIONS REACHED LIMIT"
+MAX_RETRY_MSG = "Fitting failed on all retries."
 
 
 class TestFitGPyTorchModel(unittest.TestCase):
@@ -40,10 +41,12 @@ def test_fit_gpytorch_model(self, cuda=False, optimizer=fit_gpytorch_scipy):
         for double in (False, True):
             mll = self._getModel(double=double, cuda=cuda)
             with warnings.catch_warnings(record=True) as ws:
-                mll = fit_gpytorch_model(mll, optimizer=optimizer, options=options)
+                mll = fit_gpytorch_model(
+                    mll, optimizer=optimizer, options=options, max_retries=1
+                )
                 if optimizer == fit_gpytorch_scipy:
                     self.assertEqual(len(ws), 1)
-                    self.assertTrue(MAX_ITER_MSG in str(ws[-1].message))
+                    self.assertTrue(MAX_RETRY_MSG in str(ws[-1].message))
             model = mll.model
             # Make sure all of the parameters changed
             self.assertGreater(model.likelihood.raw_noise.abs().item(), 1e-3)
@@ -60,11 +63,12 @@ def test_fit_gpytorch_model(self, cuda=False, optimizer=fit_gpytorch_scipy):
                     mll,
                     optimizer=optimizer,
                     options=options,
+                    max_retries=1,
                     bounds={"likelihood.noise_covar.raw_noise": (1e-1, None)},
                 )
                 if optimizer == fit_gpytorch_scipy:
                     self.assertEqual(len(ws), 1)
-                    self.assertTrue(MAX_ITER_MSG in str(ws[-1].message))
+                    self.assertTrue(MAX_RETRY_MSG in str(ws[-1].message))
 
             model = mll.model
             self.assertGreaterEqual(model.likelihood.raw_noise.abs().item(), 1e-1)
@@ -100,10 +104,12 @@ def test_fit_gpytorch_model(self, cuda=False, optimizer=fit_gpytorch_scipy):
                 ),
             )
             with warnings.catch_warnings(record=True) as ws:
-                mll = fit_gpytorch_model(mll, optimizer=optimizer, options=options)
+                mll = fit_gpytorch_model(
+                    mll, optimizer=optimizer, options=options, max_retries=1
+                )
                 if optimizer == fit_gpytorch_scipy:
                     self.assertEqual(len(ws), 1)
-                    self.assertTrue(MAX_ITER_MSG in str(ws[-1].message))
+                    self.assertTrue(MAX_RETRY_MSG in str(ws[-1].message))
             self.assertTrue(mll.dummy_param.grad is None)
 
     def test_fit_gpytorch_model_cuda(self):
@@ -123,9 +129,10 @@ def test_fit_gpytorch_model_singular(self, cuda=False):
             mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
             mll.to(device=device, dtype=dtype)
             with warnings.catch_warnings(record=True) as ws:
+                # this will do multiple retries
                 fit_gpytorch_model(mll, options=options)
                 self.assertEqual(len(ws), 1)
-                self.assertTrue("Fitting failed" in str(ws[0].message))
+                self.assertTrue(MAX_RETRY_MSG in str(ws[0].message))
 
     def test_fit_gpytorch_model_singular_cuda(self):
         if torch.cuda.is_available():
diff --git a/test/test_gen.py b/test/test_gen.py
@@ -37,7 +37,7 @@ def _setUp(self, double=False, cuda=False, expand=False):
         model = SingleTaskGP(self.train_x, self.train_y)
         self.model = model.to(device=device, dtype=dtype)
         self.mll = ExactMarginalLogLikelihood(self.model.likelihood, self.model)
-        self.mll = fit_gpytorch_model(self.mll, options={"maxiter": 1})
+        self.mll = fit_gpytorch_model(self.mll, options={"maxiter": 1}, max_retries=1)
 
 
 class TestGenCandidates(TestBaseCandidateGeneration):

Original file line number	Diff line number	Diff line change
`@@ -198,13 +198,11 @@ def store_iteration(xk):`
`198`	`198`	`x=xk, mll=mll, property_dict=property_dict`
`199`	`199`	`)`
`200`	`200`	`iterations.append(OptimizationIteration(i, obj, ts[i]))`
`201`		`-`
`202`	`201`	`if not res.success:`
`203`	`202`	`msg = res.message.decode("ascii")`
`204`	`203`	`warnings.warn(`
`205`	`204`	`f"Fitting failed with the optimizer reporting '{msg}'", OptimizationWarning`
`206`	`205`	`)`
`207`		`-`
`208`	`206`	`# Set to optimum`
`209`	`207`	`mll = set_params_with_array(mll, res.x, property_dict)`
`210`	`208`	`return mll, iterations`
Original file line number	Diff line number	Diff line change
`@@ -63,7 +63,7 @@ def test_gp(self, cuda=False):`
`63`	`63`	`mll = ExactMarginalLogLikelihood(model.likelihood, model).to(`
`64`	`64`	`**tkwargs`
`65`	`65`	`)`
`66`		`- fit_gpytorch_model(mll, options={"maxiter": 1})`
	`66`	`+ fit_gpytorch_model(mll, options={"maxiter": 1}, max_retries=1)`
`67`	`67`
`68`	`68`	`# test init`
`69`	`69`	`self.assertIsInstance(model.mean_module, ConstantMean)`