Catch gpytorch numerical issues and return NaN to the optimizer (#184)

Balandat · facebook-github-bot · commit a46b96419a00 · 2019-06-25T16:11:40.000-07:00
Summary: Pull Request resolved: #184 scipy's minimize can (sort of, not really) handle NaNs - doing this for the fitting may help us with the robustness issues. Basically, in `_scipy_objective_and_grad` we catch the "singularity error" in gpytorch and return `NaN` instead. L-BFGS-B will then terminate with `success=False` and an "abnormal termination in line search" message. From some simple toy experiments it appears as if the solver isn't smart enough to back off gradually during the line search. We'll have to hope that this degeneracy only occurs after the optimizer has mostly converged, in which case terminating at the current iterate will not be terrible. Therefore it may still be necessary to add enforce explicit bounds that (i) avoid numerical issues and (ii) are not too conservative so as to exclude the actual minimum from the feasible set. Also, the "overstepping" will be dependent on the initial condition, so it would be desirable to re-start the optimization if the optimizer does not report success. Reviewed By: danielrjiang Differential Revision: D15977143 fbshipit-source-id: 999d158b0ba2ce310c180b2ac5eaa71baf7430d5
diff --git a/botorch/exceptions/warnings.py b/botorch/exceptions/warnings.py
@@ -13,6 +13,12 @@ class BotorchWarning(Warning):
     pass
 
 
+class OptimizationWarning(BotorchWarning):
+    r"""Optimization-releated warnings."""
+
+    pass
+
+
 class BadInitialCandidatesWarning(BotorchWarning):
     r"""Warning issued if set of initial candidates for optimziation is bad."""
 
diff --git a/botorch/optim/fit.py b/botorch/optim/fit.py
@@ -7,6 +7,7 @@
 """
 
 import time
+import warnings
 from collections import OrderedDict
 from typing import Any, Dict, List, NamedTuple, Optional, Tuple
 
@@ -17,6 +18,7 @@
 from torch.optim.adam import Adam
 from torch.optim.optimizer import Optimizer
 
+from ..exceptions.warnings import OptimizationWarning
 from .numpy_converter import TorchAttr, module_to_array, set_params_with_array
 from .utils import _filter_kwargs, _get_extra_mll_args, check_convergence
 
@@ -192,9 +194,17 @@ def store_iteration(xk):
     iterations = []
     if track_iterations:
         for i, xk in enumerate(xs):
-            obj, _ = _scipy_objective_and_grad(xk, mll, property_dict)
+            obj, _ = _scipy_objective_and_grad(
+                x=xk, mll=mll, property_dict=property_dict
+            )
             iterations.append(OptimizationIteration(i, obj, ts[i]))
 
+    if not res.success:
+        msg = res.message.decode("ascii")
+        warnings.warn(
+            f"Fitting failed with the optimizer reporting '{msg}'", OptimizationWarning
+        )
+
     # Set to optimum
     mll = set_params_with_array(mll, res.x, property_dict)
     return mll, iterations
@@ -220,9 +230,15 @@ def _scipy_objective_and_grad(
     mll = set_params_with_array(mll, x, property_dict)
     train_inputs, train_targets = mll.model.train_inputs, mll.model.train_targets
     mll.zero_grad()
-    output = mll.model(*train_inputs)
-    args = [output, train_targets] + _get_extra_mll_args(mll)
-    loss = -mll(*args).sum()
+    try:  # catch linear algebra errors in gpytorch
+        output = mll.model(*train_inputs)
+        args = [output, train_targets] + _get_extra_mll_args(mll)
+        loss = -mll(*args).sum()
+    except RuntimeError as e:
+        if "singular" in e.args[0]:
+            return float("nan"), np.full_like(x, "nan")
+        else:
+            raise e  # pragma: nocover
     loss.backward()
     param_dict = OrderedDict(mll.named_parameters())
     grad = []
diff --git a/test/exceptions/test_warnings.py b/test/exceptions/test_warnings.py
@@ -8,6 +8,7 @@
 from botorch.exceptions.warnings import (
     BadInitialCandidatesWarning,
     BotorchWarning,
+    OptimizationWarning,
     SamplingWarning,
 )
 
@@ -16,12 +17,14 @@ class TestBotorchWarnings(unittest.TestCase):
     def test_botorch_warnings_hierarchy(self):
         self.assertIsInstance(BotorchWarning(), Warning)
         self.assertIsInstance(BadInitialCandidatesWarning(), BotorchWarning)
+        self.assertIsInstance(OptimizationWarning(), BotorchWarning)
         self.assertIsInstance(SamplingWarning(), BotorchWarning)
 
     def test_botorch_warnings(self):
         for WarningClass in (
             BotorchWarning,
             BadInitialCandidatesWarning,
+            OptimizationWarning,
             SamplingWarning,
         ):
             with warnings.catch_warnings(record=True) as w:
diff --git a/test/test_fit.py b/test/test_fit.py
@@ -4,6 +4,7 @@
 
 import math
 import unittest
+import warnings
 
 import torch
 from botorch import fit_gpytorch_model
@@ -13,11 +14,15 @@
     fit_gpytorch_scipy,
     fit_gpytorch_torch,
 )
+from gpytorch.constraints import GreaterThan
+from gpytorch.likelihoods import GaussianLikelihood
 from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood
 
 
 NOISE = [0.127, -0.113, -0.345, -0.034, -0.069, -0.272, 0.013, 0.056, 0.087, -0.081]
 
+MAX_ITER_MSG = "TOTAL NO. of ITERATIONS REACHED LIMIT"
+
 
 class TestFitGPyTorchModel(unittest.TestCase):
     def _getModel(self, double=False, cuda=False):
@@ -34,7 +39,11 @@ def test_fit_gpytorch_model(self, cuda=False, optimizer=fit_gpytorch_scipy):
         options = {"disp": False, "maxiter": 5}
         for double in (False, True):
             mll = self._getModel(double=double, cuda=cuda)
-            mll = fit_gpytorch_model(mll, optimizer=optimizer, options=options)
+            with warnings.catch_warnings(record=True) as ws:
+                mll = fit_gpytorch_model(mll, optimizer=optimizer, options=options)
+                if optimizer == fit_gpytorch_scipy:
+                    self.assertEqual(len(ws), 1)
+                    self.assertTrue(MAX_ITER_MSG in str(ws[-1].message))
             model = mll.model
             # Make sure all of the parameters changed
             self.assertGreater(model.likelihood.raw_noise.abs().item(), 1e-3)
@@ -46,12 +55,17 @@ def test_fit_gpytorch_model(self, cuda=False, optimizer=fit_gpytorch_scipy):
 
             # test overriding the default bounds with user supplied bounds
             mll = self._getModel(double=double, cuda=cuda)
-            mll = fit_gpytorch_model(
-                mll,
-                optimizer=optimizer,
-                options=options,
-                bounds={"likelihood.noise_covar.raw_noise": (1e-1, None)},
-            )
+            with warnings.catch_warnings(record=True) as ws:
+                mll = fit_gpytorch_model(
+                    mll,
+                    optimizer=optimizer,
+                    options=options,
+                    bounds={"likelihood.noise_covar.raw_noise": (1e-1, None)},
+                )
+                if optimizer == fit_gpytorch_scipy:
+                    self.assertEqual(len(ws), 1)
+                    self.assertTrue(MAX_ITER_MSG in str(ws[-1].message))
+
             model = mll.model
             self.assertGreaterEqual(model.likelihood.raw_noise.abs().item(), 1e-1)
             self.assertLess(model.mean_module.constant.abs().item(), 0.1)
@@ -64,7 +78,11 @@ def test_fit_gpytorch_model(self, cuda=False, optimizer=fit_gpytorch_scipy):
             mll = self._getModel(double=double, cuda=cuda)
             if optimizer is fit_gpytorch_torch:
                 options["disp"] = True
-            mll, iterations = optimizer(mll, options=options, track_iterations=True)
+            with warnings.catch_warnings(record=True) as ws:
+                mll, iterations = optimizer(mll, options=options, track_iterations=True)
+                if optimizer == fit_gpytorch_scipy:
+                    self.assertEqual(len(ws), 1)
+                    self.assertTrue(MAX_ITER_MSG in str(ws[-1].message))
             self.assertEqual(len(iterations), options["maxiter"])
             self.assertIsInstance(iterations[0], OptimizationIteration)
 
@@ -81,13 +99,38 @@ def test_fit_gpytorch_model(self, cuda=False, optimizer=fit_gpytorch_scipy):
                     )
                 ),
             )
-            mll = fit_gpytorch_model(mll, optimizer=optimizer, options=options)
+            with warnings.catch_warnings(record=True) as ws:
+                mll = fit_gpytorch_model(mll, optimizer=optimizer, options=options)
+                if optimizer == fit_gpytorch_scipy:
+                    self.assertEqual(len(ws), 1)
+                    self.assertTrue(MAX_ITER_MSG in str(ws[-1].message))
             self.assertTrue(mll.dummy_param.grad is None)
 
-    def test_fit_gpytorch_model_scipy_cuda(self):
+    def test_fit_gpytorch_model_cuda(self):
         if torch.cuda.is_available():
             self.test_fit_gpytorch_model(cuda=True)
 
+    def test_fit_gpytorch_model_singular(self, cuda=False):
+        options = {"disp": False, "maxiter": 2}
+        device = torch.device("cuda") if cuda else torch.device("cpu")
+        for dtype in (torch.float, torch.double):
+            X_train = torch.rand(2, 2, device=device, dtype=dtype)
+            Y_train = torch.zeros(2, device=device, dtype=dtype)
+            test_likelihood = GaussianLikelihood(
+                noise_constraint=GreaterThan(-1.0, transform=None, initial_value=0.0)
+            )
+            gp = SingleTaskGP(X_train, Y_train, likelihood=test_likelihood)
+            mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
+            mll.to(device=device, dtype=dtype)
+            with warnings.catch_warnings(record=True) as ws:
+                fit_gpytorch_model(mll, options=options)
+                self.assertEqual(len(ws), 1)
+                self.assertTrue("Fitting failed" in str(ws[0].message))
+
+    def test_fit_gpytorch_model_singular_cuda(self):
+        if torch.cuda.is_available():
+            self.test_fit_gpytorch_model_singular(cuda=True)
+
     def test_fit_gpytorch_model_torch(self, cuda=False):
         self.test_fit_gpytorch_model(cuda=cuda, optimizer=fit_gpytorch_torch)