Support loading a state dict for SaasFullyBayesianSingleTaskGP (#1120)

dme65 · facebook-github-bot · commit 286012dcf530 · 2022-09-08T20:22:47.000-07:00
Summary: X-link: facebook/Ax#1120 Pull Request resolved: #1384 `SaasFullyBayesianSingleTaskGP` currently doesn't support `load_state_dict` since the model is only initialized after fitting, so we can't load a state dict into a model that hasn't been fitted. This diff modifies `load_state_dict` to initialize the model with some dummy samples before loading the state dict. Reviewed By: saitcakmak, Balandat Differential Revision: D39358160 fbshipit-source-id: d923688bd307ae1a0bdb85e015491c50ab8e7203
diff --git a/botorch/models/fully_bayesian.py b/botorch/models/fully_bayesian.py
@@ -33,7 +33,7 @@
 
 import math
 from abc import abstractmethod
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
 
 import pyro
 import torch
@@ -134,7 +134,7 @@ def load_mcmc_samples(
 class SaasPyroModel(PyroModel):
     r"""Implementation of the sparse axis-aligned subspace priors (SAAS) model.
 
-    The SAAS model uses sparsity-inducing priors to identift the most important
+    The SAAS model uses sparsity-inducing priors to identify the most important
     parameters. This model is suitable for high-dimensional BO with potentially
     hundreds of tunable parameters. See [Eriksson2021saasbo]_ for more details.
 
@@ -422,6 +422,41 @@ def load_mcmc_samples(self, mcmc_samples: Dict[str, Tensor]) -> None:
             self.likelihood,
         ) = self.pyro_model.load_mcmc_samples(mcmc_samples=mcmc_samples)
 
+    def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True):
+        r"""Custom logic for loading the state dict.
+
+        The standard approach of calling `load_state_dict` currently doesn't play well
+        with the `SaasFullyBayesianSingleTaskGP` since the `mean_module`, `covar_module`
+        and `likelihood` aren't initialized until the model has been fitted. The reason
+        for this is that we don't know the number of MCMC samples until NUTS is called.
+        Given the state dict, we can initialize a new model with some dummy samples and
+        then load the state dict into this model. This currently only works for a
+        `SaasPyroModel` and supporting more Pyro models likely requires moving the model
+        construction logic into the Pyro model itself.
+        """
+
+        if not isinstance(self.pyro_model, SaasPyroModel):
+            raise NotImplementedError("load_state_dict only works for SaasPyroModel")
+        raw_mean = state_dict["mean_module.raw_constant"]
+        num_mcmc_samples = len(raw_mean)
+        dim = self.pyro_model.train_X.shape[-1]
+        tkwargs = {"device": raw_mean.device, "dtype": raw_mean.dtype}
+        # Load some dummy samples
+        mcmc_samples = {
+            "mean": torch.ones(num_mcmc_samples, **tkwargs),
+            "lengthscale": torch.ones(num_mcmc_samples, dim, **tkwargs),
+            "outputscale": torch.ones(num_mcmc_samples, **tkwargs),
+        }
+        if self.pyro_model.train_Yvar is None:
+            mcmc_samples["noise"] = torch.ones(num_mcmc_samples, **tkwargs)
+        (
+            self.mean_module,
+            self.covar_module,
+            self.likelihood,
+        ) = self.pyro_model.load_mcmc_samples(mcmc_samples=mcmc_samples)
+        # Load the actual samples from the state dict
+        super().load_state_dict(state_dict=state_dict, strict=strict)
+
     def forward(self, X: Tensor) -> MultivariateNormal:
         self._check_if_fitted()
         return super().forward(X.unsqueeze(MCMC_DIM))
diff --git a/test/models/test_fully_bayesian.py b/test/models/test_fully_bayesian.py
@@ -52,6 +52,22 @@
 from linear_operator.operators import to_linear_operator
 
 
+EXPECTED_KEYS = [
+    "mean_module.raw_constant",
+    "covar_module.raw_outputscale",
+    "covar_module.base_kernel.raw_lengthscale",
+    "covar_module.base_kernel.raw_lengthscale_constraint.lower_bound",
+    "covar_module.base_kernel.raw_lengthscale_constraint.upper_bound",
+    "covar_module.raw_outputscale_constraint.lower_bound",
+    "covar_module.raw_outputscale_constraint.upper_bound",
+]
+EXPECTED_KEYS_NOISE = EXPECTED_KEYS + [
+    "likelihood.noise_covar.raw_noise",
+    "likelihood.noise_covar.raw_noise_constraint.lower_bound",
+    "likelihood.noise_covar.raw_noise_constraint.upper_bound",
+]
+
+
 class CustomPyroModel(PyroModel):
     def sample(self) -> None:
         pass
@@ -310,6 +326,24 @@ def test_fit_model(self):
             self.assertEqual(median_lengthscale.shape, torch.Size([4]))
             self.assertEqual(model.num_mcmc_samples, 3)
 
+            # Test loading via state dict
+            state_dict = model.state_dict()
+            true_keys = EXPECTED_KEYS_NOISE if infer_noise else EXPECTED_KEYS
+            self.assertEqual(set(state_dict.keys()), set(true_keys))
+            _, _, _, model_new = self._get_data_and_model(
+                infer_noise=infer_noise, **tkwargs
+            )
+            self.assertEqual(model_new.state_dict(), {})
+            model_new.load_state_dict(state_dict)
+            self.assertEqual(model.state_dict().keys(), model_new.state_dict().keys())
+            for k in model.state_dict().keys():
+                self.assertTrue(
+                    (model.state_dict()[k] == model_new.state_dict()[k]).all()
+                )
+            preds1, preds2 = model.posterior(test_X), model_new.posterior(test_X)
+            self.assertTrue((preds1.mean == preds2.mean).all())
+            self.assertTrue((preds1.variance == preds2.variance).all())
+
             # Make sure the model shapes are set correctly
             self.assertEqual(model.pyro_model.train_X.shape, torch.Size([n, d]))
             self.assertTrue(torch.allclose(model.pyro_model.train_X, train_X))
@@ -520,6 +554,10 @@ def test_custom_pyro_model(self):
                 train_Yvar=train_Yvar,
                 pyro_model=CustomPyroModel(),
             )
+            with self.assertRaisesRegex(
+                NotImplementedError, "load_state_dict only works for SaasPyroModel"
+            ):
+                model.load_state_dict({})
             self.assertIsInstance(model.pyro_model, CustomPyroModel)
             self.assertTrue(torch.allclose(model.pyro_model.train_X, train_X))
             self.assertTrue(torch.allclose(model.pyro_model.train_Y, train_Y))