ModelList <-> BatchedModel converters (#187)

Balandat · facebook-github-bot · commit 394609e91823 · 2019-06-26T16:25:43.000-07:00
Summary: Pull Request resolved: #187 Adds converters between `BatchedMultiOutputGPyTorchModel` and `ModelListGP` and the reverse direction (if applicable). This is useful e.g. for fitting batched multi-output models with a lot of outputs, where jointly fitting the model can result in inferior model fits (due to the size of the resulting optimization problem). See stacked diff. This currently does **not** support the following: - `HeteroskedasticSingleTaskGP` - custom likelihoods for `SingleTaskGP` Reviewed By: sdaulton Differential Revision: D15982128 fbshipit-source-id: d966b8007144e8ca27c83483a2ff4639ebfe2304
diff --git a/botorch/models/converter.py b/botorch/models/converter.py
@@ -0,0 +1,189 @@
+#! /usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+r"""
+Utilities for converting between different models.
+"""
+
+from copy import deepcopy
+
+import torch
+from torch.nn import Module
+
+from ..exceptions import UnsupportedError
+from .gp_regression import FixedNoiseGP, HeteroskedasticSingleTaskGP
+from .gpytorch import BatchedMultiOutputGPyTorchModel
+from .model_list_gp_regression import ModelListGP
+
+
+def _get_module(module: Module, name: str) -> Module:
+    """Recursively get a sub-module from a module.
+
+    Args:
+        module: A `torch.nn.Module`.
+        name: The name of the submodule to return, in the form of a period-delinated
+            string: `sub_module.subsub_module.[...].leaf_module`.
+
+    Returns:
+        The requested sub-module.
+
+    Example:
+        >>> gp = SingleTaskGP(train_X, train_Y)
+        >>> noise_prior = _get_module(gp, "likelihood.noise_covar.noise_prior")
+    """
+    current = module
+    if name != "":
+        for a in name.split("."):
+            current = getattr(current, a)
+    return current
+
+
+def _check_compatibility(models: ModelListGP) -> None:
+    """Check if a ModelListGP can be converted."""
+    # check that all submodules are of the same type
+    for modn, mod in models[0].named_modules():
+        mcls = mod.__class__
+        if not all(isinstance(_get_module(m, modn), mcls) for m in models[1:]):
+            raise UnsupportedError(
+                "Sub-modules must be of the same type across models."
+            )
+
+    # check that each model is a BatchedMultiOutputGPyTorchModel
+    if not all(isinstance(m, BatchedMultiOutputGPyTorchModel) for m in models):
+        raise UnsupportedError(
+            "All models must be of type BatchedMultiOutputGPyTorchModel."
+        )
+
+    # TODO: Add support for HeteroskedasticSingleTaskGP
+    if any(isinstance(m, HeteroskedasticSingleTaskGP) for m in models):
+        raise NotImplementedError(
+            "Conversion of HeteroskedasticSingleTaskGP is currently unsupported."
+        )
+
+    # if the list has only one model, we can just return a copy of that
+    if len(models) == 1:
+        return deepcopy(models[0])
+
+    # check that each model is single-output
+    if not all(m._num_outputs == 1 for m in models):
+        raise UnsupportedError("All models must be single-output.")
+
+    # check that training inputs are the same
+    if not all(
+        torch.equal(ti, tj)
+        for m in models[1:]
+        for ti, tj in zip(models[0].train_inputs, m.train_inputs)
+    ):
+        raise UnsupportedError("training inputs must agree for all sub-models.")
+
+
+def model_list_to_batched(model_list: ModelListGP) -> BatchedMultiOutputGPyTorchModel:
+    """Convert a ModelListGP to a BatchedMultiOutputGPyTorchModel.
+
+    Args:
+        model_list: The `ModelListGP` to be converted to the appropriate
+            `BatchedMultiOutputGPyTorchModel`. All sub-models must be of the same
+            type and have the shape (batch shape and number of training inputs).
+
+    Returns:
+        The model converted into a `BatchedMultiOutputGPyTorchModel`.
+
+    Example:
+        >>> list_gp = ModelListGP(gp1, gp2)
+        >>> batch_gp = model_list_to_batched(list_gp)
+    """
+    models = model_list.models
+    _check_compatibility(models)
+
+    # construct inputs
+    train_X = deepcopy(models[0].train_inputs[0])
+    train_Y = torch.stack([m.train_targets.clone() for m in models], dim=-1)
+    kwargs = {"train_X": train_X, "train_Y": train_Y}
+    if isinstance(models[0], FixedNoiseGP):
+        kwargs["train_Yvar"] = torch.stack(
+            [m.likelihood.noise_covar.noise.clone() for m in models], dim=-1
+        )
+
+    # construct the batched GP model
+    batch_gp = models[0].__class__(**kwargs)
+
+    tensors = {n for n, p in batch_gp.state_dict().items() if len(p.shape) > 0}
+    scalars = set(batch_gp.state_dict()) - tensors
+    input_batch_dims = len(models[0]._input_batch_shape)
+
+    # ensure scalars agree (TODO: Allow different priors for different outputs)
+    for n in scalars:
+        v0 = _get_module(models[0], n)
+        if not all(torch.equal(_get_module(m, n), v0) for m in models[1:]):
+            raise UnsupportedError("All scalars must have the same value.")
+
+    # ensure dimensions of all tensors agree
+    for n in tensors:
+        shape0 = _get_module(models[0], n).shape
+        if not all(_get_module(m, n).shape == shape0 for m in models[1:]):
+            raise UnsupportedError("All tensors must have the same shape.")
+
+    # now construct the batched state dict
+    scalar_state_dict = {
+        s: p.clone() for s, p in models[0].state_dict().items() if s in scalars
+    }
+    tensor_state_dict = {
+        t: torch.stack(
+            [m.state_dict()[t].clone() for m in models], dim=input_batch_dims
+        )
+        for t in tensors
+    }
+    batch_state_dict = {**scalar_state_dict, **tensor_state_dict}
+
+    # load the state dict into the new model
+    batch_gp.load_state_dict(batch_state_dict)
+
+    return batch_gp
+
+
+def batched_to_model_list(batch_model: BatchedMultiOutputGPyTorchModel) -> ModelListGP:
+    """Convert a BatchedMultiOutputGPyTorchModel to a ModelListGP.
+
+    Args:
+        model_list: The `BatchedMultiOutputGPyTorchModel` to be converted to a
+            `ModelListGP`.
+
+    Returns:
+        The model converted into a `ModelListGP`.
+
+    Example:
+        >>> train_X = torch.rand(5, 2)
+        >>> train_Y = torch.rand(5, 2)
+        >>> batch_gp = SingleTaskGP(train_X, train_Y)
+        >>> list_gp = batched_to_model_list(batch_gp)
+    """
+    batch_sd = batch_model.state_dict()
+
+    tensors = {n for n, p in batch_sd.items() if len(p.shape) > 0}
+    scalars = set(batch_sd) - tensors
+    input_bdims = len(batch_model._input_batch_shape)
+
+    models = []
+
+    for i in range(batch_model._num_outputs):
+        scalar_sd = {s: batch_sd[s].clone() for s in scalars}
+        tensor_sd = {t: batch_sd[t].select(input_bdims, i).clone() for t in tensors}
+        sd = {**scalar_sd, **tensor_sd}
+        kwargs = {
+            "train_X": batch_model.train_inputs[0].select(input_bdims, i).clone(),
+            "train_Y": batch_model.train_targets.select(input_bdims, i).clone(),
+        }
+        if isinstance(batch_model, FixedNoiseGP):
+            noise_covar = batch_model.likelihood.noise_covar
+            kwargs["train_Yvar"] = noise_covar.noise.select(input_bdims, i).clone()
+        # TODO: Add support for HeteroskedasticSingleTaskGP
+        if isinstance(batch_model, HeteroskedasticSingleTaskGP):
+            raise NotImplementedError(
+                "Conversion of HeteroskedasticSingleTaskGP currently not supported."
+            )
+        model = batch_model.__class__(**kwargs)
+        model.load_state_dict(sd)
+        models.append(model)
+
+    return ModelListGP(*models)
diff --git a/test/models/test_converter.py b/test/models/test_converter.py
@@ -0,0 +1,137 @@
+#! /usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import unittest
+
+import torch
+from botorch.exceptions import UnsupportedError
+from botorch.models import (
+    FixedNoiseGP,
+    HeteroskedasticSingleTaskGP,
+    ModelListGP,
+    SingleTaskGP,
+)
+from botorch.models.converter import batched_to_model_list, model_list_to_batched
+
+from .test_gpytorch import SimpleGPyTorchModel
+
+
+class TestConverters(unittest.TestCase):
+    def test_batched_to_model_list(self, cuda=False):
+        device = torch.device("cuda") if cuda else torch.device("cpu")
+        for dtype in (torch.float, torch.double):
+            # test SingleTaskGP
+            train_X = torch.rand(10, 2, device=device, dtype=dtype)
+            train_Y1 = train_X.sum(dim=-1)
+            train_Y2 = train_X[:, 0] - train_X[:, 1]
+            train_Y = torch.stack([train_Y1, train_Y2], dim=-1)
+            batch_gp = SingleTaskGP(train_X, train_Y)
+            list_gp = batched_to_model_list(batch_gp)
+            self.assertIsInstance(list_gp, ModelListGP)
+            # test FixedNoiseGP
+            batch_gp = FixedNoiseGP(train_X, train_Y, torch.rand_like(train_Y))
+            list_gp = batched_to_model_list(batch_gp)
+            self.assertIsInstance(list_gp, ModelListGP)
+            # test HeteroskedasticSingleTaskGP
+            batch_gp = HeteroskedasticSingleTaskGP(
+                train_X, train_Y, torch.rand_like(train_Y)
+            )
+            with self.assertRaises(NotImplementedError):
+                batched_to_model_list(batch_gp)
+
+    def test_batched_to_model_list_cuda(self):
+        if torch.cuda.is_available():
+            self.test_batched_to_model_list(cuda=True)
+
+    def test_model_list_to_batched(self, cuda=False):
+        device = torch.device("cuda") if cuda else torch.device("cpu")
+        for dtype in (torch.float, torch.double):
+            # basic test
+            train_X = torch.rand(10, 2, device=device, dtype=dtype)
+            train_Y1 = train_X.sum(dim=-1)
+            train_Y2 = train_X[:, 0] - train_X[:, 1]
+            gp1 = SingleTaskGP(train_X, train_Y1)
+            gp2 = SingleTaskGP(train_X, train_Y2)
+            list_gp = ModelListGP(gp1, gp2)
+            batch_gp = model_list_to_batched(list_gp)
+            self.assertIsInstance(batch_gp, SingleTaskGP)
+            # test degenerate (single model)
+            batch_gp = model_list_to_batched(ModelListGP(gp1))
+            self.assertEqual(batch_gp._num_outputs, 1)
+            # test different model classes
+            gp2 = FixedNoiseGP(train_X, train_Y1, torch.ones_like(train_Y1))
+            with self.assertRaises(UnsupportedError):
+                model_list_to_batched(ModelListGP(gp1, gp2))
+            # test non-batched models
+            gp1_ = SimpleGPyTorchModel(train_X, train_Y1)
+            gp2_ = SimpleGPyTorchModel(train_X, train_Y2)
+            with self.assertRaises(UnsupportedError):
+                model_list_to_batched(ModelListGP(gp1_, gp2_))
+            # test list of multi-output models
+            train_Y = torch.stack([train_Y1, train_Y2], dim=-1)
+            gp2 = SingleTaskGP(train_X, train_Y)
+            with self.assertRaises(UnsupportedError):
+                model_list_to_batched(ModelListGP(gp1, gp2))
+            # test different training inputs
+            gp2 = SingleTaskGP(2 * train_X, train_Y2)
+            with self.assertRaises(UnsupportedError):
+                model_list_to_batched(ModelListGP(gp1, gp2))
+            # check scalar agreement
+            gp2 = SingleTaskGP(train_X, train_Y2)
+            gp2.likelihood.noise_covar.noise_prior.rate.fill_(1.0)
+            with self.assertRaises(UnsupportedError):
+                model_list_to_batched(ModelListGP(gp1, gp2))
+            # check tensor shape agreement
+            gp2 = SingleTaskGP(train_X, train_Y2)
+            gp2.covar_module.raw_outputscale = torch.nn.Parameter(
+                torch.tensor([0.0], device=device, dtype=dtype)
+            )
+            with self.assertRaises(UnsupportedError):
+                model_list_to_batched(ModelListGP(gp1, gp2))
+            # test HeteroskedasticSingleTaskGP
+            gp2 = HeteroskedasticSingleTaskGP(
+                train_X, train_Y1, torch.ones_like(train_Y1)
+            )
+            with self.assertRaises(NotImplementedError):
+                model_list_to_batched(ModelListGP(gp2))
+            # test FixedNoiseGP
+            train_X = torch.rand(10, 2, device=device, dtype=dtype)
+            train_Y1 = train_X.sum(dim=-1)
+            train_Y2 = train_X[:, 0] - train_X[:, 1]
+            gp1_ = FixedNoiseGP(train_X, train_Y1, torch.rand_like(train_Y1))
+            gp2_ = FixedNoiseGP(train_X, train_Y2, torch.rand_like(train_Y2))
+            list_gp = ModelListGP(gp1_, gp2_)
+            batch_gp = model_list_to_batched(list_gp)
+
+    def test_model_list_to_batched_cuda(self):
+        if torch.cuda.is_available():
+            self.test_model_list_to_batched(cuda=True)
+
+    def test_roundtrip(self, cuda=False):
+        device = torch.device("cuda") if cuda else torch.device("cpu")
+        for dtype in (torch.float, torch.double):
+            train_X = torch.rand(10, 2, device=device, dtype=dtype)
+            train_Y1 = train_X.sum(dim=-1)
+            train_Y2 = train_X[:, 0] - train_X[:, 1]
+            train_Y = torch.stack([train_Y1, train_Y2], dim=-1)
+            # SingleTaskGP
+            batch_gp = SingleTaskGP(train_X, train_Y)
+            list_gp = batched_to_model_list(batch_gp)
+            batch_gp_recov = model_list_to_batched(list_gp)
+            sd_orig = batch_gp.state_dict()
+            sd_recov = batch_gp_recov.state_dict()
+            self.assertTrue(set(sd_orig) == set(sd_recov))
+            self.assertTrue(all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig))
+            # FixedNoiseGP
+            batch_gp = FixedNoiseGP(train_X, train_Y, torch.rand_like(train_Y))
+            list_gp = batched_to_model_list(batch_gp)
+            batch_gp_recov = model_list_to_batched(list_gp)
+            sd_orig = batch_gp.state_dict()
+            sd_recov = batch_gp_recov.state_dict()
+            self.assertTrue(set(sd_orig) == set(sd_recov))
+            self.assertTrue(all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig))
+
+    def test_roundtrip_cuda(self):
+        if torch.cuda.is_available():
+            self.test_roundtrip(cuda=True)