FixedNoiseMultiFidelityGP (#386)

Daniel Jiang · facebook-github-bot · commit a36c2e600a78 · 2020-03-06T20:43:28.000-08:00
Summary: Pull Request resolved: #386 Adds a FixedNoiseMultiFidelityGP + unit tests. Changes FixedNoiseGP to allow a covar_module to be passed in. Reviewed By: Balandat Differential Revision: D20235817 fbshipit-source-id: 13029b765c5fe41136dae1f7ccdd44f5d50606af
diff --git a/botorch/models/gp_regression.py b/botorch/models/gp_regression.py
@@ -154,6 +154,7 @@ def __init__(
         train_X: Tensor,
         train_Y: Tensor,
         train_Yvar: Tensor,
+        covar_module: Optional[Module] = None,
         outcome_transform: Optional[OutcomeTransform] = None,
     ) -> None:
         r"""A single-task exact GP model using fixed noise levels.
@@ -189,23 +190,28 @@ def __init__(
             self, train_inputs=train_X, train_targets=train_Y, likelihood=likelihood
         )
         self.mean_module = ConstantMean(batch_shape=self._aug_batch_shape)
-        self.covar_module = ScaleKernel(
-            base_kernel=MaternKernel(
-                nu=2.5,
-                ard_num_dims=train_X.shape[-1],
+        if covar_module is None:
+            self.covar_module = ScaleKernel(
+                base_kernel=MaternKernel(
+                    nu=2.5,
+                    ard_num_dims=train_X.shape[-1],
+                    batch_shape=self._aug_batch_shape,
+                    lengthscale_prior=GammaPrior(3.0, 6.0),
+                ),
                 batch_shape=self._aug_batch_shape,
-                lengthscale_prior=GammaPrior(3.0, 6.0),
-            ),
-            batch_shape=self._aug_batch_shape,
-            outputscale_prior=GammaPrior(2.0, 0.15),
-        )
+                outputscale_prior=GammaPrior(2.0, 0.15),
+            )
+            self._subset_batch_dict = {
+                "mean_module.constant": -2,
+                "covar_module.raw_outputscale": -1,
+                "covar_module.base_kernel.raw_lengthscale": -3,
+            }
+        else:
+            self.covar_module = covar_module
+        # TODO: Allow subsetting of other covar modules
         if outcome_transform is not None:
             self.outcome_transform = outcome_transform
-        self._subset_batch_dict = {
-            "mean_module.constant": -2,
-            "covar_module.raw_outputscale": -1,
-            "covar_module.base_kernel.raw_lengthscale": -3,
-        }
+
         self.to(train_X)
 
     def fantasize(
diff --git a/botorch/models/gp_regression_fidelity.py b/botorch/models/gp_regression_fidelity.py
@@ -14,8 +14,9 @@
 
 from __future__ import annotations
 
-from typing import Optional
+from typing import Dict, Optional, Tuple
 
+import torch
 from gpytorch.kernels.kernel import ProductKernel
 from gpytorch.kernels.rbf_kernel import RBFKernel
 from gpytorch.kernels.scale_kernel import ScaleKernel
@@ -24,7 +25,7 @@
 from torch import Tensor
 
 from ..exceptions.errors import UnsupportedError
-from .gp_regression import SingleTaskGP
+from .gp_regression import FixedNoiseGP, SingleTaskGP
 from .kernels.downsampling import DownsamplingKernel
 from .kernels.exponential_decay import ExponentialDecayKernel
 from .kernels.linear_truncated_fidelity import LinearTruncatedFidelityKernel
@@ -55,11 +56,15 @@ class SingleTaskMultiFidelityGP(SingleTaskGP):
             5/2. Only used when `linear_truncated=True`.
         likelihood: A likelihood. If omitted, use a standard GaussianLikelihood
             with inferred noise level.
+        outcome_transform: An outcome transform that is applied to the
+                training data during instantiation and to the posterior during
+                inference (that is, the `Posterior` obtained by calling
+                `.posterior` on the model will be on the original scale).
 
     Example:
         >>> train_X = torch.rand(20, 4)
         >>> train_Y = train_X.pow(2).sum(dim=-1, keepdim=True)
-        >>> model = SingleTaskMultiFidelityGP(train_X, train_Y)
+        >>> model = SingleTaskMultiFidelityGP(train_X, train_Y, data_fidelity=3)
     """
 
     def __init__(
@@ -84,93 +89,211 @@ def __init__(
             raise UnsupportedError(
                 "SingleTaskMultiFidelityGP requires at least one fidelity parameter."
             )
-        if iteration_fidelity is not None and iteration_fidelity < 0:
-            iteration_fidelity = train_X.size(-1) + iteration_fidelity
-        if data_fidelity is not None and data_fidelity < 0:
-            data_fidelity = train_X.size(-1) + data_fidelity
         self._set_dimensions(train_X=train_X, train_Y=train_Y)
-        if linear_truncated:
-            fidelity_dims = [
-                i for i in (iteration_fidelity, data_fidelity) if i is not None
-            ]
-            kernel = LinearTruncatedFidelityKernel(
-                fidelity_dims=fidelity_dims,
-                dimension=train_X.size(-1),
-                nu=nu,
-                batch_shape=self._aug_batch_shape,
-                power_prior=GammaPrior(3.0, 3.0),
-            )
-        else:
-            active_dimsX = [
-                i
-                for i in range(train_X.size(-1))
-                if i not in {iteration_fidelity, data_fidelity}
-            ]
-            kernel = RBFKernel(
-                ard_num_dims=len(active_dimsX),
-                batch_shape=self._aug_batch_shape,
-                lengthscale_prior=GammaPrior(3.0, 6.0),
-                active_dims=active_dimsX,
-            )
-            additional_kernels = []
-            if iteration_fidelity is not None:
-                exp_kernel = ExponentialDecayKernel(
-                    batch_shape=self._aug_batch_shape,
-                    lengthscale_prior=GammaPrior(3.0, 6.0),
-                    offset_prior=GammaPrior(3.0, 6.0),
-                    power_prior=GammaPrior(3.0, 6.0),
-                    active_dims=[iteration_fidelity],
-                )
-                additional_kernels.append(exp_kernel)
-            if data_fidelity is not None:
-                ds_kernel = DownsamplingKernel(
-                    batch_shape=self._aug_batch_shape,
-                    offset_prior=GammaPrior(3.0, 6.0),
-                    power_prior=GammaPrior(3.0, 6.0),
-                    active_dims=[data_fidelity],
-                )
-                additional_kernels.append(ds_kernel)
-            kernel = ProductKernel(kernel, *additional_kernels)
-
-        covar_module = ScaleKernel(
-            kernel,
-            batch_shape=self._aug_batch_shape,
-            outputscale_prior=GammaPrior(2.0, 0.15),
+        covar_module, subset_batch_dict = _setup_multifidelity_covar_module(
+            dim=train_X.size(-1),
+            aug_batch_shape=self._aug_batch_shape,
+            iteration_fidelity=iteration_fidelity,
+            data_fidelity=data_fidelity,
+            linear_truncated=linear_truncated,
+            nu=nu,
         )
         super().__init__(
             train_X=train_X,
             train_Y=train_Y,
+            likelihood=likelihood,
             covar_module=covar_module,
             outcome_transform=outcome_transform,
         )
-        if linear_truncated:
-            subset_batch_dict = {
-                "covar_module.base_kernel.raw_power": -2,
-                "covar_module.base_kernel.covar_module_unbiased.raw_lengthscale": -3,
-                "covar_module.base_kernel.covar_module_biased.raw_lengthscale": -3,
-            }
-        else:
-            subset_batch_dict = {
-                "covar_module.base_kernel.kernels.0.raw_lengthscale": -3,
-                "covar_module.base_kernel.kernels.1.raw_power": -2,
-                "covar_module.base_kernel.kernels.1.raw_offset": -2,
-            }
-            if iteration_fidelity is not None:
-                subset_batch_dict = {
-                    "covar_module.base_kernel.kernels.1.raw_lengthscale": -3,
-                    **subset_batch_dict,
-                }
-                if data_fidelity is not None:
-                    subset_batch_dict = {
-                        "covar_module.base_kernel.kernels.2.raw_power": -2,
-                        "covar_module.base_kernel.kernels.2.raw_offset": -2,
-                        **subset_batch_dict,
-                    }
         self._subset_batch_dict = {
             "likelihood.noise_covar.raw_noise": -2,
             "mean_module.constant": -2,
             "covar_module.raw_outputscale": -1,
             **subset_batch_dict,
         }
+        self.to(train_X)
+
+
+class FixedNoiseMultiFidelityGP(FixedNoiseGP):
+    r"""A single task multi-fidelity GP model using fixed noise levels.
+
+    A FixedNoiseGP model analogue to SingleTaskMultiFidelityGP, using a
+    DownsamplingKernel for the data fidelity parameter (if present) and
+    an ExponentialDecayKernel for the iteration fidelity parameter (if present).
+
+    This kernel is described in [Wu2019mf]_.
+
+    Args:
+        train_X: A `batch_shape x n x (d + s)` tensor of training features,
+            where `s` is the dimension of the fidelity parameters (either one
+            or two).
+        train_Y: A `batch_shape x n x m` tensor of training observations.
+        train_Yvar: A `batch_shape x n x m` tensor of observed measurement noise.
+        iteration_fidelity: The column index for the training iteration fidelity
+            parameter (optional).
+        data_fidelity: The column index for the downsampling fidelity parameter
+            (optional).
+        linear_truncated: If True, use a `LinearTruncatedFidelityKernel` instead
+            of the default kernel.
+        nu: The smoothness parameter for the Matern kernel: either 1/2, 3/2, or
+            5/2. Only used when `linear_truncated=True`.
+        outcome_transform: An outcome transform that is applied to the
+            training data during instantiation and to the posterior during
+            inference (that is, the `Posterior` obtained by calling
+            `.posterior` on the model will be on the original scale).
+
+    Example:
+        >>> train_X = torch.rand(20, 4)
+        >>> train_Y = train_X.pow(2).sum(dim=-1, keepdim=True)
+        >>> train_Yvar = torch.full_like(train_Y) * 0.01
+        >>> model = FixedNoiseMultiFidelityGP(
+        >>>     train_X,
+        >>>     train_Y,
+        >>>     train_Yvar,
+        >>>     data_fidelity=3,
+        >>> )
+    """
 
+    def __init__(
+        self,
+        train_X: Tensor,
+        train_Y: Tensor,
+        train_Yvar: Tensor,
+        iteration_fidelity: Optional[int] = None,
+        data_fidelity: Optional[int] = None,
+        linear_truncated: bool = True,
+        nu: float = 2.5,
+        outcome_transform: Optional[OutcomeTransform] = None,
+    ) -> None:
+        if iteration_fidelity is None and data_fidelity is None:
+            raise UnsupportedError(
+                "FixedNoiseMultiFidelityGP requires at least one fidelity parameter."
+            )
+        self._set_dimensions(train_X=train_X, train_Y=train_Y)
+        covar_module, subset_batch_dict = _setup_multifidelity_covar_module(
+            dim=train_X.size(-1),
+            aug_batch_shape=self._aug_batch_shape,
+            iteration_fidelity=iteration_fidelity,
+            data_fidelity=data_fidelity,
+            linear_truncated=linear_truncated,
+            nu=nu,
+        )
+        super().__init__(
+            train_X=train_X,
+            train_Y=train_Y,
+            train_Yvar=train_Yvar,
+            covar_module=covar_module,
+            outcome_transform=outcome_transform,
+        )
+        self._subset_batch_dict = {
+            "likelihood.noise_covar.raw_noise": -2,
+            "mean_module.constant": -2,
+            "covar_module.raw_outputscale": -1,
+            **subset_batch_dict,
+        }
         self.to(train_X)
+
+
+def _setup_multifidelity_covar_module(
+    dim: int,
+    aug_batch_shape: torch.Size,
+    iteration_fidelity: Optional[int],
+    data_fidelity: Optional[int],
+    linear_truncated: bool,
+    nu: float,
+) -> Tuple[ScaleKernel, Dict]:
+    """Helper function to get the covariance module and associated subset_batch_dict
+    for the multifidelity setting.
+
+    Args:
+        dim: The dimensionality of the training data.
+        aug_batch_shape: The output-augmented batch shape as defined in
+            `BatchedMultiOutputGPyTorchModel`.
+        iteration_fidelity: The column index for the training iteration fidelity
+            parameter (optional).
+        data_fidelity: The column index for the downsampling fidelity parameter
+            (optional).
+        linear_truncated: If True, use a `LinearTruncatedFidelityKernel` instead
+            of the default kernel.
+        nu: The smoothness parameter for the Matern kernel: either 1/2, 3/2, or
+            5/2. Only used when `linear_truncated=True`.
+
+    Returns:
+        The covariance module and subset_batch_dict.
+    """
+
+    if iteration_fidelity is not None and iteration_fidelity < 0:
+        iteration_fidelity = dim + iteration_fidelity
+    if data_fidelity is not None and data_fidelity < 0:
+        data_fidelity = dim + data_fidelity
+
+    if linear_truncated:
+        fidelity_dims = [
+            i for i in (iteration_fidelity, data_fidelity) if i is not None
+        ]
+        kernel = LinearTruncatedFidelityKernel(
+            fidelity_dims=fidelity_dims,
+            dimension=dim,
+            nu=nu,
+            batch_shape=aug_batch_shape,
+            power_prior=GammaPrior(3.0, 3.0),
+        )
+    else:
+        active_dimsX = [
+            i for i in range(dim) if i not in {iteration_fidelity, data_fidelity}
+        ]
+        kernel = RBFKernel(
+            ard_num_dims=len(active_dimsX),
+            batch_shape=aug_batch_shape,
+            lengthscale_prior=GammaPrior(3.0, 6.0),
+            active_dims=active_dimsX,
+        )
+        additional_kernels = []
+        if iteration_fidelity is not None:
+            exp_kernel = ExponentialDecayKernel(
+                batch_shape=aug_batch_shape,
+                lengthscale_prior=GammaPrior(3.0, 6.0),
+                offset_prior=GammaPrior(3.0, 6.0),
+                power_prior=GammaPrior(3.0, 6.0),
+                active_dims=[iteration_fidelity],
+            )
+            additional_kernels.append(exp_kernel)
+        if data_fidelity is not None:
+            ds_kernel = DownsamplingKernel(
+                batch_shape=aug_batch_shape,
+                offset_prior=GammaPrior(3.0, 6.0),
+                power_prior=GammaPrior(3.0, 6.0),
+                active_dims=[data_fidelity],
+            )
+            additional_kernels.append(ds_kernel)
+        kernel = ProductKernel(kernel, *additional_kernels)
+
+    covar_module = ScaleKernel(
+        kernel, batch_shape=aug_batch_shape, outputscale_prior=GammaPrior(2.0, 0.15)
+    )
+
+    if linear_truncated:
+        subset_batch_dict = {
+            "covar_module.base_kernel.raw_power": -2,
+            "covar_module.base_kernel.covar_module_unbiased.raw_lengthscale": -3,
+            "covar_module.base_kernel.covar_module_biased.raw_lengthscale": -3,
+        }
+    else:
+        subset_batch_dict = {
+            "covar_module.base_kernel.kernels.0.raw_lengthscale": -3,
+            "covar_module.base_kernel.kernels.1.raw_power": -2,
+            "covar_module.base_kernel.kernels.1.raw_offset": -2,
+        }
+        if iteration_fidelity is not None:
+            subset_batch_dict = {
+                "covar_module.base_kernel.kernels.1.raw_lengthscale": -3,
+                **subset_batch_dict,
+            }
+            if data_fidelity is not None:
+                subset_batch_dict = {
+                    "covar_module.base_kernel.kernels.2.raw_power": -2,
+                    "covar_module.base_kernel.kernels.2.raw_offset": -2,
+                    **subset_batch_dict,
+                }
+
+    return covar_module, subset_batch_dict
diff --git a/test/models/test_gp_regression_fidelity.py b/test/models/test_gp_regression_fidelity.py