Implements ExpectationPosteriorTransform (#903)

saitcakmak · facebook-github-bot · commit 9a93afb40777 · 2022-04-18T15:01:16.000-07:00
Summary: Pull Request resolved: #903 Implements `ExpectationPosteriorTransform`, which transforms the `batch x (q * n_w) x m` posterior to a `batch x q x m` posterior of the expectation over the `n_w` points. Unlike the `RiskMeasureMCObjective`, this avoids the posterior sampling over `q * n_w` points, which leads to significant speed-ups for large `q * n_w`. Reviewed By: Balandat Differential Revision: D29277116 fbshipit-source-id: a6be1c32d0343e6b1c99d2e76facc1c8d5b22d42
diff --git a/botorch/acquisition/multi_objective/multi_output_risk_measures.py b/botorch/acquisition/multi_objective/multi_output_risk_measures.py
@@ -95,7 +95,13 @@ def forward(self, samples: Tensor, X: Optional[Tensor] = None) -> Tensor:
 
 
 class MultiOutputExpectation(MultiOutputRiskMeasureMCObjective):
-    r"""A multi-output MC expectation risk measure."""
+    r"""A multi-output MC expectation risk measure.
+
+    For unconstrained problems, we recommend using the `ExpectationPosteriorTransform`
+    instead. `ExpectationPosteriorTransform` directly transforms the posterior
+    distribution over `q * n_w` to a posterior of `q` expectations, significantly
+    reducing the cost of posterior sampling as a result.
+    """
 
     def forward(self, samples: Tensor, X: Optional[Tensor] = None) -> Tensor:
         r"""Calculate the expectation of the given samples. Expectation is
diff --git a/botorch/acquisition/objective.py b/botorch/acquisition/objective.py
@@ -16,11 +16,14 @@
 from typing import Callable, List, Optional
 
 import torch
+from botorch.exceptions.errors import UnsupportedError
 from botorch.models.model import Model
 from botorch.posteriors.gpytorch import GPyTorchPosterior, scalarize_posterior
 from botorch.posteriors.posterior import Posterior
 from botorch.sampling import IIDNormalSampler, MCSampler
 from botorch.utils import apply_constraints
+from gpytorch.distributions import MultivariateNormal, MultitaskMultivariateNormal
+from gpytorch.lazy import lazify
 from torch import Tensor
 from torch.nn import Module
 
@@ -137,6 +140,110 @@ def __init__(self, weights: Tensor, offset: float = 0.0) -> None:
         super().__init__(weights=weights, offset=offset)
 
 
+class ExpectationPosteriorTransform(PosteriorTransform):
+    r"""Transform the `batch x (q * n_w) x m` posterior into a `batch x q x m`
+    posterior of the expectation. The expectation is calculated over each
+    consecutive `n_w` block of points in the posterior.
+
+    This is intended for use with `InputPerturbation` or `AppendFeatures` for
+    optimizing the expectation over `n_w` points. This should not be used when
+    there are constraints present, since this does not take into account
+    the feasibility of the objectives.
+
+    Note: This is different than `ScalarizedPosteriorTransform` in that
+    this operates over the q-batch dimension.
+    """
+
+    def __init__(self, n_w: int, weights: Optional[Tensor] = None) -> None:
+        r"""A posterior transform calculating the expectation over the q-batch
+        dimension.
+
+        Args:
+            n_w: The number of points in the q-batch of the posterior to compute
+                the expectation over. This corresponds to the size of the
+                `feature_set` of `AppendFeatures` or the size of the `perturbation_set`
+                of `InputPerturbation`.
+            weights: An optional `n_w x m`-dim tensor of weights. Can be used to
+                compute a weighted expectation. Weights are normalized before use.
+        """
+        super().__init__()
+        if weights is not None:
+            if weights.dim() != 2 or weights.shape[0] != n_w:
+                raise ValueError("`weights` must be a tensor of size `n_w x m`.")
+            if torch.any(weights < 0):
+                raise ValueError("`weights` must be non-negative.")
+        else:
+            weights = torch.ones(n_w, 1)
+        # Normalize the weights.
+        weights = weights / weights.sum(dim=0)
+        self.register_buffer("weights", weights)
+        self.n_w = n_w
+
+    def evaluate(self, Y: Tensor) -> Tensor:
+        r"""Evaluate the expectation of a set of outcomes.
+
+        Args:
+            Y: A `batch_shape x (q * n_w) x m`-dim tensor of outcomes.
+
+        Returns:
+            A `batch_shape x q x m`-dim tensor of expectation outcomes.
+        """
+        batch_shape, m = Y.shape[:-2], Y.shape[-1]
+        weighted_Y = Y.view(*batch_shape, -1, self.n_w, m) * self.weights.to(Y)
+        return weighted_Y.sum(dim=-2)
+
+    def forward(self, posterior: GPyTorchPosterior) -> GPyTorchPosterior:
+        r"""Compute the posterior of the expectation.
+
+        Args:
+            posterior: An `m`-outcome joint posterior over `q * n_w` points.
+
+        Returns:
+            An `m`-outcome joint posterior over `q` expectations.
+        """
+        org_mvn = posterior.mvn
+        if getattr(org_mvn, "_interleaved", False):
+            raise UnsupportedError(
+                "`ExpectationPosteriorTransform` does not support "
+                "interleaved posteriors."
+            )
+        # Initialize the weight matrix of shape compatible with the mvn.
+        org_event_shape = org_mvn.event_shape
+        batch_shape = org_mvn.batch_shape
+        q = org_event_shape[0] // self.n_w
+        m = 1 if len(org_event_shape) == 1 else org_event_shape[-1]
+        tkwargs = {"device": org_mvn.loc.device, "dtype": org_mvn.loc.dtype}
+        weights = torch.zeros(q * m, q * self.n_w * m, **tkwargs)
+        # Make sure self.weights has the correct dtype/device and shape.
+        self.weights = self.weights.to(org_mvn.loc).expand(self.n_w, m)
+        # Fill in the non-zero entries of the weight matrix.
+        # We want each row to have non-zero weights for the corresponding
+        # `n_w` sized diagonal. The `m` outcomes are not interleaved.
+        for i in range(q * m):
+            weights[i, self.n_w * i : self.n_w * (i + 1)] = self.weights[:, i // q]
+        # Trasform the mean.
+        new_loc = (
+            (weights @ org_mvn.loc.unsqueeze(-1))
+            .view(*batch_shape, m, q)
+            .transpose(-1, -2)
+        )
+        # Transform the covariance matrix.
+        org_cov = (
+            org_mvn.lazy_covariance_matrix
+            if org_mvn.islazy
+            else org_mvn.covariance_matrix
+        )
+        new_cov = weights @ (org_cov @ weights.t())
+        if m == 1:
+            new_mvn = MultivariateNormal(new_loc.squeeze(-1), lazify(new_cov))
+        else:
+            # Using MTMVN since we pass a single loc and covar for all `m` outputs.
+            new_mvn = MultitaskMultivariateNormal(
+                new_loc, lazify(new_cov), interleaved=False
+            )
+        return GPyTorchPosterior(mvn=new_mvn)
+
+
 class MCAcquisitionObjective(Module, ABC):
     r"""Abstract base class for MC-based objectives.
 
diff --git a/botorch/acquisition/risk_measures.py b/botorch/acquisition/risk_measures.py
@@ -228,7 +228,13 @@ def forward(self, samples: Tensor, X: Optional[Tensor] = None) -> Tensor:
 
 
 class Expectation(RiskMeasureMCObjective):
-    r"""The expectation risk measure."""
+    r"""The expectation risk measure.
+
+    For unconstrained problems, we recommend using the `ExpectationPosteriorTransform`
+    instead. `ExpectationPosteriorTransform` directly transforms the posterior
+    distribution over `q * n_w` to a posterior of `q` expectations, significantly
+    reducing the cost of posterior sampling as a result.
+    """
 
     def forward(self, samples: Tensor, X: Optional[Tensor] = None) -> Tensor:
         r"""Calculate the expectation corresponding to the given samples.
diff --git a/test/acquisition/test_objective.py b/test/acquisition/test_objective.py
@@ -12,18 +12,23 @@
 from botorch.acquisition import LearnedObjective
 from botorch.acquisition.objective import (
     ConstrainedMCObjective,
+    ExpectationPosteriorTransform,
     GenericMCObjective,
     IdentityMCObjective,
     LinearMCObjective,
     MCAcquisitionObjective,
     PosteriorTransform,
     ScalarizedPosteriorTransform,
 )
+from botorch.exceptions.errors import UnsupportedError
 from botorch.models.deterministic import PosteriorMeanModel
 from botorch.models.pairwise_gp import PairwiseGP
+from botorch.posteriors import GPyTorchPosterior
 from botorch.sampling.samplers import SobolQMCNormalSampler
 from botorch.utils import apply_constraints
 from botorch.utils.testing import _get_test_posterior, BotorchTestCase
+from gpytorch.distributions import MultitaskMultivariateNormal, MultivariateNormal
+from gpytorch.lazy import lazify
 from torch import Tensor
 
 
@@ -83,6 +88,143 @@ def test_scalarized_posterior_transform(self):
             self.assertTrue(torch.equal(val, val_expected))
 
 
+class TestExpectationPosteriorTransform(BotorchTestCase):
+    def test_init(self):
+        # Without weights.
+        tf = ExpectationPosteriorTransform(n_w=5)
+        self.assertEqual(tf.n_w, 5)
+        self.assertTrue(torch.allclose(tf.weights, torch.ones(5, 1) * 0.2))
+        # Errors with weights.
+        with self.assertRaisesRegex(ValueError, "a tensor of size"):
+            ExpectationPosteriorTransform(n_w=3, weights=torch.ones(5, 1))
+        with self.assertRaisesRegex(ValueError, "non-negative"):
+            ExpectationPosteriorTransform(n_w=3, weights=-torch.ones(3, 1))
+        # Successful init with weights.
+        weights = torch.tensor([[1.0, 2.0], [2.0, 4.0], [3.0, 6.0]])
+        tf = ExpectationPosteriorTransform(n_w=3, weights=weights)
+        self.assertTrue(torch.allclose(tf.weights, weights / torch.tensor([6.0, 12.0])))
+
+    def test_evaluate(self):
+        for dtype in (torch.float, torch.double):
+            tkwargs = {"dtype": dtype, "device": self.device}
+            # Without weights.
+            tf = ExpectationPosteriorTransform(n_w=3)
+            Y = torch.rand(3, 6, 2, **tkwargs)
+            self.assertTrue(
+                torch.allclose(tf.evaluate(Y), Y.view(3, 2, 3, 2).mean(dim=-2))
+            )
+            # With weights - weights intentionally doesn't use tkwargs.
+            weights = torch.tensor([[1.0, 2.0], [2.0, 1.0]])
+            tf = ExpectationPosteriorTransform(n_w=2, weights=weights)
+            expected = (Y.view(3, 3, 2, 2) * weights.to(Y)).sum(dim=-2) / 3.0
+            self.assertTrue(torch.allclose(tf.evaluate(Y), expected))
+
+    def test_expectation_posterior_transform(self):
+        tkwargs = {"dtype": torch.float, "device": self.device}
+        # Without weights, simple expectation, single output, no batch.
+        # q = 2, n_w = 3.
+        org_loc = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], **tkwargs)
+        org_covar = torch.tensor(
+            [
+                [1.0, 0.8, 0.7, 0.3, 0.2, 0.1],
+                [0.8, 1.0, 0.9, 0.25, 0.15, 0.1],
+                [0.7, 0.9, 1.0, 0.2, 0.2, 0.05],
+                [0.3, 0.25, 0.2, 1.0, 0.7, 0.6],
+                [0.2, 0.15, 0.2, 0.7, 1.0, 0.7],
+                [0.1, 0.1, 0.05, 0.6, 0.7, 1.0],
+            ],
+            **tkwargs
+        )
+        org_mvn = MultivariateNormal(org_loc, lazify(org_covar))
+        org_post = GPyTorchPosterior(mvn=org_mvn)
+        tf = ExpectationPosteriorTransform(n_w=3)
+        tf_post = tf(org_post)
+        self.assertIsInstance(tf_post, GPyTorchPosterior)
+        self.assertEqual(tf_post.sample().shape, torch.Size([1, 2, 1]))
+        tf_mvn = tf_post.mvn
+        self.assertIsInstance(tf_mvn, MultivariateNormal)
+        expected_loc = torch.tensor([2.0, 5.0], **tkwargs)
+        # This is the average of each 3 x 3 block.
+        expected_covar = torch.tensor([[0.8667, 0.1722], [0.1722, 0.7778]], **tkwargs)
+        self.assertTrue(torch.allclose(tf_mvn.loc, expected_loc))
+        self.assertTrue(
+            torch.allclose(tf_mvn.covariance_matrix, expected_covar, atol=1e-3)
+        )
+
+        # With weights, 2 outputs, batched.
+        tkwargs = {"dtype": torch.double, "device": self.device}
+        # q = 2, n_w = 2, m = 2, leading to 8 values for loc and 8x8 cov.
+        org_loc = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], **tkwargs)
+        # We have 2 4x4 matrices with 0s as filler. Each block is for one outcome.
+        # Each 2x2 sub block corresponds to `n_w`.
+        org_covar = torch.tensor(
+            [
+                [1.0, 0.8, 0.3, 0.2, 0.0, 0.0, 0.0, 0.0],
+                [0.8, 1.4, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0],
+                [0.3, 0.2, 1.2, 0.5, 0.0, 0.0, 0.0, 0.0],
+                [0.2, 0.1, 0.5, 1.0, 0.0, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 0.0, 1.0, 0.7, 0.4, 0.3],
+                [0.0, 0.0, 0.0, 0.0, 0.7, 0.8, 0.3, 0.2],
+                [0.0, 0.0, 0.0, 0.0, 0.4, 0.3, 1.4, 0.5],
+                [0.0, 0.0, 0.0, 0.0, 0.3, 0.2, 0.5, 1.2],
+            ],
+            **tkwargs
+        )
+        # Making it batched by adding two more batches, mostly the same.
+        org_loc = org_loc.repeat(3, 1)
+        org_loc[1] += 100
+        org_loc[2] += 1000
+        org_covar = org_covar.repeat(3, 1, 1)
+        # Construct the transform with weights.
+        weights = torch.tensor([[1.0, 3.0], [2.0, 1.0]])
+        tf = ExpectationPosteriorTransform(n_w=2, weights=weights)
+        # Construct the posterior.
+        org_mvn = MultitaskMultivariateNormal(
+            # The return of mvn.loc and the required input are different.
+            # We constructed it according to the output of mvn.loc,
+            # reshaping here to have the required `b x n x t` shape.
+            org_loc.view(3, 2, 4).transpose(-2, -1),
+            lazify(org_covar),
+            interleaved=True,  # To test the error.
+        )
+        org_post = GPyTorchPosterior(mvn=org_mvn)
+        # Error if interleaved.
+        with self.assertRaisesRegex(UnsupportedError, "interleaved"):
+            tf(org_post)
+        # Construct the non-interleaved posterior.
+        org_mvn = MultitaskMultivariateNormal(
+            org_loc.view(3, 2, 4).transpose(-2, -1),
+            lazify(org_covar),
+            interleaved=False,
+        )
+        org_post = GPyTorchPosterior(mvn=org_mvn)
+        self.assertTrue(torch.equal(org_mvn.loc, org_loc))
+        tf_post = tf(org_post)
+        self.assertIsInstance(tf_post, GPyTorchPosterior)
+        self.assertEqual(tf_post.sample().shape, torch.Size([1, 3, 2, 2]))
+        tf_mvn = tf_post.mvn
+        self.assertIsInstance(tf_mvn, MultitaskMultivariateNormal)
+        expected_loc = torch.tensor([[1.6667, 3.6667, 5.25, 7.25]], **tkwargs).repeat(
+            3, 1
+        )
+        expected_loc[1] += 100
+        expected_loc[2] += 1000
+        # This is the weighted average of each 2 x 2 block.
+        expected_covar = torch.tensor(
+            [
+                [1.0889, 0.1667, 0.0, 0.0],
+                [0.1667, 0.8, 0.0, 0.0],
+                [0.0, 0.0, 0.875, 0.35],
+                [0.0, 0.0, 0.35, 1.05],
+            ],
+            **tkwargs
+        ).repeat(3, 1, 1)
+        self.assertTrue(torch.allclose(tf_mvn.loc, expected_loc, atol=1e-3))
+        self.assertTrue(
+            torch.allclose(tf_mvn.covariance_matrix, expected_covar, atol=1e-3)
+        )
+
+
 class TestMCAcquisitionObjective(BotorchTestCase):
     def test_abstract_raises(self):
         with self.assertRaises(TypeError):