Change how qNEHVI handles pending points (#2985)

TobyBoyne · facebook-github-bot · commit c431c6d0fc99 · 2025-09-02T06:50:03.000-07:00
Summary: ## Motivation Currently, qNEHVI proposes repeated experiments in a batch when initial pending points are passed. This PR changes how this class handles pending points - `X_pending` is now always populated, and only appended in the forward pass if those points have not yet been cached. See issue #2983 for further discussion. ### Have you read the [Contributing Guidelines on pull requests](https://github.com/pytorch/botorch/blob/main/CONTRIBUTING.md#pull-requests)? Yes Pull Request resolved: #2985 Test Plan: I will rewrite the tests in `test/acquisition/multi_objective/test_monte_carlo.py` to ensure that they pass. Reviewed By: hvarfner Differential Revision: D80533943 Pulled By: Balandat fbshipit-source-id: c5f72413f5636bba69ac2088bc35f1bd819f0fc4
diff --git a/botorch/acquisition/multi_objective/logei.py b/botorch/acquisition/multi_objective/logei.py
@@ -39,8 +39,6 @@
 from botorch.utils.transforms import (
     average_over_ensemble_models,
     concatenate_pending_points,
-    is_ensemble,
-    match_batch_shape,
     t_batch_mode_transform,
 )
 from torch import Tensor
@@ -439,32 +437,12 @@ def __init__(
         self.tau_max = tau_max
         self.fat = fat
 
-    @concatenate_pending_points
     @t_batch_mode_transform()
     @average_over_ensemble_models
     def forward(self, X: Tensor) -> Tensor:
-        X_full = torch.cat([match_batch_shape(self.X_baseline, X), X], dim=-2)
-        # NOTE: To ensure that we correctly sample `f(X)` from the joint distribution
-        # `f((X_baseline, X)) ~ P(f | D)`, it is critical to compute the joint posterior
-        # over X *and* X_baseline -- which also contains pending points whenever there
-        # are any --  since the baseline and pending values `f(X_baseline)` are
-        # generally pre-computed and cached before the `forward` call, see the docs of
-        # `cache_pending` for details.
-        # TODO: Improve the efficiency by not re-computing the X_baseline-X_baseline
-        # covariance matrix, but only the covariance of
-        # 1) X and X, and
-        # 2) X and X_baseline.
-        posterior = self.model.posterior(X_full)
-        # Account for possible one-to-many transform and the model batch dimensions in
-        # ensemble models.
-        event_shape_lag = 1 if is_ensemble(self.model) else 2
-        n_w = (
-            posterior._extended_shape()[X_full.dim() - event_shape_lag]
-            // X_full.shape[-2]
-        )
-        q_in = X.shape[-2] * n_w
-        self._set_sampler(q_in=q_in, posterior=posterior)
-        samples = self._get_f_X_samples(posterior=posterior, q_in=q_in)
+        # Get samples from the posterior, and manually concatenate pending points that
+        # have not yet been cached. Shared with qNEHVI.
+        samples, X = self._compute_posterior_samples_and_concat_pending(X)
         # Add previous nehvi from pending points.
         nehvi = self._compute_log_qehvi(samples=samples, X=X)
         if self.incremental_nehvi:
diff --git a/botorch/acquisition/multi_objective/monte_carlo.py b/botorch/acquisition/multi_objective/monte_carlo.py
@@ -45,8 +45,6 @@
 from botorch.utils.transforms import (
     average_over_ensemble_models,
     concatenate_pending_points,
-    is_ensemble,
-    match_batch_shape,
     t_batch_mode_transform,
 )
 from torch import Tensor
@@ -349,31 +347,11 @@ def __init__(
         )
         self.fat = fat
 
-    @concatenate_pending_points
     @t_batch_mode_transform()
     @average_over_ensemble_models
     def forward(self, X: Tensor) -> Tensor:
-        X_full = torch.cat([match_batch_shape(self.X_baseline, X), X], dim=-2)
-        # NOTE: To ensure that we correctly sample `f(X)` from the joint distribution
-        # `f((X_baseline, X)) ~ P(f | D)`, it is critical to compute the joint posterior
-        # over X *and* X_baseline -- which also contains pending points whenever there
-        # are any --  since the baseline and pending values `f(X_baseline)` are
-        # generally pre-computed and cached before the `forward` call, see the docs of
-        # `cache_pending` for details.
-        # TODO: Improve the efficiency by not re-computing the X_baseline-X_baseline
-        # covariance matrix, but only the covariance of
-        # 1) X and X, and
-        # 2) X and X_baseline.
-        posterior = self.model.posterior(X_full)
-        # Account for possible one-to-many transform and the MCMC batch dimension in
-        # `SaasFullyBayesianSingleTaskGP`
-        event_shape_lag = 1 if is_ensemble(self.model) else 2
-        n_w = (
-            posterior._extended_shape()[X_full.dim() - event_shape_lag]
-            // X_full.shape[-2]
-        )
-        q_in = X.shape[-2] * n_w
-        self._set_sampler(q_in=q_in, posterior=posterior)
-        samples = self._get_f_X_samples(posterior=posterior, q_in=q_in)
+        # Get samples from the posterior, and manually concatenate pending points that
+        # have not yet been cached. Shared with qLogNEHVI.
+        samples, X = self._compute_posterior_samples_and_concat_pending(X)
         # Add previous nehvi from pending points.
         return self._compute_qehvi(samples=samples, X=X) + self._prev_nehvi
diff --git a/botorch/utils/multi_objective/hypervolume.py b/botorch/utils/multi_objective/hypervolume.py
@@ -58,8 +58,10 @@
 )
 from botorch.utils.objective import compute_feasibility_indicator
 from botorch.utils.torch import BufferDict
+from botorch.utils.transforms import is_ensemble, match_batch_shape
 from torch import Tensor
 
+
 MIN_Y_RANGE = 1e-7
 
 
@@ -793,7 +795,7 @@ def set_X_pending(self, X_pending: Tensor | None = None) -> None:
                     BotorchWarning,
                     stacklevel=2,
                 )
-            X_pending = X_pending.detach().clone()
+            self.X_pending = X_pending.detach().clone()
             if self.cache_pending:
                 X_baseline = torch.cat([self._X_baseline, X_pending], dim=-2)
                 # Number of new points is the total number of points minus
@@ -812,16 +814,9 @@ def set_X_pending(self, X_pending: Tensor | None = None) -> None:
                                 .clamp_min(0.0)
                                 .mean()
                             )
-                        # Set to None so that pending points are not concatenated in
-                        # forward.
-                        self.X_pending = None
                         # Set q_in=-1 to so that self.sampler is updated at the next
                         # forward call.
                         self.q_in = -1
-                    else:
-                        self.X_pending = X_pending[-num_new_points:]
-            else:
-                self.X_pending = X_pending
 
     @property
     def _hypervolumes(self) -> Tensor:
@@ -836,6 +831,58 @@ def _hypervolumes(self) -> Tensor:
             .view(self._batch_sample_shape)
         )
 
+    def _compute_posterior_samples_and_concat_pending(
+        self, X: Tensor
+    ) -> tuple[Tensor, Tensor]:
+        r"""Get samples from the posterior, and concatenate uncached pending points.
+
+        Args:
+            X: `batch_shape x q x d` X Tensor pased into the `forward` method of an acqf
+
+        Returns:
+            A tuple containing samples of the latent function from the posterior, and
+            the `batch_shape x (q + num_uncached_pending) x d` X tensor including any
+            pending observations that have not been cached.
+        """
+        # Manually concatenate pending points only if:
+        # - pending points are not cached, or
+        # - number of pending points is less than max_iep
+        if self.X_pending is not None:
+            num_pending = self.X_pending.shape[-2]
+            num_X_baseline = self._X_baseline.shape[-2]
+            num_X_baseline_and_cached_pending = self.X_baseline.shape[-2]
+            num_uncached_pending = (
+                (num_pending + num_X_baseline - num_X_baseline_and_cached_pending)
+                if self.cache_pending
+                else num_pending
+            )
+            X_pending_uncached = self.X_pending[
+                ..., num_pending - num_uncached_pending :, :
+            ]
+            X = torch.cat([X, match_batch_shape(X_pending_uncached, X)], dim=-2)
+        X_full = torch.cat([match_batch_shape(self.X_baseline, X), X], dim=-2)
+        # NOTE: To ensure that we correctly sample `f(X)` from the joint distribution
+        # `f((X_baseline, X)) ~ P(f | D)`, it is critical to compute the joint posterior
+        # over X *and* X_baseline -- which also contains pending points whenever there
+        # are any --  since the baseline and pending values `f(X_baseline)` are
+        # generally pre-computed and cached before the `forward` call, see the docs of
+        # `cache_pending` for details.
+        # TODO: Improve the efficiency by not re-computing the X_baseline-X_baseline
+        # covariance matrix, but only the covariance of
+        # 1) X and X, and
+        # 2) X and X_baseline.
+        posterior = self.model.posterior(X_full)
+        # Account for possible one-to-many transform and the MCMC batch dimension in
+        # `SaasFullyBayesianSingleTaskGP`
+        event_shape_lag = 1 if is_ensemble(self.model) else 2
+        n_w = (
+            posterior._extended_shape()[X_full.dim() - event_shape_lag]
+            // X_full.shape[-2]
+        )
+        q_in = X.shape[-2] * n_w
+        self._set_sampler(q_in=q_in, posterior=posterior)
+        return self._get_f_X_samples(posterior=posterior, q_in=q_in), X
+
 
 def get_hypervolume_maximizing_subset(
     n: int, Y: Tensor, ref_point: Tensor
diff --git a/test/acquisition/multi_objective/test_monte_carlo.py b/test/acquisition/multi_objective/test_monte_carlo.py
@@ -1054,7 +1054,7 @@ def _test_qnehvi_with_CBD(
             acqf.set_X_pending(X_pending)
             if not incremental_nehvi:
                 self.assertAllClose(expected_val, acqf._prev_nehvi)
-            self.assertIsNone(acqf.X_pending)
+            self.assertTrue(torch.all(acqf.X_pending == X_pending))
             # check that X_baseline has been updated
             self.assertTrue(torch.equal(acqf.X_baseline[:-1], acqf._X_baseline))
             self.assertTrue(torch.equal(acqf.X_baseline[-1:], X_pending))
@@ -1112,7 +1112,7 @@ def _test_qnehvi_with_CBD(
         )
         mm._posterior._samples = mm._posterior._samples.squeeze(0)
         acqf.set_X_pending(X_pending2)
-        self.assertIsNone(acqf.X_pending)
+        self.assertTrue(torch.all(acqf.X_pending == X_pending2))
         # check that X_baseline has been updated
         self.assertTrue(torch.equal(acqf.X_baseline[:-2], acqf._X_baseline))
         self.assertTrue(torch.equal(acqf.X_baseline[-2:], X_pending2))
@@ -1129,7 +1129,9 @@ def _test_qnehvi_with_CBD(
             acqf.set_X_pending(
                 torch.cat([X_pending2, X_pending2], dim=0).requires_grad_(True)
             )
-        self.assertIsNone(acqf.X_pending)
+        self.assertTrue(
+            torch.all(acqf.X_pending == torch.cat([X_pending2, X_pending2], dim=0))
+        )
         self.assertEqual(sum(issubclass(w.category, BotorchWarning) for w in ws), 1)
 
         # test max iep
@@ -1161,10 +1163,10 @@ def _test_qnehvi_with_CBD(
                 new_Y2,
             ]
         )
-        # check that after second pending point is added, X_pending is set to None
-        # and the pending points are included in the box decompositions
+        # check that after second pending point is added, X_pending still includes
+        # pending points, and the pending points are included in the box decompositions
         acqf.set_X_pending(X_pending2)
-        self.assertIsNone(acqf.X_pending)
+        self.assertTrue(torch.all(acqf.X_pending == X_pending2))
         acqf_pareto_Y = acqf.partitioning.pareto_Y[0]
         self.assertTrue(torch.equal(acqf_pareto_Y[:-2], expected_pareto_Y))
         self.assertTrue(torch.equal(acqf_pareto_Y[-2:], expected_new_Y2))
@@ -1294,7 +1296,7 @@ def test_constrained_q_noisy_expected_hypervolume_improvement(self) -> None:
     def test_constrained_q_log_noisy_expected_hypervolume_improvement(self) -> None:
         for dtype, fat in product(
             (torch.float, torch.double),
-            (True, False),
+            (False, True),
         ):
             with self.subTest(dtype=dtype, fat=fat):
                 self._test_constrained_q_noisy_expected_hypervolume_improvement(