Buxfix for Proximal acquisition function wrapper for negative base acquisition functions (#1447)

roussel-ryan · facebook-github-bot · commit 98503e4db839 · 2022-10-18T12:06:20.000-07:00
Summary: ## Motivation This PR fixes a major issue when using the ```ProximalAcquisitionFunction``` with base acquisition functions that are not strictly positive. This PR fixes it by applying a Softplus transformation to the base acquisition function values (using optional beta = 1,0 argument) before multiplying by proximal weighting. ### Have you read the [Contributing Guidelines on pull requests](https://github.com/pytorch/botorch/blob/main/CONTRIBUTING.md#pull-requests)? Yes Pull Request resolved: #1447 Test Plan: Tests have been updated with correct (softplus transformed) values. Reviewed By: Balandat Differential Revision: D40238091 Pulled By: saitcakmak fbshipit-source-id: 7529114c77bd9a3634d2ccc1aeeb333452122b80
diff --git a/botorch/acquisition/proximal.py b/botorch/acquisition/proximal.py
@@ -27,15 +27,19 @@
 
 class ProximalAcquisitionFunction(AcquisitionFunction):
     """A wrapper around AcquisitionFunctions to add proximal weighting of the
-    acquisition function. Acquisition function is weighted via a squared exponential
-    centered at the last training point, with varying lengthscales corresponding to
-    `proximal_weights`. Can only be used with acquisition functions based on single
-    batch models.
+    acquisition function. The acquisition function is
+    weighted via a squared exponential centered at the last training point,
+    with varying lengthscales corresponding to `proximal_weights`. Can only be used
+    with acquisition functions based on single batch models. Acquisition functions
+    must be positive or `beta` must be specified to apply a SoftPlus transform before
+    proximal weighting.
 
     Small values of `proximal_weights` corresponds to strong biasing towards recently
     observed points, which smoothes optimization with a small potential decrese in
     convergence rate.
 
+
+
     Example:
         >>> model = SingleTaskGP(train_X, train_Y)
         >>> EI = ExpectedImprovement(model, best_f=0.0)
@@ -48,7 +52,8 @@ def __init__(
         self,
         acq_function: AcquisitionFunction,
         proximal_weights: Tensor,
-        transformed_weighting: bool = True,
+        transformed_weighting: Optional[bool] = True,
+        beta: Optional[float] = None,
     ) -> None:
         r"""Derived Acquisition Function weighted by proximity to recently
         observed point.
@@ -62,6 +67,8 @@ def __init__(
                 the transformed input space given by
                 `acq_function.model.input_transform` (if available), otherwise
                 proximal weights are applied in real input space.
+            beta: If not None, apply a softplus transform to the base acquisition
+                function, allows negative base acquisition function values.
         """
         Module.__init__(self)
 
@@ -79,6 +86,9 @@ def __init__(
         self.register_buffer(
             "transformed_weighting", torch.tensor(transformed_weighting)
         )
+
+        self.register_buffer("beta", None if beta is None else torch.tensor(beta))
+
         _validate_model(model, proximal_weights)
 
     @t_batch_mode_transform(expected_q=1, assert_output_shape=False)
@@ -127,7 +137,20 @@ def forward(self, X: Tensor) -> Tensor:
 
         M = torch.linalg.norm(diff / self.proximal_weights, dim=-1) ** 2
         proximal_acq_weight = torch.exp(-0.5 * M)
-        return self.acq_func(X) * proximal_acq_weight.flatten()
+
+        base_acqf = self.acq_func(X)
+        if self.beta is None:
+            if torch.any(base_acqf < 0):
+                raise RuntimeError(
+                    "Cannot use proximal biasing for negative "
+                    "acquisition function values, set a value for beta to "
+                    "fix this with a softplus transform"
+                )
+
+        else:
+            base_acqf = torch.nn.functional.softplus(base_acqf, beta=self.beta)
+
+        return base_acqf * proximal_acq_weight.flatten()
 
 
 def _validate_model(model: Model, proximal_weights: Tensor) -> None:
diff --git a/test/acquisition/test_proximal.py b/test/acquisition/test_proximal.py
@@ -36,6 +36,11 @@ def forward(self, X):
         pass
 
 
+class NegativeAcquisitionFunction(AcquisitionFunction):
+    def forward(self, X):
+        return torch.ones(*X.shape[:-1]) * -1.0
+
+
 class TestProximalAcquisitionFunction(BotorchTestCase):
     def test_proximal(self):
         for dtype in (torch.float, torch.double):
@@ -68,6 +73,7 @@ def test_proximal(self):
                         transformed_weighting=transformed_weighting,
                     )
 
+                    # softplus transformed value of the acquisition function
                     ei = EI(test_X)
 
                     # modify last_X/test_X depending on transformed_weighting
@@ -84,7 +90,34 @@ def test_proximal(self):
 
                     ei_prox = EI_prox(test_X)
                     self.assertTrue(torch.allclose(ei_prox, ei * test_prox_weight))
-                    self.assertTrue(ei_prox.shape == torch.Size([1]))
+                    self.assertEqual(ei_prox.shape, torch.Size([1]))
+
+                    # test with beta specified
+                    EI_prox_beta = ProximalAcquisitionFunction(
+                        EI,
+                        proximal_weights=proximal_weights,
+                        transformed_weighting=transformed_weighting,
+                        beta=1.0,
+                    )
+
+                    # SoftPlus transformed value of the acquisition function
+                    ei = torch.nn.functional.softplus(EI(test_X), beta=1.0)
+
+                    # modify last_X/test_X depending on transformed_weighting
+                    proximal_test_X = test_X.clone()
+                    if transformed_weighting:
+                        if input_transform is not None:
+                            last_X = input_transform(train_X[-1])
+                            proximal_test_X = input_transform(test_X)
+
+                    mv_normal = MultivariateNormal(last_X, torch.diag(proximal_weights))
+                    test_prox_weight = torch.exp(
+                        mv_normal.log_prob(proximal_test_X) - mv_normal.log_prob(last_X)
+                    )
+
+                    ei_prox_beta = EI_prox_beta(test_X)
+                    self.assertTrue(torch.allclose(ei_prox_beta, ei * test_prox_weight))
+                    self.assertEqual(ei_prox_beta.shape, torch.Size([1]))
 
                     # test t-batch with broadcasting
                     test_X = torch.rand(4, 1, 3, device=self.device, dtype=dtype)
@@ -104,7 +137,7 @@ def test_proximal(self):
                     self.assertTrue(
                         torch.allclose(ei_prox, ei * test_prox_weight.flatten())
                     )
-                    self.assertTrue(ei_prox.shape == torch.Size([4]))
+                    self.assertEqual(ei_prox.shape, torch.Size([4]))
 
                     # test q-based MC acquisition function
                     qEI = qExpectedImprovement(model, best_f=0.0)
@@ -133,6 +166,18 @@ def test_proximal(self):
                     )
                     self.assertEqual(qei_prox.shape, torch.Size([4]))
 
+                    # test acquisition function with
+                    # negative values w/o SoftPlus transform specified
+                    negative_acqf = NegativeAcquisitionFunction(model)
+                    bad_neg_prox = ProximalAcquisitionFunction(
+                        negative_acqf, proximal_weights=proximal_weights
+                    )
+
+                    with self.assertRaisesRegex(
+                        RuntimeError, "Cannot use proximal biasing for negative"
+                    ):
+                        bad_neg_prox(test_X)
+
             # test gradient
             test_X = torch.rand(
                 1, 3, device=self.device, dtype=dtype, requires_grad=True
@@ -228,7 +273,7 @@ def test_proximal_model_list(self):
             ei_prox = EI_prox(test_X)
 
             self.assertTrue(torch.allclose(ei_prox, ei * test_prox_weight))
-            self.assertTrue(ei_prox.shape == torch.Size([1]))
+            self.assertEqual(ei_prox.shape, torch.Size([1]))
 
             # test MC acquisition function
             qEI = qExpectedImprovement(model, best_f=0.0, objective=mc_linear_objective)
@@ -245,7 +290,7 @@ def test_proximal_model_list(self):
 
             qei_prox = qEI_prox(test_X)
             self.assertTrue(torch.allclose(qei_prox, qei * test_prox_weight.flatten()))
-            self.assertTrue(qei_prox.shape == torch.Size([4]))
+            self.assertEqual(qei_prox.shape, torch.Size([4]))
 
             # test gradient
             test_X = torch.rand(