Improve initialization with continuous relaxation in optimize_acqf_mixed_alternating (meta-pytorch#3041)

saitcakmak · meta-codesync[bot] · commit 443bcc533a0c · 2025-10-06T18:05:45.000-07:00
Summary: Pull Request resolved: meta-pytorch#3041 `post_processing_func` is often used to apply Ax transforms to ensure that the generated points are aligned with the Ax search space. The transforms expects the values they are called with to be a member of the transformed search space. Since the initialization step already includes rounding operations, and the post processing function is already applied before returning from `optimize_acqf_mixed_alternating`, we can skip it at initialization. Not using `post_processing_func` in the initialization gets around the errors that may be raised by `post_processing_func` (due to the points not being in the specified search space), and will eliminate excessive fallback to random initialization that we see in Ax. Reviewed By: dme65 Differential Revision: D83979491 fbshipit-source-id: 783d634ff24c159d1a46777e534c372da60974b4
diff --git a/botorch/optim/optimize_mixed.py b/botorch/optim/optimize_mixed.py
@@ -551,6 +551,7 @@ def generate_starting_points(
                     "batch_limit": options.get("batch_limit", MAX_BATCH_SIZE),
                     "init_batch_limit": options.get("init_batch_limit", MAX_BATCH_SIZE),
                 },
+                post_processing_func=None,
             )
             x_init_candts, _ = _optimize_acqf(opt_inputs=updated_opt_inputs)
             x_init_candts = x_init_candts.squeeze(-2).detach()
diff --git a/test/optim/test_optimize_mixed.py b/test/optim/test_optimize_mixed.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import random
+import warnings
 from dataclasses import fields
 from itertools import product
 from typing import Any, Callable
@@ -58,6 +59,7 @@ def _make_opt_inputs(
     fixed_features: dict[int, float] | None = None,
     return_best_only: bool = True,
     sequential: bool = True,
+    post_processing_func: Callable[[Tensor], Tensor] | None = None,
 ) -> OptimizeAcqfInputs:
     r"""Helper to construct `OptimizeAcqfInputs` from limited inputs."""
     return OptimizeAcqfInputs(
@@ -71,7 +73,7 @@ def _make_opt_inputs(
         equality_constraints=equality_constraints,
         nonlinear_inequality_constraints=nonlinear_inequality_constraints,
         fixed_features=fixed_features or {},
-        post_processing_func=None,
+        post_processing_func=post_processing_func,
         batch_initial_conditions=None,
         return_best_only=return_best_only,
         gen_candidates=gen_candidates_scipy,
@@ -1458,3 +1460,56 @@ def test_optimize_acqf_mixed_continuous_relaxation(self) -> None:
             self.assertAllClose(
                 discrete_call_args["opt_inputs"].post_processing_func(X), X_expected
             )
+
+    def test_initialization_w_continuous_relaxation(self) -> None:
+        # Testing with integer variables.
+        train_X, _, _, cont_dims = self._get_data()
+        # Update the data to introduce integer dimensions.
+        cat_dims = {0: [0, 1]}
+        discrete_dims = {3: list(range(41)), 4: list(range(16))}
+        all_integer_dims: list[int] = [0, 3, 4]
+
+        def org_post_proc_func(X: Tensor) -> Tensor:
+            # Just error out if things are not rounded already.
+            # This stands in for any Ax transform that expects discrete values.
+            if X[..., all_integer_dims].round() != X[..., all_integer_dims]:
+                raise ValueError("Expected discrete values")
+            return X
+
+        # The key test is that this call doesn't error out.
+        with mock.patch(
+            "botorch.optim.optimize_mixed._optimize_acqf", wraps=_optimize_acqf
+        ) as mock_opt, warnings.catch_warnings(record=True) as ws:
+            X, _ = generate_starting_points(
+                opt_inputs=_make_opt_inputs(
+                    acq_function=qLogNoisyExpectedImprovement(
+                        model=QuadraticDeterministicModel(
+                            root=torch.zeros(
+                                train_X.shape[-1],
+                                device=self.device,
+                                dtype=torch.double,
+                            )
+                        ),
+                        X_baseline=train_X,
+                        cache_root=False,
+                    ),
+                    bounds=torch.tensor(
+                        [[0.0, 0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 40.0, 15.0]],
+                        dtype=torch.double,
+                        device=self.device,
+                    ),
+                    post_processing_func=org_post_proc_func,
+                    num_restarts=4,
+                ),
+                discrete_dims=discrete_dims,
+                cat_dims=cat_dims,
+                cont_dims=torch.tensor(cont_dims, device=self.device),
+            )
+        # Check that it was called with no post processing func.
+        mock_opt.assert_called_once()
+        self.assertIsNone(mock_opt.call_args.kwargs["opt_inputs"].post_processing_func)
+        # Check that optimization failure warning is not raised.
+        self.assertFalse(any(issubclass(w.category, OptimizationWarning) for w in ws))
+        # Check that generated points are rounded.
+        self.assertEqual(X.shape, torch.Size([4, train_X.shape[-1]]))
+        self.assertAllClose(X[..., all_integer_dims], X[..., all_integer_dims].round())

Original file line number	Diff line number	Diff line change
`@@ -551,6 +551,7 @@ def generate_starting_points(`
`551`	`551`	`"batch_limit": options.get("batch_limit", MAX_BATCH_SIZE),`
`552`	`552`	`"init_batch_limit": options.get("init_batch_limit", MAX_BATCH_SIZE),`
`553`	`553`	`},`
	`554`	`+ post_processing_func=None,`
`554`	`555`	`)`
`555`	`556`	`x_init_candts, _ = _optimize_acqf(opt_inputs=updated_opt_inputs)`
`556`	`557`	`x_init_candts = x_init_candts.squeeze(-2).detach()`