Fix boundary handling in sample_polytope (#2353)

saitcakmak · facebook-github-bot · commit 5f582084e998 · 2024-05-28T18:46:41.000-07:00
Summary: Pull Request resolved: #2353 `sample_polytope` would set both `alpha_min = 0` and `alpha_max = 0` when `x` was at the boundary, leading to it getting stuck and returning the same point. Fixes #2351 Reviewed By: Balandat Differential Revision: D57883949 fbshipit-source-id: 48433e94739f60c38cbd028c8dd48b919ebff9c3
diff --git a/botorch/utils/sampling.py b/botorch/utils/sampling.py
@@ -239,35 +239,59 @@ def sample_polytope(
     Returns:
         (n, d) dim Tensor containing the resulting samples.
     """
+    # Check that starting point satisfies the constraints.
+    if not ((slack := A @ x0 - b) <= 0).all():
+        raise ValueError(
+            f"Starting point does not satisfy the constraints. Inputs: {A=},"
+            f"{b=}, {x0=}, A@x0-b={slack}."
+        )
+    # Remove rows where all elements of A are 0. This avoids nan and infs later.
+    # A may have zero rows in it when this is called from PolytopeSampler
+    # with equality constraints (which are absorbed into A & b).
+    non_zero_rows = torch.any(A != 0, dim=-1)
+    A = A[non_zero_rows]
+    b = b[non_zero_rows]
+
     n_tot = n + n0
     seed = seed if seed is not None else torch.randint(0, 1000000, (1,)).item()
     with manual_seed(seed=seed):
         rands = torch.rand(n_tot, dtype=A.dtype, device=A.device)
 
-    # pre-sample samples from hypersphere
-    d = x0.size(0)
-    # uniform samples from unit ball in d dims
-    # increment seed by +1 to avoid correlation with step size, see #2156 for details
+    # Sample uniformly from unit hypersphere in d dims.
+    # Increment seed by +1 to avoid correlation with step size, see #2156 for details.
     Rs = sample_hypersphere(
-        d=d, n=n_tot, dtype=A.dtype, device=A.device, seed=seed + 1
+        d=x0.shape[0], n=n_tot, dtype=A.dtype, device=A.device, seed=seed + 1
     ).unsqueeze(-1)
 
-    # compute matprods in batch
+    # Use batch operations for matrix multiplication.
     ARs = (A @ Rs).squeeze(-1)
     out = torch.empty(n, A.size(-1), dtype=A.dtype, device=A.device)
     x = x0.clone()
+    large_constant = torch.finfo().max
     for i, (ar, r, rnd) in enumerate(zip(ARs, Rs, rands)):
-        # given x, the next point in the chain is x+alpha*r
-        # it also satisfies A(x+alpha*r)<=b which implies A*alpha*r<=b-Ax
+        # Given x, the next point in the chain is x+alpha*r.
+        # It must satisfy A(x+alpha*r)<=b, which implies A*alpha*r<=b-Ax,
         # so alpha<=(b-Ax)/ar for ar>0, and alpha>=(b-Ax)/ar for ar<0.
-        # b - A @ x is always >= 0, clamping for numerical tolerances
+        # If x is at the boundary, b - Ax = 0. If ar > 0, then we must
+        # have alpha <= 0. If ar < 0, we must have alpha >= 0.
+        # ar == 0 is an unlikely event that provides no signal.
+        # b - A @ x is always >= 0, clamping for numerical tolerances.
         w = (b - A @ x).squeeze().clamp(min=0.0) / ar
-        pos = w >= 0
-        alpha_max = w[pos].min()
-        # important to include equality here in cases x is at the boundary
-        # of the polytope
-        neg = w <= 0
-        alpha_min = w[neg].max()
+        # Find upper bound for alpha. If there are no constraints on
+        # the upper bound of alpha, set it to a large value.
+        pos = w > 0
+        alpha_max = w[pos].min().item() if pos.any() else large_constant
+        # Find lower bound for alpha.
+        neg = w < 0
+        alpha_min = w[neg].max().item() if neg.any() else -large_constant
+        # Handle the boundary case.
+        if (w_eq_0 := (w == 0)).any():
+            # If ar > 0 at the boundary, alpha <= 0.
+            if w_eq_0.logical_and(ar > 0).any():
+                alpha_max = min(alpha_max, 0.0)
+            # If ar < 0 at the boundary, alpha >= 0.
+            if w_eq_0.logical_and(ar < 0).any():
+                alpha_min = max(alpha_min, 0.0)
         # alpha~Unif[alpha_min, alpha_max]
         alpha = alpha_min + rnd * (alpha_max - alpha_min)
         x = x + alpha * r
diff --git a/test/utils/test_sampling.py b/test/utils/test_sampling.py
@@ -8,6 +8,7 @@
 
 import itertools
 import warnings
+from abc import ABC
 from typing import Any, Dict, Type
 from unittest import mock
 
@@ -29,6 +30,7 @@
     optimize_posterior_samples,
     PolytopeSampler,
     sample_hypersphere,
+    sample_polytope,
     sample_simplex,
     sparse_to_dense_constraints,
 )
@@ -304,8 +306,36 @@ def test_get_polytope_samples(self):
                 ).draw(15, seed=0)[::3]
             self.assertTrue(torch.equal(samps, expected_samps))
 
+    def test_sample_polytope_infeasible(self) -> None:
+        with self.assertRaisesRegex(ValueError, "Starting point does not satisfy"):
+            sample_polytope(
+                A=torch.tensor([[0.0, 0.0]]),
+                b=torch.tensor([[-1.0]]),
+                x0=torch.tensor([[0.0], [0.0]]),
+            )
+
+    def test_sample_polytope_boundary(self) -> None:
+        # Check that sample_polytope does not get stuck at the boundary.
+        # This replicates https://github.com/pytorch/botorch/issues/2351.
+        samples = sample_polytope(
+            A=torch.tensor(
+                [
+                    [-1.0, -1.0],
+                    [0.0, 0.0],
+                    [-1.0, 0.0],
+                    [0.0, -1.0],
+                    [0.0, 0.0],
+                    [1.0, 0.0],
+                    [0.0, 1.0],
+                ]
+            ),
+            b=torch.tensor([[1.0], [1.0], [1.0], [1.0], [0.0], [0.0], [0.0]]),
+            x0=torch.tensor([[0.0], [0.0]]),
+        )
+        self.assertFalse((samples == 0).all())
+
 
-class PolytopeSamplerTestBase:
+class PolytopeSamplerTestBase(ABC):
     sampler_class: Type[PolytopeSampler]
     sampler_kwargs: Dict[str, Any] = {}