Allowing custom dimensionality and improved gradient stability in ModifiedFixedSingleSampleModel (#1732)

ItsMrLin · facebook-github-bot · commit 2d87f90cde1d · 2023-03-13T20:33:43.000-07:00
Summary: Pull Request resolved: #1732 - Allowing explicitly specifying the dimensionality instead of inferring it from the base model. - Adding jitter before the sqrt call so that when variance is close to zero, the gradient does not explode. Reviewed By: Ryan-Rhys Differential Revision: D43899380 fbshipit-source-id: 51d7ad75810320626f24887e13453b57453164da
diff --git a/botorch/models/deterministic.py b/botorch/models/deterministic.py
@@ -178,18 +178,39 @@ class FixedSingleSampleModel(DeterministicModel):
     We assume the outcomes are uncorrelated here.
     """
 
-    def __init__(self, model: Model, w: Optional[Tensor] = None) -> None:
+    def __init__(
+        self,
+        model: Model,
+        w: Optional[Tensor] = None,
+        dim: Optional[int] = None,
+        jitter: Optional[float] = 1e-8,
+        dtype: Optional[torch.dtype] = None,
+        device: Optional[torch.dtype] = None,
+    ) -> None:
         r"""
         Args:
             model: The base model.
             w: A 1-d tensor with length model.num_outputs.
                 If None, draw it from a standard normal distribution.
+            dim: dimensionality of w.
+                If None and w is not provided, draw w samples of size model.num_outputs.
+            jitter: jitter value to be added for numerical stability, 1e-8 by default.
+            dtype: dtype for w if specified
+            device: device for w if specified
         """
         super().__init__()
         self.model = model
         self._num_outputs = model.num_outputs
-        self.w = torch.randn(model.num_outputs)
+        self.jitter = jitter
+        if w is None:
+            self.w = (
+                torch.randn(model.num_outputs, dtype=dtype, device=device)
+                if dim is None
+                else torch.randn(dim, dtype=dtype, device=device)
+            )
+        else:
+            self.w = w
 
     def forward(self, X: Tensor) -> Tensor:
         post = self.model.posterior(X)
-        return post.mean + post.variance.sqrt() * self.w.to(X)
+        return post.mean + torch.sqrt(post.variance + self.jitter) * self.w.to(X)
diff --git a/test/models/test_deterministic.py b/test/models/test_deterministic.py
@@ -166,6 +166,7 @@ def test_FixedSingleSampleModel(self):
         model = SingleTaskGP(train_X=train_X, train_Y=train_Y)
         fss_model = FixedSingleSampleModel(model=model)
 
+        # test without specifying w and dim
         test_X = torch.rand(2, 3)
         w = fss_model.w
         post = model.posterior(test_X)
@@ -175,6 +176,20 @@ def test_FixedSingleSampleModel(self):
 
         self.assertTrue(hasattr(fss_model, "num_outputs"))
 
+        # test specifying w
+        w = torch.randn(4)
+        fss_model = FixedSingleSampleModel(model=model, w=w)
+        self.assertTrue(fss_model.w.shape == w.shape)
+        # test dim
+        dim = 5
+        fss_model = FixedSingleSampleModel(model=model, w=w, dim=dim)
+        # dim should be ignored
+        self.assertTrue(fss_model.w.shape == w.shape)
+        # test dim when no w is provided
+        fss_model = FixedSingleSampleModel(model=model, dim=dim)
+        # dim should be ignored
+        self.assertTrue(fss_model.w.shape == torch.Size([dim]))
+
         # check w dtype conversion
         train_X_double = torch.rand(2, 3, dtype=torch.double)
         train_Y_double = torch.rand(2, 2, dtype=torch.double)