Fix batch_shape handling in Normalize and InputStandardize (#1360)

James Wilson · facebook-github-bot · commit e3d7f779c880 · 2022-08-23T09:41:11.000-07:00
Summary: Pull Request resolved: #1360 Follow up to #1078. Reviewed By: saitcakmak Differential Revision: D38881646 fbshipit-source-id: 50f127ccd7699e760058609e7ed0904568864ab7
diff --git a/botorch/models/transforms/input.py b/botorch/models/transforms/input.py
@@ -341,7 +341,7 @@ def __init__(
                 take all dimensions of the inputs into account.
             bounds: If provided, use these bounds to normalize the inputs. If
                 omitted, learn the bounds in train mode.
-            batch_shape: The batch shape of the inputs (asssuming input tensors
+            batch_shape: The batch shape of the inputs (assuming input tensors
                 of shape `batch_shape x n x d`). If provided, perform individual
                 normalization per batch, otherwise uses a single normalization.
             transform_on_train: A boolean indicating whether to apply the
@@ -410,10 +410,27 @@ def _transform(self, X: Tensor) -> Tensor:
                     f"Wrong input dimension. Received {X.size(-1)}, "
                     f"expected {self.mins.size(-1)}."
                 )
-            self.mins = X.min(dim=-2, keepdim=True)[0]
-            ranges = X.max(dim=-2, keepdim=True)[0] - self.mins
-            ranges[torch.where(ranges <= self.min_range)] = self.min_range
-            self.ranges = ranges
+
+            n = len(self.batch_shape) + 2
+            if X.ndim < n:
+                raise ValueError(
+                    f"`X` must have at least {n} dimensions, {n - 2} batch and 2 innate"
+                    f" , but has {X.ndim}."
+                )
+
+            # Move extra batch and innate batch (i.e. marginal) dimensions to the right
+            batch_ndim = min(len(self.batch_shape), X.ndim - 2)  # batch rank of `X`
+            _X = X.permute(
+                *range(X.ndim - batch_ndim - 2, X.ndim - 2),  # module batch dims
+                X.ndim - 1,  # input dim
+                *range(X.ndim - batch_ndim - 2),  # other dims, to be reduced over
+                X.ndim - 2,  # marginal dim
+            ).reshape(*self.batch_shape, 1, X.shape[-1], -1)
+
+            # Extract minimums and ranges
+            self.mins = _X.min(dim=-1).values  # batch_shape x (1, d)
+            self.ranges = (_X.max(dim=-1).values - self.mins).clip(min=self.min_range)
+
         if hasattr(self, "indices"):
             X_new = X.clone()
             X_new[..., self.indices] = (
@@ -551,10 +568,23 @@ def _transform(self, X: Tensor) -> Tensor:
                     f"Wrong input. dimension. Received {X.size(-1)}, "
                     f"expected {self.means.size(-1)}"
                 )
-            self.means = X.mean(dim=-2, keepdim=True)
-            self.stds = X.std(dim=-2, keepdim=True)
 
-            self.stds = torch.clamp(self.stds, min=self.min_std)
+            n = len(self.batch_shape) + 2
+            if X.ndim < n:
+                raise ValueError(
+                    f"`X` must have at least {n} dimensions, {n - 2} batch and 2 innate"
+                    f" , but has {X.ndim}."
+                )
+
+            # Aggregate means and standard deviations over extra batch and marginal dims
+            batch_ndim = min(len(self.batch_shape), X.ndim - 2)  # batch rank of `X`
+            reduce_dims = (*range(X.ndim - batch_ndim - 2), X.ndim - 2)
+            self.stds, self.means = (
+                values.unsqueeze(-2)
+                for values in torch.std_mean(X, dim=reduce_dims, unbiased=True)
+            )
+            self.stds.clamp_(min=self.min_std)
+
         if hasattr(self, "indices"):
             X_new = X.clone()
             X_new[..., self.indices] = (
diff --git a/test/models/transforms/test_input.py b/test/models/transforms/test_input.py
@@ -187,15 +187,22 @@ def test_normalize(self):
             X = torch.cat((torch.randn(4, 1), torch.zeros(4, 1)), dim=-1)
             X = X.to(self.device)
             self.assertEqual(torch.isfinite(nlz(X)).sum(), X.numel())
+            with self.assertRaisesRegex(ValueError, r"must have at least \d+ dim"):
+                nlz(torch.randn(X.shape[-1], dtype=dtype))
 
             # basic usage
             for batch_shape in (torch.Size(), torch.Size([3])):
                 # learned bounds
                 nlz = Normalize(d=2, batch_shape=batch_shape)
                 X = torch.randn(*batch_shape, 4, 2, device=self.device, dtype=dtype)
-                X_nlzd = nlz(X)
+                for _X in (torch.stack((X, X)), X):  # check batch_shape is obeyed
+                    X_nlzd = nlz(_X)
+                    self.assertEqual(nlz.mins.shape, batch_shape + (1, X.shape[-1]))
+                    self.assertEqual(nlz.ranges.shape, batch_shape + (1, X.shape[-1]))
+
                 self.assertEqual(X_nlzd.min().item(), 0.0)
                 self.assertEqual(X_nlzd.max().item(), 1.0)
+
                 nlz.eval()
                 X_unnlzd = nlz.untransform(X_nlzd)
                 self.assertTrue(torch.allclose(X, X_unnlzd, atol=1e-4, rtol=1e-4))
@@ -356,15 +363,22 @@ def test_standardize(self):
             X = torch.cat((torch.randn(4, 1), torch.zeros(4, 1)), dim=-1)
             X = X.to(self.device, dtype=dtype)
             self.assertEqual(torch.isfinite(stdz(X)).sum(), X.numel())
+            with self.assertRaisesRegex(ValueError, r"must have at least \d+ dim"):
+                stdz(torch.randn(X.shape[-1], dtype=dtype))
 
             # basic usage
             for batch_shape in (torch.Size(), torch.Size([3])):
                 stdz = InputStandardize(d=2, batch_shape=batch_shape)
                 torch.manual_seed(42)
                 X = torch.randn(*batch_shape, 4, 2, device=self.device, dtype=dtype)
-                X_stdz = stdz(X)
+                for _X in (torch.stack((X, X)), X):  # check batch_shape is obeyed
+                    X_stdz = stdz(_X)
+                    self.assertEqual(stdz.means.shape, batch_shape + (1, X.shape[-1]))
+                    self.assertEqual(stdz.stds.shape, batch_shape + (1, X.shape[-1]))
+
                 self.assertTrue(torch.all(X_stdz.mean(dim=-2).abs() < 1e-4))
                 self.assertTrue(torch.all((X_stdz.std(dim=-2) - 1.0).abs() < 1e-4))
+
                 stdz.eval()
                 X_unstdz = stdz.untransform(X_stdz)
                 self.assertTrue(torch.allclose(X, X_unstdz, atol=1e-4, rtol=1e-4))