Implement construct_inputs class method for Ax integration (meta-pytorch#3037)

Jihao Andreas Lin · meta-codesync[bot] · commit 26e96d4f4c7e · 2025-10-03T14:23:49.000-07:00
Summary: Pull Request resolved: meta-pytorch#3037 LatentKroneckerGP requires `train_X`, `train_T`, and `train_Y` as input data, where `train_X` and `train_T` define the Cartesian product space and `train_Y` are the corresponding observations (with potentially missing values). Ax provides the data as samples from the product space and we need to separate it into the individual factors. For example, let X = [a, b, c] and T = [0, 1], then the full product space is {(a, 0), (a, 1), (b, 0), (b, 1), (c, 0), (c, 1)}. Ax would provide us with observations like x1 = (a, 0), y1 = 1 x2 = (a, 1), y2 = 2 x3 = (b, 0), y3 = 3 x4 = (c, 1), y4 = 4 and we need to transform them into X = [a, b, c], T = [0, 1], and Y = [[1, 2], [3, nan], [nan, 4]] (note that y values for (b, 1) and (c, 0) are missing). Reviewed By: saitcakmak Differential Revision: D83781022 fbshipit-source-id: 6a0d153fd8f776a4a33acf1f0581d76a0ba31148
diff --git a/botorch/models/latent_kronecker_gp.py b/botorch/models/latent_kronecker_gp.py
@@ -24,17 +24,20 @@
 """
 
 import contextlib
+import warnings
 from typing import Any
 
 import torch
 from botorch.acquisition.objective import PosteriorTransform
 from botorch.exceptions.errors import BotorchTensorDimensionError
+from botorch.exceptions.warnings import InputDataWarning
 from botorch.models.gpytorch import GPyTorchModel
 from botorch.models.model import FantasizeMixin, Model
 from botorch.models.transforms.input import InputTransform
 from botorch.models.transforms.outcome import OutcomeTransform, Standardize
 from botorch.posteriors.gpytorch import GPyTorchPosterior
 from botorch.posteriors.latent_kronecker import LatentKroneckerGPPosterior
+from botorch.utils.datasets import SupervisedDataset
 from botorch.utils.types import _DefaultType, DEFAULT
 from gpytorch.distributions import MultivariateNormal
 from gpytorch.kernels import MaternKernel, ScaleKernel
@@ -427,3 +430,64 @@ def condition_on_observations(
         raise NotImplementedError(
             f"Conditioning currently not supported for {self.__class__.__name__}"
         )
+
+    @classmethod
+    def construct_inputs(cls, training_data: SupervisedDataset) -> dict[str, Any]:
+        """
+        Constructs the input tensors for LatentKroneckerGP from a SupervisedDataset.
+
+        This method processes the provided training data to extract and organize the
+        features and targets into the required format for the LatentKroneckerGP model.
+        It factorizes inputs from the product space into the factors X and T.
+        The matching output Y values are assembled by mapping observed values to their
+        corresponding positions and filling missing values with NaN.
+
+        Args:
+            training_data: A SupervisedDataset containing training inputs and outputs.
+
+        Returns:
+            A dictionary with keys `train_X`, `train_T`, and `train_Y`, where:
+                - `train_X`: The unique feature values (excluding the T dimension).
+                - `train_T`: The unique feature values of the T dimension.
+                - `train_Y`: The outputs aligned with the Cartesian product of
+                    `train_X` and `train_T`, with missing values filled as NaN.
+        """
+        model_inputs = super().construct_inputs(training_data=training_data)
+
+        if "train_Yvar" in model_inputs:
+            warnings.warn(
+                "Ignoring Yvar values in provided training data, because "
+                "they are currently not supported by LatentKroneckerGP.",
+                InputDataWarning,
+                stacklevel=2,
+            )
+
+        t_idx = training_data.feature_names.index("step")
+        x_idx = [i for i in range(len(training_data.feature_names)) if i != t_idx]
+
+        # Factorize product space into factors X and T by finding unique values
+        train_X, x_idx = model_inputs["train_X"][..., x_idx].unique(
+            sorted=True, return_inverse=True, dim=-2
+        )
+        train_T, t_idx = model_inputs["train_X"][..., [t_idx]].unique(
+            sorted=True, return_inverse=True, dim=-2
+        )
+
+        # Initialize train_Y with NaN for the full Cartesian product
+        batch_shape = train_X.shape[:-2]
+        n_x = train_X.shape[-2]
+        n_t = train_T.shape[-2]
+        train_Y = torch.full(
+            (*batch_shape, n_x * n_t, 1),
+            torch.nan,
+            dtype=model_inputs["train_Y"].dtype,
+            device=model_inputs["train_Y"].device,
+        )
+
+        # Convert 2D indices to 1D indices
+        y_idx = x_idx * n_t + t_idx
+        # Map original observations to their positions in the Cartesian product
+        train_Y[..., y_idx, :] = model_inputs["train_Y"]
+        train_Y = train_Y.reshape(*batch_shape, n_x, n_t)
+
+        return {"train_X": train_X, "train_T": train_T, "train_Y": train_Y}
diff --git a/test/models/test_latent_kronecker_gp.py b/test/models/test_latent_kronecker_gp.py
@@ -10,10 +10,11 @@
 import torch
 from botorch.acquisition.objective import ScalarizedPosteriorTransform
 from botorch.exceptions.errors import BotorchTensorDimensionError
-from botorch.exceptions.warnings import OptimizationWarning
+from botorch.exceptions.warnings import InputDataWarning, OptimizationWarning
 from botorch.fit import fit_gpytorch_mll
 from botorch.models.latent_kronecker_gp import LatentKroneckerGP
 from botorch.models.transforms import Normalize, Standardize
+from botorch.utils.datasets import SupervisedDataset
 from botorch.utils.testing import BotorchTestCase, get_random_data
 from botorch.utils.types import DEFAULT
 from gpytorch.kernels import MaternKernel, RBFKernel, ScaleKernel
@@ -38,7 +39,7 @@ def _get_data_with_missing_entries(
     mask[torch.randperm(n_train * t)[: n_train * t // 2]] = False
     train_Y[..., ~mask.reshape(n_train, t)] = torch.nan
 
-    return train_X, train_T, train_Y
+    return train_X, train_T, train_Y, mask
 
 
 class TestLatentKroneckerGP(BotorchTestCase):
@@ -71,7 +72,7 @@ def test_default_init(self):
                 intf = None
                 octf = None
 
-            train_X, train_T, train_Y = _get_data_with_missing_entries(
+            train_X, train_T, train_Y, mask = _get_data_with_missing_entries(
                 n_train=n_train, d=d, t=t, batch_shape=batch_shape, tkwargs=tkwargs
             )
 
@@ -85,8 +86,7 @@ def test_default_init(self):
             model.to(**tkwargs)
 
             # test init
-            mask_valid = torch.isfinite(train_Y.reshape(-1, n_train, t)[0]).flatten()
-            train_Y_flat = train_Y.reshape(*batch_shape, -1)[..., mask_valid]
+            train_Y_flat = train_Y.reshape(*batch_shape, -1)[..., mask]
             if use_transforms:
                 self.assertIsInstance(model.input_transform, Normalize)
                 self.assertIsInstance(model.outcome_transform, Standardize)
@@ -124,7 +124,7 @@ def test_custom_init(self):
         ):
             tkwargs = {"device": self.device, "dtype": dtype}
 
-            train_X, train_T, train_Y = _get_data_with_missing_entries(
+            train_X, train_T, train_Y, _ = _get_data_with_missing_entries(
                 n_train=n_train, d=d, t=t, batch_shape=batch_shape, tkwargs=tkwargs
             )
 
@@ -230,7 +230,7 @@ def test_gp_train(self):
                 intf = None
                 octf = None
 
-            train_X, train_T, train_Y = _get_data_with_missing_entries(
+            train_X, train_T, train_Y, _ = _get_data_with_missing_entries(
                 n_train=n_train, d=d, t=t, batch_shape=batch_shape, tkwargs=tkwargs
             )
 
@@ -271,7 +271,7 @@ def _test_gp_eval_shapes(
             intf = None
             octf = None
 
-        train_X, train_T, train_Y = _get_data_with_missing_entries(
+        train_X, train_T, train_Y, _ = _get_data_with_missing_entries(
             n_train=n_train, d=d, t=t, batch_shape=batch_shape, tkwargs=tkwargs
         )
 
@@ -441,7 +441,7 @@ def test_gp_eval_values(self):
                 intf = None
                 octf = None
 
-            train_X, train_T, train_Y = _get_data_with_missing_entries(
+            train_X, train_T, train_Y, _ = _get_data_with_missing_entries(
                 n_train=n_train, d=d, t=t, batch_shape=batch_shape, tkwargs=tkwargs
             )
 
@@ -507,7 +507,7 @@ def test_iterative_methods(self):
         batch_shape = torch.Size([])
         tkwargs = {"device": self.device, "dtype": torch.double}
 
-        train_X, train_T, train_Y = _get_data_with_missing_entries(
+        train_X, train_T, train_Y, _ = _get_data_with_missing_entries(
             n_train=10, d=1, t=1, batch_shape=batch_shape, tkwargs=tkwargs
         )
 
@@ -525,7 +525,7 @@ def test_not_implemented(self):
         batch_shape = torch.Size([])
         tkwargs = {"device": self.device, "dtype": torch.double}
 
-        train_X, train_T, train_Y = _get_data_with_missing_entries(
+        train_X, train_T, train_Y, _ = _get_data_with_missing_entries(
             n_train=10, d=1, t=1, batch_shape=batch_shape, tkwargs=tkwargs
         )
 
@@ -558,3 +558,63 @@ def test_not_implemented(self):
         err_msg = f"Only GaussianLikelihood currently supported for {cls_name}"
         with self.assertRaisesRegex(NotImplementedError, err_msg):
             model.posterior(train_X)
+
+    def test_construct_inputs(self) -> None:
+        # This test relies on the fact that the random (missing) data generation
+        # does not remove all occurrences of a particular X or T value. Therefore,
+        # we fix the random seed and set n_train and t to slightly larger values.
+
+        torch.manual_seed(12345)
+        for batch_shape, n_train, d, t, dtype in itertools.product(
+            (  # batch_shape
+                torch.Size([]),
+                torch.Size([1]),
+                torch.Size([2]),
+                torch.Size([2, 3]),
+            ),
+            (15,),  # n_train
+            (1, 2),  # d
+            (10,),  # t
+            (torch.float, torch.double),  # dtype
+        ):
+            tkwargs = {"device": self.device, "dtype": dtype}
+
+            train_X, train_T, train_Y, mask = _get_data_with_missing_entries(
+                n_train=n_train, d=d, t=t, batch_shape=batch_shape, tkwargs=tkwargs
+            )
+
+            train_X_supervised = torch.cat(
+                [
+                    train_X.repeat_interleave(t, dim=-2),
+                    train_T.repeat(*([1] * len(batch_shape)), n_train, 1),
+                ],
+                dim=-1,
+            )
+            train_Y_supervised = train_Y.reshape(*batch_shape, n_train * t, 1)
+
+            # randomly permute data to test robustness to non-contiguous data
+            idx = torch.randperm(n_train * t, device=self.device)
+            train_X_supervised = train_X_supervised[..., idx, :][..., mask[idx], :]
+            train_Y_supervised = train_Y_supervised[..., idx, :][..., mask[idx], :]
+
+            dataset = SupervisedDataset(
+                X=train_X_supervised,
+                Y=train_Y_supervised,
+                Yvar=train_Y_supervised,  # just to check warning
+                feature_names=[f"x_{i}" for i in range(d)] + ["step"],
+                outcome_names=["y"],
+            )
+
+            w_msg = "Ignoring Yvar values in provided training data, because "
+            w_msg += "they are currently not supported by LatentKroneckerGP."
+            with self.assertWarnsRegex(InputDataWarning, w_msg):
+                model_inputs = LatentKroneckerGP.construct_inputs(dataset)
+
+            # this test generates train_X and train_T in sorted order
+            # the data is randomly permuted before passing to construct_inputs
+            # construct_inputs sorts the data, so we expect the results to be equal
+            self.assertAllClose(model_inputs["train_X"], train_X, atol=0.0)
+            self.assertAllClose(model_inputs["train_T"], train_T, atol=0.0)
+            self.assertAllClose(
+                model_inputs["train_Y"], train_Y, atol=0.0, equal_nan=True
+            )