Preserving train inputs and targets through transforms (#3044)

Carl Hvarfner · meta-codesync[bot] · commit b0d492d349f5 · 2025-10-17T18:43:15.000-07:00
Summary: Pull Request resolved: #3044 This PR preserves botorch transforms (specifically outcome_transforms, like Standardize) through state_dict loading. The fix also ensures that train_targets of a Leave-one-out model with outcome transforms will, in the default case, have the same targets as a base model, minus the point left out. __Longer explanation:__ Transforms, and specifically learnable output transforms like Standardize, will currently: a. Learn the parameters at initialization of the GP b. Transform the train_Ys to the normalized space Then, when we load a state dict, we will: a. Impose new standardization parameters on already standardized data b. Potentially make the transforms re-learnable, nullifying the change made by the state dict This has undesired consequences for cross-validation, as all cross-validated models will effectively have different training data. In essence, _we don't simply leave one point out, but instead we leave one out and re-standardize_. When we have outliers in the data, this will lead to substantially different predictions when the outlier is left out, since the outlier will substantially impact the outcome transform parameters. Notebook explaining the effect with some plots: N8342965 Reviewed By: Balandat, saitcakmak Differential Revision: D84571407 fbshipit-source-id: dafffe980d6a853733f9235ac84f2ab424b84f55
diff --git a/botorch/models/gpytorch.py b/botorch/models/gpytorch.py
@@ -17,7 +17,7 @@
 import warnings
 from abc import ABC
 from copy import deepcopy
-from typing import Any, TYPE_CHECKING
+from typing import Any, Mapping, TYPE_CHECKING
 
 import torch
 from botorch.acquisition.objective import PosteriorTransform
@@ -29,15 +29,18 @@
 from botorch.exceptions.warnings import (
     _get_single_precision_warning,
     BotorchTensorDimensionWarning,
+    BotorchWarning,
     InputDataWarning,
 )
 from botorch.models.model import Model, ModelList
 from botorch.models.utils import (
     _make_X_full,
     add_output_dim,
+    extract_targets_and_noise_single_output,
     gpt_posterior_settings,
     mod_batch_shape,
     multioutput_to_batch_mode_transform,
+    restore_targets_and_noise_single_output,
 )
 from botorch.models.utils.assorted import fantasize as fantasize_flag
 from botorch.posteriors.fully_bayesian import GaussianMixturePosterior
@@ -283,6 +286,103 @@ def condition_on_observations(
             ).detach()
         return fantasy_model
 
+    def _extract_targets_and_noise(self) -> tuple[Tensor, Tensor | None]:
+        r"""Extract targets and noise variance in the correct shape.
+
+        Returns a tuple of (Y, Yvar) where Y and Yvar have shape
+        [batch_shape] x n x m, with batch_shape included only if the
+        training data initially contained it.
+        """
+        if self.num_outputs > 1:
+            Y = self.train_targets.transpose(-1, -2)
+            Yvar = None
+            if isinstance(self.likelihood, FixedNoiseGaussianLikelihood):
+                Yvar = self.likelihood.noise_covar.noise.transpose(-1, -2)
+        else:
+            Y, Yvar = extract_targets_and_noise_single_output(self)
+        return Y, Yvar
+
+    def _restore_targets_and_noise(
+        self, Y: Tensor, Yvar: Tensor | None, strict: bool
+    ) -> None:
+        r"""Restore targets and noise variance to the model.
+
+        Args:
+            Y: Targets tensor in shape [batch_shape] x n x m.
+            Yvar: Optional noise variance tensor in shape [batch_shape] x n x m.
+            strict: Whether to strictly enforce shape constraints.
+        """
+        if self.num_outputs > 1:
+            Y = Y.transpose(-1, -2)
+            if Yvar is not None and isinstance(
+                self.likelihood, FixedNoiseGaussianLikelihood
+            ):
+                Yvar = Yvar.transpose(-1, -2)
+                self.likelihood.noise_covar.noise = Yvar
+            self.set_train_data(targets=Y, strict=strict)
+        else:
+            restore_targets_and_noise_single_output(self, Y, Yvar, strict)
+
+    def load_state_dict(
+        self,
+        state_dict: Mapping[str, Any],
+        strict: bool = True,
+        keep_transforms: bool = True,
+    ) -> None:
+        r"""Load the model state.
+
+        Args:
+            state_dict: A dict containing the state of the model.
+            strict: A boolean indicating whether to strictly enforce that the keys.
+            keep_transforms: A boolean indicating whether to keep the input and outcome
+                transforms. Doing so is useful when loading a model that was trained on
+                a full set of data, and is later loaded with a subset of the data.
+        """
+        if not keep_transforms:
+            super().load_state_dict(state_dict, strict)
+            return
+
+        should_outcome_transform = (
+            hasattr(self, "train_targets")
+            and getattr(self, "outcome_transform", None) is not None
+        )
+
+        with torch.no_grad():
+            untransformed_Y, untransformed_Yvar = self._extract_targets_and_noise()
+            X = self.train_inputs[0]
+
+            if should_outcome_transform:
+                try:
+                    untransformed_Y, untransformed_Yvar = (
+                        self.outcome_transform.untransform(
+                            Y=untransformed_Y,
+                            Yvar=untransformed_Yvar,
+                            X=X,
+                        )
+                    )
+                except NotImplementedError:
+                    warnings.warn(
+                        "Outcome transform does not support untransforming."
+                        "Cannot load the state dict with transforms preserved."
+                        "Setting keep_transforms=False.",
+                        BotorchWarning,
+                        stacklevel=3,
+                    )
+                    super().load_state_dict(state_dict, strict)
+                    return
+
+        super().load_state_dict(state_dict, strict)
+
+        if getattr(self, "input_transform", None) is not None:
+            self.input_transform.eval()
+
+        if should_outcome_transform:
+            self.outcome_transform.eval()
+            retransformed_Y, retransformed_Yvar = self.outcome_transform(
+                Y=untransformed_Y, Yvar=untransformed_Yvar, X=X
+            )
+            self._restore_targets_and_noise(retransformed_Y, retransformed_Yvar, strict)
+
 
 # pyre-fixme[13]: uninitialized attributes _num_outputs, _input_batch_shape,
 # _aug_batch_shape
@@ -659,6 +759,13 @@ def batch_shape(self) -> torch.Size:
                 raise NotImplementedError(msg + " that are not broadcastble.")
         return next(iter(batch_shapes))
 
+    def load_state_dict(
+        self,
+        state_dict: Mapping[str, Any],
+        strict: bool = True,
+    ) -> None:
+        return ModelList.load_state_dict(self, state_dict, strict)
+
     # pyre-fixme[14]: Inconsistent override in return types
     def posterior(
         self,
@@ -803,6 +910,27 @@ class MultiTaskGPyTorchModel(GPyTorchModel, ABC):
     "long-format" multi-task GP in the style of `MultiTaskGP`.
     """
 
+    def _extract_targets_and_noise(self) -> tuple[Tensor, Tensor | None]:
+        r"""Extract targets and noise variance for multi-task models.
+
+        Returns a tuple of (Y, Yvar) where Y and Yvar have shape
+        [batch_shape] x n x m, with batch_shape included only if the
+        training data initially contained it.
+        """
+        return extract_targets_and_noise_single_output(self)
+
+    def _restore_targets_and_noise(
+        self, Y: Tensor, Yvar: Tensor | None, strict: bool
+    ) -> None:
+        r"""Restore targets and noise variance for multi-task models.
+
+        Args:
+            Y: Targets tensor in shape [batch_shape] x n x m.
+            Yvar: Optional noise variance tensor in shape [batch_shape] x n x m.
+            strict: Whether to strictly enforce shape constraints.
+        """
+        restore_targets_and_noise_single_output(self, Y, Yvar, strict)
+
     def _apply_noise(
         self,
         X: Tensor,
diff --git a/botorch/models/model.py b/botorch/models/model.py
@@ -33,7 +33,6 @@
 from botorch.sampling.list_sampler import ListSampler
 from botorch.utils.containers import BotorchContainer
 from botorch.utils.datasets import SupervisedDataset
-from botorch.utils.transforms import is_fully_bayesian
 from gpytorch.likelihoods.gaussian_likelihood import FixedNoiseGaussianLikelihood
 from torch import Tensor
 from torch.nn import Module, ModuleDict, ModuleList
@@ -578,18 +577,19 @@ def transform_inputs(self, X: Tensor) -> list[Tensor]:
         return transformed_X_list
 
     def load_state_dict(
-        self, state_dict: Mapping[str, Any], strict: bool = True
+        self,
+        state_dict: Mapping[str, Any],
+        strict: bool = True,
+        keep_transforms: bool = True,
     ) -> None:
         """Initialize the fully Bayesian models before loading the state dict."""
         for i, m in enumerate(self.models):
-            if is_fully_bayesian(m):
-                filtered_dict = {
-                    k.replace(f"models.{i}.", ""): v
-                    for k, v in state_dict.items()
-                    if k.startswith(f"models.{i}.")
-                }
-                m.load_state_dict(filtered_dict)
-        super().load_state_dict(state_dict=state_dict, strict=strict)
+            filtered_dict = {
+                k.replace(f"models.{i}.", ""): v
+                for k, v in state_dict.items()
+                if k.startswith(f"models.{i}.")
+            }
+            m.load_state_dict(filtered_dict, strict=strict)
 
     def fantasize(
         self,
diff --git a/botorch/models/utils/__init__.py b/botorch/models/utils/__init__.py
@@ -12,10 +12,12 @@
     check_standardization,
     consolidate_duplicates,
     detect_duplicates,
+    extract_targets_and_noise_single_output,
     fantasize,
     gpt_posterior_settings,
     mod_batch_shape,
     multioutput_to_batch_mode_transform,
+    restore_targets_and_noise_single_output,
     validate_input_scaling,
 )
 
@@ -33,4 +35,6 @@
     "validate_input_scaling",
     "detect_duplicates",
     "consolidate_duplicates",
+    "extract_targets_and_noise_single_output",
+    "restore_targets_and_noise_single_output",
 ]
diff --git a/botorch/models/utils/assorted.py b/botorch/models/utils/assorted.py
@@ -17,6 +17,7 @@
 from botorch.exceptions import InputDataError, InputDataWarning
 from botorch.settings import _Flag
 from gpytorch import settings as gpt_settings
+from gpytorch.likelihoods.gaussian_likelihood import FixedNoiseGaussianLikelihood
 from gpytorch.module import Module
 from torch import Tensor
 
@@ -460,3 +461,37 @@ def get_task_value_remapping(
         )
         mapper[observed_task_values] = task_range.to(dtype=dtype)
     return mapper
+
+
+def extract_targets_and_noise_single_output(model) -> tuple[Tensor, Tensor | None]:
+    r"""Extract targets and noise variance for single-output models (m=1).
+
+    Args:
+        model: A GPyTorch model.
+
+    Returns:
+        A tuple of (Y, Yvar) where Y and Yvar have shape [batch_shape] x n x 1.
+    """
+    Y = model.train_targets.unsqueeze(-1)
+    Yvar = None
+    if isinstance(model.likelihood, FixedNoiseGaussianLikelihood):
+        Yvar = model.likelihood.noise_covar.noise.unsqueeze(-1)
+    return Y, Yvar
+
+
+def restore_targets_and_noise_single_output(
+    model, Y: Tensor, Yvar: Tensor | None, strict: bool
+) -> None:
+    r"""Restore targets and noise variance for single-output models (m=1).
+
+    Args:
+        model: A GPyTorch model.
+        Y: Targets tensor in shape [batch_shape] x n x 1.
+        Yvar: Optional noise variance tensor in shape [batch_shape] x n x 1.
+        strict: Whether to strictly enforce shape constraints.
+    """
+    Y = Y.squeeze(-1)
+    if Yvar is not None and isinstance(model.likelihood, FixedNoiseGaussianLikelihood):
+        Yvar = Yvar.squeeze(-1)
+        model.likelihood.noise_covar.noise = Yvar
+    model.set_train_data(targets=Y, strict=strict)
diff --git a/test/models/test_gpytorch.py b/test/models/test_gpytorch.py