meta-pytorch
diff --git a/‎botorch_community/models/prior_fitted_network.py‎
Lines changed: 72 additions & 86 deletions b/‎botorch_community/models/prior_fitted_network.py‎
Lines changed: 72 additions & 86 deletions
diff --git a/‎botorch_community/posteriors/riemann.py‎
Lines changed: 73 additions & 31 deletions b/‎botorch_community/posteriors/riemann.py‎
Lines changed: 73 additions & 31 deletions
@@ -18,15 +18,16 @@
 import torch
 from botorch.acquisition.objective import PosteriorTransform
 from botorch.exceptions.errors import UnsupportedError
-
 from botorch.logging import logger
 from botorch.models.model import Model
 from botorch.models.transforms.input import InputTransform
+from botorch.utils.transforms import match_batch_shape
 from botorch_community.models.utils.prior_fitted_network import (
     download_model,
     ModelPaths,
 )
 from botorch_community.posteriors.riemann import BoundedRiemannPosterior
+from gpytorch.likelihoods.gaussian_likelihood import FixedNoiseGaussianLikelihood
 from pfns.train import MainConfig  # @manual=//pytorch/PFNs:PFNs
 from torch import Tensor
 from torch.nn import Module
@@ -58,7 +59,7 @@ def __init__(
 
         Args:
             train_X: A `n x d` tensor of training features.
-            train_Y: A `n x m` tensor of training observations.
+            train_Y: A `n x 1` tensor of training observations.
             model: A pre-trained PFN model with the following
                 forward(train_X, train_Y, X) -> logit predictions of shape
                 `n x b x c` where c is the number of discrete buckets
@@ -95,40 +96,35 @@ def __init__(
         if train_Yvar is not None:
             logger.debug("train_Yvar provided but ignored for PFNModel.")
 
-        if not (1 <= train_Y.dim() <= 3):
-            raise UnsupportedError("train_Y must be 1- to 3-dimensional.")
+        if train_Y.dim() != 2:
+            raise UnsupportedError("train_Y must be 2-dimensional.")
 
-        if not (2 <= train_X.dim() <= 3):
-            raise UnsupportedError("train_X must be 2- to 3-dimensional.")
+        if train_X.dim() != 2:
+            raise UnsupportedError("train_X must be 2-dimensional.")
 
-        if train_Y.dim() == train_X.dim():
-            if train_Y.shape[-1] > 1:
-                raise UnsupportedError("Only 1 target allowed for PFNModel.")
-            train_Y = train_Y.squeeze(-1)
+        if train_Y.shape[-1] > 1:
+            raise UnsupportedError("Only 1 target allowed for PFNModel.")
 
-        if (len(train_X.shape) != len(train_Y.shape) + 1) or (
-            train_Y.shape != train_X.shape[:-1]
-        ):
+        if train_X.shape[0] != train_Y.shape[0]:
             raise UnsupportedError(
-                "train_X and train_Y must have the same shape except "
-                "for the last dimension."
+                "train_X and train_Y must have the same number of rows."
             )
 
-        if len(train_X.shape) == 2:
-            # adding batch dimension
-            train_X = train_X.unsqueeze(0)
-            train_Y = train_Y.unsqueeze(0)
-
         with torch.no_grad():
             self.transformed_X = self.transform_inputs(
                 X=train_X, input_transform=input_transform
             )
 
-        self.train_X = train_X  # shape: `b x n x d`
-        self.train_Y = train_Y  # shape: `b x n`
-        self.pfn = model.to(train_X.device)
+        self.train_X = train_X  # shape: (n, d)
+        self.train_Y = train_Y  # shape: (n, 1)
+        # Downstream botorch tooling expects a likelihood to be specified,
+        # so here we use a FixedNoiseGaussianLikelihood that is unused.
+        if train_Yvar is None:
+            train_Yvar = torch.zeros_like(train_Y)
+        self.likelihood = FixedNoiseGaussianLikelihood(noise=train_Yvar)
+        self.pfn = model.to(device=train_X.device)
         self.batch_first = batch_first
-        self.constant_model_kwargs = constant_model_kwargs
+        self.constant_model_kwargs = constant_model_kwargs or {}
         if input_transform is not None:
             self.input_transform = input_transform
 
@@ -146,23 +142,19 @@ def posterior(
             any `model.forward` or `model.likelihood` calls.
 
         Args:
-            X: A `b'? x b? x q x d`-dim Tensor, where `d` is the dimension of the
-                feature space, `q` is the number of points considered jointly,
-                and `b` is the batch dimension.
-                We only allow `q=1` for PFNModel, so q can also be omitted, i.e.
-                `b x d`-dim Tensor.
-            **Currently not supported for PFNModel**.
+            X: A b? x q? x d`-dim Tensor, where `d` is the dimension of the
+                feature space.
             output_indices: **Currenlty not supported for PFNModel.**
             observation_noise: **Currently not supported for PFNModel**.
             posterior_transform: **Currently not supported for PFNModel**.
 
         Returns:
-            A `BoundedRiemannPosterior` object, representing a batch of `b` joint
-            distributions over `q` points and `m` outputs each.
+            A `BoundedRiemannPosterior`, representing a batch of b? x q?`
+            distributions.
         """
         self.pfn.eval()
         if output_indices is not None:
-            raise RuntimeError(
+            raise UnsupportedError(
                 "output_indices is not None. PFNModel should not "
                 "be a multi-output model."
             )
@@ -173,60 +165,54 @@ def posterior(
         if posterior_transform is not None:
             raise UnsupportedError("posterior_transform is not supported for PFNModel.")
 
-        if not (1 <= len(X.shape) <= 4):
-            raise UnsupportedError("X must be 1- to 4-dimensional.")
-
-        # X has shape b'? x b? x q? x d
-
-        orig_X_shape = X.shape
-        q_in_orig_X_shape = len(X.shape) > 2
-
-        if len(X.shape) == 1:
-            X = X.unsqueeze(0).unsqueeze(0).unsqueeze(0)  # shape `b'=1 x b=1 x q=1 x d`
-        elif len(X.shape) == 2:
-            X = X.unsqueeze(1).unsqueeze(1)  # shape `b' x b=1 x q=1 x d`
-        elif len(X.shape) == 3:
-            if self.train_X.shape[0] == 1:
-                X = X.unsqueeze(1)  # shape `b' x b=1 x q x d`
-            else:
-                X = X.unsqueeze(0)  # shape `b'=1 x b x q x d`
-
-        # X has shape `b' x b x q x d`
-
-        if X.shape[2] != 1:
-            raise UnsupportedError("Only q=1 is supported for PFNModel.")
-
-        # X has shape `b' x b x q=1 x d`
-        X = self.transform_inputs(X)
-        train_X = self.transformed_X  # shape `b x n x d`
-        train_Y = self.train_Y  # shape `b x n`
-        folded_X = X.transpose(0, 2).squeeze(0)  # shape `b x b' x d
-
-        constant_model_kwargs = self.constant_model_kwargs or {}
-
-        if self.batch_first:
-            logits = self.pfn(
-                train_X.float(),
-                train_X.float(),
-                folded_X.float(),
-                **constant_model_kwargs,
-            ).transpose(0, 1)
-        else:
-            logits = self.pfn(
-                train_X.float().transpose(0, 1),
-                train_Y.float().transpose(0, 1),
-                folded_X.float().transpose(0, 1),
-                **constant_model_kwargs,
-            )
-
-        # logits shape `b' x b x logits_dim`
+        orig_X_shape = X.shape  # X has shape b? x q? x d
+        X = self.prepare_X(X)  # shape (b, q, d)
+        train_X = match_batch_shape(self.transformed_X, X)  # shape (b, n, d)
+        train_Y = match_batch_shape(self.train_Y, X)  # shape (b, n, 1)
 
-        logits = logits.view(
+        probabilities = self.pfn_predict(
+            X=X, train_X=train_X, train_Y=train_Y
+        )  # (b, q, num_buckets)
+        probabilities = probabilities.view(
             *orig_X_shape[:-1], -1
-        )  # orig shape w/o q but logits_dim at end: `b'? x b? x q? x logits_dim`
-        if q_in_orig_X_shape:
-            logits = logits.squeeze(-2)  # shape `b'? x b? x logits_dim`
+        )  # (b?, q?, num_buckets)
 
-        probabilities = logits.softmax(dim=-1)
+        # Get posterior with the right dtype
+        borders = self.pfn.criterion.borders.to(X.dtype)
+        return BoundedRiemannPosterior(
+            borders=borders,
+            probabilities=probabilities,
+        )
 
-        return BoundedRiemannPosterior(self.pfn.criterion.borders, probabilities)
+    def prepare_X(self, X: Tensor) -> Tensor:
+        if len(X.shape) > 3:
+            raise UnsupportedError(f"X must be at most 3-d, got {X.shape}.")
+        while len(X.shape) < 3:
+            X = X.unsqueeze(0)
+
+        X = self.transform_inputs(X)  # shape (b , q, d)
+        return X
+
+    def pfn_predict(self, X: Tensor, train_X: Tensor, train_Y: Tensor) -> Tensor:
+        """
+        X has shape (b, q, d)
+        train_X has shape (b, n, d)
+        train_Y has shape (b, n, 1)
+        """
+        if not self.batch_first:
+            X = X.transpose(0, 1)  # shape (q, b, d)
+            train_X = train_X.transpose(0, 1)  # shape (n, b, d)
+            train_Y = train_Y.transpose(0, 1)  # shape (n, b, 1)
+
+        logits = self.pfn(
+            train_X.float(),
+            train_Y.float(),
+            X.float(),
+            **self.constant_model_kwargs,
+        )
+        if not self.batch_first:
+            logits = logits.transpose(0, 1)  # shape (b, q, num_buckets)
+        logits = logits.to(X.dtype)
+
+        probabilities = logits.softmax(dim=-1)  # shape (b, q, num_buckets)
+        return probabilities
@@ -14,13 +14,16 @@
 
 import torch
 from botorch.posteriors.posterior import Posterior
+from botorch.sampling.get_sampler import _get_sampler_mvn, GetSampler
+from botorch.sampling.normal import NormalMCSampler
 from torch import Tensor
 
 
 class BoundedRiemannPosterior(Posterior):
+    batch_range = (0, -1)
+
     """
-    Notes: Bounded posterior for now, will work on unbounded posteriors.
-    This is also only over 1 test point, not batches.
+    A single variate bounded Riemann posterior.
     """
 
     def __init__(self, borders, probabilities):
@@ -31,9 +34,9 @@ def __init__(self, borders, probabilities):
         borders, with each bucket having an associated probability.
 
         Args:
-            borders: A tensor of shape `(n_buckets + 1,)` defining the boundaries of
+            borders: A tensor of shape `(num_buckets + 1,)` defining the boundaries of
                 the buckets. Must be monotonically increasing.
-            probabilities: A tensor of shape `(..., n_buckets,)` defining the
+            probabilities: A tensor of shape `(b?, q?, num_buckets)` defining the
                 probability mass in each bucket. Must sum to 1 in the last dim.
         """
 
@@ -79,18 +82,40 @@ def rsample(
             `self._extended_shape(sample_shape=sample_shape)`.
         """
         sample_shape = sample_shape if sample_shape is not None else torch.Size([1])
-        z = torch.rand(sample_shape)
-        return self.rsample_from_base_samples(sample_shape, z)
+        base_samples = torch.randn(
+            sample_shape + self.probabilities.shape[:-1],
+            device=self.probabilities.device,
+        )
+        return self.rsample_from_base_samples(
+            sample_shape=sample_shape, base_samples=base_samples
+        )
 
     def rsample_from_base_samples(
-        self, sample_shape: torch.Size, base_samples: Tensor
+        self,
+        sample_shape: torch.Size,
+        base_samples: Tensor,
     ) -> Tensor:
+        """
+        base_samples are N(0, I) samples, as this posterior is registered
+        with the IIDNormalSampler below. Alternatively it could be registered
+        with a uniform sampler in which case the transformation to uniform RVs
+        could be avoided. Shape of base_samples is (nsamp, b?, q).
+        """
         if base_samples.shape[: len(sample_shape)] != sample_shape:
-            raise RuntimeError(
+            raise ValueError(
                 "`sample_shape` disagrees with shape of `base_samples`. "
                 f"Got {sample_shape=} and {base_samples.shape=}."
             )
-        return self.icdf(base_samples)
+        # convert base samples from N(O, I) to Uniform.
+        U = torch.distributions.Normal(0, 1).cdf(base_samples)
+        # Convert U to Riemann samples.
+        Z = self.icdf(U)  # (nsamp, b?, q, 1)
+        return Z
+
+    @property
+    def base_sample_shape(self) -> torch.Size:
+        r"""The shape of the base samples required to draw from the posterior."""
+        return self.probabilities.shape[:-1]
 
     @property
     def device(self) -> torch.device:
@@ -137,45 +162,62 @@ def confidence_region(
                 Use .954 for 2 sigma of a normal distribution.
         """
         side_probs = (1.0 - confidence_level) / 2
-        return self.icdf(side_probs), self.icdf(1.0 - side_probs)
+        lower = self.icdf(side_probs).squeeze()
+        upper = self.icdf(1.0 - side_probs).squeeze()
+        return lower, upper
 
-    def icdf(self, value: Union[Tensor, float]) -> Tensor:
+    def icdf(
+        self,
+        value: Union[float, Tensor],
+    ) -> Tensor:
         r"""Inverse cdf (with gradients).
         Use value to get the index of the bucket that contains the value
         and then interpolate between the left and right borders of the bucket
 
         Args:
             value: The value at which to evaluate the inverse CDF.
+                Either a float, or a tensor with shape is (b', b?, q), where
+                probabilities has shape (b?, q, num_buckets).
 
         Returns:
             The inverse CDF of the posterior at the given value(s).
-            The shape of the return is the shape of value, with the batch
-            shape of the probs (all dims up to the final dim) appended
-            with a final trailing dimension of 1, for the dim of the dist.
+            The shape of the return is (b', b?, q, 1), with a trailing
+            dimension.
         """
+        if not torch.is_tensor(value):
+            # Promote to a (b', b?, q) tensor
+            value = torch.tensor(value, device=self.device, dtype=self.dtype)
+            value = value.expand(*self.probabilities.shape[:-1]).unsqueeze(0)
+        value = value.movedim(0, -1)  # (b?, q, b')
 
-        # final shape is (batch_shape, -1)
-        value = torch.as_tensor(
-            value, device=self.borders.device, dtype=self.borders.dtype
-        )
-        value_shape = value.shape
-        # shape of cumprobs is (batch_shape, n_buckets)
-        value = value.broadcast_to(size=(*self.cumprobs.shape[:-1], *value_shape))
-        value = value.reshape(*self.cumprobs.shape[:-1], -1)
-
-        # get first index where cumprobs > value
-        index = torch.searchsorted(self.cumprobs, value)
+        index = torch.searchsorted(self.cumprobs, value)  # (b?, q, b')
 
-        left_border = self.borders[index]
+        left_border = self.borders[index]  # (b?, q, b')
         right_border = self.borders[index + 1]
 
         bucket_width = right_border - left_border
         right_cum_probs = torch.gather(self.cumprobs, -1, index)
         prob_width = torch.gather(self.probabilities, -1, index)
 
         bucket_proportion_remaining = (right_cum_probs - value) / prob_width
-        result = left_border + (1 - bucket_proportion_remaining) * bucket_width
-
-        # reshape to (value_shape, batch_shape, 1)
-        result = result.transpose(0, -1)
-        return result.reshape(*value_shape, *self.cumprobs.shape[:-1], 1)
+        result = (
+            right_border - bucket_proportion_remaining * bucket_width
+        )  # (b?, q, b')
+
+        # reshape back to (b', b?, q, 1)
+        result = result.movedim(-1, 0).unsqueeze(-1)
+        return result
+
+
+@GetSampler.register(BoundedRiemannPosterior)
+def _get_sampler_riemann(
+    posterior: BoundedRiemannPosterior,
+    sample_shape: torch.Size,
+    *,
+    seed: int | None = None,
+) -> NormalMCSampler:
+    return _get_sampler_mvn(
+        posterior=posterior,
+        sample_shape=sample_shape,
+        seed=seed,
+    )