Skip to content

Commit c984666

Browse files
saitcakmakfacebook-github-bot
authored andcommitted
Support multi-output models in MES using PosteriorTransform (#904)
Summary: Pull Request resolved: #904 Reviewed By: Balandat Differential Revision: D30022574 fbshipit-source-id: 6292eea8500c3013fd29efefd736352231316891
1 parent 6965426 commit c984666

File tree

5 files changed

+181
-34
lines changed

5 files changed

+181
-34
lines changed

botorch/acquisition/active_learning.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,9 @@ def __init__(
6666
sampler: The sampler used for drawing fantasy samples. In the basic setting
6767
of a standard GP (default) this is a dummy, since the variance of the
6868
model after conditioning does not actually depend on the sampled values.
69-
posterior_transform: A PosteriorTransform. Required for multi-output models.
69+
posterior_transform: A PosteriorTransform. If using a multi-output model,
70+
a PosteriorTransform that transforms the multi-output posterior into a
71+
single-output posterior is required.
7072
X_pending: A `n' x d`-dim Tensor of `n'` design points that have
7173
points that have been submitted for function evaluation but
7274
have not yet been evaluated.

botorch/acquisition/analytic.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ def __init__(
4141
4242
Args:
4343
model: A fitted single-outcome model.
44-
posterior_transform: A PosteriorTransform. Required for multi-output models.
44+
posterior_transform: A PosteriorTransform. If using a multi-output model,
45+
a PosteriorTransform that transforms the multi-output posterior into a
46+
single-output posterior is required.
4547
"""
4648
super().__init__(model=model)
4749
posterior_transform = self._deprecate_acqf_objective(
@@ -99,7 +101,9 @@ def __init__(
99101
model: A fitted single-outcome model.
100102
best_f: Either a scalar or a `b`-dim Tensor (batch mode) representing
101103
the best function value observed so far (assumed noiseless).
102-
posterior_transform: A PosteriorTransform. Required for multi-output models.
104+
posterior_transform: A PosteriorTransform. If using a multi-output model,
105+
a PosteriorTransform that transforms the multi-output posterior into a
106+
single-output posterior is required.
103107
maximize: If True, consider the problem a maximization problem.
104108
"""
105109
super().__init__(model=model, posterior_transform=posterior_transform, **kwargs)
@@ -164,7 +168,9 @@ def __init__(
164168
Args:
165169
model: A fitted single-outcome GP model (must be in batch mode if
166170
candidate sets X will be)
167-
posterior_transform: A PosteriorTransform. Required for multi-output models.
171+
posterior_transform: A PosteriorTransform. If using a multi-output model,
172+
a PosteriorTransform that transforms the multi-output posterior into a
173+
single-output posterior is required.
168174
maximize: If True, consider the problem a maximization problem. Note
169175
that if `maximize=False`, the posterior mean is negated. As a
170176
consequence `optimize_acqf(PosteriorMean(gp, maximize=False))`
@@ -225,7 +231,9 @@ def __init__(
225231
model: A fitted single-outcome model.
226232
best_f: Either a scalar or a `b`-dim Tensor (batch mode) representing
227233
the best function value observed so far (assumed noiseless).
228-
posterior_transform: A PosteriorTransform. Required for multi-output models.
234+
posterior_transform: A PosteriorTransform. If using a multi-output model,
235+
a PosteriorTransform that transforms the multi-output posterior into a
236+
single-output posterior is required.
229237
maximize: If True, consider the problem a maximization problem.
230238
"""
231239
super().__init__(model=model, posterior_transform=posterior_transform, **kwargs)
@@ -293,7 +301,9 @@ def __init__(
293301
candidate sets X will be)
294302
beta: Either a scalar or a one-dim tensor with `b` elements (batch mode)
295303
representing the trade-off parameter between mean and covariance
296-
posterior_transform: A PosteriorTransform. Required for multi-output models.
304+
posterior_transform: A PosteriorTransform. If using a multi-output model,
305+
a PosteriorTransform that transforms the multi-output posterior into a
306+
single-output posterior is required.
297307
maximize: If True, consider the problem a maximization problem.
298308
"""
299309
super().__init__(model=model, posterior_transform=posterior_transform, **kwargs)
@@ -636,7 +646,9 @@ def __init__(
636646
Args:
637647
model: A fitted single-outcome model.
638648
weights: A tensor of shape `q` for scalarization.
639-
posterior_transform: A PosteriorTransform. Required for multi-output models.
649+
posterior_transform: A PosteriorTransform. If using a multi-output model,
650+
a PosteriorTransform that transforms the multi-output posterior into a
651+
single-output posterior is required.
640652
"""
641653
super().__init__(model=model, posterior_transform=posterior_transform, **kwargs)
642654
self.register_buffer("weights", weights.unsqueeze(dim=0))

botorch/acquisition/max_value_entropy_search.py

Lines changed: 78 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939
import torch
4040
from botorch.acquisition.acquisition import AcquisitionFunction
4141
from botorch.acquisition.cost_aware import CostAwareUtility, InverseCostWeightedUtility
42+
from botorch.acquisition.objective import PosteriorTransform
43+
from botorch.exceptions.errors import UnsupportedError
4244
from botorch.models.cost import AffineFidelityCostModel
4345
from botorch.models.model import Model
4446
from botorch.models.utils import check_no_nans
@@ -68,6 +70,7 @@ def __init__(
6870
self,
6971
model: Model,
7072
num_mv_samples: int,
73+
posterior_transform: Optional[PosteriorTransform] = None,
7174
maximize: bool = True,
7275
X_pending: Optional[Tensor] = None,
7376
) -> None:
@@ -76,17 +79,18 @@ def __init__(
7679
Args:
7780
model: A fitted single-outcome model.
7881
num_mv_samples: Number of max value samples.
82+
posterior_transform: A PosteriorTransform. If using a multi-output model,
83+
a PosteriorTransform that transforms the multi-output posterior into a
84+
single-output posterior is required.
7985
maximize: If True, consider the problem a maximization problem.
8086
X_pending: A `m x d`-dim Tensor of `m` design points that have been
8187
submitted for function evaluation but have not yet been evaluated.
8288
"""
8389
super().__init__(model=model)
8490

85-
# Multi-output GP models are not currently supported
86-
if model.num_outputs != 1:
87-
raise NotImplementedError(
88-
"Multi-output models are not yet supported by "
89-
f"`{self.__class__.__name__}`."
91+
if posterior_transform is None and model.num_outputs != 1:
92+
raise UnsupportedError(
93+
"Must specify a posterior transform when using a multi-output model."
9094
)
9195

9296
# Batched GP models are not currently supported
@@ -96,10 +100,11 @@ def __init__(
96100
batch_shape = torch.Size()
97101
if len(batch_shape) > 0:
98102
raise NotImplementedError(
99-
"Batched GP models (e.g. fantasized models) are not yet "
103+
"Batched GP models (e.g., fantasized models) are not yet "
100104
f"supported by `{self.__class__.__name__}`."
101105
)
102106
self.num_mv_samples = num_mv_samples
107+
self.posterior_transform = posterior_transform
103108
self.maximize = maximize
104109
self.weight = 1.0 if maximize else -1.0
105110
self.set_X_pending(X_pending)
@@ -116,7 +121,11 @@ def forward(self, X: Tensor) -> Tensor:
116121
A `batch_shape`-dim Tensor of MVE values at the given design points `X`.
117122
"""
118123
# Compute the posterior, posterior mean, variance and std
119-
posterior = self.model.posterior(X.unsqueeze(-3), observation_noise=False)
124+
posterior = self.model.posterior(
125+
X.unsqueeze(-3),
126+
observation_noise=False,
127+
posterior_transform=self.posterior_transform,
128+
)
120129
# batch_shape x num_fantasies x (m) x 1
121130
mean = self.weight * posterior.mean.squeeze(-1).squeeze(-1)
122131
variance = posterior.variance.clamp_min(CLAMP_LB).view_as(mean)
@@ -193,6 +202,7 @@ def __init__(
193202
model: Model,
194203
candidate_set: Tensor,
195204
num_mv_samples: int = 10,
205+
posterior_transform: Optional[PosteriorTransform] = None,
196206
use_gumbel: bool = True,
197207
maximize: bool = True,
198208
X_pending: Optional[Tensor] = None,
@@ -206,6 +216,9 @@ def __init__(
206216
discretize the design space. Max values are sampled from the
207217
(joint) model posterior over these points.
208218
num_mv_samples: Number of max value samples.
219+
posterior_transform: A PosteriorTransform. If using a multi-output model,
220+
a PosteriorTransform that transforms the multi-output posterior into a
221+
single-output posterior is required.
209222
use_gumbel: If True, use Gumbel approximation to sample the max values.
210223
maximize: If True, consider the problem a maximization problem.
211224
X_pending: A `m x d`-dim Tensor of `m` design points that have been
@@ -231,6 +244,7 @@ def __init__(
231244
super().__init__(
232245
model=model,
233246
num_mv_samples=num_mv_samples,
247+
posterior_transform=posterior_transform,
234248
maximize=maximize,
235249
X_pending=X_pending,
236250
)
@@ -275,6 +289,7 @@ def _sample_max_values(
275289
model=self.model,
276290
candidate_set=candidate_set,
277291
num_samples=self.num_mv_samples,
292+
posterior_transform=self.posterior_transform,
278293
maximize=self.maximize,
279294
)
280295

@@ -303,6 +318,7 @@ def __init__(
303318
num_fantasies: int = 16,
304319
num_mv_samples: int = 10,
305320
num_y_samples: int = 128,
321+
posterior_transform: Optional[PosteriorTransform] = None,
306322
use_gumbel: bool = True,
307323
maximize: bool = True,
308324
X_pending: Optional[Tensor] = None,
@@ -321,6 +337,9 @@ def __init__(
321337
complexity, wall time and memory). Ignored if `X_pending` is `None`.
322338
num_mv_samples: Number of max value samples.
323339
num_y_samples: Number of posterior samples at specific design point `X`.
340+
posterior_transform: A PosteriorTransform. If using a multi-output model,
341+
a PosteriorTransform that transforms the multi-output posterior into a
342+
single-output posterior is required.
324343
use_gumbel: If True, use Gumbel approximation to sample the max values.
325344
maximize: If True, consider the problem a maximization problem.
326345
X_pending: A `m x d`-dim Tensor of `m` design points that have been
@@ -332,6 +351,7 @@ def __init__(
332351
model=model,
333352
candidate_set=candidate_set,
334353
num_mv_samples=num_mv_samples,
354+
posterior_transform=posterior_transform,
335355
use_gumbel=use_gumbel,
336356
maximize=maximize,
337357
X_pending=X_pending,
@@ -397,7 +417,11 @@ def _compute_information_gain(
397417
given design points `X` (`num_fantasies=1` for non-fantasized models).
398418
"""
399419
# compute the std_m, variance_m with noisy observation
400-
posterior_m = self.model.posterior(X.unsqueeze(-3), observation_noise=True)
420+
posterior_m = self.model.posterior(
421+
X.unsqueeze(-3),
422+
observation_noise=True,
423+
posterior_transform=self.posterior_transform,
424+
)
401425
# batch_shape x num_fantasies x (m) x (1 + num_trace_observations)
402426
mean_m = self.weight * posterior_m.mean.squeeze(-1)
403427
# batch_shape x num_fantasies x (m) x (1 + num_trace_observations)
@@ -489,7 +513,7 @@ class qLowerBoundMaxValueEntropy(DiscreteMaxValueBase):
489513
the mutual information between max values and a batch of candidate points `X`.
490514
See [Moss2021gibbon]_ for a detailed discussion.
491515
492-
The model must be single-outcome.
516+
The model must be single-outcome, unless using a PosteriorTransform.
493517
q > 1 is supported through greedy batch filling.
494518
495519
Example:
@@ -527,7 +551,9 @@ def _compute_information_gain(
527551
# doing posterior computations twice
528552

529553
# compute the mean_m, variance_m with noisy observation
530-
posterior_m = self.model.posterior(X, observation_noise=True)
554+
posterior_m = self.model.posterior(
555+
X, observation_noise=True, posterior_transform=self.posterior_transform
556+
)
531557
mean_m = self.weight * posterior_m.mean.squeeze(-1)
532558
# batch_shape x 1
533559
variance_m = posterior_m.variance.clamp_min(CLAMP_LB).squeeze(-1)
@@ -584,17 +610,29 @@ def _compute_information_gain(
584610
# it provides only a translation of the acqusition function surface
585611
# and can thus be ignored.
586612

613+
if self.posterior_transform is not None:
614+
raise UnsupportedError(
615+
"qLowerBoundMaxValueEntropy does not support PosteriorTransforms"
616+
"when X_pending is not None."
617+
)
618+
587619
X_batches = torch.cat(
588620
[X, self.X_pending.unsqueeze(0).repeat(X.shape[0], 1, 1)], 1
589621
)
590622
# batch_shape x (1 + m) x d
623+
# NOTE: This is the blocker for supporting posterior transforms.
624+
# We would have to process this MVN, applying whatever operations
625+
# are typically applied for the corresponding posterior, then applying
626+
# the posterior transform onto the resulting object.
591627
V = self.model(X_batches)
592628
# Evaluate terms required for A
593629
A = V.lazy_covariance_matrix[:, 0, 1:].unsqueeze(1)
594630
# batch_shape x 1 x m
595631
# Evaluate terms required for B
596632
B = self.model.posterior(
597-
self.X_pending, observation_noise=True
633+
self.X_pending,
634+
observation_noise=True,
635+
posterior_transform=self.posterior_transform,
598636
).mvn.covariance_matrix.unsqueeze(0)
599637
# 1 x m x m
600638

@@ -616,8 +654,8 @@ class qMultiFidelityMaxValueEntropy(qMaxValueEntropy):
616654
for a detailed discussion of the basic ideas on multi-fidelity MES
617655
(note that this implementation is somewhat different).
618656
619-
The model must be single-outcome. The batch case `q > 1` is supported
620-
through cyclic optimization and fantasies.
657+
The model must be single-outcome, unless using a PosteriorTransform.
658+
The batch case `q > 1` is supported through cyclic optimization and fantasies.
621659
622660
Example:
623661
>>> model = SingleTaskGP(train_X, train_Y)
@@ -634,6 +672,7 @@ def __init__(
634672
num_fantasies: int = 16,
635673
num_mv_samples: int = 10,
636674
num_y_samples: int = 128,
675+
posterior_transform: Optional[PosteriorTransform] = None,
637676
use_gumbel: bool = True,
638677
maximize: bool = True,
639678
X_pending: Optional[Tensor] = None,
@@ -657,6 +696,9 @@ def __init__(
657696
is not `None`.
658697
num_mv_samples: Number of max value samples.
659698
num_y_samples: Number of posterior samples at specific design point `X`.
699+
posterior_transform: A PosteriorTransform. If using a multi-output model,
700+
a PosteriorTransform that transforms the multi-output posterior into a
701+
single-output posterior is required.
660702
use_gumbel: If True, use Gumbel approximation to sample the max values.
661703
maximize: If True, consider the problem a maximization problem.
662704
X_pending: A `m x d`-dim Tensor of `m` design points that have been
@@ -678,9 +720,10 @@ def __init__(
678720
num_fantasies=num_fantasies,
679721
num_mv_samples=num_mv_samples,
680722
num_y_samples=num_y_samples,
681-
X_pending=X_pending,
723+
posterior_transform=posterior_transform,
682724
use_gumbel=use_gumbel,
683725
maximize=maximize,
726+
X_pending=X_pending,
684727
)
685728

686729
if cost_aware_utility is None:
@@ -731,7 +774,9 @@ def forward(self, X: Tensor) -> Tensor:
731774

732775
# Compute the posterior, posterior mean, variance without noise
733776
# `_m` and `_M` in the var names means the current and the max fidelity.
734-
posterior = self.model.posterior(X_all, observation_noise=False)
777+
posterior = self.model.posterior(
778+
X_all, observation_noise=False, posterior_transform=self.posterior_transform
779+
)
735780
mean_M = self.weight * posterior.mean[..., -1, 0] # batch_shape x num_fantasies
736781
variance_M = posterior.variance[..., -1, 0].clamp_min(CLAMP_LB)
737782
# get the covariance between the low fidelities and max fidelity
@@ -751,7 +796,11 @@ def forward(self, X: Tensor) -> Tensor:
751796

752797

753798
def _sample_max_value_Thompson(
754-
model: Model, candidate_set: Tensor, num_samples: int, maximize: bool = True
799+
model: Model,
800+
candidate_set: Tensor,
801+
num_samples: int,
802+
posterior_transform: Optional[PosteriorTransform] = None,
803+
maximize: bool = True,
755804
) -> Tensor:
756805
"""Samples the max values by discrete Thompson sampling.
757806
@@ -762,12 +811,15 @@ def _sample_max_value_Thompson(
762811
candidate_set: A `n x d` Tensor including `n` candidate points to
763812
discretize the design space.
764813
num_samples: Number of max value samples.
814+
posterior_transform: A PosteriorTransform. If using a multi-output model,
815+
a PosteriorTransform that transforms the multi-output posterior into a
816+
single-output posterior is required.
765817
maximize: If True, consider the problem a maximization problem.
766818
767819
Returns:
768820
A `num_samples x num_fantasies` Tensor of posterior max value samples.
769821
"""
770-
posterior = model.posterior(candidate_set)
822+
posterior = model.posterior(candidate_set, posterior_transform=posterior_transform)
771823
weight = 1.0 if maximize else -1.0
772824
samples = weight * posterior.rsample(torch.Size([num_samples])).squeeze(-1)
773825
# samples is num_samples x (num_fantasies) x n
@@ -779,7 +831,11 @@ def _sample_max_value_Thompson(
779831

780832

781833
def _sample_max_value_Gumbel(
782-
model: Model, candidate_set: Tensor, num_samples: int, maximize: bool = True
834+
model: Model,
835+
candidate_set: Tensor,
836+
num_samples: int,
837+
posterior_transform: Optional[PosteriorTransform] = None,
838+
maximize: bool = True,
783839
) -> Tensor:
784840
"""Samples the max values by Gumbel approximation.
785841
@@ -790,13 +846,16 @@ def _sample_max_value_Gumbel(
790846
candidate_set: A `n x d` Tensor including `n` candidate points to
791847
discretize the design space.
792848
num_samples: Number of max value samples.
849+
posterior_transform: A PosteriorTransform. If using a multi-output model,
850+
a PosteriorTransform that transforms the multi-output posterior into a
851+
single-output posterior is required.
793852
maximize: If True, consider the problem a maximization problem.
794853
795854
Returns:
796855
A `num_samples x num_fantasies` Tensor of posterior max value samples.
797856
"""
798857
# define the approximate CDF for the max value under the independence assumption
799-
posterior = model.posterior(candidate_set)
858+
posterior = model.posterior(candidate_set, posterior_transform=posterior_transform)
800859
weight = 1.0 if maximize else -1.0
801860
mu = weight * posterior.mean
802861
sigma = posterior.variance.clamp_min(1e-8).sqrt()

botorch/acquisition/multi_objective/max_value_entropy_search.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ def __init__(
113113
self._sample_max_values()
114114
else:
115115
self.set_X_pending(X_pending)
116+
# This avoids attribute errors in qMaxValueEntropy code.
117+
self.posterior_transform = None
116118

117119
def set_X_pending(self, X_pending: Optional[Tensor] = None) -> None:
118120
r"""Set pending points.

0 commit comments

Comments
 (0)