3939import torch
4040from botorch .acquisition .acquisition import AcquisitionFunction
4141from botorch .acquisition .cost_aware import CostAwareUtility , InverseCostWeightedUtility
42+ from botorch .acquisition .objective import PosteriorTransform
43+ from botorch .exceptions .errors import UnsupportedError
4244from botorch .models .cost import AffineFidelityCostModel
4345from botorch .models .model import Model
4446from botorch .models .utils import check_no_nans
@@ -68,6 +70,7 @@ def __init__(
6870 self ,
6971 model : Model ,
7072 num_mv_samples : int ,
73+ posterior_transform : Optional [PosteriorTransform ] = None ,
7174 maximize : bool = True ,
7275 X_pending : Optional [Tensor ] = None ,
7376 ) -> None :
@@ -76,17 +79,18 @@ def __init__(
7679 Args:
7780 model: A fitted single-outcome model.
7881 num_mv_samples: Number of max value samples.
82+ posterior_transform: A PosteriorTransform. If using a multi-output model,
83+ a PosteriorTransform that transforms the multi-output posterior into a
84+ single-output posterior is required.
7985 maximize: If True, consider the problem a maximization problem.
8086 X_pending: A `m x d`-dim Tensor of `m` design points that have been
8187 submitted for function evaluation but have not yet been evaluated.
8288 """
8389 super ().__init__ (model = model )
8490
85- # Multi-output GP models are not currently supported
86- if model .num_outputs != 1 :
87- raise NotImplementedError (
88- "Multi-output models are not yet supported by "
89- f"`{ self .__class__ .__name__ } `."
91+ if posterior_transform is None and model .num_outputs != 1 :
92+ raise UnsupportedError (
93+ "Must specify a posterior transform when using a multi-output model."
9094 )
9195
9296 # Batched GP models are not currently supported
@@ -96,10 +100,11 @@ def __init__(
96100 batch_shape = torch .Size ()
97101 if len (batch_shape ) > 0 :
98102 raise NotImplementedError (
99- "Batched GP models (e.g. fantasized models) are not yet "
103+ "Batched GP models (e.g., fantasized models) are not yet "
100104 f"supported by `{ self .__class__ .__name__ } `."
101105 )
102106 self .num_mv_samples = num_mv_samples
107+ self .posterior_transform = posterior_transform
103108 self .maximize = maximize
104109 self .weight = 1.0 if maximize else - 1.0
105110 self .set_X_pending (X_pending )
@@ -116,7 +121,11 @@ def forward(self, X: Tensor) -> Tensor:
116121 A `batch_shape`-dim Tensor of MVE values at the given design points `X`.
117122 """
118123 # Compute the posterior, posterior mean, variance and std
119- posterior = self .model .posterior (X .unsqueeze (- 3 ), observation_noise = False )
124+ posterior = self .model .posterior (
125+ X .unsqueeze (- 3 ),
126+ observation_noise = False ,
127+ posterior_transform = self .posterior_transform ,
128+ )
120129 # batch_shape x num_fantasies x (m) x 1
121130 mean = self .weight * posterior .mean .squeeze (- 1 ).squeeze (- 1 )
122131 variance = posterior .variance .clamp_min (CLAMP_LB ).view_as (mean )
@@ -193,6 +202,7 @@ def __init__(
193202 model : Model ,
194203 candidate_set : Tensor ,
195204 num_mv_samples : int = 10 ,
205+ posterior_transform : Optional [PosteriorTransform ] = None ,
196206 use_gumbel : bool = True ,
197207 maximize : bool = True ,
198208 X_pending : Optional [Tensor ] = None ,
@@ -206,6 +216,9 @@ def __init__(
206216 discretize the design space. Max values are sampled from the
207217 (joint) model posterior over these points.
208218 num_mv_samples: Number of max value samples.
219+ posterior_transform: A PosteriorTransform. If using a multi-output model,
220+ a PosteriorTransform that transforms the multi-output posterior into a
221+ single-output posterior is required.
209222 use_gumbel: If True, use Gumbel approximation to sample the max values.
210223 maximize: If True, consider the problem a maximization problem.
211224 X_pending: A `m x d`-dim Tensor of `m` design points that have been
@@ -231,6 +244,7 @@ def __init__(
231244 super ().__init__ (
232245 model = model ,
233246 num_mv_samples = num_mv_samples ,
247+ posterior_transform = posterior_transform ,
234248 maximize = maximize ,
235249 X_pending = X_pending ,
236250 )
@@ -275,6 +289,7 @@ def _sample_max_values(
275289 model = self .model ,
276290 candidate_set = candidate_set ,
277291 num_samples = self .num_mv_samples ,
292+ posterior_transform = self .posterior_transform ,
278293 maximize = self .maximize ,
279294 )
280295
@@ -303,6 +318,7 @@ def __init__(
303318 num_fantasies : int = 16 ,
304319 num_mv_samples : int = 10 ,
305320 num_y_samples : int = 128 ,
321+ posterior_transform : Optional [PosteriorTransform ] = None ,
306322 use_gumbel : bool = True ,
307323 maximize : bool = True ,
308324 X_pending : Optional [Tensor ] = None ,
@@ -321,6 +337,9 @@ def __init__(
321337 complexity, wall time and memory). Ignored if `X_pending` is `None`.
322338 num_mv_samples: Number of max value samples.
323339 num_y_samples: Number of posterior samples at specific design point `X`.
340+ posterior_transform: A PosteriorTransform. If using a multi-output model,
341+ a PosteriorTransform that transforms the multi-output posterior into a
342+ single-output posterior is required.
324343 use_gumbel: If True, use Gumbel approximation to sample the max values.
325344 maximize: If True, consider the problem a maximization problem.
326345 X_pending: A `m x d`-dim Tensor of `m` design points that have been
@@ -332,6 +351,7 @@ def __init__(
332351 model = model ,
333352 candidate_set = candidate_set ,
334353 num_mv_samples = num_mv_samples ,
354+ posterior_transform = posterior_transform ,
335355 use_gumbel = use_gumbel ,
336356 maximize = maximize ,
337357 X_pending = X_pending ,
@@ -397,7 +417,11 @@ def _compute_information_gain(
397417 given design points `X` (`num_fantasies=1` for non-fantasized models).
398418 """
399419 # compute the std_m, variance_m with noisy observation
400- posterior_m = self .model .posterior (X .unsqueeze (- 3 ), observation_noise = True )
420+ posterior_m = self .model .posterior (
421+ X .unsqueeze (- 3 ),
422+ observation_noise = True ,
423+ posterior_transform = self .posterior_transform ,
424+ )
401425 # batch_shape x num_fantasies x (m) x (1 + num_trace_observations)
402426 mean_m = self .weight * posterior_m .mean .squeeze (- 1 )
403427 # batch_shape x num_fantasies x (m) x (1 + num_trace_observations)
@@ -489,7 +513,7 @@ class qLowerBoundMaxValueEntropy(DiscreteMaxValueBase):
489513 the mutual information between max values and a batch of candidate points `X`.
490514 See [Moss2021gibbon]_ for a detailed discussion.
491515
492- The model must be single-outcome.
516+ The model must be single-outcome, unless using a PosteriorTransform .
493517 q > 1 is supported through greedy batch filling.
494518
495519 Example:
@@ -527,7 +551,9 @@ def _compute_information_gain(
527551 # doing posterior computations twice
528552
529553 # compute the mean_m, variance_m with noisy observation
530- posterior_m = self .model .posterior (X , observation_noise = True )
554+ posterior_m = self .model .posterior (
555+ X , observation_noise = True , posterior_transform = self .posterior_transform
556+ )
531557 mean_m = self .weight * posterior_m .mean .squeeze (- 1 )
532558 # batch_shape x 1
533559 variance_m = posterior_m .variance .clamp_min (CLAMP_LB ).squeeze (- 1 )
@@ -584,17 +610,29 @@ def _compute_information_gain(
584610 # it provides only a translation of the acqusition function surface
585611 # and can thus be ignored.
586612
613+ if self .posterior_transform is not None :
614+ raise UnsupportedError (
615+ "qLowerBoundMaxValueEntropy does not support PosteriorTransforms"
616+ "when X_pending is not None."
617+ )
618+
587619 X_batches = torch .cat (
588620 [X , self .X_pending .unsqueeze (0 ).repeat (X .shape [0 ], 1 , 1 )], 1
589621 )
590622 # batch_shape x (1 + m) x d
623+ # NOTE: This is the blocker for supporting posterior transforms.
624+ # We would have to process this MVN, applying whatever operations
625+ # are typically applied for the corresponding posterior, then applying
626+ # the posterior transform onto the resulting object.
591627 V = self .model (X_batches )
592628 # Evaluate terms required for A
593629 A = V .lazy_covariance_matrix [:, 0 , 1 :].unsqueeze (1 )
594630 # batch_shape x 1 x m
595631 # Evaluate terms required for B
596632 B = self .model .posterior (
597- self .X_pending , observation_noise = True
633+ self .X_pending ,
634+ observation_noise = True ,
635+ posterior_transform = self .posterior_transform ,
598636 ).mvn .covariance_matrix .unsqueeze (0 )
599637 # 1 x m x m
600638
@@ -616,8 +654,8 @@ class qMultiFidelityMaxValueEntropy(qMaxValueEntropy):
616654 for a detailed discussion of the basic ideas on multi-fidelity MES
617655 (note that this implementation is somewhat different).
618656
619- The model must be single-outcome. The batch case `q > 1` is supported
620- through cyclic optimization and fantasies.
657+ The model must be single-outcome, unless using a PosteriorTransform.
658+ The batch case `q > 1` is supported through cyclic optimization and fantasies.
621659
622660 Example:
623661 >>> model = SingleTaskGP(train_X, train_Y)
@@ -634,6 +672,7 @@ def __init__(
634672 num_fantasies : int = 16 ,
635673 num_mv_samples : int = 10 ,
636674 num_y_samples : int = 128 ,
675+ posterior_transform : Optional [PosteriorTransform ] = None ,
637676 use_gumbel : bool = True ,
638677 maximize : bool = True ,
639678 X_pending : Optional [Tensor ] = None ,
@@ -657,6 +696,9 @@ def __init__(
657696 is not `None`.
658697 num_mv_samples: Number of max value samples.
659698 num_y_samples: Number of posterior samples at specific design point `X`.
699+ posterior_transform: A PosteriorTransform. If using a multi-output model,
700+ a PosteriorTransform that transforms the multi-output posterior into a
701+ single-output posterior is required.
660702 use_gumbel: If True, use Gumbel approximation to sample the max values.
661703 maximize: If True, consider the problem a maximization problem.
662704 X_pending: A `m x d`-dim Tensor of `m` design points that have been
@@ -678,9 +720,10 @@ def __init__(
678720 num_fantasies = num_fantasies ,
679721 num_mv_samples = num_mv_samples ,
680722 num_y_samples = num_y_samples ,
681- X_pending = X_pending ,
723+ posterior_transform = posterior_transform ,
682724 use_gumbel = use_gumbel ,
683725 maximize = maximize ,
726+ X_pending = X_pending ,
684727 )
685728
686729 if cost_aware_utility is None :
@@ -731,7 +774,9 @@ def forward(self, X: Tensor) -> Tensor:
731774
732775 # Compute the posterior, posterior mean, variance without noise
733776 # `_m` and `_M` in the var names means the current and the max fidelity.
734- posterior = self .model .posterior (X_all , observation_noise = False )
777+ posterior = self .model .posterior (
778+ X_all , observation_noise = False , posterior_transform = self .posterior_transform
779+ )
735780 mean_M = self .weight * posterior .mean [..., - 1 , 0 ] # batch_shape x num_fantasies
736781 variance_M = posterior .variance [..., - 1 , 0 ].clamp_min (CLAMP_LB )
737782 # get the covariance between the low fidelities and max fidelity
@@ -751,7 +796,11 @@ def forward(self, X: Tensor) -> Tensor:
751796
752797
753798def _sample_max_value_Thompson (
754- model : Model , candidate_set : Tensor , num_samples : int , maximize : bool = True
799+ model : Model ,
800+ candidate_set : Tensor ,
801+ num_samples : int ,
802+ posterior_transform : Optional [PosteriorTransform ] = None ,
803+ maximize : bool = True ,
755804) -> Tensor :
756805 """Samples the max values by discrete Thompson sampling.
757806
@@ -762,12 +811,15 @@ def _sample_max_value_Thompson(
762811 candidate_set: A `n x d` Tensor including `n` candidate points to
763812 discretize the design space.
764813 num_samples: Number of max value samples.
814+ posterior_transform: A PosteriorTransform. If using a multi-output model,
815+ a PosteriorTransform that transforms the multi-output posterior into a
816+ single-output posterior is required.
765817 maximize: If True, consider the problem a maximization problem.
766818
767819 Returns:
768820 A `num_samples x num_fantasies` Tensor of posterior max value samples.
769821 """
770- posterior = model .posterior (candidate_set )
822+ posterior = model .posterior (candidate_set , posterior_transform = posterior_transform )
771823 weight = 1.0 if maximize else - 1.0
772824 samples = weight * posterior .rsample (torch .Size ([num_samples ])).squeeze (- 1 )
773825 # samples is num_samples x (num_fantasies) x n
@@ -779,7 +831,11 @@ def _sample_max_value_Thompson(
779831
780832
781833def _sample_max_value_Gumbel (
782- model : Model , candidate_set : Tensor , num_samples : int , maximize : bool = True
834+ model : Model ,
835+ candidate_set : Tensor ,
836+ num_samples : int ,
837+ posterior_transform : Optional [PosteriorTransform ] = None ,
838+ maximize : bool = True ,
783839) -> Tensor :
784840 """Samples the max values by Gumbel approximation.
785841
@@ -790,13 +846,16 @@ def _sample_max_value_Gumbel(
790846 candidate_set: A `n x d` Tensor including `n` candidate points to
791847 discretize the design space.
792848 num_samples: Number of max value samples.
849+ posterior_transform: A PosteriorTransform. If using a multi-output model,
850+ a PosteriorTransform that transforms the multi-output posterior into a
851+ single-output posterior is required.
793852 maximize: If True, consider the problem a maximization problem.
794853
795854 Returns:
796855 A `num_samples x num_fantasies` Tensor of posterior max value samples.
797856 """
798857 # define the approximate CDF for the max value under the independence assumption
799- posterior = model .posterior (candidate_set )
858+ posterior = model .posterior (candidate_set , posterior_transform = posterior_transform )
800859 weight = 1.0 if maximize else - 1.0
801860 mu = weight * posterior .mean
802861 sigma = posterior .variance .clamp_min (1e-8 ).sqrt ()
0 commit comments