Skip to content

Commit 8ecc903

Browse files
jduerholtfacebook-github-bot
authored andcommitted
make eta configurable (#1526)
Summary: <!-- Thank you for sending the PR! We appreciate you spending the time to make BoTorch better. Help us understand your motivation by explaining why you decided to make this change. You can learn more about contributing to BoTorch here: https://github.com/pytorch/botorch/blob/main/CONTRIBUTING.md --> ## Motivation I recently looked into the output constraint implementation in botorch and figured out that it behave like our custom `Objective` implementation for handling of constraints, namely by multiplying by sigmoids. Currently, the only difference is that we work often with different `eta` values per constraint. I think this would be a nice feature also for `botorch`. This PR is still work in progress, as the `apply_constraints` method is used at a lot of different occasions though-out the codebase, and my question is if one want to keep backwards compatibility. In the current PR, I kept the backwards compatibility and made `eta` of type `Union[float, torch.Tensor]`. If one does this one has to always catch if just a float is provided and transform the float in a tensor of the same length as the list of constraint callables. Another option would be to set `eta` as optional with default `None` and then just generate a tensor with the old default of 10e-3. Which solution would you prefer? Depending on your suggestion, I would finalize the PR and implement the functionality of different `eta`s per constraint though-out the whole codebase. ### Have you read the [Contributing Guidelines on pull requests] Yes. Pull Request resolved: #1526 Test Plan: Unit tests. Reviewed By: Balandat Differential Revision: D41600602 Pulled By: saitcakmak fbshipit-source-id: c04e68f6f0bb2264938f2dfb360a378cc661654a
1 parent 0fb00ef commit 8ecc903

File tree

7 files changed

+278
-22
lines changed

7 files changed

+278
-22
lines changed

botorch/acquisition/multi_objective/monte_carlo.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ def __init__(
8585
sampler: Optional[MCSampler] = None,
8686
objective: Optional[MCMultiOutputObjective] = None,
8787
constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
88+
eta: Optional[Union[Tensor, float]] = 1e-3,
8889
X_pending: Optional[Tensor] = None,
8990
) -> None:
9091
r"""Constructor for the MCAcquisitionFunction base class.
@@ -102,6 +103,12 @@ def __init__(
102103
`sample_shape x batch-shape x q x m` to a Tensor of dimension
103104
`sample_shape x batch-shape x q`, where negative values imply
104105
feasibility.
106+
eta: The temperature parameter for the sigmoid function used for the
107+
differentiable approximation of the constraints. In case of a float the
108+
same eta is used for every constraint in constraints. In case of a
109+
tensor the length of the tensor must match the number of provided
110+
constraints. The i-th constraint is then estimated with the i-th
111+
eta value.
105112
X_pending: A `m x d`-dim Tensor of `m` design points that have
106113
points that have been submitted for function evaluation
107114
but have not yet been evaluated.
@@ -128,6 +135,10 @@ def __init__(
128135
)
129136
self.add_module("objective", objective)
130137
self.constraints = constraints
138+
if constraints:
139+
if type(eta) != Tensor:
140+
eta = torch.full((len(constraints),), eta)
141+
self.register_buffer("eta", eta)
131142
self.X_pending = None
132143
if X_pending is not None:
133144
self.set_X_pending(X_pending)
@@ -153,7 +164,7 @@ def __init__(
153164
objective: Optional[MCMultiOutputObjective] = None,
154165
constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
155166
X_pending: Optional[Tensor] = None,
156-
eta: float = 1e-3,
167+
eta: Optional[Union[Tensor, float]] = 1e-3,
157168
) -> None:
158169
r"""q-Expected Hypervolume Improvement supporting m>=2 outcomes.
159170
@@ -189,7 +200,11 @@ def __init__(
189200
been evaluated. Concatenated into `X` upon forward call. Copied and set
190201
to have no gradient.
191202
eta: The temperature parameter for the sigmoid function used for the
192-
differentiable approximation of the constraints.
203+
differentiable approximation of the constraints. In case of a float the
204+
same eta is used for every constraint in constraints. In case of a
205+
tensor the length of the tensor must match the number of provided
206+
constraints. The i-th constraint is then estimated with the i-th
207+
eta value.
193208
"""
194209
if len(ref_point) != partitioning.num_outcomes:
195210
raise ValueError(
@@ -207,9 +222,9 @@ def __init__(
207222
sampler=sampler,
208223
objective=objective,
209224
constraints=constraints,
225+
eta=eta,
210226
X_pending=X_pending,
211227
)
212-
self.eta = eta
213228
self.register_buffer("ref_point", ref_point)
214229
cell_bounds = partitioning.get_hypercell_bounds()
215230
self.register_buffer("cell_lower_bounds", cell_bounds[0])
@@ -357,7 +372,7 @@ def __init__(
357372
objective: Optional[MCMultiOutputObjective] = None,
358373
constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
359374
X_pending: Optional[Tensor] = None,
360-
eta: float = 1e-3,
375+
eta: Optional[Union[Tensor, float]] = 1e-3,
361376
prune_baseline: bool = False,
362377
alpha: float = 0.0,
363378
cache_pending: bool = True,
@@ -400,7 +415,11 @@ def __init__(
400415
have points that have been submitted for function evaluation, but
401416
have not yet been evaluated.
402417
eta: The temperature parameter for the sigmoid function used for the
403-
differentiable approximation of the constraints.
418+
differentiable approximation of the constraints. In case of a float the
419+
same eta is used for every constraint in constraints. In case of a
420+
tensor the length of the tensor must match the number of provided
421+
constraints. The i-th constraint is then estimated with the i-th
422+
eta value.
404423
prune_baseline: If True, remove points in `X_baseline` that are
405424
highly unlikely to be the pareto optimal and better than the
406425
reference point. This can significantly improve computation time and
@@ -431,6 +450,7 @@ def __init__(
431450
sampler=sampler,
432451
objective=objective,
433452
constraints=constraints,
453+
eta=eta,
434454
)
435455
self._setup(model=model, cache_root=cache_root)
436456

@@ -450,7 +470,6 @@ def __init__(
450470
)
451471
self.register_buffer("ref_point", ref_point)
452472
self.alpha = alpha
453-
self.eta = eta
454473
self.q_in = -1
455474
self.q_out = -1
456475
self.q_subset_indices = BufferDict()

botorch/acquisition/multi_objective/multi_fidelity.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,9 @@ def __init__(
4646
sampler: Optional[MCSampler] = None,
4747
objective: Optional[MCMultiOutputObjective] = None,
4848
constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
49+
eta: Optional[Union[Tensor, float]] = 1e-3,
4950
X_pending: Optional[Tensor] = None,
5051
cost_call: Callable[Tensor, Tensor] = None,
51-
eta: float = 1e-3,
5252
**kwargs: Any,
5353
) -> None:
5454
r"""MOMF acquisition function supporting m>=2 outcomes.
@@ -98,7 +98,11 @@ def __init__(
9898
`batch_shape x q x m`. Defaults to an AffineCostModel with
9999
`C(s) = 1 + s`.
100100
eta: The temperature parameter for the sigmoid function used for the
101-
differentiable approximation of the constraints.
101+
differentiable approximation of the constraints. In case of a float the
102+
same eta is used for every constraint in constraints. In case of a
103+
tensor the length of the tensor must match the number of provided
104+
constraints. The i-th constraint is then estimated with the i-th
105+
eta value.
102106
"""
103107

104108
if len(ref_point) != partitioning.num_outcomes:
@@ -119,6 +123,7 @@ def __init__(
119123
sampler=sampler,
120124
objective=objective,
121125
constraints=constraints,
126+
eta=eta,
122127
X_pending=X_pending,
123128
)
124129

botorch/acquisition/objective.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,7 @@ def __init__(
453453
objective: Callable[[Tensor, Optional[Tensor]], Tensor],
454454
constraints: List[Callable[[Tensor], Tensor]],
455455
infeasible_cost: Union[Tensor, float] = 0.0,
456-
eta: float = 1e-3,
456+
eta: Union[Tensor, float] = 1e-3,
457457
) -> None:
458458
r"""
459459
Args:
@@ -468,11 +468,17 @@ def __init__(
468468
infeasible_cost: The cost of a design if all associated samples are
469469
infeasible.
470470
eta: The temperature parameter of the sigmoid function approximating
471-
the constraint.
471+
the constraint. Can be either a float or a 1-dim tensor. In case
472+
of a float the same eta is used for every constraint in
473+
constraints. In case of a tensor the length of the tensor must
474+
match the number of provided constraints. The i-th constraint is
475+
then estimated with the i-th eta value.
472476
"""
473477
super().__init__(objective=objective)
474478
self.constraints = constraints
475-
self.register_buffer("eta", torch.as_tensor(eta))
479+
if type(eta) != Tensor:
480+
eta = torch.full((len(constraints),), eta)
481+
self.register_buffer("eta", eta)
476482
self.register_buffer("infeasible_cost", torch.as_tensor(infeasible_cost))
477483

478484
def forward(self, samples: Tensor, X: Optional[Tensor] = None) -> Tensor:

botorch/utils/objective.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from __future__ import annotations
1212

13-
from typing import Callable, List, Optional
13+
from typing import Callable, List, Optional, Union
1414

1515
import torch
1616
from torch import Tensor
@@ -64,7 +64,7 @@ def apply_constraints_nonnegative_soft(
6464
obj: Tensor,
6565
constraints: List[Callable[[Tensor], Tensor]],
6666
samples: Tensor,
67-
eta: float,
67+
eta: Union[Tensor, float],
6868
) -> Tensor:
6969
r"""Applies constraints to a non-negative objective.
7070
@@ -78,14 +78,24 @@ def apply_constraints_nonnegative_soft(
7878
This callable must support broadcasting. Only relevant for multi-
7979
output models (`m` > 1).
8080
samples: A `n_samples x b x q x m` Tensor of samples drawn from the posterior.
81-
eta: The temperature parameter for the sigmoid function.
81+
eta: The temperature parameter for the sigmoid function. Can be either a float
82+
or a 1-dim tensor. In case of a float the same eta is used for every
83+
constraint in constraints. In case of a tensor the length of the tensor
84+
must match the number of provided constraints. The i-th constraint is
85+
then estimated with the i-th eta value.
8286
8387
Returns:
8488
A `n_samples x b x q (x m')`-dim tensor of feasibility-weighted objectives.
8589
"""
90+
if type(eta) != Tensor:
91+
eta = torch.full((len(constraints),), eta)
92+
if len(eta) != len(constraints):
93+
raise ValueError(
94+
"Number of provided constraints and number of provided etas do not match."
95+
)
8696
obj = obj.clamp_min(0) # Enforce non-negativity with constraints
87-
for constraint in constraints:
88-
constraint_eval = soft_eval_constraint(constraint(samples), eta=eta)
97+
for constraint, e in zip(constraints, eta):
98+
constraint_eval = soft_eval_constraint(constraint(samples), eta=e)
8999
if obj.dim() == samples.dim():
90100
# Need to unsqueeze to accommodate the outcome dimension.
91101
constraint_eval = constraint_eval.unsqueeze(-1)
@@ -101,7 +111,7 @@ def soft_eval_constraint(lhs: Tensor, eta: float = 1e-3) -> Tensor:
101111
Args:
102112
lhs: The left hand side of the constraint `lhs <= 0`.
103113
eta: The temperature parameter of the softmax function. As eta
104-
grows larger, this approximates the Heaviside step function.
114+
decreases, this approximates the Heaviside step function.
105115
106116
Returns:
107117
Element-wise 'soft' feasibility indicator of the same shape as `lhs`.
@@ -118,7 +128,7 @@ def apply_constraints(
118128
constraints: List[Callable[[Tensor], Tensor]],
119129
samples: Tensor,
120130
infeasible_cost: float,
121-
eta: float = 1e-3,
131+
eta: Union[Tensor, float] = 1e-3,
122132
) -> Tensor:
123133
r"""Apply constraints using an infeasible_cost `M` for negative objectives.
124134
@@ -136,7 +146,11 @@ def apply_constraints(
136146
output models (`m` > 1).
137147
samples: A `n_samples x b x q x m` Tensor of samples drawn from the posterior.
138148
infeasible_cost: The infeasible value.
139-
eta: The temperature parameter of the sigmoid function.
149+
eta: The temperature parameter of the sigmoid function. Can be either a float
150+
or a 1-dim tensor. In case of a float the same eta is used for every
151+
constraint in constraints. In case of a tensor the length of the tensor
152+
must match the number of provided constraints. The i-th constraint is
153+
then estimated with the i-th eta value.
140154
141155
Returns:
142156
A `n_samples x b x q (x m')`-dim tensor of feasibility-weighted objectives.

test/acquisition/multi_objective/test_monte_carlo.py

Lines changed: 99 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -517,16 +517,59 @@ def test_constrained_q_expected_hypervolume_improvement(self):
517517
X = torch.zeros(1, 1, **tkwargs)
518518
# test zero slack
519519
for eta in (1e-1, 1e-2):
520+
expected_values = [0.5 * 1.5, 0.5 * 0.5 * 1.5]
521+
for i, constraints in enumerate(
522+
[
523+
[lambda Z: torch.zeros_like(Z[..., -1])],
524+
[
525+
lambda Z: torch.zeros_like(Z[..., -1]),
526+
lambda Z: torch.zeros_like(Z[..., -1]),
527+
],
528+
]
529+
):
530+
acqf = qExpectedHypervolumeImprovement(
531+
model=mm,
532+
ref_point=ref_point,
533+
partitioning=partitioning,
534+
sampler=sampler,
535+
constraints=constraints,
536+
eta=eta,
537+
)
538+
res = acqf(X)
539+
self.assertAlmostEqual(res.item(), expected_values[i], places=4)
540+
# test multiple constraints one and multiple etas
541+
constraints = [
542+
lambda Z: torch.ones_like(Z[..., -1]),
543+
lambda Z: torch.ones_like(Z[..., -1]),
544+
]
545+
etas = [1, torch.tensor([1, 10])]
546+
expected_values = [
547+
(
548+
torch.sigmoid(torch.as_tensor(-1.0))
549+
* torch.sigmoid(torch.as_tensor(-1.0))
550+
* 1.5
551+
).item(),
552+
(
553+
torch.sigmoid(torch.as_tensor(-1.0))
554+
* torch.sigmoid(torch.as_tensor(-1.0 / 10.0))
555+
* 1.5
556+
).item(),
557+
]
558+
for eta, expected_value in zip(etas, expected_values):
520559
acqf = qExpectedHypervolumeImprovement(
521560
model=mm,
522561
ref_point=ref_point,
523562
partitioning=partitioning,
524563
sampler=sampler,
525-
constraints=[lambda Z: torch.zeros_like(Z[..., -1])],
564+
constraints=constraints,
526565
eta=eta,
527566
)
528567
res = acqf(X)
529-
self.assertAlmostEqual(res.item(), 0.5 * 1.5, places=4)
568+
self.assertAlmostEqual(
569+
res.item(),
570+
expected_value,
571+
places=4,
572+
)
530573
# test feasible
531574
acqf = qExpectedHypervolumeImprovement(
532575
model=mm,
@@ -1074,7 +1117,29 @@ def test_constrained_q_noisy_expected_hypervolume_improvement(self):
10741117
)
10751118
mm = MockModel(MockPosterior(samples=baseline_samples))
10761119
X = torch.zeros(1, 1, **tkwargs)
1077-
# test zero slack
1120+
# test zero slack multiple constraints, multiple etas
1121+
for eta in [1e-1, 1e-2, torch.tensor([1.0, 10.0])]:
1122+
# set the MockPosterior to use samples over baseline points
1123+
mm._posterior._samples = baseline_samples
1124+
sampler = IIDNormalSampler(sample_shape=torch.Size([1]))
1125+
acqf = qNoisyExpectedHypervolumeImprovement(
1126+
model=mm,
1127+
ref_point=ref_point,
1128+
X_baseline=X_baseline,
1129+
sampler=sampler,
1130+
constraints=[
1131+
lambda Z: torch.zeros_like(Z[..., -1]),
1132+
lambda Z: torch.zeros_like(Z[..., -1]),
1133+
],
1134+
eta=eta,
1135+
cache_root=False,
1136+
)
1137+
# set the MockPosterior to use samples over baseline points and new
1138+
# candidates
1139+
mm._posterior._samples = samples
1140+
res = acqf(X)
1141+
self.assertAlmostEqual(res.item(), 0.5 * 0.5 * 1.5, places=4)
1142+
# test zero slack single constraint
10781143
for eta in (1e-1, 1e-2):
10791144
# set the MockPosterior to use samples over baseline points
10801145
mm._posterior._samples = baseline_samples
@@ -1169,6 +1234,37 @@ def test_constrained_q_noisy_expected_hypervolume_improvement(self):
11691234
mm._posterior._samples = samples
11701235
res = acqf(X)
11711236
self.assertAlmostEqual(res.item(), 1.5, places=4)
1237+
# test multiple constraints one eta with
1238+
# this crashes for large etas, and I do not why
1239+
# set the MockPosterior to use samples over baseline points
1240+
etas = [torch.tensor([1.0]), torch.tensor([1.0, 10.0])]
1241+
constraints = [
1242+
[lambda Z: torch.ones_like(Z[..., -1])],
1243+
[
1244+
lambda Z: torch.ones_like(Z[..., -1]),
1245+
lambda Z: torch.ones_like(Z[..., -1]),
1246+
],
1247+
]
1248+
expected_values = [
1249+
(torch.sigmoid(torch.as_tensor(-1.0 / 1)) * 1.5).item(),
1250+
(
1251+
torch.sigmoid(torch.as_tensor(-1.0 / 1))
1252+
* torch.sigmoid(torch.as_tensor(-1.0 / 10))
1253+
* 1.5
1254+
).item(),
1255+
]
1256+
for eta, constraint, expected_value in zip(
1257+
etas, constraints, expected_values
1258+
):
1259+
acqf.constraints = constraint
1260+
acqf.eta = eta
1261+
res = acqf(X)
1262+
1263+
self.assertAlmostEqual(
1264+
res.item(),
1265+
expected_value,
1266+
places=4,
1267+
)
11721268
# test infeasible
11731269
# set the MockPosterior to use samples over baseline points
11741270
mm._posterior._samples = baseline_samples

0 commit comments

Comments
 (0)