diff --git a/bofire/data_models/acquisition_functions/acquisition_function.py b/bofire/data_models/acquisition_functions/acquisition_function.py index ab51b46bc..41552f9f3 100644 --- a/bofire/data_models/acquisition_functions/acquisition_function.py +++ b/bofire/data_models/acquisition_functions/acquisition_function.py @@ -1,4 +1,4 @@ -from typing import Annotated, Dict, Literal, Optional +from typing import Annotated, Dict, List, Literal, Optional, Union from pydantic import Field, PositiveFloat @@ -18,6 +18,10 @@ class MultiObjectiveAcquisitionFunction(AcquisitionFunction): type: str +class MultiFideltyAcquisitionFunction(AcquisitionFunction): + type: str + + class qNEI(SingleObjectiveAcquisitionFunction): type: Literal["qNEI"] = "qNEI" prune_baseline: bool = True @@ -87,3 +91,17 @@ class qNegIntPosVar(SingleObjectiveAcquisitionFunction): type: Literal["qNegIntPosVar"] = "qNegIntPosVar" n_mc_samples: IntPowerOfTwo = 512 weights: Optional[Dict[str, PositiveFloat]] = Field(default_factory=lambda: None) + + +class qMFMES(MultiFideltyAcquisitionFunction): + type: Literal["qMFMES"] = "qMFMES" + num_fantasies: IntPowerOfTwo = 16 + num_mv_samples: int = 10 + num_y_samples: IntPowerOfTwo = 128 + fidelity_costs: list[float] + + +class qMFVariance(MultiFideltyAcquisitionFunction): + type: Literal["qMFVariance"] = "qMFVariance" + beta: Annotated[float, Field(ge=0)] = 0.2 + fidelity_thresholds: Union[List[float], float] = 0.1 diff --git a/bofire/data_models/acquisition_functions/api.py b/bofire/data_models/acquisition_functions/api.py index edc81fa86..e197b433c 100644 --- a/bofire/data_models/acquisition_functions/api.py +++ b/bofire/data_models/acquisition_functions/api.py @@ -2,6 +2,7 @@ from bofire.data_models.acquisition_functions.acquisition_function import ( AcquisitionFunction, + MultiFideltyAcquisitionFunction, MultiObjectiveAcquisitionFunction, SingleObjectiveAcquisitionFunction, qEHVI, @@ -10,6 +11,8 @@ qLogEI, qLogNEHVI, qLogNEI, + qMFMES, + qMFVariance, qNegIntPosVar, qNEHVI, qNEI, @@ -23,6 +26,7 @@ AcquisitionFunction, SingleObjectiveAcquisitionFunction, MultiObjectiveAcquisitionFunction, + MultiFideltyAcquisitionFunction, ] AnyAcquisitionFunction = Union[ @@ -53,3 +57,5 @@ AnyMultiObjectiveAcquisitionFunction = Union[qEHVI, qLogEHVI, qNEHVI, qLogNEHVI] AnyActiveLearningAcquisitionFunction = qNegIntPosVar + +AnyMultiFidelityAcquisitionFunction = Union[qMFMES, qMFVariance] diff --git a/bofire/data_models/strategies/predictives/multi_fidelity.py b/bofire/data_models/strategies/predictives/multi_fidelity.py index f074a7f8c..411dd0e25 100644 --- a/bofire/data_models/strategies/predictives/multi_fidelity.py +++ b/bofire/data_models/strategies/predictives/multi_fidelity.py @@ -1,7 +1,11 @@ -from typing import List, Literal, Union +from typing import Literal -from pydantic import model_validator +from pydantic import Field, model_validator +from bofire.data_models.acquisition_functions.api import ( + AnyMultiFidelityAcquisitionFunction, + qMFVariance, +) from bofire.data_models.domain.api import Domain, Outputs from bofire.data_models.features.api import TaskInput from bofire.data_models.strategies.predictives.sobo import SoboStrategy @@ -11,20 +15,24 @@ class MultiFidelityStrategy(SoboStrategy): type: Literal["MultiFidelityStrategy"] = "MultiFidelityStrategy" - fidelity_thresholds: Union[List[float], float] = 0.1 + fidelity_acquisition_function: AnyMultiFidelityAcquisitionFunction = Field( + default_factory=lambda: qMFVariance(), + ) @model_validator(mode="after") def validate_tasks_and_fidelity_thresholds(self): """Ensures that there is one threshold per fidelity""" task_input, *_ = self.domain.inputs.get(includes=TaskInput, exact=True) num_tasks = len(task_input.categories) # type: ignore + fid_acqf = self.fidelity_acquisition_function if ( - isinstance(self.fidelity_thresholds, list) - and len(self.fidelity_thresholds) != num_tasks + isinstance(fid_acqf, qMFVariance) + and isinstance(fid_acqf.fidelity_thresholds, list) + and len(fid_acqf.fidelity_thresholds) != num_tasks ): raise ValueError( - f"The number of tasks should be equal to the number of fidelity thresholds (got {num_tasks} tasks, {len(self.fidelity_thresholds)} thresholds)." + f"The number of tasks should be equal to the number of fidelity thresholds (got {num_tasks} tasks, {len(fid_acqf.fidelity_thresholds)} thresholds)." ) return self diff --git a/bofire/strategies/predictives/multi_fidelity.py b/bofire/strategies/predictives/multi_fidelity.py index bc4b33027..10914b207 100644 --- a/bofire/strategies/predictives/multi_fidelity.py +++ b/bofire/strategies/predictives/multi_fidelity.py @@ -1,30 +1,249 @@ +import math +from typing import Optional + import numpy as np import pandas as pd +import torch +from botorch.acquisition import ( + SampleReducingMCAcquisitionFunction, + qMultiFidelityMaxValueEntropy, +) +from botorch.acquisition.cost_aware import InverseCostWeightedUtility +from botorch.acquisition.objective import ( + MCAcquisitionObjective, + PosteriorTransform, + ScalarizedPosteriorTransform, +) +from botorch.acquisition.utils import project_to_target_fidelity +from botorch.models.cost import AffineFidelityCostModel +from botorch.models.model import Model +from botorch.sampling.base import MCSampler +from bofire.data_models.acquisition_functions.api import qMFMES, qMFVariance +from bofire.data_models.domain.api import Domain +from bofire.data_models.enum import SamplingMethodEnum from bofire.data_models.features.api import TaskInput +from bofire.data_models.objectives.api import MaximizeObjective, Objective +from bofire.data_models.strategies.api import RandomStrategy as RandomStrategyDataModel from bofire.data_models.strategies.predictives.multi_fidelity import ( MultiFidelityStrategy as DataModel, ) +from bofire.data_models.types import InputTransformSpecs from bofire.strategies.predictives.sobo import SoboStrategy -from bofire.utils.naming_conventions import get_column_names +from bofire.strategies.random import RandomStrategy +from bofire.utils.torch_tools import tkwargs + + +class qMultiFidelityVariance(SampleReducingMCAcquisitionFunction): + r"""MC-based Variance Bound. + + Uses a reparameterization to extend UCB to qUCB for q > 1 (See Appendix A + of [Wilson2017reparam].) Since we only consider the variance, we get the following + expression. + + `qVariance = E(max(|Y_tilde - mu|))`, where `Y_tilde ~ N(mu, beta pi/2 Sigma)` + and `f(X)` has distribution `N(mu, Sigma)`. + + + """ + + def __init__( + self, + model: Model, + beta: float, + fidelity_thresholds: torch.Tensor, + sampler: Optional[MCSampler] = None, + objective: Optional[MCAcquisitionObjective] = None, + posterior_transform: Optional[PosteriorTransform] = None, + X_pending: Optional[torch.Tensor] = None, + ) -> None: + r"""q-Upper Confidence Bound. + + Args: + model: A fitted model. + beta: Controls tradeoff between mean and standard deviation in UCB. + sampler: The sampler used to draw base samples. See `MCAcquisitionFunction` + more details. + objective: The MCAcquisitionObjective under which the samples are + evaluated. Defaults to `IdentityMCObjective()`. + posterior_transform: A PosteriorTransform (optional). + X_pending: A `batch_shape x m x d`-dim Tensor of `m` design points that have + points that have been submitted for function evaluation but have not yet + been evaluated. Concatenated into X upon forward call. Copied and set to + have no gradient. + """ + super().__init__( + model=model, + sampler=sampler, + objective=objective, + posterior_transform=posterior_transform, + X_pending=X_pending, + ) + self.beta_prime = self._get_beta_prime(beta=beta) + self.fidelity_thresholds = fidelity_thresholds + + def _get_beta_prime(self, beta: float) -> float: + return math.sqrt(beta * math.pi / 2) + + def _sample_forward(self, obj: torch.Tensor) -> torch.Tensor: + r"""Evaluate qMultiFidelityVariance per sample on the candidate set `X`. + + Args: + obj: A `sample_shape x batch_shape x q`-dim Tensor of MC objective values. + + Returns: + A `sample_shape x batch_shape x q`-dim Tensor of acquisition values. + """ + mean = obj.mean(dim=0) + return self.beta_prime * (obj - mean).abs() + + def forward(self, X: torch.Tensor): + r"""Compute acquisition values for a batch of a design point with different fidelities. + + Since the acquisition function depends on other fidelities, we need to + share information across a batch of samples across fidelities. We therefore + need to override the forward method to handle this. + + We return a simplified acquisition function, that is simply 1 / (m+1) if + the fidelity is above the variance threshold, and 0 otherwise. Maximizing + this will give the smallest fidelity that is above the threshold. + + Args: + X: A `batch_shape x q=1 x d`-dim Tensor. X must be ordered from lowest + to highest fidelity. + + Returns: + A `batch_shape`-dim Tensor of acquisition values. + """ + acqf_values = super().forward(X) + + fidelity_threshold_scale = self.model.outcome_transform.stdvs.item() + fidelity_thresholds = self.fidelity_thresholds * fidelity_threshold_scale + fidelity_thresholds = fidelity_thresholds.view( + *([1] * (acqf_values.ndim - 1)), -1 + ) + above_threshold = acqf_values > fidelity_thresholds + above_threshold[..., -1] = True # selecting highest fidelity is always allowed + + acqf_indicator = ( + 1 / (1 + torch.arange(above_threshold.size(-1))) * above_threshold.float() + ) + return acqf_indicator + + +def _gen_candidate_set( + domain: Domain, + transform_specs: InputTransformSpecs, + num_candidates: int, + seed: int | None = None, +) -> torch.Tensor: + """Generate a candidate set for Gumbel sampling.""" + random_strategy = RandomStrategy( + data_model=RandomStrategyDataModel( + domain=domain, + fallback_sampling_method=SamplingMethodEnum.SOBOL, + seed=seed, + ), + ) + candidate_df = random_strategy.ask(num_candidates) + candidate_set = domain.inputs.transform( + experiments=candidate_df, + specs=transform_specs, + ) + return torch.from_numpy(candidate_set.to_numpy()).to(**tkwargs) + + +def get_mf_acquisition_function( + acquisition_function_name: str, + model: Model, + target_fidelities: dict[int, float], + objective: MCAcquisitionObjective, + maximize: bool = True, + X_pending: Optional[torch.Tensor] = None, + mc_samples: int = 512, + seed: Optional[int] = None, + *, + beta: Optional[float] = None, + fidelity_thresholds: Optional[torch.Tensor] = None, + fidelity_costs: Optional[list[float]] = None, + candidate_set: Optional[torch.Tensor] = None, +): + """Convenience function for initialiing multi-fidelity acquisition functions. + + Mirrors the signature of botorch.acquisition.factory.get_acquisition_function. + """ + + # we require a posterior transform since the MultiTaskGP model has + # model.num_outputs > 1, even though it is in fact a single output model. + posterior_transform = ScalarizedPosteriorTransform(weights=torch.tensor([1.0])) + # TODO: use proper cost model + fidelity_task_idx = list(target_fidelities.keys())[0] + + def project(X): + return project_to_target_fidelity(X=X, target_fidelities=target_fidelities) + + if acquisition_function_name == "qMFMES": + if candidate_set is None: + raise ValueError("`candidate_set` must not be None for qMFMES.") + if fidelity_costs is None: + raise ValueError("`fidelity_costs` must not be None for qMFMES.") + fidelity_fixed, fidelity_gradient = ( + fidelity_costs[0], + fidelity_costs[1] - fidelity_costs[0], + ) + cost_model = AffineFidelityCostModel( + fidelity_weights={fidelity_task_idx: fidelity_gradient}, + fixed_cost=fidelity_fixed, + ) + cost_aware_utility = InverseCostWeightedUtility(cost_model) + + return qMultiFidelityMaxValueEntropy( + model=model, + candidate_set=candidate_set, # type: ignore + project=project, + posterior_transform=posterior_transform, + cost_aware_utility=cost_aware_utility, + X_pending=X_pending, + maximize=maximize, + ) + + elif acquisition_function_name == "qMFVariance": + if beta is None: + raise ValueError("`beta` must not be None for qMFVariance.") + if fidelity_thresholds is None: + raise ValueError("`fidelity_thresholds` must not be None for qMFVariance.") + return qMultiFidelityVariance( + model=model, + beta=beta, + fidelity_thresholds=fidelity_thresholds, + posterior_transform=posterior_transform, + objective=objective, + X_pending=X_pending, + ) + + raise NotImplementedError( + f"Unknown acquisition function {acquisition_function_name}" + ) class MultiFidelityStrategy(SoboStrategy): def __init__(self, data_model: DataModel, **kwargs): super().__init__(data_model=data_model, **kwargs) self.task_feature_key = self.domain.inputs.get_keys(TaskInput)[0] - - ft = data_model.fidelity_thresholds - M = len(self.domain.inputs.get_by_key(self.task_feature_key).fidelities) # type: ignore - self.fidelity_thresholds = ft if isinstance(ft, list) else [ft] * M + self.fidelity_acquisition_function = data_model.fidelity_acquisition_function def _ask(self, candidate_count: int) -> pd.DataFrame: """Generate new candidates (x, m). This is a greedy optimization of the acquisition function. We first optimize the acqf for the target fidelity to generate a candidate x, - then select the lowest fidelity that has a variance exceeding a - threshold. + then select a target fidelity. + + We do this procedure greedily in line with [Folch et al. 2023]. This has + the advantage of being simpler and faster, as we only need to evaluate + the fidelity acquisition function M times. It also allows more freedom + in the choice of design-space acquisition function, as well as enabling a + more flexible choice of surrogate models. Args: candidate_count (int): number of candidates to be generated @@ -44,11 +263,66 @@ def _ask(self, candidate_count: int) -> pd.DataFrame: task_feature.allowed = [fidelity == 0 for fidelity in task_feature.fidelities] x = super()._ask(candidate_count) task_feature.allowed = prev_allowed - fidelity_pred = self._select_fidelity_and_get_predict(x) - x.update(fidelity_pred) - return x + fidelity_cand = self.select_fidelity_candidate(x) + pred = self.predict(fidelity_cand) + return pd.concat((fidelity_cand, pred), axis=1) + + def _get_fidelity_acqf( + self, fidelity_input: TaskInput + ) -> qMultiFidelityVariance | qMultiFidelityMaxValueEntropy: + _, X_pending = self.get_acqf_input_tensors() + assert self.model is not None and self.experiments is not None + + fidelity_input_idx = self.domain.inputs.get_keys().index(fidelity_input.key) + # TODO: target fidelity is not necessarily at index 0 + + # determine sense of optimization (max/min) + # qMFMES doesn't take an `objective` argument, so we need `maximize` + ( + objective_callable, + _, + _, + ) = self._get_objective_and_constraints() + target_feature = self.domain.outputs.get_by_objective(includes=Objective)[0] + maximize = isinstance(target_feature.objective, MaximizeObjective) # type: ignore + + fidelity_acqf = get_mf_acquisition_function( + acquisition_function_name=self.fidelity_acquisition_function.__class__.__name__, + model=self.model, + target_fidelities={fidelity_input_idx: 0.0}, + objective=objective_callable, + maximize=maximize, + X_pending=X_pending, + beta=( + self.fidelity_acquisition_function.beta + if isinstance(self.fidelity_acquisition_function, qMFVariance) + else 0.2 + ), + fidelity_thresholds=( + torch.atleast_1d( + torch.tensor( + self.fidelity_acquisition_function.fidelity_thresholds, + **tkwargs, + ) + ) + if isinstance(self.fidelity_acquisition_function, qMFVariance) + else None + ), + candidate_set=_gen_candidate_set( + domain=self.domain, + transform_specs=self.input_preprocessing_specs, + num_candidates=1000, + ) + if isinstance(self.fidelity_acquisition_function, qMFMES) + else None, + fidelity_costs=self.fidelity_acquisition_function.fidelity_costs + if isinstance(self.fidelity_acquisition_function, qMFMES) + else None, + ) - def _select_fidelity_and_get_predict(self, X: pd.DataFrame) -> pd.DataFrame: # type: ignore + return fidelity_acqf + + def select_fidelity_candidate(self, X: pd.DataFrame) -> pd.DataFrame: # type: ignore """Select the fidelity for a given input. Uses the variance based approach (see [Kandasamy et al. 2016, @@ -62,33 +336,36 @@ def _select_fidelity_and_get_predict(self, X: pd.DataFrame) -> pd.DataFrame: # pd.DataFrame: selected fidelity and prediction """ fidelity_input: TaskInput = self.domain.inputs.get_by_key(self.task_feature_key) # type: ignore - assert self.model is not None and self.experiments is not None assert fidelity_input.allowed is not None sorted_fidelities = np.argsort(fidelity_input.fidelities)[::-1] - target_fidelity_idx = sorted_fidelities[-1] - target_fidelity = fidelity_input.fidelities[target_fidelity_idx] - _, sd_cols = get_column_names(self.domain.outputs) - - for fidelity_idx in sorted_fidelities: - if not fidelity_input.allowed[fidelity_idx]: - continue - - m = fidelity_input.fidelities[fidelity_idx] - fidelity_name = fidelity_input.categories[fidelity_idx] + num_fidelities = len(fidelity_input.fidelities) - fidelity_threshold_scale = self.model.outcome_transform.stdvs.item() - fidelity_threshold = self.fidelity_thresholds[m] * fidelity_threshold_scale + fidelity_acqf = self._get_fidelity_acqf(fidelity_input) - X_fid = X.assign(**{self.task_feature_key: fidelity_name}) - transformed = self.domain.inputs.transform( - experiments=X_fid, specs=self.input_preprocessing_specs - ) - pred = self.predict(transformed) - - if (pred[sd_cols] > fidelity_threshold).all().all() or m == target_fidelity: - pred[self.task_feature_key] = fidelity_name - return pred + X_fidelity_batched = X.loc[ + X.index.repeat(num_fidelities), + self.domain.inputs.get_keys(excludes=TaskInput), + ] + sorted_fidelity_labels = [ + fidelity_input.categories[f] for f in sorted_fidelities + ] + X_fidelity_batched[self.task_feature_key] = sorted_fidelity_labels * len(X) + X_fidelity_batched_transformed = self.domain.inputs.transform( + experiments=X_fidelity_batched, specs=self.input_preprocessing_specs + ) + X_fidelity_batched_tensor = ( + torch.from_numpy(X_fidelity_batched_transformed.to_numpy()) + .to(**tkwargs) + .unsqueeze(-2) + ) + with torch.no_grad(): + # since we optimize over a discrete set of fidelities, there is + # no need to compute gradients + acqf_values = fidelity_acqf(X_fidelity_batched_tensor) + chosen_fidelity_idx = int(torch.argmax(acqf_values).item()) + candidate = X_fidelity_batched.iloc[[chosen_fidelity_idx]] + return candidate def _verify_all_fidelities_observed(self) -> None: """Get all fidelities that have at least one observation. diff --git a/tests/bofire/strategies/test_multi_fidelity.py b/tests/bofire/strategies/test_multi_fidelity.py index f4ff3809d..1e69af10c 100644 --- a/tests/bofire/strategies/test_multi_fidelity.py +++ b/tests/bofire/strategies/test_multi_fidelity.py @@ -1,6 +1,7 @@ import pytest from bofire.benchmarks.api import MultiTaskHimmelblau +from bofire.data_models.acquisition_functions.api import qMFMES, qMFVariance from bofire.data_models.domain.api import Domain from bofire.data_models.enum import SamplingMethodEnum from bofire.data_models.features.api import TaskInput @@ -42,7 +43,9 @@ def test_mf_requires_all_fidelities_observed(): strategy = MultiFidelityStrategy( data_model=MultiFidelityStrategyDataModel( domain=domain_with_extra_task, - fidelity_thresholds=0.1, + fidelity_acquisition_function=qMFVariance( + fidelity_thresholds=[0.1, 0.1, 0.1] + ), ) ) @@ -57,11 +60,18 @@ def test_mf_requires_all_fidelities_observed(): # test that the strategy does not raise an error if all fidelities are observed experiments.loc[experiments.index[-1], task_input.key] = "task_dummy" - strategy.tell(experiments) + strategy.tell(experiments, replace=True) strategy.ask(1) -def test_mf_fidelity_selection(): +@pytest.mark.parametrize( + "fidelity_acqf", + ( + qMFVariance(fidelity_thresholds=0.1, beta=0.2), + qMFMES(fidelity_costs=[2.0, 1.0]), + ), +) +def test_mf_fidelity_selection(fidelity_acqf): benchmark = MultiTaskHimmelblau() (task_input,) = benchmark.domain.inputs.get(TaskInput, exact=True) assert task_input.type == "TaskInput" @@ -75,30 +85,41 @@ def test_mf_fidelity_selection(): ), ) - experiments = benchmark.f(random_strategy.ask(4), return_complete=True) - experiments[task_input.key] = ["task_1", "task_2", "task_2", "task_2"] + N_train = 10 + experiments = benchmark.f(random_strategy.ask(N_train), return_complete=True) + experiments[task_input.key] = ["task_1"] + ["task_2"] * (N_train - 1) experiments, withheld = experiments.iloc[:-1], experiments.iloc[-1:] strategy = MultiFidelityStrategy( data_model=MultiFidelityStrategyDataModel( domain=benchmark.domain, - fidelity_thresholds=0.1, + fidelity_acquisition_function=fidelity_acqf, ) ) strategy.tell(experiments) # test that for a point close to training data, the highest fidelity is selected - close_to_training = experiments.iloc[2:3].copy() + good_training_point = experiments[benchmark.domain.outputs.get_keys()[0]].argmin() + close_to_training = experiments.iloc[ + good_training_point : good_training_point + 1 + ].copy() close_to_training[benchmark.domain.inputs.get_keys(excludes=TaskInput)] += 0.01 - pred = strategy._select_fidelity_and_get_predict(close_to_training) + pred = strategy.select_fidelity_candidate(close_to_training) assert (pred[task_input.key] == task_input.categories[0]).all() # test that for a point far from training data, the lowest fidelity is selected - pred = strategy._select_fidelity_and_get_predict(withheld) + pred = strategy.select_fidelity_candidate(withheld) assert (pred[task_input.key] == task_input.categories[1]).all() -def test_mf_point_selection(): +@pytest.mark.parametrize( + "fidelity_acqf", + ( + qMFVariance(fidelity_thresholds=0.1, beta=0.2), + qMFMES(fidelity_costs=[2.0, 1.0]), + ), +) +def test_mf_point_selection(fidelity_acqf): benchmark = MultiTaskHimmelblau() (task_input,) = benchmark.domain.inputs.get(TaskInput, exact=True) assert task_input.type == "TaskInput" @@ -118,7 +139,7 @@ def test_mf_point_selection(): strategy = MultiFidelityStrategy( data_model=MultiFidelityStrategyDataModel( domain=benchmark.domain, - fidelity_thresholds=0.1, + fidelity_acquisition_function=fidelity_acqf, ) ) diff --git a/tutorials/advanced_examples/multifidelity_bo.ipynb b/tutorials/advanced_examples/multifidelity_bo.ipynb index efe38fb0d..69d41b4e7 100644 --- a/tutorials/advanced_examples/multifidelity_bo.ipynb +++ b/tutorials/advanced_examples/multifidelity_bo.ipynb @@ -58,15 +58,15 @@ "outputs": [], "source": [ "SMOKE_TEST = os.environ.get(\"SMOKE_TEST\")\n", - "NUM_INIT_HF = 4\n", + "NUM_INIT_HF = 2\n", "NUM_INIT_LF = 10\n", "if SMOKE_TEST:\n", " num_runs = 5\n", " num_iters = 2\n", " verbose = False\n", "else:\n", - " num_runs = 10\n", - " num_iters = 10\n", + " num_runs = 5\n", + " num_iters = 20\n", " verbose = True" ] }, @@ -132,11 +132,15 @@ " outputs=self._branin.domain.outputs,\n", " )\n", "\n", + " self.bias_scale = 0.2\n", + "\n", " def _f(self, candidates: pd.DataFrame) -> pd.DataFrame:\n", " candidates_no_task = candidates.drop(columns=[\"task\"])\n", " f_branin = self._branin.f(candidates_no_task)\n", " f_ackley = self._ackley.f(candidates_no_task)\n", - " bias_scale = np.where(candidates[\"task\"] == \"task_hf\", 0.0, 0.15).reshape(-1, 1)\n", + " bias_scale = np.where(\n", + " candidates[\"task\"] == \"task_hf\", 0.0, self.bias_scale\n", + " ).reshape(-1, 1)\n", " bias_scale = pd.DataFrame(bias_scale, columns=self._domain.outputs.get_keys())\n", " bias_scale[\"valid_y\"] = 0.0\n", " return f_branin + bias_scale * f_ackley\n", @@ -167,7 +171,7 @@ " experiments.index < NUM_INIT_LF, \"task_lf\", \"task_hf\"\n", " )\n", "\n", - " # then use the ml_benchmark to evaluate the low fidelity\n", + " # then use the mf_benchmark to evaluate the low fidelity\n", " return mf_benchmark.f(experiments, return_complete=True)\n", "\n", "\n", @@ -181,6 +185,7 @@ "metadata": {}, "outputs": [], "source": [ + "from bofire.data_models.acquisition_functions.api import qMFVariance\n", "from bofire.data_models.strategies.api import MultiFidelityStrategy\n", "\n", "\n", @@ -190,7 +195,7 @@ "mf_data_model = MultiFidelityStrategy(\n", " domain=mf_benchmark.domain,\n", " acquisition_function=qLogEI(),\n", - " fidelity_thresholds=0.1,\n", + " fidelity_acquisition_function=qMFVariance(fidelity_thresholds=0.1),\n", ")\n", "mf_data_model.surrogate_specs.surrogates[0].inputs" ] @@ -202,7 +207,7 @@ "metadata": {}, "outputs": [], "source": [ - "from bofire.data_models.strategies.api import SoboStrategy\n", + "from bofire.data_models.strategies.api import SoboStrategy, Strategy\n", "\n", "\n", "surrogate_specs = BotorchSurrogates(\n", @@ -239,8 +244,47 @@ ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "# helper function for running bayesian optimization loop\n", + "def run_bo_loop(\n", + " strategy_data_model: Strategy, run_idx: int, last_itr_high_fidelity: bool = False\n", + "):\n", + " results = pd.DataFrame(columns=pd.MultiIndex.from_tuples([], names=(\"col\", \"run\")))\n", + " seed = 2048 * run_idx + 123\n", + " experiments = create_data_set(seed)\n", + "\n", + " strategy = strategies.map(strategy_data_model)\n", + " strategy.tell(experiments)\n", + "\n", + " assert strategy.experiments is not None\n", + "\n", + " pbar = tqdm(range(num_iters), desc=\"Optimizing\")\n", + " for itr in pbar:\n", + " candidate = strategy.ask(1)\n", + " if last_itr_high_fidelity and itr == num_iters - 1:\n", + " candidate[\"task\"] = \"task_hf\"\n", + " y = mf_benchmark.f(candidate, return_complete=True)\n", + " strategy.tell(y)\n", + "\n", + " hf_experiments = strategy.experiments[strategy.experiments[\"task\"] == \"task_hf\"]\n", + " # note that both benchmarks have the same optimum\n", + " regret = hf_experiments[\"y\"].min() - mf_benchmark.get_optima()[\"y\"][0].item()\n", + "\n", + " pbar.set_postfix({\"Regret\": f\"{regret:.4f}\"})\n", + "\n", + " results[\"fidelity\", f\"{run_idx}\"] = strategy.experiments[\"task\"]\n", + " results[\"y\", f\"{run_idx}\"] = strategy.experiments[\"y\"]\n", + " return results" + ] + }, + { + "cell_type": "markdown", + "id": "11", "metadata": { "papermill": { "duration": null, @@ -259,7 +303,7 @@ { "cell_type": "code", "execution_count": null, - "id": "11", + "id": "12", "metadata": { "papermill": { "duration": null, @@ -273,35 +317,14 @@ "outputs": [], "source": [ "tl_results = pd.DataFrame(columns=pd.MultiIndex.from_tuples([], names=(\"col\", \"run\")))\n", - "for run in range(num_runs):\n", - " seed = 2048 * run + 123\n", - " experiments = create_data_set(seed)\n", - "\n", - " tl_strategy = strategies.map(tl_data_model)\n", - " tl_strategy.tell(experiments)\n", - "\n", - " assert tl_strategy.experiments is not None\n", - "\n", - " pbar = tqdm(range(num_iters), desc=\"Optimizing\")\n", - " for _ in pbar:\n", - " candidate = tl_strategy.ask(1)\n", - " y = tl_benchmark.f(candidate, return_complete=True)\n", - " tl_strategy.tell(y)\n", - "\n", - " hf_experiments = tl_strategy.experiments[\n", - " tl_strategy.experiments[\"task\"] == \"task_hf\"\n", - " ]\n", - " regret = hf_experiments[\"y\"].min() - tl_benchmark.get_optima()[\"y\"][0].item()\n", - "\n", - " pbar.set_postfix({\"Regret\": f\"{regret:.4f}\"})\n", - "\n", - " tl_results[\"fidelity\", f\"{run}\"] = tl_strategy.experiments[\"task\"]\n", - " tl_results[\"y\", f\"{run}\"] = tl_strategy.experiments[\"y\"]" + "for run_idx in range(num_runs):\n", + " results = run_bo_loop(tl_data_model, run_idx)\n", + " tl_results = pd.concat((tl_results, results), axis=1)" ] }, { "cell_type": "markdown", - "id": "12", + "id": "13", "metadata": { "papermill": { "duration": null, @@ -320,7 +343,7 @@ { "cell_type": "code", "execution_count": null, - "id": "13", + "id": "14", "metadata": { "papermill": { "duration": null, @@ -334,35 +357,14 @@ "outputs": [], "source": [ "mf_results = pd.DataFrame(columns=pd.MultiIndex.from_tuples([], names=(\"col\", \"run\")))\n", - "for run in range(num_runs):\n", - " seed = 2048 * run + 123\n", - " experiments = create_data_set(seed)\n", - "\n", - " mf_strategy = strategies.map(mf_data_model)\n", - " mf_strategy.tell(experiments)\n", - "\n", - " assert mf_strategy.experiments is not None\n", - "\n", - " pbar = tqdm(range(num_iters), desc=\"Optimizing\")\n", - " for _ in pbar:\n", - " candidate = mf_strategy.ask(1)\n", - " y = mf_benchmark.f(candidate, return_complete=True)\n", - " mf_strategy.tell(y)\n", - "\n", - " hf_experiments = mf_strategy.experiments[\n", - " mf_strategy.experiments[\"task\"] == \"task_hf\"\n", - " ]\n", - " regret = hf_experiments[\"y\"].min() - mf_benchmark.get_optima()[\"y\"][0].item()\n", - "\n", - " pbar.set_postfix({\"Regret\": f\"{regret:.4f}\"})\n", - "\n", - " mf_results[\"fidelity\", f\"{run}\"] = mf_strategy.experiments[\"task\"]\n", - " mf_results[\"y\", f\"{run}\"] = mf_strategy.experiments[\"y\"]" + "for run_idx in range(num_runs):\n", + " results = run_bo_loop(mf_data_model, run_idx)\n", + " mf_results = pd.concat((mf_results, results), axis=1)" ] }, { "cell_type": "markdown", - "id": "14", + "id": "15", "metadata": { "papermill": { "duration": null, @@ -381,13 +383,13 @@ "BO performance.\n", "\n", "Specifically, although both strategies have a budget of 10 function queries, the MF \n", - "approach uses some of them on " + "approach uses some of them on the low fidelity to obtain information about the problem while exhausting less budget." ] }, { "cell_type": "code", "execution_count": null, - "id": "15", + "id": "16", "metadata": { "papermill": { "duration": null, @@ -436,9 +438,16 @@ " np.quantile(regret, 0.25, axis=0),\n", " color=plot_kwargs.get(\"color\"),\n", " alpha=0.2,\n", - " )\n", - "\n", - "\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17", + "metadata": {}, + "outputs": [], + "source": [ "fig, axs = plt.subplots(ncols=2, figsize=(8, 4), sharey=True)\n", "cost_ratios = (1, 3)\n", "\n", @@ -469,7 +478,7 @@ }, { "cell_type": "markdown", - "id": "16", + "id": "18", "metadata": { "papermill": { "duration": null, @@ -489,12 +498,98 @@ { "cell_type": "code", "execution_count": null, - "id": "17", + "id": "19", "metadata": {}, "outputs": [], "source": [ "(mf_results[\"fidelity\"] == \"task_hf\")[-num_iters:].mean(axis=1) # type: ignore" ] + }, + { + "cell_type": "markdown", + "id": "20", + "metadata": {}, + "source": [ + "### Information-based Multi-fidelity\n", + "\n", + "We can also use an information-theoretic approach to evaluate the different fidelities. \n", + "This approach selects the fidelity that maximizes the information gained about the global maximizer - see [Folch 2023] for more details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21", + "metadata": {}, + "outputs": [], + "source": [ + "from bofire.data_models.acquisition_functions.api import qMFMES\n", + "\n", + "\n", + "mf_mes_data_model = MultiFidelityStrategy(\n", + " domain=mf_benchmark.domain,\n", + " acquisition_function=qLogEI(),\n", + " fidelity_acquisition_function=qMFMES(fidelity_costs=[3.0, 1.0]),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22", + "metadata": {}, + "outputs": [], + "source": [ + "mf_mes_results = pd.DataFrame(\n", + " columns=pd.MultiIndex.from_tuples([], names=(\"col\", \"run\"))\n", + ")\n", + "for run_idx in range(num_runs):\n", + " results = run_bo_loop(mf_mes_data_model, run_idx, last_itr_high_fidelity=True)\n", + " mf_mes_results = pd.concat((mf_mes_results, results), axis=1)\n", + "\n", + "mf_mes_results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23", + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(figsize=(4, 4))\n", + "fidelity_costs = mf_mes_data_model.fidelity_acquisition_function.fidelity_costs\n", + "cost_ratio = fidelity_costs[0] / fidelity_costs[1]\n", + "\n", + "plot_regret(\n", + " ax,\n", + " tl_results,\n", + " fidelity_cost_ratio=cost_ratio,\n", + " label=\"Transfer Learning\",\n", + " color=\"blue\",\n", + ")\n", + "plot_regret(\n", + " ax,\n", + " mf_results,\n", + " fidelity_cost_ratio=cost_ratio,\n", + " label=\"Multi-fidelity (Variance)\",\n", + " color=\"green\",\n", + ")\n", + "plot_regret(\n", + " ax,\n", + " mf_mes_results,\n", + " fidelity_cost_ratio=cost_ratio,\n", + " label=\"Multi-fidelity (MES)\",\n", + " color=\"orange\",\n", + ")\n", + "\n", + "ax.set_xlabel(\"Time step\")\n", + "ax.set_title(f\"Fidelity cost ratio = {cost_ratio}\")\n", + "ax.legend()\n", + "ax.set_ylabel(\"Regret\")\n", + "\n", + "plt.show()" + ] } ], "metadata": {