Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions bofire/data_models/surrogates/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
)
from bofire.data_models.surrogates.surrogate import Surrogate
from bofire.data_models.surrogates.tanimoto_gp import TanimotoGPSurrogate
from bofire.data_models.surrogates.enting import EntingSurrogate
from bofire.data_models.surrogates.xgb import XGBoostSurrogate

AbstractSurrogate = Union[Surrogate, BotorchSurrogate, EmpiricalSurrogate]
Expand All @@ -33,6 +34,7 @@
MixedSingleTaskGPSurrogate,
MLPEnsemble,
SaasSingleTaskGPSurrogate,
EntingSurrogate,
XGBoostSurrogate,
LinearSurrogate,
TanimotoGPSurrogate,
Expand All @@ -44,6 +46,7 @@
MixedSingleTaskGPSurrogate,
MLPEnsemble,
SaasSingleTaskGPSurrogate,
EntingSurrogate,
XGBoostSurrogate,
LinearSurrogate,
TanimotoGPSurrogate,
Expand Down
27 changes: 27 additions & 0 deletions bofire/data_models/surrogates/enting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import Literal

from pydantic import Field
from typing_extensions import Annotated

from bofire.data_models.surrogates.surrogate import Surrogate
from bofire.data_models.surrogates.trainable import TrainableSurrogate


class EntingSurrogate(Surrogate, TrainableSurrogate):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you want to do it like this, I would call it LGBMEntingSurrogate, and if you have xgb support at some point you could introduce an XGBEntingSurrogate. This has the advantage that the names of hyperparameters do not have to be the same but different, or if one hyperparam is only available in one model you only provide it in this model.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The other option would be to have a LGBMSurrogate which just includes the mean model and implement LGBMEntingSurrogate as inherited class from LGBMSurrogate and add the uncertainty parameters there. But I know that in the current Entmoot model it is setup a bit different, so you would have in this case two different _fit methods. The first just uses LGBM and the second one ENTMOOT.

Thinking this further one could even build it up by having an LGBMSurrogate and XGBSurrogate class which are ENTMOOT agnostic and all care for the mean model and an EntingUncertaintySurrogate which adds the uncertainty part to the model. Then one could build up the LGBMEntingSurrogate by inheritance from LGBMSurrogate and EntingUncertaintySurrogate. This would be the super object oriented solution, but as these kind of structures are currently not supported by ENTMOOT, I think its is ok to just go with the flat LGBMEntingSurrogate.

What do you think?

type: Literal["EntingSurrogate"] = "EntingSurrogate"
train_lib: Literal["lgbm"] = "lgbm"
# mean model parameters
objective: str = "regression"
metric: str = "rmse"
boosting: str = "gbdt"
num_boost_round: Annotated[int, Field(ge=1)] = 100
max_depth: Annotated[int, Field(ge=1)] = 3
min_data_in_leaf: Annotated[int, Field(ge=1)] = 1
min_data_per_group: Annotated[int, Field(ge=1)] = 1

# uncertainty model parameters
beta: Annotated[float, Field(gt=0)] = 1.96
acq_sense: Literal["exploration", "penalty"] = "exploration"
dist_trafo: Literal["normal", "standard"] = "normal"
dist_metric: Literal["euclidean_squared", "l1", "l2"] = "euclidean_squared"
cat_metric: Literal["overlap", "of", "goodall4"] = "overlap"
1 change: 1 addition & 0 deletions bofire/surrogates/api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from bofire.surrogates.botorch_surrogates import BotorchSurrogates
from bofire.surrogates.empirical import EmpiricalSurrogate
from bofire.surrogates.enting import EntingSurrogate
from bofire.surrogates.mapper import map
from bofire.surrogates.mixed_single_task_gp import MixedSingleTaskGPSurrogate
from bofire.surrogates.mlp import MLPEnsemble
Expand Down
84 changes: 84 additions & 0 deletions bofire/surrogates/enting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import warnings
from typing import Tuple

import numpy as np
import pandas as pd

try:
from entmoot.models.enting import Enting # type: ignore
from entmoot.problem_config import ProblemConfig
except ImportError:
warnings.warn("entmoot not installed, BoFire's `EntingSurrogate` cannot be used.")

import uuid

from bofire.data_models.surrogates.api import EntingSurrogate as DataModel
from bofire.surrogates.surrogate import Surrogate
from bofire.surrogates.trainable import TrainableSurrogate


class EntingSurrogate(TrainableSurrogate, Surrogate):
def __init__(self, data_model: DataModel, **kwargs) -> None:
self.train_lib = data_model.train_lib

self.objective = data_model.objective
self.metric = data_model.metric
self.boosting = data_model.boosting
self.num_boost_round = data_model.num_boost_round
self.max_depth = data_model.max_depth
self.min_data_in_leaf = data_model.min_data_in_leaf
self.min_data_per_group = data_model.min_data_per_group

self.beta = data_model.beta
self.acq_sense = data_model.acq_sense
self.dist_trafo = data_model.dist_trafo
self.dist_metric = data_model.dist_metric
self.cat_metric = data_model.cat_metric

self.tmpfile_name = f"enting_{uuid.uuid4().hex}.json"
super().__init__(data_model=data_model, **kwargs)

def _get_params_dict(self):
return {
"tree_train_params": {
"train_lib": self.train_lib,
"train_params": {
"objective": self.objective,
"metric": self.metric,
"boosting": self.boosting,
"num_boost_round": self.num_boost_round,
"max_depth": self.max_depth,
"min_data_in_leaf": self.min_data_in_leaf,
"min_data_per_group": self.min_data_per_group,
},
"unc_params": {
"beta": self.beta,
"acq_sense": self.acq_sense,
"dist_trafo": self.dist_trafo,
"dist_metric": self.dist_metric,
"cat_metric": self.cat_metric,
},
}
}

def _fit(self, X: pd.DataFrame, Y: pd.DataFrame, **kwargs):
transformed_X = self.inputs.transform(X, self.input_preprocessing_specs)
self._get_params_dict()
self.model = Enting()
self.model.fit(X=transformed_X.values, y=Y.values)

def _predict(self, transformed_X: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]:
preds = self.model.predict(transformed_X.to_numpy())
# pred has shape [([mu1], std1), ([mu2], std2), ... ]
m, v = zip(*preds)
mean = np.array(m)
std = np.sqrt(np.array(v)).reshape(-1, 1)
# std is given combined - copy for each objective
std = np.tile(std, mean.shape[1])
return mean, std

def loads(self, data: str):
pass

def _dumps(self) -> str:
pass
69 changes: 69 additions & 0 deletions tests/bofire/surrogates/test_enting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import importlib

import pytest

import bofire.surrogates.api as surrogates
from bofire.benchmarks.single import Himmelblau
from bofire.data_models.domain.api import Inputs, Outputs
from bofire.data_models.enum import CategoricalEncodingEnum
from bofire.data_models.features.api import (
CategoricalInput,
ContinuousInput,
ContinuousOutput,
)
from bofire.data_models.surrogates.api import EntingSurrogate

ENTMOOT_AVAILABLE = importlib.util.find_spec("entmoot") is not None


@pytest.mark.skipif(not ENTMOOT_AVAILABLE, reason="requires entmoot")
def test_EntingSurrogate():
benchmark = Himmelblau()
samples = benchmark.domain.inputs.sample(10)
experiments = benchmark.f(samples, return_complete=True)
#
data_model = EntingSurrogate(
inputs=benchmark.domain.inputs, outputs=benchmark.domain.outputs, n_estimators=2
)
surrogate = surrogates.map(data_model)
assert isinstance(surrogate, surrogates.EntingSurrogate)
assert surrogate.input_preprocessing_specs == {}
assert surrogate.is_fitted is False
# fit it
surrogate.fit(experiments=experiments)
assert surrogate.is_fitted is True
# predict it
surrogate.predict(experiments)
# # dump it
# dump = surrogate.dumps()
# # load it
# surrogate2 = surrogates.map(data_model)
# surrogate2.loads(dump)
# preds2 = surrogate2.predict(experiments)
# assert_frame_equal(preds, preds2)
# assert_frame_equal(preds, preds2)


def test_EntingSurrogate_categorical():
inputs = Inputs(
features=[
ContinuousInput(
key=f"x_{i+1}",
bounds=(-4, 4),
)
for i in range(2)
]
+ [CategoricalInput(key="x_cat", categories=["mama", "papa"])]
)
outputs = Outputs(features=[ContinuousOutput(key="y")])
experiments = inputs.sample(n=10)
experiments.eval("y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=True)
experiments.loc[experiments.x_cat == "mama", "y"] *= 5.0
experiments.loc[experiments.x_cat == "papa", "y"] /= 2.0
experiments["valid_y"] = 1
data_model = EntingSurrogate(inputs=inputs, outputs=outputs, n_estimators=2)
assert data_model.input_preprocessing_specs == {
"x_cat": CategoricalEncodingEnum.ONE_HOT
}
surrogate = surrogates.map(data_model)
surrogate.fit(experiments)