diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index c94e246bb..6d2b980a3 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -28,7 +28,7 @@ on: env: package-name: smac test-dir: tests - extra-requires: "[gpytorch,dev]" + extra-requires: "[gpytorch,dev,tabpfn]" # Arguments used for pytest pytest-args: >- diff --git a/examples/4_advanced_optimizer/5_tabPFN_surrogate_model.py b/examples/4_advanced_optimizer/5_tabPFN_surrogate_model.py new file mode 100644 index 000000000..a5bec161d --- /dev/null +++ b/examples/4_advanced_optimizer/5_tabPFN_surrogate_model.py @@ -0,0 +1,103 @@ +"""Support Vector Machine with Cross-Validation +# Flags: doc-Runnable + +An example of optimizing a simple support vector machine on the IRIS dataset. We use the +hyperparameter optimization facade, which uses a random forest as its surrogate model. It is able to +scale to higher evaluation budgets and a higher number of dimensions. Also, you can use mixed data +types as well as conditional hyperparameters. +""" + +import numpy as np +from ConfigSpace import Categorical, Configuration, ConfigurationSpace, Float, Integer +from ConfigSpace.conditions import InCondition +from sklearn import datasets, svm +from sklearn.model_selection import cross_val_score +from smac.model.tabPFNv2 import TabPFNModel + +from smac import HyperparameterOptimizationFacade, Scenario +from smac.acquisition.function.tabpfn_acq_fun import RiemannExpectedImprovement + +__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI" +__license__ = "3-clause BSD" + + +# We load the iris-dataset (a widely used benchmark) +iris = datasets.load_iris() + + +class SVM: + @property + def configspace(self) -> ConfigurationSpace: + # Build Configuration Space which defines all parameters and their ranges + cs = ConfigurationSpace(seed=0) + + # First we create our hyperparameters + kernel = Categorical("kernel", ["linear", "poly", "rbf", "sigmoid"], default="poly") + C = Float("C", (0.001, 1000.0), default=1.0, log=True) + shrinking = Categorical("shrinking", [True, False], default=True) + degree = Integer("degree", (1, 5), default=3) + coef = Float("coef0", (0.0, 10.0), default=0.0) + gamma = Categorical("gamma", ["auto", "value"], default="auto") + gamma_value = Float("gamma_value", (0.0001, 8.0), default=1.0, log=True) + + # Then we create dependencies + use_degree = InCondition(child=degree, parent=kernel, values=["poly"]) + use_coef = InCondition(child=coef, parent=kernel, values=["poly", "sigmoid"]) + use_gamma = InCondition(child=gamma, parent=kernel, values=["rbf", "poly", "sigmoid"]) + use_gamma_value = InCondition(child=gamma_value, parent=gamma, values=["value"]) + + # Add hyperparameters and conditions to our configspace + cs.add([kernel, C, shrinking, degree, coef, gamma, gamma_value]) + cs.add([use_degree, use_coef, use_gamma, use_gamma_value]) + + return cs + + def train(self, config: Configuration, seed: int = 0) -> float: + """Creates a SVM based on a configuration and evaluates it on the + iris-dataset using cross-validation.""" + config_dict = dict(config) + if "gamma" in config: + config_dict["gamma"] = config_dict["gamma_value"] if config_dict["gamma"] == "value" else "auto" + config_dict.pop("gamma_value", None) + + classifier = svm.SVC(**config_dict, random_state=seed) + scores = cross_val_score(classifier, iris.data, iris.target, cv=5) + cost = 1 - np.mean(scores) + + return cost + + +if __name__ == "__main__": + classifier = SVM() + + # Next, we create an object, holding general information about the run + scenario = Scenario( + classifier.configspace, + n_trials=50, # We want to run max 50 trials (combination of config and seed) + ) + + # We want to run the facade's default initial design, but we want to change the number + # of initial configs to 5. + initial_design = HyperparameterOptimizationFacade.get_initial_design(scenario, n_configs=5) + + acq_fun = RiemannExpectedImprovement() # we will set the runhistory later + + # Now we use SMAC to find the best hyperparameters + smac = HyperparameterOptimizationFacade( + scenario, + classifier.train, + initial_design=initial_design, + overwrite=True, # If the run exists, we overwrite it; alternatively, we can continue from last state + model=TabPFNModel(configspace=scenario.configspace, seed=scenario.seed), # use TabPFN as surrogate model + acquisition_function=acq_fun, # use TabPFN-based EI as acquisition function + ) + + incumbent = smac.optimize() + + # Get cost of default configuration + default_cost = smac.validate(classifier.configspace.get_default_configuration()) + print(f"Default cost: {default_cost}") + + # Let's calculate the cost of the incumbent + incumbent_cost = smac.validate(incumbent) + print(f"Incumbent cost: {incumbent_cost}") diff --git a/setup.py b/setup.py index 6ac234efe..092a7b495 100644 --- a/setup.py +++ b/setup.py @@ -54,8 +54,9 @@ def read_file(filepath: str) -> str: "black", # This allows mkdocstrings to format signatures in the docs "pytest", "pytest-coverage", - "pytest-cases", + "pytest-cases" ], + "tabpfn":["tabpfn"] } setuptools.setup( @@ -85,7 +86,7 @@ def read_file(filepath: str) -> str: "dask_jobqueue>=0.8.2", "emcee>=3.0.0", "regex", - "pyyaml", + "pyyaml" ], extras_require=extras_require, test_suite="pytest", diff --git a/smac/acquisition/function/tabpfn_acq_fun.py b/smac/acquisition/function/tabpfn_acq_fun.py new file mode 100644 index 000000000..7de87f720 --- /dev/null +++ b/smac/acquisition/function/tabpfn_acq_fun.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from typing import Any, cast + +import numpy as np +import torch + +from smac.acquisition.function import AbstractAcquisitionFunction +from smac.model.tabPFNv2 import TabPFNModel # <--- Adjust path as necessary + + +class RiemannExpectedImprovement(AbstractAcquisitionFunction): + """Expected Improvement computed from a discrete (Riemann) predictive distribution. + + This version is designed for TabPFN/PFNs4BO models that output discrete logits + rather than Gaussian mean/variance pairs. + """ + + @property + def name(self) -> str: # noqa: D102 + return "RiemannExpectedImprovement" + + def _update(self, **kwargs: Any) -> None: + """Called after the model is fitted. Updates current best (f_best).""" + if self.model is None: + raise ValueError("No model attached to acquisition function.") + assert "eta" in kwargs + self._eta = kwargs["eta"] + + def _compute(self, X: np.ndarray) -> np.ndarray: + """Compute Riemann-based EI for given X.""" + if self.model is None: + raise ValueError("Model not set for acquisition function.") + + model = cast(TabPFNModel, self.model) + + # Impute, transform, scale + X_imputed = model._x_imputer.transform(X) + X_transformed = model._x_pt.transform(X_imputed) + X_scaled = model._x_scaler.transform(X_transformed) + + assert model._tabpfn is not None + with torch.no_grad(): + pred = model._tabpfn.predict(X_scaled, output_type="full") + + # change sign because TabPFN maximizes by default + ei = pred["criterion"].ei(pred["logits"], (-1) * self._eta) + return ei.cpu().numpy().reshape(-1, 1) diff --git a/smac/model/__init__.py b/smac/model/__init__.py index fd3396791..3129d36c1 100644 --- a/smac/model/__init__.py +++ b/smac/model/__init__.py @@ -2,8 +2,11 @@ from smac.model.multi_objective_model import MultiObjectiveModel from smac.model.random_model import RandomModel -__all__ = [ - "AbstractModel", - "MultiObjectiveModel", - "RandomModel", -] +__all__ = ["AbstractModel", "MultiObjectiveModel", "RandomModel"] + +try: + from smac.model.tabPFNv2 import TabPFNModel + + __all__ = ["AbstractModel", "MultiObjectiveModel", "RandomModel", "TabPFNModel"] +except ImportError: + pass diff --git a/smac/model/tabPFNv2.py b/smac/model/tabPFNv2.py new file mode 100644 index 000000000..583f2e14d --- /dev/null +++ b/smac/model/tabPFNv2.py @@ -0,0 +1,152 @@ +from __future__ import annotations + +from typing import Any + +import numpy as np +import torch +from ConfigSpace import ConfigurationSpace +from ConfigSpace.hyperparameters import CategoricalHyperparameter +from sklearn.impute import SimpleImputer +from sklearn.preprocessing import PowerTransformer, StandardScaler +from tabpfn import TabPFNRegressor + +from smac.model.abstract_model import AbstractModel +from smac.utils.logging import get_logger + +__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI" +__license__ = "3-clause BSD" + +logger = get_logger(__name__) + + +class TabPFNModel(AbstractModel): + """TabPFNModel, for more details check: https://github.com/PriorLabs/TabPFN. + + Parameters + ---------- + instance_features : dict[str, list[int | float]] | None, defaults to None + Features (list of int or floats) of the instances (str). The features are incorporated into the X data, + on which the model is trained on. + pca_components : float, defaults to 7 + Number of components to keep when using PCA to reduce dimensionality of instance features. + seed : int + n_estimators : int, defaults to 8 + The number of estimators in the TabPFN ensemble. + softmax_temperature : float, defaults to 0.9 + The temperature for the softmax function. + """ + + def __init__( + self, + configspace: ConfigurationSpace, + instance_features: dict[str, list[int | float]] | None = None, + pca_components: int | None = 7, + seed: int = 0, + n_estimators: int = 8, + softmax_temperature: float = 0.9, + ) -> None: + super().__init__( + configspace=configspace, + instance_features=instance_features, + pca_components=pca_components, + seed=seed, + ) + + self._tabpfn = None + self.n_estimators = n_estimators + self.categorical_features_indices = [ + i for i, hp in enumerate(list(configspace.values())) if isinstance(hp, CategoricalHyperparameter) + ] + self.softmax_temperature = softmax_temperature + self.random_state = seed + + self._x_imputer = SimpleImputer(strategy="mean") + self._x_pt = PowerTransformer(method="yeo-johnson", standardize=False) + self._x_scaler = StandardScaler() + + self._y_pt = PowerTransformer(method="yeo-johnson", standardize=False) + self._y_scaler = StandardScaler() + + @property + def meta(self) -> dict[str, Any]: + """Returns the metadata of the model. + + Returns + ------- + dict[str, Any]: meta data + """ + meta = super().meta + meta.update( + { + "pca_components": self._pca_components, + } + ) + return meta + + def _train(self, X: np.ndarray, y: np.ndarray) -> TabPFNModel: + self._tabpfn = self._get_tabpfn() + if self._tabpfn is None: + raise AssertionError("TabPFNRegressor is not initialized properly!") + + # Impute, transform, scale + X_imputed = self._x_imputer.fit_transform(X) + X_transformed = self._x_pt.fit_transform(X_imputed) + X_scaled = self._x_scaler.fit_transform(X_transformed) + + y = y.flatten() + y_transformed = self._y_pt.fit_transform(y.reshape(-1, 1)) + y_scaled = self._y_scaler.fit_transform(y_transformed) + y_scaled = y_scaled.flatten() + + self._tabpfn.fit(X_scaled, y_scaled) + self._is_trained = True + return self + + def _predict( + self, + X: np.ndarray, + covariance_type: str | None = "diagonal", + ) -> tuple[np.ndarray, np.ndarray | None]: + if len(X.shape) != 2: + raise ValueError("Expected 2d array, got %dd array!" % len(X.shape)) + + if X.shape[1] != len(self._types): + raise ValueError("Rows in X should have %d entries but have %d!" % (len(self._types), X.shape[1])) + + if covariance_type != "diagonal": + raise ValueError("`covariance_type` can only take `diagonal` for this model.") + + assert self._tabpfn is not None + + # Impute, transform, scale + X_imputed = self._x_imputer.transform(X) + X_transformed = self._x_pt.transform(X_imputed) + X_scaled = self._x_scaler.transform(X_transformed) + + with torch.no_grad(): + out_dict = self._tabpfn.predict(X_scaled, output_type="full") + + # Variance estimation is difficult with TabPFN, it can have very large variances + var = out_dict["criterion"].variance(out_dict["logits"]).cpu().detach().numpy() + var = var.flatten() + var = np.maximum(var, 1e-6) + + y_pred = self._y_scaler.inverse_transform(out_dict["mean"].reshape(-1, 1)) + y_pred = self._y_pt.inverse_transform(y_pred) + + return y_pred.flatten(), var + + def _get_tabpfn(self) -> TabPFNRegressor: + """Return a TabPFNRegressor instance with the specified parameters. + The fit_mode is set to 'low_memory' because the model is often retrained. + + Returns + ------- + TabPFNRegressor: TabPFNRegressor. + """ + return TabPFNRegressor( + n_estimators=self.n_estimators, + categorical_features_indices=self.categorical_features_indices, + softmax_temperature=self.softmax_temperature, + fit_mode="low_memory", + ) diff --git a/tests/test_model/test_tabpfn.py b/tests/test_model/test_tabpfn.py new file mode 100644 index 000000000..01fd2d68d --- /dev/null +++ b/tests/test_model/test_tabpfn.py @@ -0,0 +1,155 @@ +import numpy as np +import sys +import pytest + +from ConfigSpace import ( + CategoricalHyperparameter, + ConfigurationSpace, + OrdinalHyperparameter, + UniformFloatHyperparameter, + UniformIntegerHyperparameter, +) + +try: + from smac.model.tabPFNv2 import TabPFNModel +except ImportError: + pass + +pytestmark = pytest.mark.skipif( + sys.version_info < (3, 9), + reason="tabpfn requires Python >=3.9" +) + +__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI" +__license__ = "3-clause BSD" + + +def _get_cs(n_dimensions): + configspace = ConfigurationSpace(seed=0) + for i in range(n_dimensions): + configspace.add(UniformFloatHyperparameter("x%d" % i, 0, 1)) + + return configspace + + +def test_predict_wrong_X_dimensions(): + rs = np.random.RandomState(1) + + model = TabPFNModel( + configspace=_get_cs(10), + ) + X = rs.rand(10) + with pytest.raises(ValueError, match="Expected 2d array.*"): + model.predict(X) + + X = rs.rand(10, 10, 10) + with pytest.raises(ValueError, match="Expected 2d array.*"): + model.predict(X) + + X = rs.rand(10, 5) + + with pytest.raises(ValueError, match="Feature mismatch: .*"): + model.predict(X) + + +def test_predict(): + rs = np.random.RandomState(1) + X = rs.rand(20, 10) + Y = rs.rand(10, 1) + model = TabPFNModel(configspace=_get_cs(10)) + model.train(X[:10], Y[:10]) + m_hat, v_hat = model.predict(X[10:]) + assert m_hat.shape == (10, 1) + assert v_hat.shape == (10, 1) + + +def test_train_with_pca(): + rs = np.random.RandomState(1) + X = rs.rand(20, 20) + Y = rs.rand(20, 1) + + F = {} + for i in range(10): + F[f"instance-{i}"] = list(rs.rand(10)) + + model = TabPFNModel( + configspace=_get_cs(10), + pca_components=2, + instance_features=F, + ) + model.train(X, Y) + + assert model._n_features == 10 + assert model._n_hps == 10 + assert model._pca is not None + assert model._scaler is not None + + +def test_predict_with_actual_values(): + X = np.array( + [ + [0.0, 0.0, 0.0], + [0.0, 0.0, 1.0], + [0.0, 1.0, 0.0], + [0.0, 1.0, 1.0], + [1.0, 0.0, 0.0], + [1.0, 0.0, 1.0], + [1.0, 1.0, 0.0], + [1.0, 1.0, 1.0], + ], + dtype=np.float64, + ) + y = np.array([[0.1], [0.2], [9], [9.2], [100.0], [100.2], [109.0], [109.2]], dtype=np.float64) + model = TabPFNModel( + configspace=_get_cs(3), + instance_features=None, + seed=12345, + ) + model.train(np.vstack((X, X, X, X, X, X, X, X)), np.vstack((y, y, y, y, y, y, y, y))) + + y_hat, _ = model.predict(X) + for y_i, y_hat_i in zip(y.reshape((1, -1)).flatten(), y_hat.reshape((1, -1)).flatten()): + assert pytest.approx(y_i, rel=0.5) == y_hat_i + + +def test_with_ordinal(): + cs = ConfigurationSpace(seed=0) + cs.add(CategoricalHyperparameter("a", [0, 1], default_value=0)) + cs.add(OrdinalHyperparameter("b", [0, 1], default_value=1)) + cs.add(UniformFloatHyperparameter("c", lower=0.0, upper=1.0, default_value=1)) + cs.add(UniformIntegerHyperparameter("d", lower=0, upper=10, default_value=1)) + + F = {} + for i in range(1): + F[f"instance-{i}"] = [0, 0, 0] + + model = TabPFNModel( + configspace=cs, + instance_features=F, + pca_components=9, + ) + + X = np.array( + [ + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 1.0, 0.0, 9.0, 0.0, 0.0, 0.0], + [0.0, 1.0, 1.0, 4.0, 0.0, 0.0, 0.0], + ], + dtype=np.float64, + ) + y = np.array([0, 1, 2, 3], dtype=np.float64) + + X_train = np.vstack((X, X, X, X, X, X, X, X, X, X)) + y_train = np.vstack((y, y, y, y, y, y, y, y, y, y)) + + model.train(X_train, y_train.reshape((-1, 1))) + mean, _ = model.predict(X) + for idx, m in enumerate(mean): + assert pytest.approx(y[idx], abs=0.05) == m + +def test_predict_before_train_raises(): + model = TabPFNModel(configspace=_get_cs(3)) + X = np.random.rand(2, 3) + with pytest.raises(AssertionError): + model.predict(X) \ No newline at end of file