automl · daphne12345 · Jul 15, 2025 · Jul 15, 2025 · Jul 15, 2025 · Jul 15, 2025
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -28,7 +28,7 @@ on:
 env:
   package-name: smac
   test-dir: tests
-  extra-requires: "[gpytorch,dev]"
+  extra-requires: "[gpytorch,dev,tabpfn]"
 
   # Arguments used for pytest
   pytest-args: >-

diff --git a/examples/4_advanced_optimizer/5_tabPFN_surrogate_model.py b/examples/4_advanced_optimizer/5_tabPFN_surrogate_model.py
@@ -0,0 +1,103 @@
+"""Support Vector Machine with Cross-Validation
+# Flags: doc-Runnable
+
+An example of optimizing a simple support vector machine on the IRIS dataset. We use the
+hyperparameter optimization facade, which uses a random forest as its surrogate model. It is able to
+scale to higher evaluation budgets and a higher number of dimensions. Also, you can use mixed data
+types as well as conditional hyperparameters.
+"""
+
+import numpy as np
+from ConfigSpace import Categorical, Configuration, ConfigurationSpace, Float, Integer
+from ConfigSpace.conditions import InCondition
+from sklearn import datasets, svm
+from sklearn.model_selection import cross_val_score
+from smac.model.tabPFNv2 import TabPFNModel
+
+from smac import HyperparameterOptimizationFacade, Scenario
+from smac.acquisition.function.tabpfn_acq_fun import RiemannExpectedImprovement
+
+__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
+__license__ = "3-clause BSD"
+
+
+# We load the iris-dataset (a widely used benchmark)
+iris = datasets.load_iris()
+
+
+class SVM:
+    @property
+    def configspace(self) -> ConfigurationSpace:
+        # Build Configuration Space which defines all parameters and their ranges
+        cs = ConfigurationSpace(seed=0)
+
+        # First we create our hyperparameters
+        kernel = Categorical("kernel", ["linear", "poly", "rbf", "sigmoid"], default="poly")
+        C = Float("C", (0.001, 1000.0), default=1.0, log=True)
+        shrinking = Categorical("shrinking", [True, False], default=True)
+        degree = Integer("degree", (1, 5), default=3)
+        coef = Float("coef0", (0.0, 10.0), default=0.0)
+        gamma = Categorical("gamma", ["auto", "value"], default="auto")
+        gamma_value = Float("gamma_value", (0.0001, 8.0), default=1.0, log=True)
+
+        # Then we create dependencies
+        use_degree = InCondition(child=degree, parent=kernel, values=["poly"])
+        use_coef = InCondition(child=coef, parent=kernel, values=["poly", "sigmoid"])
+        use_gamma = InCondition(child=gamma, parent=kernel, values=["rbf", "poly", "sigmoid"])
+        use_gamma_value = InCondition(child=gamma_value, parent=gamma, values=["value"])
+
+        # Add hyperparameters and conditions to our configspace
+        cs.add([kernel, C, shrinking, degree, coef, gamma, gamma_value])
+        cs.add([use_degree, use_coef, use_gamma, use_gamma_value])
+
+        return cs
+
+    def train(self, config: Configuration, seed: int = 0) -> float:
+        """Creates a SVM based on a configuration and evaluates it on the
+        iris-dataset using cross-validation."""
+        config_dict = dict(config)
+        if "gamma" in config:
+            config_dict["gamma"] = config_dict["gamma_value"] if config_dict["gamma"] == "value" else "auto"
+            config_dict.pop("gamma_value", None)
+
+        classifier = svm.SVC(**config_dict, random_state=seed)
+        scores = cross_val_score(classifier, iris.data, iris.target, cv=5)
+        cost = 1 - np.mean(scores)
+
+        return cost
+
+
+if __name__ == "__main__":
+    classifier = SVM()
+
+    # Next, we create an object, holding general information about the run
+    scenario = Scenario(
+        classifier.configspace,
+        n_trials=50,  # We want to run max 50 trials (combination of config and seed)
+    )
+
+    # We want to run the facade's default initial design, but we want to change the number
+    # of initial configs to 5.
+    initial_design = HyperparameterOptimizationFacade.get_initial_design(scenario, n_configs=5)
+
+    acq_fun = RiemannExpectedImprovement()  # we will set the runhistory later
+
+    # Now we use SMAC to find the best hyperparameters
+    smac = HyperparameterOptimizationFacade(
+        scenario,
+        classifier.train,
+        initial_design=initial_design,
+        overwrite=True,  # If the run exists, we overwrite it; alternatively, we can continue from last state
+        model=TabPFNModel(configspace=scenario.configspace, seed=scenario.seed), # use TabPFN as surrogate model
+        acquisition_function=acq_fun,  # use TabPFN-based EI as acquisition function
+    )
+
+    incumbent = smac.optimize()
+
+    # Get cost of default configuration
+    default_cost = smac.validate(classifier.configspace.get_default_configuration())
+    print(f"Default cost: {default_cost}")
+
+    # Let's calculate the cost of the incumbent
+    incumbent_cost = smac.validate(incumbent)
+    print(f"Incumbent cost: {incumbent_cost}")
diff --git a/setup.py b/setup.py
@@ -54,8 +54,9 @@ def read_file(filepath: str) -> str:
         "black",                # This allows mkdocstrings to format signatures in the docs
         "pytest",
         "pytest-coverage",
-        "pytest-cases",
+        "pytest-cases"
     ],
+    "tabpfn":["tabpfn"]
 }
 
 setuptools.setup(
@@ -85,7 +86,7 @@ def read_file(filepath: str) -> str:
         "dask_jobqueue>=0.8.2",
         "emcee>=3.0.0",
         "regex",
-        "pyyaml",
+        "pyyaml"
     ],
     extras_require=extras_require,
     test_suite="pytest",

diff --git a/smac/acquisition/function/tabpfn_acq_fun.py b/smac/acquisition/function/tabpfn_acq_fun.py
@@ -0,0 +1,48 @@
+from __future__ import annotations
+
+from typing import Any, cast
+
+import numpy as np
+import torch
+
+from smac.acquisition.function import AbstractAcquisitionFunction
+from smac.model.tabPFNv2 import TabPFNModel  # <--- Adjust path as necessary
+
+
+class RiemannExpectedImprovement(AbstractAcquisitionFunction):
+    """Expected Improvement computed from a discrete (Riemann) predictive distribution.
+
+    This version is designed for TabPFN/PFNs4BO models that output discrete logits
+    rather than Gaussian mean/variance pairs.
+    """
+
+    @property
+    def name(self) -> str:  # noqa: D102
+        return "RiemannExpectedImprovement"
+
+    def _update(self, **kwargs: Any) -> None:
+        """Called after the model is fitted. Updates current best (f_best)."""
+        if self.model is None:
+            raise ValueError("No model attached to acquisition function.")
+        assert "eta" in kwargs
+        self._eta = kwargs["eta"]
+
+    def _compute(self, X: np.ndarray) -> np.ndarray:
+        """Compute Riemann-based EI for given X."""
+        if self.model is None:
+            raise ValueError("Model not set for acquisition function.")
+
+        model = cast(TabPFNModel, self.model)
+
+        # Impute, transform, scale
+        X_imputed = model._x_imputer.transform(X)
+        X_transformed = model._x_pt.transform(X_imputed)
+        X_scaled = model._x_scaler.transform(X_transformed)
+
+        assert model._tabpfn is not None
+        with torch.no_grad():
+            pred = model._tabpfn.predict(X_scaled, output_type="full")
+
+        # change sign because TabPFN maximizes by default
+        ei = pred["criterion"].ei(pred["logits"], (-1) * self._eta)
+        return ei.cpu().numpy().reshape(-1, 1)
diff --git a/smac/model/__init__.py b/smac/model/__init__.py
@@ -2,8 +2,11 @@
 from smac.model.multi_objective_model import MultiObjectiveModel
 from smac.model.random_model import RandomModel
 
-__all__ = [
-    "AbstractModel",
-    "MultiObjectiveModel",
-    "RandomModel",
-]
+__all__ = ["AbstractModel", "MultiObjectiveModel", "RandomModel"]
+
+try:
+    from smac.model.tabPFNv2 import TabPFNModel
+
+    __all__ = ["AbstractModel", "MultiObjectiveModel", "RandomModel", "TabPFNModel"]
+except ImportError:
+    pass
diff --git a/smac/model/tabPFNv2.py b/smac/model/tabPFNv2.py
@@ -0,0 +1,152 @@
+from __future__ import annotations
+
+from typing import Any
+
+import numpy as np
+import torch
+from ConfigSpace import ConfigurationSpace
+from ConfigSpace.hyperparameters import CategoricalHyperparameter
+from sklearn.impute import SimpleImputer
+from sklearn.preprocessing import PowerTransformer, StandardScaler
+from tabpfn import TabPFNRegressor
+
+from smac.model.abstract_model import AbstractModel
+from smac.utils.logging import get_logger
+
+__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
+__license__ = "3-clause BSD"
+
+logger = get_logger(__name__)
+
+
+class TabPFNModel(AbstractModel):
+    """TabPFNModel, for more details check: https://github.com/PriorLabs/TabPFN.
+
+    Parameters
+    ----------
+    instance_features : dict[str, list[int | float]] | None, defaults to None
+        Features (list of int or floats) of the instances (str). The features are incorporated into the X data,
+        on which the model is trained on.
+    pca_components : float, defaults to 7
+        Number of components to keep when using PCA to reduce dimensionality of instance features.
+    seed : int
+    n_estimators : int, defaults to 8
+        The number of estimators in the TabPFN ensemble.
+    softmax_temperature : float, defaults to 0.9
+        The temperature for the softmax function.
+    """
+
+    def __init__(
+        self,
+        configspace: ConfigurationSpace,
+        instance_features: dict[str, list[int | float]] | None = None,
+        pca_components: int | None = 7,
+        seed: int = 0,
+        n_estimators: int = 8,
+        softmax_temperature: float = 0.9,
+    ) -> None:
+        super().__init__(
+            configspace=configspace,
+            instance_features=instance_features,
+            pca_components=pca_components,
+            seed=seed,
+        )
+
+        self._tabpfn = None
+        self.n_estimators = n_estimators
+        self.categorical_features_indices = [
+            i for i, hp in enumerate(list(configspace.values())) if isinstance(hp, CategoricalHyperparameter)
+        ]
+        self.softmax_temperature = softmax_temperature
+        self.random_state = seed
+
+        self._x_imputer = SimpleImputer(strategy="mean")
+        self._x_pt = PowerTransformer(method="yeo-johnson", standardize=False)
+        self._x_scaler = StandardScaler()
+
+        self._y_pt = PowerTransformer(method="yeo-johnson", standardize=False)
+        self._y_scaler = StandardScaler()
+
+    @property
+    def meta(self) -> dict[str, Any]:
+        """Returns the metadata of the model.
+
+        Returns
+        -------
+            dict[str, Any]: meta data
+        """
+        meta = super().meta
+        meta.update(
+            {
+                "pca_components": self._pca_components,
+            }
+        )
+        return meta
+
+    def _train(self, X: np.ndarray, y: np.ndarray) -> TabPFNModel:
+        self._tabpfn = self._get_tabpfn()
+        if self._tabpfn is None:
+            raise AssertionError("TabPFNRegressor is not initialized properly!")
+
+        # Impute, transform, scale
+        X_imputed = self._x_imputer.fit_transform(X)
+        X_transformed = self._x_pt.fit_transform(X_imputed)
+        X_scaled = self._x_scaler.fit_transform(X_transformed)
+
+        y = y.flatten()
+        y_transformed = self._y_pt.fit_transform(y.reshape(-1, 1))
+        y_scaled = self._y_scaler.fit_transform(y_transformed)
+        y_scaled = y_scaled.flatten()
+
+        self._tabpfn.fit(X_scaled, y_scaled)
+        self._is_trained = True
+        return self
+
+    def _predict(
+        self,
+        X: np.ndarray,
+        covariance_type: str | None = "diagonal",
+    ) -> tuple[np.ndarray, np.ndarray | None]:
+        if len(X.shape) != 2:
+            raise ValueError("Expected 2d array, got %dd array!" % len(X.shape))
+
+        if X.shape[1] != len(self._types):
+            raise ValueError("Rows in X should have %d entries but have %d!" % (len(self._types), X.shape[1]))
+
+        if covariance_type != "diagonal":
+            raise ValueError("`covariance_type` can only take `diagonal` for this model.")
+
+        assert self._tabpfn is not None
+
+        # Impute, transform, scale
+        X_imputed = self._x_imputer.transform(X)
+        X_transformed = self._x_pt.transform(X_imputed)
+        X_scaled = self._x_scaler.transform(X_transformed)
+
+        with torch.no_grad():
+            out_dict = self._tabpfn.predict(X_scaled, output_type="full")
+
+        # Variance estimation is difficult with TabPFN, it can have very large variances
+        var = out_dict["criterion"].variance(out_dict["logits"]).cpu().detach().numpy()
+        var = var.flatten()
+        var = np.maximum(var, 1e-6)
+
+        y_pred = self._y_scaler.inverse_transform(out_dict["mean"].reshape(-1, 1))
+        y_pred = self._y_pt.inverse_transform(y_pred)
+
+        return y_pred.flatten(), var
+
+    def _get_tabpfn(self) -> TabPFNRegressor:
+        """Return a TabPFNRegressor instance with the specified parameters.
+        The fit_mode is set to 'low_memory' because the model is often retrained.
+
+        Returns
+        -------
+            TabPFNRegressor: TabPFNRegressor.
+        """
+        return TabPFNRegressor(
+            n_estimators=self.n_estimators,
+            categorical_features_indices=self.categorical_features_indices,
+            softmax_temperature=self.softmax_temperature,
+            fit_mode="low_memory",
+        )