autogluon
diff --git a/‎tabarena/pyproject.toml‎
Lines changed: 2 additions & 2 deletions b/‎tabarena/pyproject.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎tabarena/tabarena/benchmark/models/ag/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎tabarena/tabarena/benchmark/models/ag/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎tabarena/tabarena/benchmark/models/ag/tabicl/tabicl_model.py‎
Lines changed: 127 additions & 40 deletions b/‎tabarena/tabarena/benchmark/models/ag/tabicl/tabicl_model.py‎
Lines changed: 127 additions & 40 deletions
diff --git a/‎tabarena/tabarena/benchmark/models/model_registry.py‎
Lines changed: 2 additions & 0 deletions b/‎tabarena/tabarena/benchmark/models/model_registry.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎tabarena/tabarena/models/tabicl/generate.py‎
Lines changed: 31 additions & 13 deletions b/‎tabarena/tabarena/models/tabicl/generate.py‎
Lines changed: 31 additions & 13 deletions
diff --git a/‎tabarena/tabarena/models/utils.py‎
Lines changed: 1 addition & 0 deletions b/‎tabarena/tabarena/models/utils.py‎
Lines changed: 1 addition & 0 deletions
@@ -41,7 +41,7 @@ dependencies = [
 tabpfn = [
   "tabpfn>=6.0.5",       # We used version 6.0.5
 ]
-tabicl = ["tabicl>=0.1.1"]
+tabicl = ["tabicl>=2.0.0"]
 ebm = ["interpret-core>=0.7.3"]
 search_spaces = ["configspace>=1.2,<2.0"]
 realmlp = ["pytabkit>=1.5.0,<2.0"]
@@ -57,7 +57,7 @@ tabprep = []
 # union of all above extras (mirrors your "benchmark" extra)
 benchmark = [
   "tabpfn>=6.0.5",
-  "tabicl>=0.1.1",
+  "tabicl>=2.0.0",
   "interpret-core>=0.7.3",
   "configspace>=1.2,<2.0",
   "pytabkit>=1.5.0,<2.0",
 
@@ -6,7 +6,7 @@
 from tabarena.benchmark.models.ag.realmlp.realmlp_model import RealMLPModel
 from tabarena.benchmark.models.ag.sap_rpt_oss.sap_rpt_oss_model import SAPRPTOSSModel
 from tabarena.benchmark.models.ag.tabdpt.tabdpt_model import TabDPTModel
-from tabarena.benchmark.models.ag.tabicl.tabicl_model import TabICLModel
+from tabarena.benchmark.models.ag.tabicl.tabicl_model import TabICLModel, TabICLv2Model
 from tabarena.benchmark.models.ag.tabm.tabm_model import TabMModel
 from tabarena.benchmark.models.ag.tabpfnv2_5.tabpfnv2_5_model import RealTabPFNv25Model
 from tabarena.benchmark.models.ag.xrfm.xrfm_model import XRFMModel
@@ -20,6 +20,7 @@
     "SAPRPTOSSModel",
     "TabDPTModel",
     "TabICLModel",
+    "TabICLv2Model",
     "TabMModel",
     "XRFMModel",
 ]
@@ -1,21 +1,21 @@
 from __future__ import annotations
 
 import logging
-
-import pandas as pd
+from typing import TYPE_CHECKING
 
 from autogluon.common.utils.pandas_utils import get_approximate_df_mem_usage
 from autogluon.common.utils.resource_utils import ResourceManager
 from autogluon.core.models import AbstractModel
 from autogluon.tabular import __version__
 
+if TYPE_CHECKING:
+    import pandas as pd
+
 logger = logging.getLogger(__name__)
 
 
-# TODO: Verify if crashes when weights are not yet downloaded and fit in parallel
-class TabICLModel(AbstractModel):
-    """
-    TabICL is a foundation model for tabular data using in-context learning
+class TabICLModelBase(AbstractModel):
+    """TabICL is a foundation model for tabular data using in-context learning
     that is scalable to larger datasets than TabPFNv2. It is pretrained purely on synthetic data.
     TabICL currently only supports classification tasks.
 
@@ -26,27 +26,57 @@ class TabICLModel(AbstractModel):
     Codebase: https://github.com/soda-inria/tabicl
     License: BSD-3-Clause
     """
-    ag_key = "TA-TABICL"
-    ag_name = "TA-TabICL"
+
+    ag_key = "NOTSET"
+    ag_name = "NOTSET"
     ag_priority = 65
+    seed_name = "random_state"
 
-    def get_model_cls(self):
-        from tabicl import TabICLClassifier
+    default_classification_model: str | None = None
+    default_regression_model: str | None = None
 
+    def get_model_cls(self):
         if self.problem_type in ["binary", "multiclass"]:
+            from tabicl import TabICLClassifier
+
             model_cls = TabICLClassifier
         else:
-            raise AssertionError(f"Unsupported problem_type: {self.problem_type}")
+            from tabicl import TabICLRegressor
+
+            model_cls = TabICLRegressor
         return model_cls
 
+    def get_checkpoint_version(self, hyperparameter: dict) -> str:
+        clf_checkpoint = self.default_classification_model
+        reg_checkpoint = self.default_regression_model
+
+        # Resolve HPO
+        if "checkpoint_version" in hyperparameter:
+            if isinstance(hyperparameter["checkpoint_version"], str):
+                clf_checkpoint = hyperparameter["checkpoint_version"]
+                reg_checkpoint = hyperparameter["checkpoint_version"]
+            elif isinstance(hyperparameter["checkpoint_version"], tuple):
+                clf_checkpoint = hyperparameter["checkpoint_version"][0]
+                reg_checkpoint = hyperparameter["checkpoint_version"][1]
+            else:
+                raise ValueError(
+                    "checkpoint_version hyperparameter must be either "
+                    "a string or a tuple of two strings (clf, reg)."
+                )
+
+        if self.problem_type in ["binary", "multiclass"]:
+            return clf_checkpoint
+
+        return reg_checkpoint
+
+    # TODO: is this still correct for TabICLv2?
     @staticmethod
     def _get_batch_size(n_cells: int):
         if n_cells <= 4_000_000:
             return 8
-        elif n_cells <= 6_000_000:
+        if n_cells <= 6_000_000:
             return 4
-        else:
-            return 2
+        return 2
 
     def _fit(
         self,
@@ -78,7 +108,11 @@ def _fit(
 
         model_cls = self.get_model_cls()
         hyp = self._get_model_params()
-        hyp["batch_size"] = hyp.get("batch_size", self._get_batch_size(X.shape[0] * X.shape[1]))
+        hyp["batch_size"] = hyp.get(
+            "batch_size", self._get_batch_size(X.shape[0] * X.shape[1])
+        )
+        hyp["checkpoint_version"] = self.get_checkpoint_version(hyperparameter=hyp)
+
         self.model = model_cls(
             **hyp,
             device=device,
@@ -90,77 +124,76 @@ def _fit(
             y=y,
         )
 
-    def _set_default_params(self):
-        default_params = {
-            "random_state": 42,
-        }
-        for param, val in default_params.items():
-            self._set_default_param_value(param, val)
-
-    @classmethod
-    def supported_problem_types(cls) -> list[str] | None:
-        return ["binary", "multiclass"]
-
     def _get_default_resources(self) -> tuple[int, int]:
         # Use only physical cores for better performance based on benchmarks
         num_cpus = ResourceManager.get_cpu_count(only_physical_cores=True)
 
         num_gpus = min(1, ResourceManager.get_gpu_count_torch(cuda_only=True))
         return num_cpus, num_gpus
 
-    def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str, int | float]:
+    def get_minimum_resources(
+        self, is_gpu_available: bool = False
+    ) -> dict[str, int | float]:
         return {
             "num_cpus": 1,
             "num_gpus": 1 if is_gpu_available else 0,
         }
 
     def _estimate_memory_usage(self, X: pd.DataFrame, **kwargs) -> int:
         hyperparameters = self._get_model_params()
-        return self.estimate_memory_usage_static(X=X, problem_type=self.problem_type, num_classes=self.num_classes, hyperparameters=hyperparameters, **kwargs)
+        return self.estimate_memory_usage_static(
+            X=X,
+            problem_type=self.problem_type,
+            num_classes=self.num_classes,
+            hyperparameters=hyperparameters,
+            **kwargs,
+        )
 
+    # TODO: move memory estimate to specific models below.
     @classmethod
     def _estimate_memory_usage_static(
         cls,
         *,
         X: pd.DataFrame,
-        hyperparameters: dict = None,
+        hyperparameters: dict | None = None,
         **kwargs,
     ) -> int:
-        """
-        Heuristic memory estimate that is very primitive.
+        """Heuristic memory estimate that is very primitive.
         Can be vastly improved.
         """
         if hyperparameters is None:
             hyperparameters = {}
 
-        dataset_size_mem_est = 3 * get_approximate_df_mem_usage(X).sum()  # roughly 3x DataFrame memory size
+        dataset_size_mem_est = (
+            3 * get_approximate_df_mem_usage(X).sum()
+        )  # roughly 3x DataFrame memory size
         baseline_overhead_mem_est = 1e9  # 1 GB generic overhead
 
         n_rows = X.shape[0]
         n_features = X.shape[1]
-        batch_size = hyperparameters.get("batch_size", cls._get_batch_size(X.shape[0] * X.shape[1]))
+        batch_size = hyperparameters.get(
+            "batch_size", cls._get_batch_size(X.shape[0] * X.shape[1])
+        )
         embedding_dim = 128
         bytes_per_float = 4
-        model_mem_estimate = 2 * batch_size * embedding_dim * bytes_per_float * (4 + n_rows) * n_features
+        model_mem_estimate = (
+            2 * batch_size * embedding_dim * bytes_per_float * (4 + n_rows) * n_features
+        )
 
         model_mem_estimate *= 1.3  # add 30% buffer
 
         # TODO: Observed memory spikes above expected values on large datasets, increasing mem estimate to compensate
         model_mem_estimate *= 2.0  # Note: 1.5 is not large enough, still gets OOM
 
-        mem_estimate = model_mem_estimate + dataset_size_mem_est + baseline_overhead_mem_est
-
-        return mem_estimate
+        return model_mem_estimate + dataset_size_mem_est + baseline_overhead_mem_est
 
     @classmethod
     def _get_default_ag_args_ensemble(cls, **kwargs) -> dict:
-        """
-        Set fold_fitting_strategy to sequential_local,
+        """Set fold_fitting_strategy to sequential_local,
         as parallel folding crashes if model weights aren't pre-downloaded.
         """
         default_ag_args_ensemble = super()._get_default_ag_args_ensemble(**kwargs)
         extra_ag_args_ensemble = {
-            # FIXME: If parallel, uses way more memory, seems to behave incorrectly, so we force sequential.
             "fold_fitting_strategy": "sequential_local",
             "refit_folds": True,  # Better to refit the model for faster inference and similar quality as the bag.
         }
@@ -173,3 +206,57 @@ def _class_tags(cls) -> dict:
 
     def _more_tags(self) -> dict:
         return {"can_refit_full": True}
+
+    @staticmethod
+    def checkpoint_search_space() -> list[str | tuple[str, str]]:
+        raise NotImplementedError("This method must be implemented in the subclass.")
+
+
+class TabICLModel(TabICLModelBase):
+    """TabICLv1.1 model as used on TabArena."""
+
+    ag_key = "TA-TABICL"
+    ag_name = "TA-TabICL"
+
+    default_classification_model: str | None = "tabicl-classifier-v1.1-20250506.ckpt"
+
+    @classmethod
+    def supported_problem_types(cls) -> list[str] | None:
+        return ["binary", "multiclass"]
+
+    @staticmethod
+    def checkpoint_search_space() -> list[str]:
+        return [
+            "tabicl-classifier-v1.1-20250506.ckpt",
+            "tabicl-classifier-v1-20250208.ckpt",
+        ]
+
+    def _set_default_params(self):
+        default_params = {
+            "n_estimators": 32, # default of TabICLv1
+        }
+        for param, val in default_params.items():
+            self._set_default_param_value(param, val)
+
+class TabICLv2Model(TabICLModelBase):
+    """TabICLv2 model as used on TabArena."""
+
+    ag_key = "TA-TABICLv2"
+    ag_name = "TA-TabICLv2"
+
+    default_classification_model: str | None = "tabicl-classifier-v2-20260212.ckpt"
+    default_regression_model: str | None = "tabicl-regressor-v2-20260212.ckpt"
+
+    @classmethod
+    def supported_problem_types(cls) -> list[str] | None:
+        return ["binary", "multiclass", "regression"]
+
+    # TODO: search over v1 checkpoints too?
+    @staticmethod
+    def checkpoint_search_space() -> list[tuple[str, str]]:
+        return [
+            (
+                "tabicl-classifier-v2-20260212.ckpt",
+                "tabicl-regressor-v2-20260212.ckpt",
+            )
+        ]
@@ -13,6 +13,7 @@
     SAPRPTOSSModel,
     TabDPTModel,
     TabICLModel,
+    TabICLv2Model,
     TabMModel,
     XRFMModel,
 )
@@ -30,6 +31,7 @@
     KNNNewModel,
     RealTabPFNv25Model,
     SAPRPTOSSModel,
+    TabICLv2Model,
 ]
 
 for _model_cls in _models_to_add:
 
@@ -1,20 +1,21 @@
 from __future__ import annotations
 
+from copy import deepcopy
+
 from autogluon.common.space import Categorical, Real
 
-from tabarena.benchmark.models.ag.tabicl.tabicl_model import TabICLModel
+from tabarena.benchmark.models.ag.tabicl.tabicl_model import (
+    TabICLModel,
+    TabICLModelBase,
+    TabICLv2Model,
+)
 from tabarena.utils.config_utils import ConfigGenerator
 
-name = "TabICL"
-manual_configs = [
-    # Default config with refit after cross-validation.
-    {"ag_args_ensemble": {"refit_folds": True}},
-]
-
 # Unofficial search space
-search_space = {
-    "checkpoint_version": Categorical("tabicl-classifier-v1.1-0506.ckpt", "tabicl-classifier-v1-0208.ckpt"),
-    "norm_methods": Categorical("none", "power", "robust", "quantile_rtdl", ["none", "power"]),
+base_search_space = {
+    "norm_methods": Categorical(
+        "none", "power", "robust", "quantile_rtdl", ["none", "power"]
+    ),
     # just in case, tuning between TabICL and TabPFN defaults
     "outlier_threshold": Real(4.0, 12.0),
     "average_logits": Categorical(False, True),
@@ -24,9 +25,20 @@
     "ag_args_ensemble": Categorical({"refit_folds": True}),
 }
 
-gen_tabicl = ConfigGenerator(
-    model_cls=TabICLModel, manual_configs=manual_configs, search_space=search_space
-)
+
+def get_gen_function(model_cls: TabICLModelBase):
+    search_space = deepcopy(base_search_space)
+    search_space["checkpoint_version"] = Categorical(
+        *model_cls.checkpoint_search_space()
+    )
+    return ConfigGenerator(
+        model_cls=model_cls, manual_configs=[{}], search_space=search_space
+    )
+
+
+gen_tabicl = get_gen_function(TabICLModel)
+
+gen_tabiclv2 = get_gen_function(TabICLv2Model)
 
 if __name__ == "__main__":
     from tabarena.benchmark.experiment import YamlExperimentSerializer
@@ -36,3 +48,9 @@
             experiments=gen_tabicl.generate_all_bag_experiments(num_random_configs=0),
         ),
     )
+
+    print(
+        YamlExperimentSerializer.to_yaml_str(
+            experiments=gen_tabiclv2.generate_all_bag_experiments(num_random_configs=0),
+        ),
+    )
@@ -48,6 +48,7 @@ def get_configs_generator_from_name(model_name: str):
         "xRFM": lambda: importlib.import_module("tabarena.models.xrfm.generate").gen_xrfm,
         "RealTabPFN-v2.5": lambda: importlib.import_module("tabarena.models.tabpfnv2_5.generate").gen_realtabpfnv25,
         "SAP-RPT-OSS": lambda: importlib.import_module("tabarena.models.sap_rpt_oss.generate").gen_sap_rpt_oss,
+        "TabICLv2": lambda: importlib.import_module("tabarena.models.tabicl.generate").gen_tabiclv2,
     }
 
     if model_name not in name_to_import_map:
Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,7 @@`
`13`	`13`	`SAPRPTOSSModel,`
`14`	`14`	`TabDPTModel,`
`15`	`15`	`TabICLModel,`
	`16`	`+ TabICLv2Model,`
`16`	`17`	`TabMModel,`
`17`	`18`	`XRFMModel,`
`18`	`19`	`)`
`@@ -30,6 +31,7 @@`
`30`	`31`	`KNNNewModel,`
`31`	`32`	`RealTabPFNv25Model,`
`32`	`33`	`SAPRPTOSSModel,`
	`34`	`+ TabICLv2Model,`
`33`	`35`	`]`
`34`	`36`
`35`	`37`	`for _model_cls in _models_to_add:`
Original file line number	Diff line number	Diff line change
`@@ -48,6 +48,7 @@ def get_configs_generator_from_name(model_name: str):`
`48`	`48`	`"xRFM": lambda: importlib.import_module("tabarena.models.xrfm.generate").gen_xrfm,`
`49`	`49`	`"RealTabPFN-v2.5": lambda: importlib.import_module("tabarena.models.tabpfnv2_5.generate").gen_realtabpfnv25,`
`50`	`50`	`"SAP-RPT-OSS": lambda: importlib.import_module("tabarena.models.sap_rpt_oss.generate").gen_sap_rpt_oss,`
	`51`	`+ "TabICLv2": lambda: importlib.import_module("tabarena.models.tabicl.generate").gen_tabiclv2,`
`51`	`52`	`}`
`52`	`53`
`53`	`54`	`if model_name not in name_to_import_map:`