Skip to content

Commit db8af12

Browse files
Innixmaatschalz
andauthored
Tabarena 2025 refinement (#251)
* Initialize with AG LightGBM * Add new model and search space * Update PrepGBM: Add residual logic and preprocessors, switch to use LGBModel as superclass; Adapt search space * Adjust to new tabarena structure * Adjust to new tabarena structure * Initialize with AG LightGBM * Adjust to new tabarena structure * Make preprocessors and residuals use the correct random seeds * Add explorative search space * Remove unnecessary old code * Remove unnecessary old code * Undo example changes * Add functions for linear residual boosting * Add memory estimation, switch to using AG preprocessors * Fix bug in memory estimation for OOF-TE * Adjust search space * bug fix in search space * Add skrub to dependencies * minor updates * Extend preprocessing logic to TabM, TabPFN2.5, CatBoost and XGBoost; Move preprocessing models to a separate directory * Small bug fix * update * streamline prep logic * refactor prep logic * Update RealTabPFN-2.5 to new prep logic * minor update * cleanup torch memory and gc * add `memory_usage_estimate` tracking * stop saving training job logs to s3 * Add missing __init__.py * cleanup * Reduce ModelAgnosticPrepMixin._estimate_memory_usage runtime by 4x * Minor fix * Use BulkFeatureGenerator and parallel stage preprocessing, >2x inference speedup. * Minor improvements to linear residual * Update memory estimation to use n_numeric, and n_categorical estimates from preprocessors. * Use `remove_unused_features="false_recursive"` and `post_drop_duplicates=True` * Update passthrough logic * Add passthrough_types * Update * Update * fix pyproject.toml * Fix yaml serialization * Update prep_mixin to work with lists instead of tuples * Update prep_mixin to be faster and fix crashes * switch to AG SquashingScaler * update CVSplitter import * Update * tmp commit * update pytabkit version * Remove debugging code * Various TabArena updates * Remove Prep code * Update to AG 1.5 --------- Co-authored-by: atschalz <[email protected]>
1 parent e860e0b commit db8af12

File tree

17 files changed

+146
-29
lines changed

17 files changed

+146
-29
lines changed

tabarena/pyproject.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ requires-python = ">=3.10"
1515
# uv pip install --prerelease=allow .
1616
dependencies = [
1717
# TODO: To use `uv`, you need to do `uv pip install --prerelease=allow .` so it recognizes pre-release AutoGluon
18-
"autogluon>=1.4.1b20250910,<1.6", # TODO: Remove after moving `benchmark` code elsewhere
18+
"autogluon>=1.5,<1.6", # TODO: Remove after moving `benchmark` code elsewhere
1919
"bencheval",
2020
"openml>=0.14.1", # consider making optional
2121
"pyyaml",
@@ -50,6 +50,7 @@ tabm = ["torch"]
5050
modernnca = ["category_encoders"]
5151
xrfm = ["xrfm[cu12]"]
5252
sap-rpt-oss = ["sap_rpt_oss @ git+https://github.com/SAP-samples/sap-rpt-1-oss.git@a323a0aff976fda4ac43c3196a92406de7689aaa"]
53+
tabprep = []
5354

5455
# union of all above extras (mirrors your "benchmark" extra)
5556
benchmark = [
@@ -62,7 +63,7 @@ benchmark = [
6263
"torch",
6364
"category_encoders",
6465
"xrfm[cu12]",
65-
"sap_rpt_oss @ git+https://github.com/SAP-samples/sap-rpt-1-oss.git@a323a0aff976fda4ac43c3196a92406de7689aaa"
66+
"sap_rpt_oss @ git+https://github.com/SAP-samples/sap-rpt-1-oss.git@a323a0aff976fda4ac43c3196a92406de7689aaa",
6667
]
6768

6869
[project.urls]

tabarena/tabarena/benchmark/experiment/experiment_constructor.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,30 @@ def from_yaml(cls, path: str, context=None) -> list[Experiment]:
534534

535535
experiments = []
536536
for experiment in yaml_out:
537-
experiments.append(YamlSingleExperimentSerializer.parse_method(experiment, context=context))
537+
experiments.append(
538+
YamlSingleExperimentSerializer.parse_method(
539+
experiment, context=context
540+
)
541+
)
542+
543+
return experiments
544+
545+
@classmethod
546+
def from_yaml_str(cls, yaml_str: str, context=None) -> list[Experiment]:
547+
"""
548+
Parse a YAML string containing multiple experiment definitions
549+
and return a list of Experiment instances.
550+
"""
551+
yaml_out = yaml.safe_load(yaml_str)
552+
methods = yaml_out["methods"]
553+
554+
experiments = []
555+
for experiment in methods:
556+
experiments.append(
557+
YamlSingleExperimentSerializer.parse_method(
558+
experiment, context=context
559+
)
560+
)
538561

539562
return experiments
540563

@@ -562,7 +585,5 @@ def to_yaml_str(cls, experiments: list[Experiment]) -> str:
562585
def _to_yaml_format(cls, experiments: list[Experiment]) -> dict[str, list[dict]]:
563586
yaml_lst = []
564587
for experiment in experiments:
565-
yaml_dict = experiment.to_yaml_dict()
566-
yaml_lst.append(yaml_dict)
567-
yaml_out = {"methods": yaml_lst}
568-
return yaml_out
588+
yaml_lst.append(experiment.to_yaml_dict())
589+
return {"methods": yaml_lst}

tabarena/tabarena/benchmark/models/ag/tabdpt/tabdpt_model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import pandas as pd
1313

1414

15+
# FIXME: Add CPU loading support (.to(device))
1516
class TabDPTModel(AbstractModel):
1617
ag_key = "TA-TABDPT"
1718
ag_name = "TA-TabDPT"

tabarena/tabarena/benchmark/models/ag/tabm/tabm_model.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,10 @@ def get_tabm_auto_batch_size(cls, n_samples: int) -> int:
273273

274274
@classmethod
275275
def _class_tags(cls):
276-
return {"can_estimate_memory_usage_static": True}
276+
return {
277+
"can_estimate_memory_usage_static": True,
278+
"reset_torch_threads": True,
279+
}
277280

278281
def _more_tags(self) -> dict:
279282
# TODO: Need to add train params support, track best epoch

tabarena/tabarena/benchmark/models/ag/tabpfnv2_5/tabpfnv2_5_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def _fit(
100100
"Please switch to CPU usage instead.",
101101
)
102102

103-
X = self.preprocess(X, is_train=True)
103+
X = self.preprocess(X, y=y, is_train=True)
104104

105105
hps = self._get_model_params()
106106
hps["device"] = device

tabarena/tabarena/benchmark/models/wrapper/AutoGluon_class.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
import copy
4+
import gc
45
import shutil
56
from typing import Type
67

@@ -73,6 +74,15 @@ def get_metric_error_val(self) -> float:
7374

7475
def cleanup(self):
7576
shutil.rmtree(self.predictor.path, ignore_errors=True)
77+
gc.collect()
78+
try:
79+
import torch
80+
except ImportError:
81+
pass
82+
else:
83+
import torch
84+
if torch.cuda.is_available():
85+
torch.cuda.empty_cache()
7686

7787

7888
class AGSingleWrapper(AGWrapper):
@@ -153,10 +163,13 @@ def model_cls(self) -> Type["AbstractModel"]:
153163
model_cls = ag_model_registry.key_to_cls(key=self._model_cls)
154164
return model_cls
155165

156-
def _load_model(self):
166+
def _load_model(self, assert_single_model: bool = True):
157167
model_names = self.predictor.model_names(can_infer=True)
158-
assert len(model_names) == 1
159-
model_name = self.predictor.model_names()[0]
168+
if assert_single_model:
169+
assert len(model_names) == 1
170+
model_name = self.predictor.model_names()[0]
171+
else:
172+
model_name = self.predictor.model_best
160173
return self.predictor._trainer.load_model(model_name)
161174

162175
def get_metadata_init(self) -> dict:
@@ -172,14 +185,16 @@ def get_metadata_init(self) -> dict:
172185

173186
def get_metadata_fit(self) -> dict:
174187
metadata = {}
175-
model = self._load_model()
188+
model = self._load_model(assert_single_model=False)
176189
metadata["info"] = model.get_info(include_feature_metadata=False)
177190
metadata["disk_usage"] = model.disk_usage()
178191
metadata["num_cpus"] = model.fit_num_cpus
179192
metadata["num_gpus"] = model.fit_num_gpus
180193
metadata["num_cpus_child"] = model.fit_num_cpus_child
181194
metadata["num_gpus_child"] = model.fit_num_gpus_child
182195
metadata["fit_metadata"] = model.get_fit_metadata()
196+
if hasattr(model, "_memory_usage_estimate"):
197+
metadata["memory_usage_estimate"] = model._memory_usage_estimate
183198
return metadata
184199

185200
def get_metadata_failure(self) -> dict:

tabarena/tabarena/benchmark/models/wrapper/abstract_class.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,9 @@ def _fit_custom(self, X: pd.DataFrame, y: pd.Series, X_test: pd.DataFrame) -> di
104104
dict
105105
Returns predictions, probabilities, fit time and inference time
106106
"""
107-
with (Timer() as timer_fit):
107+
from tabarena.utils.memory_utils import CpuMemoryTracker, GpuMemoryTracker
108+
109+
with CpuMemoryTracker() as cpu_tracker, GpuMemoryTracker(device=0) as gpu_tracker, Timer() as timer_fit:
108110
self.fit(X, y)
109111

110112
self.post_fit(X=X, y=y, X_test=X_test)
@@ -125,6 +127,18 @@ def _fit_custom(self, X: pd.DataFrame, y: pd.Series, X_test: pd.DataFrame) -> di
125127
"time_infer_s": timer_predict.duration,
126128
}
127129

130+
out["memory_usage"] = dict(
131+
peak_mem_cpu=cpu_tracker.peak_rss,
132+
min_mem_cpu=cpu_tracker.min_rss,
133+
134+
peak_mem_gpu=gpu_tracker.peak_allocated,
135+
peak_mem_gpu_reserved=gpu_tracker.peak_reserved,
136+
min_mem_gpu=gpu_tracker.min_allocated,
137+
min_mem_gpu_reserved=gpu_tracker.min_reserved,
138+
139+
gpu_tracking_enabled=gpu_tracker.enabled,
140+
)
141+
128142
return out
129143

130144
def fit(self, X: pd.DataFrame, y: pd.Series, X_val=None, y_val=None):

tabarena/tabarena/nips2025_utils/artifacts/_tabarena_method_metadata.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@
3434
from tabarena.nips2025_utils.artifacts._tabarena_method_metadata_2025_11_12 import realtabpfn25_metadata, contexttab_metadata
3535

3636
from tabarena.nips2025_utils.artifacts._tabarena_method_metadata_misc import (
37-
gbm_aio_0808_metadata
37+
gbm_aio_0808_metadata,
38+
# prep_gbm_v6_metadata,
3839
)
3940

4041
methods_2025_09_03: list[MethodMetadata] = [
@@ -71,6 +72,7 @@
7172

7273
methods_misc: list[MethodMetadata] = [
7374
gbm_aio_0808_metadata,
75+
# prep_gbm_v6_metadata,
7476
]
7577

7678
replaced_methods = [

tabarena/tabarena/nips2025_utils/artifacts/_tabarena_method_metadata_misc.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,26 @@
2525
s3_prefix="cache_aio",
2626
verified=False,
2727
)
28+
29+
# LightGBM w/ custom preprocessing pipeline (only first 3 repeats)
30+
# s3 cache = "cache_aio"
31+
prep_gbm_v6_metadata = MethodMetadata(
32+
method="prep_LightGBM_v6",
33+
artifact_name="prep_LightGBM_v6",
34+
method_type="config",
35+
compute="cpu",
36+
date="2025-12-16",
37+
ag_key="prep_GBM",
38+
model_key="prep_GBM_v6",
39+
config_default="prep_LightGBM_v6_c1_BAG_L1",
40+
name_suffix=None,
41+
has_raw=True,
42+
has_processed=True,
43+
has_results=True,
44+
upload_as_public=True,
45+
can_hpo=True,
46+
is_bag=True,
47+
s3_bucket="tabarena",
48+
s3_prefix="cache_aio",
49+
verified=True,
50+
)

tabarena/tabarena/nips2025_utils/end_to_end.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,7 @@ def compare_on_tabarena(
418418
leaderboard_kwargs: dict | None = None,
419419
tabarena_context_kwargs: dict | None = None,
420420
extra_results: pd.DataFrame = None,
421+
remove_imputed: bool = False,
421422
) -> pd.DataFrame:
422423
"""Compare results on TabArena leaderboard.
423424
@@ -451,6 +452,7 @@ def compare_on_tabarena(
451452
average_seeds=average_seeds,
452453
leaderboard_kwargs=leaderboard_kwargs,
453454
tabarena_context_kwargs=tabarena_context_kwargs,
455+
remove_imputed=remove_imputed,
454456
)
455457

456458
def get_results(

0 commit comments

Comments
 (0)