We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent c2a05f9 commit c38c83fCopy full SHA for c38c83f
bencheval/bencheval/tabarena.py
@@ -107,7 +107,7 @@ def _get_groupby_cols(self, results: pd.DataFrame) -> list[str]:
107
def leaderboard(
108
self,
109
data: pd.DataFrame,
110
- average_seeds: bool = True,
+ average_seeds: bool = False,
111
include_error: bool = False,
112
include_elo: bool = True,
113
include_winrate: bool = True,
tabarena/tabarena/nips2025_utils/compare.py
@@ -20,7 +20,7 @@ def compare_on_tabarena(
20
tabarena_context_kwargs: dict | None = None,
21
fillna: str | pd.DataFrame | None = "RF (default)",
22
score_on_val: bool = False,
23
24
remove_imputed: bool = False,
25
tmp_treat_tasks_independently: bool = False,
26
leaderboard_kwargs: dict | None = None,
@@ -103,7 +103,7 @@ def compare(
103
calibration_framework: str | None = None,
104
fillna: str | pd.DataFrame | None = None,
105
106
tmp_treat_tasks_independently: bool = False, # FIXME: Update
tabarena/tabarena/nips2025_utils/end_to_end.py
@@ -383,7 +383,7 @@ def compare(
383
use_artifact_name_in_prefix: bool | None = None,
384
use_model_results: bool = False,
385
386
387
388
):
389
results = self.get_results(
@@ -414,7 +414,7 @@ def compare_on_tabarena(
414
415
416
417
418
419
420
extra_results: pd.DataFrame = None,
tabarena/tabarena/nips2025_utils/end_to_end_single.py
@@ -566,7 +566,7 @@ def compare_on_tabarena(
566
567
568
569
570
571
572
tabarena_context_kwargs: dict = None,
tabarena/tabarena/nips2025_utils/tabarena_context.py
@@ -114,7 +114,7 @@ def compare(
114
subset: str | list[str] | None = None,
115
folds: list[int] | None = None,
116
117
118
119
120
@@ -386,7 +386,7 @@ def simulate_portfolio_search(
n_portfolio: int = 25,
n_ensemble: int = 40,
time_limit: float | None = 14400,
390
391
if repo is None:
392
repo = self.load_repo(methods=methods, config_fallback=config_fallback)
tabarena/tabarena/paper/paper_runner_tabarena.py
@@ -16,7 +16,7 @@ def run_portfolio_search(
16
17
18
19
) -> pd.DataFrame:
calibration_framework = "RF (default)"
elo_bootstrap_rounds = 100
tabarena/tabarena/paper/tabarena_evaluator.py
@@ -229,7 +229,7 @@ def eval(
229
plot_pareto: bool = True,
230
plot_other: bool = False,
231
calibration_framework: str | None = "auto",
232
233
tmp_treat_tasks_independently: bool = False, # FIXME: Need to make a weighted elo logic
234
235
plot_with_baselines: bool = False,
0 commit comments