Skip to content

Commit c38c83f

Browse files
committed
Update to average_seeds=False by default in all places
1 parent c2a05f9 commit c38c83f

File tree

7 files changed

+10
-10
lines changed

7 files changed

+10
-10
lines changed

bencheval/bencheval/tabarena.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def _get_groupby_cols(self, results: pd.DataFrame) -> list[str]:
107107
def leaderboard(
108108
self,
109109
data: pd.DataFrame,
110-
average_seeds: bool = True,
110+
average_seeds: bool = False,
111111
include_error: bool = False,
112112
include_elo: bool = True,
113113
include_winrate: bool = True,

tabarena/tabarena/nips2025_utils/compare.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def compare_on_tabarena(
2020
tabarena_context_kwargs: dict | None = None,
2121
fillna: str | pd.DataFrame | None = "RF (default)",
2222
score_on_val: bool = False,
23-
average_seeds: bool = True,
23+
average_seeds: bool = False,
2424
remove_imputed: bool = False,
2525
tmp_treat_tasks_independently: bool = False,
2626
leaderboard_kwargs: dict | None = None,
@@ -103,7 +103,7 @@ def compare(
103103
calibration_framework: str | None = None,
104104
fillna: str | pd.DataFrame | None = None,
105105
score_on_val: bool = False,
106-
average_seeds: bool = True,
106+
average_seeds: bool = False,
107107
tmp_treat_tasks_independently: bool = False, # FIXME: Update
108108
leaderboard_kwargs: dict | None = None,
109109
remove_imputed: bool = False,

tabarena/tabarena/nips2025_utils/end_to_end.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ def compare(
383383
use_artifact_name_in_prefix: bool | None = None,
384384
use_model_results: bool = False,
385385
score_on_val: bool = False,
386-
average_seeds: bool = True,
386+
average_seeds: bool = False,
387387
leaderboard_kwargs: dict | None = None,
388388
):
389389
results = self.get_results(
@@ -414,7 +414,7 @@ def compare_on_tabarena(
414414
use_artifact_name_in_prefix: bool | None = None,
415415
use_model_results: bool = False,
416416
score_on_val: bool = False,
417-
average_seeds: bool = True,
417+
average_seeds: bool = False,
418418
leaderboard_kwargs: dict | None = None,
419419
tabarena_context_kwargs: dict | None = None,
420420
extra_results: pd.DataFrame = None,

tabarena/tabarena/nips2025_utils/end_to_end_single.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,7 @@ def compare_on_tabarena(
566566
use_artifact_name_in_prefix: bool | None = None,
567567
use_model_results: bool = False,
568568
score_on_val: bool = False,
569-
average_seeds: bool = True,
569+
average_seeds: bool = False,
570570
leaderboard_kwargs: dict | None = None,
571571
extra_results: pd.DataFrame = None,
572572
tabarena_context_kwargs: dict = None,

tabarena/tabarena/nips2025_utils/tabarena_context.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def compare(
114114
subset: str | list[str] | None = None,
115115
folds: list[int] | None = None,
116116
score_on_val: bool = False,
117-
average_seeds: bool = True,
117+
average_seeds: bool = False,
118118
fillna: str | pd.DataFrame | None = "RF (default)",
119119
remove_imputed: bool = False,
120120
tmp_treat_tasks_independently: bool = False,
@@ -386,7 +386,7 @@ def simulate_portfolio_search(
386386
n_portfolio: int = 25,
387387
n_ensemble: int = 40,
388388
time_limit: float | None = 14400,
389-
average_seeds: bool = True,
389+
average_seeds: bool = False,
390390
):
391391
if repo is None:
392392
repo = self.load_repo(methods=methods, config_fallback=config_fallback)

tabarena/tabarena/paper/paper_runner_tabarena.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def run_portfolio_search(
1616
n_portfolio: int = 25,
1717
n_ensemble: int = 40,
1818
time_limit: float | None = 14400,
19-
average_seeds: bool = True,
19+
average_seeds: bool = False,
2020
) -> pd.DataFrame:
2121
calibration_framework = "RF (default)"
2222
elo_bootstrap_rounds = 100

tabarena/tabarena/paper/tabarena_evaluator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ def eval(
229229
plot_pareto: bool = True,
230230
plot_other: bool = False,
231231
calibration_framework: str | None = "auto",
232-
average_seeds: bool = True,
232+
average_seeds: bool = False,
233233
tmp_treat_tasks_independently: bool = False, # FIXME: Need to make a weighted elo logic
234234
leaderboard_kwargs: dict | None = None,
235235
plot_with_baselines: bool = False,

0 commit comments

Comments
 (0)