Skip to content

Commit d4daab0

Browse files
fix: ensure that display_on_leaderboard actually reflect whether the benchmark is displayed (#4288)
* fix: ensure that `display_on_leaderboard` actually reflect whether the benchmark is displayed I believe the previous attribute was a leftover from an earlier version of the leaderboard * fix typing
1 parent 3813b2d commit d4daab0

File tree

2 files changed

+47
-1
lines changed

2 files changed

+47
-1
lines changed

mteb/benchmarks/benchmark.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from collections.abc import Iterator, Sequence
44
from dataclasses import dataclass, field
5+
from functools import lru_cache
56
from typing import TYPE_CHECKING, Literal, cast
67

78
import huggingface_hub
@@ -15,6 +16,32 @@
1516
from mteb.results import BenchmarkResults
1617

1718

19+
@lru_cache
20+
def _get_benchmarks_on_leaderboard() -> set[str]:
21+
from mteb.leaderboard.benchmark_selector import (
22+
GP_BENCHMARK_ENTRIES,
23+
R_BENCHMARK_ENTRIES,
24+
MenuEntry,
25+
)
26+
27+
entries = GP_BENCHMARK_ENTRIES + R_BENCHMARK_ENTRIES
28+
29+
def __extract_benchmarks(
30+
entries: Sequence[Benchmark | MenuEntry],
31+
) -> list[Benchmark]:
32+
benchmarks = []
33+
for entry in entries:
34+
if isinstance(entry, Benchmark):
35+
benchmarks.append(entry)
36+
else:
37+
benchmarks.extend(__extract_benchmarks(entry.benchmarks))
38+
return benchmarks
39+
40+
names = {benchmark.name for benchmark in __extract_benchmarks(entries)}
41+
42+
return names
43+
44+
1845
@dataclass
1946
class Benchmark:
2047
"""A benchmark object intended to run a certain benchmark within MTEB.
@@ -46,11 +73,16 @@ class Benchmark:
4673
reference: StrURL | None = None
4774
citation: str | None = None
4875
contacts: list[str] | None = None
49-
display_on_leaderboard: bool = True
5076
icon: str | None = None
5177
display_name: str | None = None
5278
language_view: list[str] | Literal["all"] = field(default_factory=list)
5379

80+
@property
81+
def display_on_leaderboard(self) -> bool:
82+
"""Whether the benchmark should be displayed on the leaderboard."""
83+
benchmarks_on_leaderboard = _get_benchmarks_on_leaderboard()
84+
return self.name in benchmarks_on_leaderboard
85+
5486
def __iter__(self) -> Iterator[AbsTask]:
5587
return iter(self.tasks)
5688

tests/test_benchmarks/test_get_benchmarks.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,17 @@ def test_benchmark_aliases(alias, full_name):
3131
assert benchmark.name == full_name
3232
assert isinstance(benchmark, mteb.Benchmark)
3333
assert alias in benchmark.aliases
34+
35+
36+
def test_benchmark_on_leaderboard():
37+
on_leaderboard = "MTEB(Multilingual, v2)"
38+
not_on_leaderboard = "MTEB(Multilingual, v1)"
39+
benchmark = mteb.get_benchmarks(display_on_leaderboard=True)
40+
names = {b.name for b in benchmark}
41+
assert on_leaderboard in names
42+
assert not_on_leaderboard not in names
43+
44+
benchmark = mteb.get_benchmarks(display_on_leaderboard=False)
45+
names = {b.name for b in benchmark}
46+
assert on_leaderboard not in names
47+
assert not_on_leaderboard in names

0 commit comments

Comments
 (0)