Skip to content

Commit ab390ce

Browse files
fix: Added leaderboard Vidore V3 (#3542)
* feat:initial leaderboard proposal * feat: update summary table for ViDoRe V3 to reflect Document Understanding tasks * refactor: update leaderboard references * fix: update VISUAL_DOCUMENT_RETRIEVAL to use VidoreBenchmark * fix: update JinaVisualDocumentBenchmark summary table creation method * fix: add VisualDocumentRetrieval to previous benchmark names * fix: remove JinaVisualDocumentBenchmark --------- Co-authored-by: Antoine Edy <[email protected]>
1 parent 1ad433f commit ab390ce

File tree

5 files changed

+22
-14
lines changed

5 files changed

+22
-14
lines changed

mteb/benchmarks/_create_table.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -358,9 +358,7 @@ def _create_summary_table_mean_public_private(
358358
"mean(public)": "Mean (Public)",
359359
"mean(private)": "Mean (Private)",
360360
}
361-
# For RTEB: all tasks are Retrieval type, so Retrieval column = Mean (Task)
362-
if "Retrieval" in joint_table.columns:
363-
rename_dict["Retrieval"] = "Mean (Task)"
361+
364362
joint_table = joint_table.rename(columns=rename_dict)
365363

366364
# Move borda rank to front

mteb/benchmarks/benchmark.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,10 @@ class RtebBenchmark(Benchmark):
8787
def _create_summary_table(
8888
self, benchmark_results: BenchmarkResults
8989
) -> pd.DataFrame:
90-
return _create_summary_table_mean_public_private(benchmark_results)
90+
joint_table = _create_summary_table_mean_public_private(benchmark_results)
91+
# For RTEB: all tasks are Retrieval type, so Retrieval column = Mean (Task)
92+
joint_table = joint_table.rename(columns={"Retrieval": "Mean (Task)"})
93+
return joint_table
9194

9295

9396
class HUMEBenchmark(Benchmark):
@@ -108,10 +111,15 @@ def _create_summary_table(
108111
return _create_summary_table_mean_task_type(benchmark_results)
109112

110113

111-
class Vidore3Benchmark(Benchmark):
114+
class VidoreBenchmark(Benchmark):
112115
"""Wrapper for Vidore3 benchmark."""
113116

114117
def _create_summary_table(
115118
self, benchmark_results: BenchmarkResults
116119
) -> pd.DataFrame:
117-
return _create_summary_table_mean_public_private(benchmark_results)
120+
joint_table = _create_summary_table_mean_public_private(benchmark_results)
121+
# For ViDoRe (V1, V2, V3): all tasks are Document Understanding type, so Document Understanding column = Mean (Task)
122+
joint_table = joint_table.rename(
123+
columns={"Document Understanding": "Mean (Task)"}
124+
)
125+
return joint_table

mteb/benchmarks/benchmarks/benchmarks.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Benchmark,
33
HUMEBenchmark,
44
MIEBBenchmark,
5-
Vidore3Benchmark,
5+
VidoreBenchmark,
66
)
77
from mteb.get_tasks import MTEBTasks, get_task, get_tasks
88

@@ -2219,7 +2219,7 @@
22192219
""",
22202220
)
22212221

2222-
VIDORE_V3 = Vidore3Benchmark(
2222+
VIDORE_V3 = VidoreBenchmark(
22232223
name="ViDoRe(v3)",
22242224
display_name="ViDoRe V3",
22252225
icon="https://cdn-uploads.huggingface.co/production/uploads/66e16a677c2eb2da5109fb5c/x99xqw__fl2UaPbiIdC_f.png",
@@ -2253,10 +2253,9 @@
22532253
""",
22542254
)
22552255

2256-
VISUAL_DOCUMENT_RETRIEVAL = Benchmark(
2257-
name="VisualDocumentRetrieval",
2258-
display_name="Visual Document Retrieval",
2259-
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-picture.svg",
2256+
VISUAL_DOCUMENT_RETRIEVAL = VidoreBenchmark(
2257+
name="ViDoRe(v1&v2)",
2258+
display_name="ViDoRe (V1&V2)",
22602259
tasks=get_tasks(
22612260
tasks=[
22622261
# v1

mteb/benchmarks/get_benchmark.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def _get_previous_benchmark_names() -> dict[str, str]:
3939
MTEB_RETRIEVAL_MEDICAL,
4040
MTEB_RETRIEVAL_WITH_INSTRUCTIONS,
4141
SEB,
42+
VISUAL_DOCUMENT_RETRIEVAL,
4243
MTEB_code,
4344
MTEB_multilingual_v2,
4445
)
@@ -63,6 +64,7 @@ def _get_previous_benchmark_names() -> dict[str, str]:
6364
"MTEB(Chinese)": C_MTEB.name,
6465
"FaMTEB(fas, beta)": FA_MTEB.name,
6566
"BRIGHT(long)": BRIGHT_LONG.name,
67+
"VisualDocumentRetrieval": VISUAL_DOCUMENT_RETRIEVAL.name,
6668
}
6769
return previous_benchmark_names
6870

mteb/leaderboard/benchmark_selector.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,11 @@ class MenuEntry:
110110
MenuEntry(
111111
"Image",
112112
description=None,
113-
open=False,
113+
open=True,
114114
benchmarks=[
115-
mteb.get_benchmark("VisualDocumentRetrieval"),
115+
mteb.get_benchmark("ViDoRe(v3)"),
116116
mteb.get_benchmark("JinaVDR"),
117+
MenuEntry("Other", [mteb.get_benchmark("ViDoRe(v1&v2)")]),
117118
],
118119
),
119120
MenuEntry(

0 commit comments

Comments
 (0)