Skip to content

Commit 5530493

Browse files
fix: Add ViDoRe(v3.1) (#4220)
* fix: Add ViDoRe(v3.1) * Apply suggestion from @Samoed * add to init --------- Co-authored-by: Roman Solomatin <samoed.roman@gmail.com> Co-authored-by: Roman Solomatin <36135455+Samoed@users.noreply.github.com>
1 parent 482930f commit 5530493

File tree

2 files changed

+46
-0
lines changed

2 files changed

+46
-0
lines changed

mteb/benchmarks/benchmarks/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
VIDORE,
5050
VIDORE_V2,
5151
VIDORE_V3,
52+
VIDORE_V3_1,
5253
VISUAL_DOCUMENT_RETRIEVAL,
5354
VN_MTEB,
5455
CoIR,
@@ -131,6 +132,7 @@
131132
"VIDORE",
132133
"VIDORE_V2",
133134
"VIDORE_V3",
135+
"VIDORE_V3_1",
134136
"VISUAL_DOCUMENT_RETRIEVAL",
135137
"VN_MTEB",
136138
"CoIR",

mteb/benchmarks/benchmarks/benchmarks.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2507,6 +2507,50 @@
25072507
""",
25082508
)
25092509

2510+
VIDORE_V3_1 = VidoreBenchmark(
2511+
name="ViDoRe(v3.1)",
2512+
display_name="ViDoRe v3.1",
2513+
language_view=[
2514+
"deu-Latn",
2515+
"eng-Latn",
2516+
"fra-Latn",
2517+
"ita-Latn",
2518+
"por-Latn",
2519+
"spa-Latn",
2520+
],
2521+
icon="https://cdn-uploads.huggingface.co/production/uploads/66e16a677c2eb2da5109fb5c/x99xqw__fl2UaPbiIdC_f.png",
2522+
tasks=get_tasks(
2523+
tasks=[
2524+
"Vidore3FinanceEnRetrieval.v2",
2525+
"Vidore3IndustrialRetrieval.v2",
2526+
"Vidore3ComputerScienceRetrieval.v2",
2527+
"Vidore3PharmaceuticalsRetrieval.v2",
2528+
"Vidore3HrRetrieval.v2",
2529+
"Vidore3FinanceFrRetrieval.v2",
2530+
"Vidore3PhysicsRetrieval.v2",
2531+
"Vidore3EnergyRetrieval.v2",
2532+
"Vidore3TelecomRetrieval.v2",
2533+
"Vidore3NuclearRetrieval.v2",
2534+
]
2535+
),
2536+
description="ViDoRe v3.1 sets a new industry gold standard for multi-modal, enterprise document visual retrieval evaluation. "
2537+
+ "It addresses a critical challenge in production RAG systems: retrieving accurate information from complex, visually-rich documents. "
2538+
+ "The benchmark includes both open and closed datasets: to submit results on private tasks, please [open an issue](https://github.com/embeddings-benchmark/mteb/issues?template=eval_request.yaml). "
2539+
+ "The latest 3.1 version adds and markdown derived from OCR to allow easier evaluation of text-only baselines and joint image-text baselines.",
2540+
reference="https://arxiv.org/abs/2601.08620",
2541+
citation=r"""
2542+
@article{loison2026vidorev3comprehensiveevaluation,
2543+
archiveprefix = {arXiv},
2544+
author = {António Loison and Quentin Macé and Antoine Edy and Victor Xing and Tom Balough and Gabriel Moreira and Bo Liu and Manuel Faysse and Céline Hudelot and Gautier Viaud},
2545+
eprint = {2601.08620},
2546+
primaryclass = {cs.AI},
2547+
title = {ViDoRe V3: A Comprehensive Evaluation of Retrieval Augmented Generation in Complex Real-World Scenarios},
2548+
url = {https://arxiv.org/abs/2601.08620},
2549+
year = {2026},
2550+
}
2551+
""",
2552+
)
2553+
25102554
VISUAL_DOCUMENT_RETRIEVAL = VidoreBenchmark(
25112555
name="ViDoRe(v1&v2)",
25122556
aliases=["VisualDocumentRetrieval"],

0 commit comments

Comments
 (0)