Skip to content

Commit 4240bdb

Browse files
committed
refactor: split BRIGHT benchmark into individual subset tasks
1 parent 0f61c9f commit 4240bdb

File tree

4 files changed

+880
-110
lines changed

4 files changed

+880
-110
lines changed

mteb/benchmarks/benchmarks/benchmarks.py

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1165,7 +1165,22 @@
11651165
BRIGHT = Benchmark(
11661166
name="BRIGHT",
11671167
display_name="Reasoning Retrieval",
1168-
tasks=get_tasks(tasks=["BrightRetrieval"], eval_splits=["standard"]),
1168+
tasks=get_tasks(
1169+
tasks=[
1170+
"BrightBiologyRetrieval",
1171+
"BrightEarthScienceRetrieval",
1172+
"BrightEconomicsRetrieval",
1173+
"BrightPsychologyRetrieval",
1174+
"BrightRoboticsRetrieval",
1175+
"BrightStackoverflowRetrieval",
1176+
"BrightSustainableLivingRetrieval",
1177+
"BrightPonyRetrieval",
1178+
"BrightLeetcodeRetrieval",
1179+
"BrightAopsRetrieval",
1180+
"BrightTheoremQATheoremsRetrieval",
1181+
"BrightTheoremQAQuestionsRetrieval",
1182+
],
1183+
),
11691184
description="""BRIGHT: A Realistic and Challenging Benchmark for Reasoning-Intensive Retrieval.
11701185
BRIGHT is the first text retrieval
11711186
benchmark that requires intensive reasoning to retrieve relevant documents with
@@ -1186,12 +1201,17 @@
11861201

11871202
BRIGHT_LONG = Benchmark(
11881203
name="BRIGHT (long)",
1189-
tasks=MTEBTasks(
1190-
(
1191-
get_task(
1192-
"BrightLongRetrieval",
1193-
),
1194-
)
1204+
tasks=get_tasks(
1205+
tasks=[
1206+
"BrightBiologyLongRetrieval",
1207+
"BrightEarthScienceLongRetrieval",
1208+
"BrightEconomicsLongRetrieval",
1209+
"BrightPsychologyLongRetrieval",
1210+
"BrightRoboticsLongRetrieval",
1211+
"BrightStackoverflowLongRetrieval",
1212+
"BrightSustainableLivingLongRetrieval",
1213+
"BrightPonyLongRetrieval",
1214+
],
11951215
),
11961216
description="""BRIGHT: A Realistic and Challenging Benchmark for Reasoning-Intensive Retrieval.
11971217
BRIGHT is the first text retrieval
@@ -1603,8 +1623,7 @@
16031623
"TRECCOVID-NL",
16041624
],
16051625
),
1606-
description="BEIR-NL is a Dutch adaptation of the publicly available BEIR benchmark, created through automated "
1607-
"translation.",
1626+
description="BEIR-NL is a Dutch adaptation of the publicly available BEIR benchmark, created through automated translation.",
16081627
reference="https://arxiv.org/abs/2412.08329",
16091628
contacts=["nikolay-banar"],
16101629
citation=r"""

mteb/tasks/Retrieval/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
from .eng.BarExamQARetrieval import *
4343
from .eng.BillSumCARetrieval import *
4444
from .eng.BillSumUSRetrieval import *
45+
from .eng.BrightLongRetrieval import *
4546
from .eng.BrightRetrieval import *
4647
from .eng.BuiltBenchRetrieval import *
4748
from .eng.ChatDoctorRetrieval import *

0 commit comments

Comments
 (0)