|
1165 | 1165 | BRIGHT = Benchmark( |
1166 | 1166 | name="BRIGHT", |
1167 | 1167 | display_name="Reasoning Retrieval", |
1168 | | - tasks=get_tasks(tasks=["BrightRetrieval"], eval_splits=["standard"]), |
| 1168 | + tasks=get_tasks( |
| 1169 | + tasks=[ |
| 1170 | + "BrightBiologyRetrieval", |
| 1171 | + "BrightEarthScienceRetrieval", |
| 1172 | + "BrightEconomicsRetrieval", |
| 1173 | + "BrightPsychologyRetrieval", |
| 1174 | + "BrightRoboticsRetrieval", |
| 1175 | + "BrightStackoverflowRetrieval", |
| 1176 | + "BrightSustainableLivingRetrieval", |
| 1177 | + "BrightPonyRetrieval", |
| 1178 | + "BrightLeetcodeRetrieval", |
| 1179 | + "BrightAopsRetrieval", |
| 1180 | + "BrightTheoremQATheoremsRetrieval", |
| 1181 | + "BrightTheoremQAQuestionsRetrieval", |
| 1182 | + ], |
| 1183 | + ), |
1169 | 1184 | description="""BRIGHT: A Realistic and Challenging Benchmark for Reasoning-Intensive Retrieval. |
1170 | 1185 | BRIGHT is the first text retrieval |
1171 | 1186 | benchmark that requires intensive reasoning to retrieve relevant documents with |
|
1186 | 1201 |
|
1187 | 1202 | BRIGHT_LONG = Benchmark( |
1188 | 1203 | name="BRIGHT (long)", |
1189 | | - tasks=MTEBTasks( |
1190 | | - ( |
1191 | | - get_task( |
1192 | | - "BrightLongRetrieval", |
1193 | | - ), |
1194 | | - ) |
| 1204 | + tasks=get_tasks( |
| 1205 | + tasks=[ |
| 1206 | + "BrightBiologyLongRetrieval", |
| 1207 | + "BrightEarthScienceLongRetrieval", |
| 1208 | + "BrightEconomicsLongRetrieval", |
| 1209 | + "BrightPsychologyLongRetrieval", |
| 1210 | + "BrightRoboticsLongRetrieval", |
| 1211 | + "BrightStackoverflowLongRetrieval", |
| 1212 | + "BrightSustainableLivingLongRetrieval", |
| 1213 | + "BrightPonyLongRetrieval", |
| 1214 | + ], |
1195 | 1215 | ), |
1196 | 1216 | description="""BRIGHT: A Realistic and Challenging Benchmark for Reasoning-Intensive Retrieval. |
1197 | 1217 | BRIGHT is the first text retrieval |
|
1603 | 1623 | "TRECCOVID-NL", |
1604 | 1624 | ], |
1605 | 1625 | ), |
1606 | | - description="BEIR-NL is a Dutch adaptation of the publicly available BEIR benchmark, created through automated " |
1607 | | - "translation.", |
| 1626 | + description="BEIR-NL is a Dutch adaptation of the publicly available BEIR benchmark, created through automated translation.", |
1608 | 1627 | reference="https://arxiv.org/abs/2412.08329", |
1609 | 1628 | contacts=["nikolay-banar"], |
1610 | 1629 | citation=r""" |
|
0 commit comments