@@ -151,7 +151,7 @@ def get_judge_prompt(question: str, answer: str, gold: str, **kwargs):
151
151
chunk = kwargs .get ("chunks" , "" )
152
152
summary = kwargs .get ("documents" , "" )
153
153
154
- return [
154
+ prompt = [
155
155
{"role" : "system" , "content" : JUDGE_ANSWER_SYSTEM_PROMPT },
156
156
{
157
157
"role" : "user" ,
@@ -161,6 +161,8 @@ def get_judge_prompt(question: str, answer: str, gold: str, **kwargs):
161
161
},
162
162
]
163
163
164
+ return prompt
165
+
164
166
165
167
def process_judge_response_yourbench (response ):
166
168
# extract the final answer using regex from the response xml
@@ -249,11 +251,11 @@ def yourbench_prompt(line, task_name: str = ""):
249
251
extend_enum (Metrics , "yourbench_metrics" , yourbench_metrics )
250
252
251
253
yourbench = LightevalTaskConfig (
252
- name = HF_TASK_NAME , # noqa: F821
254
+ name = " HF_TASK_NAME" , # noqa: F821
253
255
suite = ["custom" ],
254
256
prompt_function = yourbench_prompt ,
255
- hf_repo = HF_DATASET_NAME , # noqa: F821
256
- hf_subset = "lighteval_single_shot_questions " ,
257
+ hf_repo = " HF_DATASET_NAME" , # noqa: F821
258
+ hf_subset = "lighteval " ,
257
259
hf_avail_splits = ["train" ],
258
260
evaluation_splits = ["train" ],
259
261
few_shots_split = None ,
0 commit comments