@@ -151,7 +151,7 @@ def get_judge_prompt(question: str, answer: str, gold: str, **kwargs):
151151 chunk = kwargs .get ("chunks" , "" )
152152 summary = kwargs .get ("documents" , "" )
153153
154- return [
154+ prompt = [
155155 {"role" : "system" , "content" : JUDGE_ANSWER_SYSTEM_PROMPT },
156156 {
157157 "role" : "user" ,
@@ -161,6 +161,8 @@ def get_judge_prompt(question: str, answer: str, gold: str, **kwargs):
161161 },
162162 ]
163163
164+ return prompt
165+
164166
165167def process_judge_response_yourbench (response ):
166168 # extract the final answer using regex from the response xml
@@ -249,11 +251,11 @@ def yourbench_prompt(line, task_name: str = ""):
249251extend_enum (Metrics , "yourbench_metrics" , yourbench_metrics )
250252
251253yourbench = LightevalTaskConfig (
252- name = HF_TASK_NAME , # noqa: F821
254+ name = " HF_TASK_NAME" , # noqa: F821
253255 suite = ["custom" ],
254256 prompt_function = yourbench_prompt ,
255- hf_repo = HF_DATASET_NAME , # noqa: F821
256- hf_subset = "lighteval_single_shot_questions " ,
257+ hf_repo = " HF_DATASET_NAME" , # noqa: F821
258+ hf_subset = "lighteval " ,
257259 hf_avail_splits = ["train" ],
258260 evaluation_splits = ["train" ],
259261 few_shots_split = None ,
0 commit comments