Fix yourbench custom task (#695)

alozowski · clefourrier · web-flow · commit 71fec806b304 · 2025-04-29T19:17:09.000+02:00
---------

Co-authored-by: Clémentine Fourrier &lt;22726840+clefourrier@users.noreply.github.com&gt;
diff --git a/examples/custom_tasks_templates/custom_yourbench_task.py b/examples/custom_tasks_templates/custom_yourbench_task.py
@@ -151,7 +151,7 @@ def get_judge_prompt(question: str, answer: str, gold: str, **kwargs):
     chunk = kwargs.get("chunks", "")
     summary = kwargs.get("documents", "")
 
-    return [
+    prompt = [
         {"role": "system", "content": JUDGE_ANSWER_SYSTEM_PROMPT},
         {
             "role": "user",
@@ -161,6 +161,8 @@ def get_judge_prompt(question: str, answer: str, gold: str, **kwargs):
         },
     ]
 
+    return prompt
+
 
 def process_judge_response_yourbench(response):
     # extract the final answer using regex from the response xml
@@ -249,11 +251,11 @@ def yourbench_prompt(line, task_name: str = ""):
 extend_enum(Metrics, "yourbench_metrics", yourbench_metrics)
 
 yourbench = LightevalTaskConfig(
-    name=HF_TASK_NAME,  # noqa: F821
+    name="HF_TASK_NAME",  # noqa: F821
     suite=["custom"],
     prompt_function=yourbench_prompt,
-    hf_repo=HF_DATASET_NAME,  # noqa: F821
-    hf_subset="lighteval_single_shot_questions",
+    hf_repo="HF_DATASET_NAME",  # noqa: F821
+    hf_subset="lighteval",
     hf_avail_splits=["train"],
     evaluation_splits=["train"],
     few_shots_split=None,