Skip to content

Commit 8b47e01

Browse files
committed
Tune prompt
1 parent fa00c5f commit 8b47e01

File tree

2 files changed

+7
-9
lines changed

2 files changed

+7
-9
lines changed

src/lighteval/tasks/default_prompts.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -730,21 +730,19 @@ def gpqa(line, task_name: str = None):
730730

731731

732732
def gpqa_instruct(line, task_name: str = None):
733-
"""Adapted from simple-eval: https://github.com/openai/simple-evals/blob/83ed7640a7d9cd26849bcb3340125002ef14abbe/common.py#L14"""
733+
"""Adapted from Llama 3 evals: https://huggingface.co/datasets/meta-llama/Llama-3.1-8B-Instruct-evals/viewer/Llama-3.1-8B-Instruct-evals__gpqa__details"""
734734
gold_index = random.randint(0, 3)
735735
choices = [line["Incorrect Answer 1"], line["Incorrect Answer 2"], line["Incorrect Answer 3"]]
736736
choices.insert(gold_index, line["Correct Answer"])
737-
738-
instruction = "Answer the following multiple choice question. The last line of your response should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering."
739-
query = f"{instruction}\n\n{line['Question']}\n\n" ""
740-
query += "".join([f"{key}. {choice}\n" for key, choice in zip(LETTER_INDICES, choices)])
737+
query_template = """Given the following question and four candidate answers (A, B, C and D), choose the best answer.\n\nQuestion: {Question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nYour response should end with \"The best answer is [the_answer_letter]\" where the [the_answer_letter] is one of A, B, C or D. Think step by step before answering."""
738+
query = query_template.format(A=choices[0], B=choices[1], C=choices[2], D=choices[3], Question=line["Question"])
741739

742740
return Doc(
743741
task_name=task_name,
744742
query=query,
745743
choices=LETTER_INDICES[: len(choices)],
746744
gold_index=gold_index,
747-
instruction=instruction,
745+
instruction=query,
748746
)
749747

750748

src/lighteval/tasks/default_tasks.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7729,7 +7729,7 @@
77297729
hf_avail_splits=["train"],
77307730
evaluation_splits=["train"],
77317731
few_shots_split=None,
7732-
few_shots_select="random_sampling",
7732+
few_shots_select=None,
77337733
generation_size=32_000, # needed for reasoning models like R1
77347734
metric=[Metrics.gpqa_instruct_metric],
77357735
stop_sequence=[], # no stop sequence, will use eos token
@@ -7745,7 +7745,7 @@
77457745
hf_avail_splits=["train"],
77467746
evaluation_splits=["train"],
77477747
few_shots_split=None,
7748-
few_shots_select="random_sampling",
7748+
few_shots_select=None,
77497749
generation_size=32_000, # needed for reasoning models like R1
77507750
metric=[Metrics.gpqa_instruct_metric],
77517751
stop_sequence=[], # no stop sequence, will use eos token
@@ -7761,7 +7761,7 @@
77617761
hf_avail_splits=["train"],
77627762
evaluation_splits=["train"],
77637763
few_shots_split=None,
7764-
few_shots_select="random_sampling",
7764+
few_shots_select=None,
77657765
generation_size=32_000, # needed for reasoning models like R1
77667766
metric=[Metrics.gpqa_instruct_metric],
77677767
stop_sequence=[], # no stop sequence, will use eos token

0 commit comments

Comments
 (0)