Skip to content

Commit 1e17547

Browse files
committed
HumanEval: Move BOS token to individual prompt template, don't prepend by default when tokenizing
1 parent f5127e8 commit 1e17547

File tree

1 file changed

+11
-7
lines changed

1 file changed

+11
-7
lines changed

eval/humaneval.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@
1414
" "
1515
),
1616
"granite": (
17-
"Question:\nComplete the following Python function:\n\n{{problem}}\n\nAnswer:\n"
17+
"<|endoftext|>Question:\nComplete the following Python function:\n\n{{problem}}\n\nAnswer:\n"
1818
"Sure! Here is how you might implement the function:\n\n```python\n{{problem}}",
1919
" "
2020
),
2121
"llama": (
22-
"[INST] <<SYS>>\n"
22+
"<s>[INST] <<SYS>>\n"
2323
"You are a helpful AI coding assistant.\n"
2424
"<</SYS>>\n\n"
2525
"Complete the following Python function:\n\n"
@@ -28,7 +28,7 @@
2828
" "
2929
),
3030
"llama3": (
31-
"<|start_header_id|>system<|end_header_id|>\n\n"
31+
"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"
3232
"You are a helpful AI coding assistant.<|eot_id|>"
3333
"<|start_header_id|>user<|end_header_id|>\n\n"
3434
"Complete the following Python function:\n\n{{problem}}<|eot_id|>"
@@ -37,7 +37,7 @@
3737
" "
3838
),
3939
"mistral": (
40-
"[INST] You are a helpful AI coding assistant.\n\n"
40+
"<s>[INST] You are a helpful AI coding assistant.\n\n"
4141
"Complete the following Python function:\n\n"
4242
"{{problem}}[/INST]"
4343
" Sure! Here is how you might implement the function:\n\n```python\n{{problem}}",
@@ -51,7 +51,7 @@
5151
" "
5252
),
5353
"reka": (
54-
"human: Complete the following Python function."
54+
"<|endoftext|>human: Complete the following Python function."
5555
" Provide your reasoning in comments, but be concise and don't second-guess."
5656
"\n\n{{problem}}"
5757
" <sep> assistant: ```python\n{{problem}}",
@@ -76,7 +76,7 @@
7676
" "
7777
),
7878
"deepseek": (
79-
"You are a helpful AI coding assistant.\n"
79+
"<|begin▁of▁sentence|>You are a helpful AI coding assistant.\n"
8080
"<|User|>Complete the following Python function:\n\n{{problem}}"
8181
"<|Assistant|>Sure! Here is how you might implement the function:\n\n```python\n{{problem}}",
8282
" "
@@ -124,7 +124,11 @@ def main(args):
124124
for idx, (problem_id, problem) in enumerate(problems.items()):
125125
b_problem = problem["prompt"]
126126
f_problem = prompt_format.replace("{{problem}}", b_problem)
127-
input_ids = tokenizer.encode(f_problem, encode_special_tokens = True, add_bos = True)
127+
input_ids = tokenizer.encode(
128+
f_problem,
129+
encode_special_tokens = True,
130+
add_bos = (args.prompt_format == "raw")
131+
)
128132
for s in range(num_samples_per_task):
129133
job = Job(
130134
input_ids = input_ids,

0 commit comments

Comments
 (0)