Skip to content

Commit 1b1b268

Browse files
committed
Update eval_aime_benchmark.py
gemini new model doesn't support system prompt
1 parent 0e54243 commit 1b1b268

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

scripts/eval_aime_benchmark.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import os
44
import logging
55
import re
6+
import time
7+
68
from typing import List, Dict, Tuple, Optional
79
from datetime import datetime
810

@@ -121,8 +123,8 @@ def get_llm_response(problem: str, model: str) -> str:
121123
response = client.chat.completions.create(
122124
model=model,
123125
messages=[
124-
{"role": "system", "content": SYSTEM_PROMPT},
125-
{"role": "user", "content": problem}
126+
# {"role": "system", "content": SYSTEM_PROMPT},
127+
{"role": "user", "content": SYSTEM_PROMPT + problem}
126128
],
127129
max_tokens=8192,
128130
# extra_body={
@@ -224,7 +226,7 @@ def main(model: str):
224226
save_result(results_file, result)
225227

226228
# Optional: Add delay between requests if needed
227-
# time.sleep(5)
229+
time.sleep(300)
228230

229231
# Analyze results
230232
final_results = load_existing_results(results_file)

0 commit comments

Comments
 (0)