Skip to content

Commit c133b7b

Browse files
committed
Update eval_simpleqa_benchmark.py
1 parent fe98303 commit c133b7b

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

scripts/eval_simpleqa_benchmark.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,8 @@ def query_optillm(self, question: str) -> Tuple[str, bool]:
232232
model=model_name,
233233
messages=messages,
234234
extra_body=extra_body if extra_body else None,
235-
max_tokens=4096
235+
max_tokens=4096,
236+
temperature=0.6
236237
)
237238

238239
answer = response.choices[0].message.content
@@ -259,7 +260,7 @@ def grade_response(self, question: str, gold_answer: str, response: str) -> str:
259260
grader_response = self.grader_client.chat.completions.create(
260261
model=self.grader_model,
261262
messages=[{"role": "user", "content": grading_prompt}],
262-
temperature=0.0,
263+
temperature=0.6,
263264
max_tokens=4096
264265
)
265266

0 commit comments

Comments
 (0)