Skip to content

Commit 62e4948

Browse files
committed
Update eval_aime_benchmark.py
collect right tokesn
1 parent 5e78652 commit 62e4948

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

scripts/eval_aime_benchmark.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def analyze_thinking(response: str) -> Dict:
137137
position = 0
138138
for phrase in THOUGHT_TRANSITIONS:
139139
# Find all occurrences of each transition phrase
140-
for match in re.finditer(r'\b' + re.escape(phrase) + r'\b', thinking_text):
140+
for match in re.finditer(re.escape(phrase), thinking_text):
141141
result["transition_counts"][phrase] += 1
142142
# Record the approximate token position of the transition
143143
token_position = len(thinking_text[:match.start()].split())
@@ -170,6 +170,12 @@ def get_llm_response(problem: str, model: str) -> Union[str, List[Dict]]:
170170
{"role": "user", "content": SYSTEM_PROMPT + problem}
171171
],
172172
max_tokens=8192,
173+
extra_body = {
174+
"decoding" : "thinkdeeper",
175+
"min_thinking_tokens" : 0,
176+
"max_thinking_tokens" : 8192,
177+
"max_thoughts" : 128,
178+
},
173179
)
174180

175181
# If there's more than one choice, format as attempts

0 commit comments

Comments
 (0)