Skip to content

Commit a6085ff

Browse files
committed
upgrade reward math verification
1 parent 01640eb commit a6085ff

File tree

1 file changed

+3
-6
lines changed
  • applications/ColossalChat/coati/distributed/reward

1 file changed

+3
-6
lines changed

applications/ColossalChat/coati/distributed/reward/reward_fn.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import torch
2+
from math_verify import parse, verify
23

34
from .reward_utils import extract_boxed_solution, extract_solution, validate_response_structure
45

@@ -35,11 +36,7 @@ def math_reward_fn(input_ids, gt_answer, response_idx, **kwargs):
3536
format_acc += 1
3637

3738
# Check answer accuracy, answer is considered correct if the answer is correct and the format is valid
38-
if (
39-
format_valid
40-
and final_answer is not None
41-
and gt_answer.strip().replace(" ", "").lower() == final_answer.strip().replace(" ", "").lower()
42-
):
39+
if format_valid and final_answer is not None and verify(parse(gt_answer.strip()), parse(final_answer.strip())):
4340
ans_acc += 1
4441
reward += acc_score
4542

@@ -91,7 +88,7 @@ def boxed_math_reward_fn(input_ids, gt_answer, response_idx, **kwargs):
9188
reward += format_score
9289

9390
# Check answer accuracy, answer is considered correct if the answer is correct and the format is valid
94-
if format_valid and final_answer is not None and gt_answer.strip().lower() == final_answer.strip().lower():
91+
if format_valid and final_answer is not None and verify(parse(gt_answer.strip()), parse(final_answer.strip())):
9592
ans_acc += 1
9693
reward += acc_score
9794

0 commit comments

Comments
 (0)