4
4
from easydict import EasyDict
5
5
from ding .reward_model .math_rule_reward_model import MathRuleRewardModel
6
6
7
+
7
8
@pytest .fixture
8
9
def reward_model ():
9
10
return MathRuleRewardModel (
@@ -19,24 +20,26 @@ def reward_model():
19
20
20
21
@pytest .mark .envtest
21
22
def test_math_rule_reward_model_correct_answer (reward_model ):
22
- data_correct = [{
23
- "system" : "Please answer this math problem..." ,
24
- "query" : (
25
- "The school now introduces a new color, silver, for the flag design. "
26
- "Crestview's school colors are now purple, gold, and silver. "
27
- "The students are designing a flag using three solid-colored horizontal stripes. "
28
- "Using one, two, or all three of the school colors, how many different flags "
29
- "are possible if adjacent stripes may be the same color?"
30
- ),
31
- "response" : (
32
- "Crestview's school colors—purple, gold, and silver—can be used to design "
33
- "a flag with three horizontal stripes, where each stripe can be any of the "
34
- "three colors and adjacent stripes may be the same. Since each of the three "
35
- "stripes has three independent color choices, the total number of possible "
36
- "flag designs is 27"
37
- ),
38
- "answer" : r"27"
39
- }]
23
+ data_correct = [
24
+ {
25
+ "system" : "Please answer this math problem..." ,
26
+ "query" : (
27
+ "The school now introduces a new color, silver, for the flag design. "
28
+ "Crestview's school colors are now purple, gold, and silver. "
29
+ "The students are designing a flag using three solid-colored horizontal stripes. "
30
+ "Using one, two, or all three of the school colors, how many different flags "
31
+ "are possible if adjacent stripes may be the same color?"
32
+ ),
33
+ "response" : (
34
+ "Crestview's school colors—purple, gold, and silver—can be used to design "
35
+ "a flag with three horizontal stripes, where each stripe can be any of the "
36
+ "three colors and adjacent stripes may be the same. Since each of the three "
37
+ "stripes has three independent color choices, the total number of possible "
38
+ "flag designs is 27"
39
+ ),
40
+ "answer" : r"27"
41
+ }
42
+ ]
40
43
41
44
# Test the case with correct answer
42
45
rewards = reward_model .estimate (data_correct )
@@ -48,26 +51,28 @@ def test_math_rule_reward_model_correct_answer(reward_model):
48
51
49
52
@pytest .mark .envtest
50
53
def test_math_rule_reward_model_wrong_answer (reward_model ):
51
- data_wrong = [{
52
- "system" : "Please answer this math problem..." ,
53
- "query" : (
54
- "The school now introduces a new color, silver, for the flag design. "
55
- "Crestview's school colors are now purple, gold, and silver. "
56
- "The students are designing a flag using three solid-colored horizontal stripes. "
57
- "Using one, two, or all three of the school colors, how many different flags "
58
- "are possible if adjacent stripes may be the same color?"
59
- ),
60
- "response" : (
61
- r"The given point \(\left(\frac{\sqrt{3}}{2}, -\frac{1}{2}\right)\) lies on "
62
- r"the unit circle, meaning its coordinates correspond to \((\cos \alpha, "
63
- r"\sin \alpha)\). Since \(\cos \alpha = \frac{\sqrt{3}}{2}\) and "
64
- r"\(\sin \alpha = -\frac{1}{2}\), the angle \(\alpha\) is in the "
65
- r"**fourth quadrant**, where the reference angle is \(\frac{\pi}{6}\). "
66
- r"Therefore, the smallest positive value of \(\alpha\) is "
67
- r"\(2\pi - \frac{\pi}{6} = \frac{17\pi}{6}\)."
68
- ),
69
- "answer" : r"\frac{11\pi}{6}"
70
- }]
54
+ data_wrong = [
55
+ {
56
+ "system" : "Please answer this math problem..." ,
57
+ "query" : (
58
+ "The school now introduces a new color, silver, for the flag design. "
59
+ "Crestview's school colors are now purple, gold, and silver. "
60
+ "The students are designing a flag using three solid-colored horizontal stripes. "
61
+ "Using one, two, or all three of the school colors, how many different flags "
62
+ "are possible if adjacent stripes may be the same color?"
63
+ ),
64
+ "response" : (
65
+ r"The given point \(\left(\frac{\sqrt{3}}{2}, -\frac{1}{2}\right)\) lies on "
66
+ r"the unit circle, meaning its coordinates correspond to \((\cos \alpha, "
67
+ r"\sin \alpha)\). Since \(\cos \alpha = \frac{\sqrt{3}}{2}\) and "
68
+ r"\(\sin \alpha = -\frac{1}{2}\), the angle \(\alpha\) is in the "
69
+ r"**fourth quadrant**, where the reference angle is \(\frac{\pi}{6}\). "
70
+ r"Therefore, the smallest positive value of \(\alpha\) is "
71
+ r"\(2\pi - \frac{\pi}{6} = \frac{17\pi}{6}\)."
72
+ ),
73
+ "answer" : r"\frac{11\pi}{6}"
74
+ }
75
+ ]
71
76
72
77
# Test the case with wrong answer
73
78
rewards = reward_model .estimate (data_wrong )
@@ -79,12 +84,14 @@ def test_math_rule_reward_model_wrong_answer(reward_model):
79
84
80
85
@pytest .mark .envtest
81
86
def test_math_rule_reward_model_format_error (reward_model ):
82
- data_format_error = [{
83
- "system" : "Please answer this math problem..." ,
84
- "query" : "What is 2+2?" ,
85
- "response" : "The answer is four." ,
86
- "answer" : r"4"
87
- }]
87
+ data_format_error = [
88
+ {
89
+ "system" : "Please answer this math problem..." ,
90
+ "query" : "What is 2+2?" ,
91
+ "response" : "The answer is four." ,
92
+ "answer" : r"4"
93
+ }
94
+ ]
88
95
rewards_format = reward_model .estimate (data_format_error )
89
96
assert len (rewards_format ) == len (data_format_error )
90
97
# This should be a format error because "four" cannot be processed as a numerical value
@@ -99,13 +106,11 @@ def test_math_rule_reward_model_special_expressions(reward_model):
99
106
"query" : "What is 1/2?" ,
100
107
"response" : r"The answer is \frac{1}{2}." ,
101
108
"answer" : r"0.5"
102
- },
103
- {
109
+ }, {
104
110
"query" : "What is 50%?" ,
105
111
"response" : "The answer is 50%." ,
106
112
"answer" : r"0.5"
107
- },
108
- {
113
+ }, {
109
114
"query" : "What is sqrt(4)?" ,
110
115
"response" : r"The answer is \sqrt{4} = 2." ,
111
116
"answer" : r"2"
0 commit comments