44from easydict import EasyDict
55from ding .reward_model .math_rule_reward_model import MathRuleRewardModel
66
7+
78@pytest .fixture
89def reward_model ():
910 return MathRuleRewardModel (
@@ -19,24 +20,26 @@ def reward_model():
1920
2021@pytest .mark .envtest
2122def test_math_rule_reward_model_correct_answer (reward_model ):
22- data_correct = [{
23- "system" : "Please answer this math problem..." ,
24- "query" : (
25- "The school now introduces a new color, silver, for the flag design. "
26- "Crestview's school colors are now purple, gold, and silver. "
27- "The students are designing a flag using three solid-colored horizontal stripes. "
28- "Using one, two, or all three of the school colors, how many different flags "
29- "are possible if adjacent stripes may be the same color?"
30- ),
31- "response" : (
32- "Crestview's school colors—purple, gold, and silver—can be used to design "
33- "a flag with three horizontal stripes, where each stripe can be any of the "
34- "three colors and adjacent stripes may be the same. Since each of the three "
35- "stripes has three independent color choices, the total number of possible "
36- "flag designs is 27"
37- ),
38- "answer" : r"27"
39- }]
23+ data_correct = [
24+ {
25+ "system" : "Please answer this math problem..." ,
26+ "query" : (
27+ "The school now introduces a new color, silver, for the flag design. "
28+ "Crestview's school colors are now purple, gold, and silver. "
29+ "The students are designing a flag using three solid-colored horizontal stripes. "
30+ "Using one, two, or all three of the school colors, how many different flags "
31+ "are possible if adjacent stripes may be the same color?"
32+ ),
33+ "response" : (
34+ "Crestview's school colors—purple, gold, and silver—can be used to design "
35+ "a flag with three horizontal stripes, where each stripe can be any of the "
36+ "three colors and adjacent stripes may be the same. Since each of the three "
37+ "stripes has three independent color choices, the total number of possible "
38+ "flag designs is 27"
39+ ),
40+ "answer" : r"27"
41+ }
42+ ]
4043
4144 # Test the case with correct answer
4245 rewards = reward_model .estimate (data_correct )
@@ -48,26 +51,28 @@ def test_math_rule_reward_model_correct_answer(reward_model):
4851
4952@pytest .mark .envtest
5053def test_math_rule_reward_model_wrong_answer (reward_model ):
51- data_wrong = [{
52- "system" : "Please answer this math problem..." ,
53- "query" : (
54- "The school now introduces a new color, silver, for the flag design. "
55- "Crestview's school colors are now purple, gold, and silver. "
56- "The students are designing a flag using three solid-colored horizontal stripes. "
57- "Using one, two, or all three of the school colors, how many different flags "
58- "are possible if adjacent stripes may be the same color?"
59- ),
60- "response" : (
61- r"The given point \(\left(\frac{\sqrt{3}}{2}, -\frac{1}{2}\right)\) lies on "
62- r"the unit circle, meaning its coordinates correspond to \((\cos \alpha, "
63- r"\sin \alpha)\). Since \(\cos \alpha = \frac{\sqrt{3}}{2}\) and "
64- r"\(\sin \alpha = -\frac{1}{2}\), the angle \(\alpha\) is in the "
65- r"**fourth quadrant**, where the reference angle is \(\frac{\pi}{6}\). "
66- r"Therefore, the smallest positive value of \(\alpha\) is "
67- r"\(2\pi - \frac{\pi}{6} = \frac{17\pi}{6}\)."
68- ),
69- "answer" : r"\frac{11\pi}{6}"
70- }]
54+ data_wrong = [
55+ {
56+ "system" : "Please answer this math problem..." ,
57+ "query" : (
58+ "The school now introduces a new color, silver, for the flag design. "
59+ "Crestview's school colors are now purple, gold, and silver. "
60+ "The students are designing a flag using three solid-colored horizontal stripes. "
61+ "Using one, two, or all three of the school colors, how many different flags "
62+ "are possible if adjacent stripes may be the same color?"
63+ ),
64+ "response" : (
65+ r"The given point \(\left(\frac{\sqrt{3}}{2}, -\frac{1}{2}\right)\) lies on "
66+ r"the unit circle, meaning its coordinates correspond to \((\cos \alpha, "
67+ r"\sin \alpha)\). Since \(\cos \alpha = \frac{\sqrt{3}}{2}\) and "
68+ r"\(\sin \alpha = -\frac{1}{2}\), the angle \(\alpha\) is in the "
69+ r"**fourth quadrant**, where the reference angle is \(\frac{\pi}{6}\). "
70+ r"Therefore, the smallest positive value of \(\alpha\) is "
71+ r"\(2\pi - \frac{\pi}{6} = \frac{17\pi}{6}\)."
72+ ),
73+ "answer" : r"\frac{11\pi}{6}"
74+ }
75+ ]
7176
7277 # Test the case with wrong answer
7378 rewards = reward_model .estimate (data_wrong )
@@ -79,12 +84,14 @@ def test_math_rule_reward_model_wrong_answer(reward_model):
7984
8085@pytest .mark .envtest
8186def test_math_rule_reward_model_format_error (reward_model ):
82- data_format_error = [{
83- "system" : "Please answer this math problem..." ,
84- "query" : "What is 2+2?" ,
85- "response" : "The answer is four." ,
86- "answer" : r"4"
87- }]
87+ data_format_error = [
88+ {
89+ "system" : "Please answer this math problem..." ,
90+ "query" : "What is 2+2?" ,
91+ "response" : "The answer is four." ,
92+ "answer" : r"4"
93+ }
94+ ]
8895 rewards_format = reward_model .estimate (data_format_error )
8996 assert len (rewards_format ) == len (data_format_error )
9097 # This should be a format error because "four" cannot be processed as a numerical value
@@ -99,13 +106,11 @@ def test_math_rule_reward_model_special_expressions(reward_model):
99106 "query" : "What is 1/2?" ,
100107 "response" : r"The answer is \frac{1}{2}." ,
101108 "answer" : r"0.5"
102- },
103- {
109+ }, {
104110 "query" : "What is 50%?" ,
105111 "response" : "The answer is 50%." ,
106112 "answer" : r"0.5"
107- },
108- {
113+ }, {
109114 "query" : "What is sqrt(4)?" ,
110115 "response" : r"The answer is \sqrt{4} = 2." ,
111116 "answer" : r"2"
0 commit comments