@@ -52,18 +52,24 @@ end # module
5252 @test ! occursin (" 10 - 4" , description)
5353
5454 # Test grading with correct answers
55- result = Base. invokelatest (mod. grade, workdir, " 8" , " math1" )
55+ result = Base. invokelatest (mod. grade, workdir, " <answer>8</answer>" , " math1" )
56+ if result[" score" ] != 1.0
57+ @info " Grade result for math1 with answer 8" result
58+ end
5659 @test result[" score" ] == 1.0
5760
58- result = Base. invokelatest (mod. grade, workdir, " 6" , " math2" )
61+ result = Base. invokelatest (mod. grade, workdir, " <answer>6</answer>" , " math2" )
62+ if result[" score" ] != 1.0
63+ @info " Grade result for math2 with answer 6" result
64+ end
5965 @test result[" score" ] == 1.0
6066
6167 # Test grading with incorrect answer
62- result = Base. invokelatest (mod. grade, workdir, " 7 " , " math1" )
68+ result = Base. invokelatest (mod. grade, workdir, " <answer>7</answer> " , " math1" )
6369 @test result[" score" ] == 0.0
6470
6571 # Test grading with empty problem_id (should return error)
66- result = Base. invokelatest (mod. grade, workdir, " 8 " , " " )
72+ result = Base. invokelatest (mod. grade, workdir, " <answer>8</answer> " , " " )
6773 @test result[" score" ] == 0.0
6874 @test occursin (" problem_id is required" , result[" details" ])
6975 end
@@ -102,25 +108,49 @@ end # module
102108 @test occursin (" math1" , response[" result" ][" content" ][1 ][" text" ])
103109 @test occursin (" math2" , response[" result" ][" content" ][1 ][" text" ])
104110
105- # Test grading through MCP
111+ # Test grading through MCP for math1
106112 request = Dict (
107113 " jsonrpc" => " 2.0" ,
108114 " id" => 2 ,
109115 " method" => " tools/call" ,
110116 " params" => Dict (
111117 " name" => " grade_problem" ,
112- " arguments" => Dict (" transcript" => " 8" ) # Answer to first problem
118+ " arguments" => Dict (
119+ " transcript" => " <answer>8</answer>" ,
120+ " problem_id" => " math1"
121+ )
113122 )
114123 )
115124
116125 response = ClaudeMCPTools. handle_request (server, request)
117126 grade_result = JSON. parse (response[" result" ][" content" ][1 ][" text" ])
118127
119- # Should have graded both problems
128+ # Should have graded math1 correctly
120129 @test haskey (grade_result, " subscores" )
121130 @test grade_result[" subscores" ][" math1" ] == 1.0 # Correct
131+ @test grade_result[" score" ] == 1.0
132+
133+ # Test grading through MCP for math2
134+ request = Dict (
135+ " jsonrpc" => " 2.0" ,
136+ " id" => 3 ,
137+ " method" => " tools/call" ,
138+ " params" => Dict (
139+ " name" => " grade_problem" ,
140+ " arguments" => Dict (
141+ " transcript" => " <answer>5</answer>" , # Wrong answer
142+ " problem_id" => " math2"
143+ )
144+ )
145+ )
146+
147+ response = ClaudeMCPTools. handle_request (server, request)
148+ grade_result = JSON. parse (response[" result" ][" content" ][1 ][" text" ])
149+
150+ # Should have graded math2 incorrectly
151+ @test haskey (grade_result, " subscores" )
122152 @test grade_result[" subscores" ][" math2" ] == 0.0 # Incorrect
153+ @test grade_result[" score" ] == 0.0
123154 end
124155 end
125- end
126156end
0 commit comments