Skip to content

Commit 51724f9

Browse files
committed
Use redirect_stdout with pipes to capture Test.finish output
- Switched from manual test output formatting to using Test.finish - Use redirect_stdout() to create pipes for capturing stdout - Test.finish provides the actual Test module formatted output - This preserves all Test module formatting including nested testsets - Properly handles test failures by catching exceptions from finish - The captured output now matches exactly what Test module would print
1 parent aafd494 commit 51724f9

File tree

1 file changed

+16
-35
lines changed

1 file changed

+16
-35
lines changed

src/tools/grade_problem.jl

Lines changed: 16 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Grade Problem Tool for LLM Benchmark
33
"""
44

5-
import Test: DefaultTestSet
5+
import Test: DefaultTestSet, finish
66

77
mutable struct GradeProblemTool <: ClaudeMCPTools.MCPTool
88
grade_fn::Function
@@ -57,43 +57,24 @@ function ClaudeMCPTools.execute(tool::GradeProblemTool, params::Dict)
5757
Test.pop_testset()
5858
end
5959

60-
# Format the testset output as a string similar to how Test module would display it
61-
test_output = IOBuffer()
60+
# Capture the testset output using redirect_stdout
61+
# Create a Pipe for capturing stdout
62+
old_stdout = stdout
63+
rd, wr = redirect_stdout()
6264

63-
# Write the summary line
64-
n_pass = ts.n_passed
65-
n_fail = count(r -> isa(r, Test.Fail), ts.results)
66-
n_error = count(r -> isa(r, Test.Error), ts.results)
67-
n_broken = count(r -> isa(r, Test.Broken), ts.results)
68-
n_total = n_pass + n_fail + n_error + n_broken
69-
70-
println(test_output, "Test Summary: | Pass Fail Error Broken Total")
71-
println(test_output, "$(ts.description) | $(n_pass) $(n_fail) $(n_error) $(n_broken) $(n_total)")
72-
73-
# Add details about nested testsets and failures
74-
for result in ts.results
75-
if isa(result, DefaultTestSet)
76-
# Nested testset
77-
n_pass_nested = result.n_passed
78-
n_fail_nested = count(r -> isa(r, Test.Fail), result.results)
79-
n_error_nested = count(r -> isa(r, Test.Error), result.results)
80-
println(test_output, " $(result.description) | $(n_pass_nested) $(n_fail_nested) $(n_error_nested)")
81-
elseif isa(result, Test.Fail)
82-
# Test failure details
83-
println(test_output, "\nTest Failed:")
84-
println(test_output, " Expression: $(result.orig_expr)")
85-
if result.data !== nothing
86-
println(test_output, " Evaluated: $(result.data)")
87-
end
88-
elseif isa(result, Test.Error)
89-
# Test error details
90-
println(test_output, "\nTest Error:")
91-
println(test_output, " Expression: $(result.orig_expr)")
92-
println(test_output, " Exception: $(result.value)")
93-
end
65+
try
66+
Test.finish(ts)
67+
catch e
68+
# finish throws an error if tests fail, but we still want the output
9469
end
9570

96-
test_output_str = String(take!(test_output))
71+
# Restore stdout and close the write end
72+
redirect_stdout(old_stdout)
73+
close(wr)
74+
75+
# Read the captured output
76+
test_output_str = read(rd, String)
77+
close(rd)
9778

9879
# Check if any tests failed (including in nested testsets)
9980
function has_failures(testset)

0 commit comments

Comments
 (0)