|
2 | 2 | Grade Problem Tool for LLM Benchmark |
3 | 3 | """ |
4 | 4 |
|
5 | | -import Test: DefaultTestSet |
| 5 | +import Test: DefaultTestSet, finish |
6 | 6 |
|
7 | 7 | mutable struct GradeProblemTool <: ClaudeMCPTools.MCPTool |
8 | 8 | grade_fn::Function |
@@ -57,43 +57,24 @@ function ClaudeMCPTools.execute(tool::GradeProblemTool, params::Dict) |
57 | 57 | Test.pop_testset() |
58 | 58 | end |
59 | 59 |
|
60 | | - # Format the testset output as a string similar to how Test module would display it |
61 | | - test_output = IOBuffer() |
| 60 | + # Capture the testset output using redirect_stdout |
| 61 | + # Create a Pipe for capturing stdout |
| 62 | + old_stdout = stdout |
| 63 | + rd, wr = redirect_stdout() |
62 | 64 |
|
63 | | - # Write the summary line |
64 | | - n_pass = ts.n_passed |
65 | | - n_fail = count(r -> isa(r, Test.Fail), ts.results) |
66 | | - n_error = count(r -> isa(r, Test.Error), ts.results) |
67 | | - n_broken = count(r -> isa(r, Test.Broken), ts.results) |
68 | | - n_total = n_pass + n_fail + n_error + n_broken |
69 | | - |
70 | | - println(test_output, "Test Summary: | Pass Fail Error Broken Total") |
71 | | - println(test_output, "$(ts.description) | $(n_pass) $(n_fail) $(n_error) $(n_broken) $(n_total)") |
72 | | - |
73 | | - # Add details about nested testsets and failures |
74 | | - for result in ts.results |
75 | | - if isa(result, DefaultTestSet) |
76 | | - # Nested testset |
77 | | - n_pass_nested = result.n_passed |
78 | | - n_fail_nested = count(r -> isa(r, Test.Fail), result.results) |
79 | | - n_error_nested = count(r -> isa(r, Test.Error), result.results) |
80 | | - println(test_output, " $(result.description) | $(n_pass_nested) $(n_fail_nested) $(n_error_nested)") |
81 | | - elseif isa(result, Test.Fail) |
82 | | - # Test failure details |
83 | | - println(test_output, "\nTest Failed:") |
84 | | - println(test_output, " Expression: $(result.orig_expr)") |
85 | | - if result.data !== nothing |
86 | | - println(test_output, " Evaluated: $(result.data)") |
87 | | - end |
88 | | - elseif isa(result, Test.Error) |
89 | | - # Test error details |
90 | | - println(test_output, "\nTest Error:") |
91 | | - println(test_output, " Expression: $(result.orig_expr)") |
92 | | - println(test_output, " Exception: $(result.value)") |
93 | | - end |
| 65 | + try |
| 66 | + Test.finish(ts) |
| 67 | + catch e |
| 68 | + # finish throws an error if tests fail, but we still want the output |
94 | 69 | end |
95 | 70 |
|
96 | | - test_output_str = String(take!(test_output)) |
| 71 | + # Restore stdout and close the write end |
| 72 | + redirect_stdout(old_stdout) |
| 73 | + close(wr) |
| 74 | + |
| 75 | + # Read the captured output |
| 76 | + test_output_str = read(rd, String) |
| 77 | + close(rd) |
97 | 78 |
|
98 | 79 | # Check if any tests failed (including in nested testsets) |
99 | 80 | function has_failures(testset) |
|
0 commit comments