Skip to content

Commit 3b1c908

Browse files
committed
Enhance PR comment with averaged test results
Update prepare_comment.py to generate a more detailed and informative GitHub PR comment. The comment now includes averaged results from Inspect AI and Pytest, as well as an overall quality gate status, providing clearer feedback on test quality and execution across multiple attempts.
1 parent 4d44a1c commit 3b1c908

File tree

1 file changed

+65
-12
lines changed

1 file changed

+65
-12
lines changed

tests/inspect-ai/scripts/prepare_comment.py

Lines changed: 65 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77

88
def prepare_comment(summary_path: Union[str, Path]) -> int:
99
"""
10-
Reads summary.json and creates a formatted comment for GitHub PR.
10+
Reads summary.json and other result files to create a formatted comment for GitHub PR
11+
showing averaged results across multiple attempts.
1112
1213
Args:
1314
summary_path: Path to the summary.json file
@@ -20,17 +21,69 @@ def prepare_comment(summary_path: Union[str, Path]) -> int:
2021
if not summary_path.exists():
2122
raise FileNotFoundError(f"Summary file not found at {summary_path}")
2223

24+
# Read the inspect-ai averaged summary
2325
with open(summary_path, "r") as f:
24-
results = json.load(f)
25-
26-
comment = f"""## Inspect AI Evaluation Results
27-
28-
- **Tests Passed**: {results['passed']}/{results['total']}
29-
- **Quality Gate**: {'✅ PASSED' if results['quality_gate_passed'] else '❌ FAILED'}
30-
31-
### Details
32-
{results['details']}
33-
"""
26+
inspect_results = json.load(f)
27+
28+
# Try to read the pytest averaged summary
29+
pytest_results = None
30+
pytest_summary_path = summary_path.parent / "pytest_summary.json"
31+
if pytest_summary_path.exists():
32+
with open(pytest_summary_path, "r") as f:
33+
pytest_results = json.load(f)
34+
35+
# Try to read the combined summary for overall gate status
36+
combined_results = None
37+
combined_summary_path = summary_path.parent / "combined_summary.json"
38+
if combined_summary_path.exists():
39+
with open(combined_summary_path, "r") as f:
40+
combined_results = json.load(f)
41+
42+
# Build the comment
43+
comment_parts = [
44+
"## Test Generation Evaluation Results (Averaged across 3 attempts)\n"
45+
]
46+
47+
# Inspect AI section
48+
inspect_passing = inspect_results["passed"] + inspect_results["partial"]
49+
comment_parts.append("### 🔍 Inspect AI Test Quality Evaluation")
50+
comment_parts.append(f"- **Complete (C)**: {inspect_results['passed']:.1f}")
51+
comment_parts.append(f"- **Partial (P)**: {inspect_results['partial']:.1f}")
52+
comment_parts.append(f"- **Incomplete (I)**: {inspect_results['failed']:.1f}")
53+
comment_parts.append(
54+
f"- **Passing Rate**: {inspect_passing:.1f}/{inspect_results['total']:.1f} ({inspect_results['pass_rate']:.1f}%)"
55+
)
56+
comment_parts.append(
57+
f"- **Quality Gate**: {'✅ PASSED' if inspect_results['quality_gate_passed'] else '❌ FAILED'} (≥80% required)\n"
58+
)
59+
60+
# Pytest section
61+
if pytest_results:
62+
comment_parts.append("### 🧪 Pytest Execution Results")
63+
comment_parts.append(f"- **Passed**: {pytest_results['passed']:.1f}")
64+
comment_parts.append(f"- **Failed**: {pytest_results['failed']:.1f}")
65+
comment_parts.append(f"- **Errors**: {pytest_results['errors']:.1f}")
66+
comment_parts.append(f"- **Skipped**: {pytest_results['skipped']:.1f}")
67+
comment_parts.append(
68+
f"- **Pass Rate**: {pytest_results['passed']:.1f}/{pytest_results['total']:.1f} ({pytest_results['pass_rate']:.1f}%)\n"
69+
)
70+
71+
# Overall status
72+
if combined_results:
73+
overall_passed = combined_results.get("overall_quality_gate_passed", False)
74+
comment_parts.append("### 🎯 Overall Result")
75+
comment_parts.append(
76+
f"**{'✅ PASSED' if overall_passed else '❌ FAILED'}** - Combined quality gate"
77+
)
78+
if pytest_results:
79+
comment_parts.append("(Requires: Inspect AI ≥80% + Pytest ≥85%)")
80+
81+
comment_parts.append("\n---")
82+
comment_parts.append(
83+
"*Results are averaged across 3 evaluation attempts for improved reliability.*"
84+
)
85+
86+
comment = "\n".join(comment_parts)
3487

3588
with open("comment_body.txt", "w") as f:
3689
f.write(comment)
@@ -41,7 +94,7 @@ def prepare_comment(summary_path: Union[str, Path]) -> int:
4194
except Exception as e:
4295
print(f"Error reading summary file: {e}")
4396

44-
comment = """## Inspect AI Evaluation Results
97+
comment = """## Test Generation Evaluation Results
4598
4699
❌ **Error**: Could not read evaluation results summary file.
47100

0 commit comments

Comments
 (0)