77
88def prepare_comment (summary_path : Union [str , Path ]) -> int :
99 """
10- Reads summary.json and creates a formatted comment for GitHub PR.
10+ Reads summary.json and other result files to create a formatted comment for GitHub PR
11+ showing averaged results across multiple attempts.
1112
1213 Args:
1314 summary_path: Path to the summary.json file
@@ -20,17 +21,69 @@ def prepare_comment(summary_path: Union[str, Path]) -> int:
2021 if not summary_path .exists ():
2122 raise FileNotFoundError (f"Summary file not found at { summary_path } " )
2223
24+ # Read the inspect-ai averaged summary
2325 with open (summary_path , "r" ) as f :
24- results = json .load (f )
25-
26- comment = f"""## Inspect AI Evaluation Results
27-
28- - **Tests Passed**: { results ['passed' ]} /{ results ['total' ]}
29- - **Quality Gate**: { '✅ PASSED' if results ['quality_gate_passed' ] else '❌ FAILED' }
30-
31- ### Details
32- { results ['details' ]}
33- """
26+ inspect_results = json .load (f )
27+
28+ # Try to read the pytest averaged summary
29+ pytest_results = None
30+ pytest_summary_path = summary_path .parent / "pytest_summary.json"
31+ if pytest_summary_path .exists ():
32+ with open (pytest_summary_path , "r" ) as f :
33+ pytest_results = json .load (f )
34+
35+ # Try to read the combined summary for overall gate status
36+ combined_results = None
37+ combined_summary_path = summary_path .parent / "combined_summary.json"
38+ if combined_summary_path .exists ():
39+ with open (combined_summary_path , "r" ) as f :
40+ combined_results = json .load (f )
41+
42+ # Build the comment
43+ comment_parts = [
44+ "## Test Generation Evaluation Results (Averaged across 3 attempts)\n "
45+ ]
46+
47+ # Inspect AI section
48+ inspect_passing = inspect_results ["passed" ] + inspect_results ["partial" ]
49+ comment_parts .append ("### 🔍 Inspect AI Test Quality Evaluation" )
50+ comment_parts .append (f"- **Complete (C)**: { inspect_results ['passed' ]:.1f} " )
51+ comment_parts .append (f"- **Partial (P)**: { inspect_results ['partial' ]:.1f} " )
52+ comment_parts .append (f"- **Incomplete (I)**: { inspect_results ['failed' ]:.1f} " )
53+ comment_parts .append (
54+ f"- **Passing Rate**: { inspect_passing :.1f} /{ inspect_results ['total' ]:.1f} ({ inspect_results ['pass_rate' ]:.1f} %)"
55+ )
56+ comment_parts .append (
57+ f"- **Quality Gate**: { '✅ PASSED' if inspect_results ['quality_gate_passed' ] else '❌ FAILED' } (≥80% required)\n "
58+ )
59+
60+ # Pytest section
61+ if pytest_results :
62+ comment_parts .append ("### 🧪 Pytest Execution Results" )
63+ comment_parts .append (f"- **Passed**: { pytest_results ['passed' ]:.1f} " )
64+ comment_parts .append (f"- **Failed**: { pytest_results ['failed' ]:.1f} " )
65+ comment_parts .append (f"- **Errors**: { pytest_results ['errors' ]:.1f} " )
66+ comment_parts .append (f"- **Skipped**: { pytest_results ['skipped' ]:.1f} " )
67+ comment_parts .append (
68+ f"- **Pass Rate**: { pytest_results ['passed' ]:.1f} /{ pytest_results ['total' ]:.1f} ({ pytest_results ['pass_rate' ]:.1f} %)\n "
69+ )
70+
71+ # Overall status
72+ if combined_results :
73+ overall_passed = combined_results .get ("overall_quality_gate_passed" , False )
74+ comment_parts .append ("### 🎯 Overall Result" )
75+ comment_parts .append (
76+ f"**{ '✅ PASSED' if overall_passed else '❌ FAILED' } ** - Combined quality gate"
77+ )
78+ if pytest_results :
79+ comment_parts .append ("(Requires: Inspect AI ≥80% + Pytest ≥85%)" )
80+
81+ comment_parts .append ("\n ---" )
82+ comment_parts .append (
83+ "*Results are averaged across 3 evaluation attempts for improved reliability.*"
84+ )
85+
86+ comment = "\n " .join (comment_parts )
3487
3588 with open ("comment_body.txt" , "w" ) as f :
3689 f .write (comment )
@@ -41,7 +94,7 @@ def prepare_comment(summary_path: Union[str, Path]) -> int:
4194 except Exception as e :
4295 print (f"Error reading summary file: { e } " )
4396
44- comment = """## Inspect AI Evaluation Results
97+ comment = """## Test Generation Evaluation Results
4598
4699❌ **Error**: Could not read evaluation results summary file.
47100
0 commit comments