Skip to content

Commit a03ae11

Browse files
committed
Include partial grades in pass rate calculation
Updated process_results.py to count both 'Complete' and 'Partial' grades towards the pass rate and improved summary output. Enhanced quality_gate.py with clearer documentation and output messages to reflect the new pass rate calculation method.
1 parent f196107 commit a03ae11

File tree

2 files changed

+25
-8
lines changed

2 files changed

+25
-8
lines changed

tests/inspect-ai/utils/scripts/process_results.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,9 @@ def process_inspect_results(result_file_path: Union[str, Path]) -> None:
5151
if s.get("scores", {}).get("model_graded_qa", {}).get("value") == "I"
5252
)
5353

54-
pass_rate = (passed_tests / total_tests) * 100 if total_tests > 0 else 0
54+
# Calculate pass rate including both Complete and Partial grades
55+
passing_tests = passed_tests + partial_tests
56+
pass_rate = (passing_tests / total_tests) * 100 if total_tests > 0 else 0
5557

5658
# Generate summary dictionary
5759
summary = {
@@ -63,7 +65,7 @@ def process_inspect_results(result_file_path: Union[str, Path]) -> None:
6365
"quality_gate_passed": pass_rate >= 80, # 80% threshold
6466
"details": (
6567
f"Complete: {passed_tests}, Partial: {partial_tests}, "
66-
f"Incomplete: {failed_tests}"
68+
f"Incomplete: {failed_tests}, Passing: {passing_tests}/{total_tests}"
6769
),
6870
}
6971

@@ -74,10 +76,10 @@ def process_inspect_results(result_file_path: Union[str, Path]) -> None:
7476

7577
print(f"\nSummary saved to: {summary_file_path}")
7678
print(
77-
f"Processed {total_tests} tests: {passed_tests} passed, "
78-
f"{partial_tests} partial, {failed_tests} failed"
79+
f"Processed {total_tests} tests: {passed_tests} complete, "
80+
f"{partial_tests} partial, {failed_tests} incomplete"
7981
)
80-
print(f"Pass rate: {pass_rate:.1f}%")
82+
print(f"Pass rate (Complete + Partial): {pass_rate:.1f}%")
8183

8284

8385
if __name__ == "__main__":

tests/inspect-ai/utils/scripts/quality_gate.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,18 @@
55

66

77
def check_quality_gate(results_dir: Union[str, Path], threshold: float = 80) -> None:
8-
"""Check if evaluation results meet quality gate"""
8+
"""
9+
Check if evaluation results meet quality gate.
10+
11+
The quality gate is based on the pass_rate from the summary.json file.
12+
Pass rate includes both 'Complete' (C) and 'Partial' (P) grades.
13+
Tests with 'Incomplete' (I) grade do not count towards the pass rate.
14+
15+
Args:
16+
results_dir: Directory containing the summary.json file
17+
threshold: Minimum pass rate percentage required (default: 80%)
18+
"""
19+
920
summary_path = Path(results_dir) / "summary.json"
1021

1122
if not summary_path.exists():
@@ -18,10 +29,14 @@ def check_quality_gate(results_dir: Union[str, Path], threshold: float = 80) ->
1829
pass_rate = summary.get("pass_rate", 0)
1930

2031
if pass_rate >= threshold:
21-
print(f"✅ Quality gate PASSED: {pass_rate:.1f}% >= {threshold}%")
32+
print(
33+
f"✅ Quality gate PASSED: {pass_rate:.1f}% >= {threshold}% (Complete + Partial grades)"
34+
)
2235
sys.exit(0)
2336
else:
24-
print(f"❌ Quality gate FAILED: {pass_rate:.1f}% < {threshold}%")
37+
print(
38+
f"❌ Quality gate FAILED: {pass_rate:.1f}% < {threshold}% (Complete + Partial grades)"
39+
)
2540
sys.exit(1)
2641

2742

0 commit comments

Comments
 (0)