Skip to content

Commit 06b3706

Browse files
committed
Add debug logging to test evaluation scripts
Introduced additional print statements in prepare_comment.py and average_results.py to aid in debugging file discovery and directory contents. Also updated run-test-evaluation.sh to initialize the results directory structure only once before test attempts, improving efficiency and clarity.
1 parent 3b1c908 commit 06b3706

File tree

3 files changed

+25
-3
lines changed

3 files changed

+25
-3
lines changed

tests/inspect-ai/scripts/prepare_comment.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,15 @@ def prepare_comment(summary_path: Union[str, Path]) -> int:
2828
# Try to read the pytest averaged summary
2929
pytest_results = None
3030
pytest_summary_path = summary_path.parent / "pytest_summary.json"
31+
print(f"Looking for pytest summary at: {pytest_summary_path}")
3132
if pytest_summary_path.exists():
3233
with open(pytest_summary_path, "r") as f:
3334
pytest_results = json.load(f)
35+
print(f"Found pytest results: {pytest_results}")
36+
else:
37+
print(
38+
f"Pytest summary not found. Directory contents: {list(summary_path.parent.iterdir())}"
39+
)
3440

3541
# Try to read the combined summary for overall gate status
3642
combined_results = None

tests/inspect-ai/scripts/run-test-evaluation.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,13 @@ cleanup_processes() {
2424

2525
trap cleanup_processes EXIT
2626

27+
# Initialize results directory structure once
28+
rm -rf results/
29+
mkdir -p results/
30+
2731
for i in $(seq 1 "$ATTEMPTS"); do
2832
log_with_timestamp "Starting attempt $i of $ATTEMPTS"
2933

30-
rm -rf results/
31-
mkdir -p results/
3234
mkdir -p results/attempts/attempt_$i/
3335
rm -f test-results.xml
3436

tests/inspect-ai/utils/scripts/average_results.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,15 @@ def process_inspect_ai_results(attempts_dir: Path) -> Dict[str, Any]:
3232

3333
if not attempt_dirs:
3434
print("No attempt directories found")
35+
print(f"Looking in: {attempts_dir}")
36+
print(
37+
f"Directory contents: {list(attempts_dir.iterdir()) if attempts_dir.exists() else 'Directory does not exist'}"
38+
)
3539
return {}
3640

37-
print(f"Found {len(attempt_dirs)} attempts to average")
41+
print(
42+
f"Found {len(attempt_dirs)} attempts to average: {[d.name for d in attempt_dirs]}"
43+
)
3844

3945
all_summaries: List[Dict[str, Union[int, float, bool]]] = []
4046

@@ -146,14 +152,22 @@ def process_pytest_results(attempts_dir: Path) -> Dict[str, Any]:
146152

147153
if not attempt_dirs:
148154
print("No attempt directories found for pytest results")
155+
print(f"Looking in: {attempts_dir}")
156+
print(
157+
f"Directory contents: {list(attempts_dir.iterdir()) if attempts_dir.exists() else 'Directory does not exist'}"
158+
)
149159
return {}
150160

151161
all_pytest_summaries: List[Dict[str, Union[int, float]]] = []
152162

153163
for attempt_dir in attempt_dirs:
154164
xml_file = attempt_dir / "test-results.xml"
165+
print(f"Looking for XML file: {xml_file}")
155166
if not xml_file.exists():
156167
print(f"Warning: No test-results.xml found in {attempt_dir}")
168+
print(
169+
f"Directory contents: {list(attempt_dir.iterdir()) if attempt_dir.exists() else 'Directory does not exist'}"
170+
)
157171
continue
158172

159173
try:

0 commit comments

Comments
 (0)