Skip to content

Commit 038a350

Browse files
committed
Refactor CI test evaluation and update Python version
Moved the test evaluation logic to a reusable shell script and updated workflows to use Python 3.13. Improved the testing docs workflow to check for documentation sync and provide clearer PR comments. Added 'openai' to chatlas test dependencies in pyproject.toml.
1 parent ea096ec commit 038a350

File tree

4 files changed

+125
-89
lines changed

4 files changed

+125
-89
lines changed

.github/workflows/verify-test-generation-prompts.yaml

Lines changed: 2 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ concurrency:
1010
cancel-in-progress: true
1111

1212
env:
13-
PYTHON_VERSION: "3.12"
13+
PYTHON_VERSION: "3.13"
1414
ATTEMPTS: 3
1515
PYTHONUNBUFFERED: 1
1616

@@ -70,80 +70,7 @@ jobs:
7070
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
7171
PYTHONUNBUFFERED: 1
7272
timeout-minutes: 25
73-
run: |
74-
set -e # Exit immediately if a command fails
75-
76-
# Function to log with timestamp
77-
log_with_timestamp() {
78-
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
79-
}
80-
81-
# Function to cleanup hanging processes
82-
cleanup_processes() {
83-
log_with_timestamp "Cleaning up any hanging processes..."
84-
pkill -f "playwright" || true
85-
pkill -f "chromium" || true
86-
pkill -f "pytest" || true
87-
}
88-
89-
# Set up trap to cleanup on exit
90-
trap cleanup_processes EXIT
91-
92-
for i in {1..3}
93-
do
94-
log_with_timestamp "Starting Attempt $i of 3"
95-
96-
# Clean up results from previous attempt to ensure a clean slate
97-
rm -rf results/
98-
mkdir -p results/
99-
rm -f test-results.xml
100-
101-
log_with_timestamp "[Attempt $i] Creating test metadata..."
102-
python tests/inspect-ai/scripts/create_test_metadata.py
103-
104-
log_with_timestamp "[Attempt $i] Running Inspect AI evaluation..."
105-
inspect eval tests/inspect-ai/scripts/evaluation.py@shiny_test_evaluation \
106-
--log-dir results/ \
107-
--log-format json
108-
109-
log_with_timestamp "[Attempt $i] Running Tests..."
110-
test_exit_code=0
111-
# Disable exit on error just for the pytest command to check the exit code
112-
set +e
113-
timeout 15m pytest tests/inspect-ai/apps \
114-
--tb=short \
115-
--disable-warnings \
116-
--maxfail=2 \
117-
--junit-xml=test-results.xml \
118-
--durations=10 \
119-
--timeout=300 \
120-
--timeout-method=thread \
121-
-v || test_exit_code=$?
122-
# Re-enable exit on error immediately
123-
set -e
124-
125-
# Check if timeout occurred
126-
if [ "${test_exit_code:-0}" -eq 124 ]; then
127-
log_with_timestamp "Tests timed out on attempt $i - this may indicate hanging tests"
128-
cleanup_processes
129-
exit 1
130-
fi
131-
132-
# Check if tests failed and how many failures occurred
133-
if [ "${test_exit_code:-0}" -ne 0 ]; then
134-
failure_count=$(grep -o 'failures="[0-9]*"' test-results.xml | grep -o '[0-9]*' || echo "0")
135-
log_with_timestamp "Found $failure_count test failures on attempt $i"
136-
137-
# Fail the workflow if more than 1 test failed
138-
if [ "$failure_count" -gt 1 ]; then
139-
log_with_timestamp "More than 1 test failed on attempt $i - failing CI"
140-
exit 1
141-
fi
142-
fi
143-
log_with_timestamp "Attempt $i of 3 Succeeded"
144-
done
145-
146-
log_with_timestamp "All 3 evaluation and test runs passed successfully."
73+
run: ./tests/inspect-ai/scripts/run-test-evaluation.sh
14774

14875
- name: Upload test results
14976
if: always()

.github/workflows/verify-testing-docs-on-change.yml

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,40 +33,74 @@ jobs:
3333
echo "No changes detected in shiny/playwright/controller directory"
3434
fi
3535
36-
- name: Comment on PR about testing docs update
36+
- name: Set up Python
37+
if: steps.check-controller.outputs.controller_changed == 'true'
38+
uses: actions/setup-python@v5
39+
with:
40+
python-version: '3.13'
41+
42+
- name: Install uv
43+
if: steps.check-controller.outputs.controller_changed == 'true'
44+
uses: astral-sh/setup-uv@v4
45+
46+
- name: Install dependencies
47+
if: steps.check-controller.outputs.controller_changed == 'true'
48+
run: |
49+
uv pip install --system --upgrade pip
50+
uv pip install --system -e ".[dev,test]"
51+
52+
- name: Update testing docs and check for changes
3753
if: steps.check-controller.outputs.controller_changed == 'true'
54+
id: check-docs-changes
55+
run: |
56+
# Store the current state of the documentation file
57+
cp shiny/pytest/generate/data/docs/documentation_testing.json documentation_testing_before.json
58+
59+
# Run the make command to update testing docs
60+
make update-testing-docs
61+
62+
# Check if the documentation file has changed
63+
if ! diff -q documentation_testing_before.json shiny/pytest/generate/data/docs/documentation_testing.json > /dev/null 2>&1; then
64+
echo "docs_changed=true" >> $GITHUB_OUTPUT
65+
echo "Documentation file has changed after running make update-testing-docs"
66+
echo "The generated documentation is out of sync with the current controller changes."
67+
exit 1
68+
else
69+
echo "docs_changed=false" >> $GITHUB_OUTPUT
70+
echo "Documentation file is up to date"
71+
fi
72+
73+
- name: Comment on PR about testing docs update
74+
if: steps.check-docs-changes.outputs.docs_changed == 'true'
3875
uses: marocchino/sticky-pull-request-comment@v2
3976
with:
4077
header: testing-docs-update
4178
message: |
42-
🤖 **Testing Documentation Update Required**
79+
🚨 **Testing Documentation Out of Sync**
4380
44-
We detected changes in the `shiny/playwright/controller` directory. These changes may affect the testing documentation used by the `shiny add test` command.
81+
We detected changes in the `shiny/playwright/controller` directory that affect the testing documentation used by the `shiny add test` command.
4582
46-
**Please run the following command to update the testing documentation:**
83+
**The generated documentation is out of sync with your controller changes. Please run:**
4784
4885
```bash
4986
make update-testing-docs
5087
```
5188
89+
**Then commit the updated `shiny/pytest/generate/data/docs/documentation_testing.json` file.**
90+
5291
<details><summary>Additional details</summary>
5392
54-
This command will:
55-
1. Install repomix if not already installed
56-
2. Build the latest documentation with quartodoc
57-
3. Generate repomix output for testing docs
58-
4. Process the output to update the AI test generator documentation
59-
5. Clean up temporary files
93+
The updated documentation file ensures that the AI test generator has access to the latest controller API documentation.
6094
6195
</details>
6296
63-
This will ensure that the AI test generator has access to the latest controller API documentation.
97+
❌ **This check will fail until the documentation is updated and committed.**
6498
6599
---
66100
*This comment was automatically generated by the validate_testing_docs workflow.*
67101
68-
- name: Remove comment when no controller changes
69-
if: steps.check-controller.outputs.controller_changed == 'false'
102+
- name: Remove comment when no controller changes or docs are up to date
103+
if: steps.check-controller.outputs.controller_changed == 'false' || (steps.check-controller.outputs.controller_changed == 'true' && steps.check-docs-changes.outputs.docs_changed == 'false')
70104
uses: marocchino/sticky-pull-request-comment@v2
71105
with:
72106
header: testing-docs-update

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ test = [
9191
"dask[dataframe]",
9292
"pyarrow",
9393
"pyarrow-stubs",
94-
"chatlas[anthropic]",
94+
"chatlas[anthropic,openai]",
9595
"chatlas[openai]",
9696
]
9797
dev = [
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#!/bin/bash
2+
3+
set -e # Exit immediately if a command fails
4+
5+
# Function to log with timestamp
6+
log_with_timestamp() {
7+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
8+
}
9+
10+
# Function to cleanup hanging processes
11+
cleanup_processes() {
12+
log_with_timestamp "Cleaning up any hanging processes..."
13+
pkill -f "playwright" || true
14+
pkill -f "chromium" || true
15+
pkill -f "pytest" || true
16+
}
17+
18+
# Set up trap to cleanup on exit
19+
trap cleanup_processes EXIT
20+
21+
for i in {1..3}
22+
do
23+
log_with_timestamp "Starting Attempt $i of 3"
24+
25+
# Clean up results from previous attempt to ensure a clean slate
26+
rm -rf results/
27+
mkdir -p results/
28+
rm -f test-results.xml
29+
30+
log_with_timestamp "[Attempt $i] Creating test metadata..."
31+
python tests/inspect-ai/scripts/create_test_metadata.py
32+
33+
log_with_timestamp "[Attempt $i] Running Inspect AI evaluation..."
34+
inspect eval tests/inspect-ai/scripts/evaluation.py@shiny_test_evaluation \
35+
--log-dir results/ \
36+
--log-format json
37+
38+
log_with_timestamp "[Attempt $i] Running Tests..."
39+
test_exit_code=0
40+
# Disable exit on error just for the pytest command to check the exit code
41+
set +e
42+
timeout 15m pytest tests/inspect-ai/apps \
43+
--tb=short \
44+
--disable-warnings \
45+
--maxfail=2 \
46+
--junit-xml=test-results.xml \
47+
--durations=10 \
48+
--timeout=300 \
49+
--timeout-method=thread \
50+
-v || test_exit_code=$?
51+
# Re-enable exit on error immediately
52+
set -e
53+
54+
# Check if timeout occurred
55+
if [ "${test_exit_code:-0}" -eq 124 ]; then
56+
log_with_timestamp "Tests timed out on attempt $i - this may indicate hanging tests"
57+
cleanup_processes
58+
exit 1
59+
fi
60+
61+
# Check if tests failed and how many failures occurred
62+
if [ "${test_exit_code:-0}" -ne 0 ]; then
63+
failure_count=$(grep -o 'failures="[0-9]*"' test-results.xml | grep -o '[0-9]*' || echo "0")
64+
log_with_timestamp "Found $failure_count test failures on attempt $i"
65+
66+
# Fail the workflow if more than 1 test failed
67+
if [ "$failure_count" -gt 1 ]; then
68+
log_with_timestamp "More than 1 test failed on attempt $i - failing CI"
69+
exit 1
70+
fi
71+
fi
72+
log_with_timestamp "Attempt $i of 3 Succeeded"
73+
done
74+
75+
log_with_timestamp "All 3 evaluation and test runs passed successfully."

0 commit comments

Comments
 (0)