Skip to content

feat(cli): Add AI support to shiny add test #8

feat(cli): Add AI support to shiny add test

feat(cli): Add AI support to shiny add test #8

name: Validate Test Generation Prompts
on:
push:
paths:
- 'shiny/pytest/generate/**'
pull_request:
paths:
- 'shiny/pytest/generate/**'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
PYTHON_VERSION: '3.12'
ATTEMPTS: 3
jobs:
validate-prompts:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
cache: 'pip'
- name: Cache Python dependencies
uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements*.txt', 'setup.py', 'pyproject.toml') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
pip install --upgrade pip
pip install -e ".[dev,test]"
pip install inspect-ai
- name: Cache Playwright browsers
uses: actions/cache@v4
id: playwright-cache
with:
path: ~/.cache/ms-playwright
key: ${{ runner.os }}-playwright-${{ hashFiles('**/requirements*.txt', 'setup.py') }}
- name: Install Playwright browsers
if: steps.playwright-cache.outputs.cache-hit != 'true'
run: playwright install --with-deps
- name: Install Playwright dependencies only
if: steps.playwright-cache.outputs.cache-hit == 'true'
run: playwright install-deps
- name: Run Evaluation and Tests 3 Times
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
run: |
set -e # Exit immediately if a command fails
for i in {1..3}
do
echo "--- Starting Attempt $i of 3 ---"
# Clean up results from previous attempt to ensure a clean slate
rm -rf results/
mkdir -p results/
rm -f test-results.xml
echo "[Attempt $i] Creating test metadata..."
python tests/inspect-ai/scripts/create_test_metadata.py
echo "[Attempt $i] Running Inspect AI evaluation..."
inspect eval tests/inspect-ai/scripts/evaluation.py@shiny_test_evaluation \
--log-dir results/ \
--log-format json
echo "[Attempt $i] Running Tests..."
test_exit_code=0
# Disable exit on error just for the pytest command to check the exit code
set +e
pytest tests/inspect-ai/apps --tb=short --disable-warnings -n auto --maxfail=2 --junit-xml=test-results.xml || test_exit_code=$?
# Re-enable exit on error immediately
set -e
# Check if tests failed and how many failures occurred
if [ "${test_exit_code:-0}" -ne 0 ]; then
failure_count=$(grep -o 'failures="[0-9]*"' test-results.xml | grep -o '[0-9]*' || echo "0")
echo "Found $failure_count test failures on attempt $i"
# Fail the workflow if more than 1 test failed
if [ "$failure_count" -gt 1 ]; then
echo "More than 1 test failed on attempt $i - failing CI"
exit 1
fi
fi
echo "--- Attempt $i of 3 Succeeded ---"
done
echo "All 3 evaluation and test runs passed successfully."
- name: Process Results
run: |
# Find the latest evaluation result file and process it
latest_result=$(ls -t results/*.json | head -1)
if [ -f "$latest_result" ]; then
echo "Processing results from: $latest_result"
python tests/inspect-ai/utils/scripts/process_results.py "$latest_result"
else
echo "No result files found in results/ directory"
exit 1
fi
- name: Check Quality Gate
run: |
if [ -f "results/summary.json" ]; then
echo "Found summary file, checking quality gate..."
python tests/inspect-ai/utils/scripts/quality_gate.py results/
else
echo "Summary file not found at results/summary.json"
ls -la results/
exit 1
fi
- name: Prepare Comment Body
if: github.event_name == 'pull_request'
run: |
if [ -f "results/summary.json" ]; then
python -c "
import json
import os
try:
with open('results/summary.json', 'r') as f:
results = json.load(f)
comment = f'''## Inspect AI Evaluation Results
- **Tests Passed**: {results['passed']}/{results['total']}
- **Quality Gate**: {'✅ PASSED' if results['quality_gate_passed'] else '❌ FAILED'}
### Details
{results['details']}
'''
with open('comment_body.txt', 'w') as f:
f.write(comment)
except Exception as e:
print(f'Error reading summary file: {e}')
comment = '''## Inspect AI Evaluation Results
❌ **Error**: Could not read evaluation results summary file.
Please check the workflow logs for details.'''
with open('comment_body.txt', 'w') as f:
f.write(comment)
"
else
echo "## Inspect AI Evaluation Results
❌ **Error**: Could not read evaluation results summary file.
Please check the workflow logs for details." > comment_body.txt
fi
- name: Comment PR Results
if: github.event_name == 'pull_request'
uses: marocchino/sticky-pull-request-comment@v2
with:
header: inspect-ai-results
path: comment_body.txt