feat(cli): Add AI support to shiny add test
#8
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Validate Test Generation Prompts | |
| on: | |
| push: | |
| paths: | |
| - 'shiny/pytest/generate/**' | |
| pull_request: | |
| paths: | |
| - 'shiny/pytest/generate/**' | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| env: | |
| PYTHON_VERSION: '3.12' | |
| ATTEMPTS: 3 | |
| jobs: | |
| validate-prompts: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ env.PYTHON_VERSION }} | |
| cache: 'pip' | |
| - name: Cache Python dependencies | |
| uses: actions/cache@v4 | |
| with: | |
| path: ~/.cache/pip | |
| key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements*.txt', 'setup.py', 'pyproject.toml') }} | |
| restore-keys: | | |
| ${{ runner.os }}-pip- | |
| - name: Install dependencies | |
| run: | | |
| pip install --upgrade pip | |
| pip install -e ".[dev,test]" | |
| pip install inspect-ai | |
| - name: Cache Playwright browsers | |
| uses: actions/cache@v4 | |
| id: playwright-cache | |
| with: | |
| path: ~/.cache/ms-playwright | |
| key: ${{ runner.os }}-playwright-${{ hashFiles('**/requirements*.txt', 'setup.py') }} | |
| - name: Install Playwright browsers | |
| if: steps.playwright-cache.outputs.cache-hit != 'true' | |
| run: playwright install --with-deps | |
| - name: Install Playwright dependencies only | |
| if: steps.playwright-cache.outputs.cache-hit == 'true' | |
| run: playwright install-deps | |
| - name: Run Evaluation and Tests 3 Times | |
| env: | |
| OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
| ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | |
| run: | | |
| set -e # Exit immediately if a command fails | |
| for i in {1..3} | |
| do | |
| echo "--- Starting Attempt $i of 3 ---" | |
| # Clean up results from previous attempt to ensure a clean slate | |
| rm -rf results/ | |
| mkdir -p results/ | |
| rm -f test-results.xml | |
| echo "[Attempt $i] Creating test metadata..." | |
| python tests/inspect-ai/scripts/create_test_metadata.py | |
| echo "[Attempt $i] Running Inspect AI evaluation..." | |
| inspect eval tests/inspect-ai/scripts/evaluation.py@shiny_test_evaluation \ | |
| --log-dir results/ \ | |
| --log-format json | |
| echo "[Attempt $i] Running Tests..." | |
| test_exit_code=0 | |
| # Disable exit on error just for the pytest command to check the exit code | |
| set +e | |
| pytest tests/inspect-ai/apps --tb=short --disable-warnings -n auto --maxfail=2 --junit-xml=test-results.xml || test_exit_code=$? | |
| # Re-enable exit on error immediately | |
| set -e | |
| # Check if tests failed and how many failures occurred | |
| if [ "${test_exit_code:-0}" -ne 0 ]; then | |
| failure_count=$(grep -o 'failures="[0-9]*"' test-results.xml | grep -o '[0-9]*' || echo "0") | |
| echo "Found $failure_count test failures on attempt $i" | |
| # Fail the workflow if more than 1 test failed | |
| if [ "$failure_count" -gt 1 ]; then | |
| echo "More than 1 test failed on attempt $i - failing CI" | |
| exit 1 | |
| fi | |
| fi | |
| echo "--- Attempt $i of 3 Succeeded ---" | |
| done | |
| echo "All 3 evaluation and test runs passed successfully." | |
| - name: Process Results | |
| run: | | |
| # Find the latest evaluation result file and process it | |
| latest_result=$(ls -t results/*.json | head -1) | |
| if [ -f "$latest_result" ]; then | |
| echo "Processing results from: $latest_result" | |
| python tests/inspect-ai/utils/scripts/process_results.py "$latest_result" | |
| else | |
| echo "No result files found in results/ directory" | |
| exit 1 | |
| fi | |
| - name: Check Quality Gate | |
| run: | | |
| if [ -f "results/summary.json" ]; then | |
| echo "Found summary file, checking quality gate..." | |
| python tests/inspect-ai/utils/scripts/quality_gate.py results/ | |
| else | |
| echo "Summary file not found at results/summary.json" | |
| ls -la results/ | |
| exit 1 | |
| fi | |
| - name: Prepare Comment Body | |
| if: github.event_name == 'pull_request' | |
| run: | | |
| if [ -f "results/summary.json" ]; then | |
| python -c " | |
| import json | |
| import os | |
| try: | |
| with open('results/summary.json', 'r') as f: | |
| results = json.load(f) | |
| comment = f'''## Inspect AI Evaluation Results | |
| - **Tests Passed**: {results['passed']}/{results['total']} | |
| - **Quality Gate**: {'✅ PASSED' if results['quality_gate_passed'] else '❌ FAILED'} | |
| ### Details | |
| {results['details']} | |
| ''' | |
| with open('comment_body.txt', 'w') as f: | |
| f.write(comment) | |
| except Exception as e: | |
| print(f'Error reading summary file: {e}') | |
| comment = '''## Inspect AI Evaluation Results | |
| ❌ **Error**: Could not read evaluation results summary file. | |
| Please check the workflow logs for details.''' | |
| with open('comment_body.txt', 'w') as f: | |
| f.write(comment) | |
| " | |
| else | |
| echo "## Inspect AI Evaluation Results | |
| ❌ **Error**: Could not read evaluation results summary file. | |
| Please check the workflow logs for details." > comment_body.txt | |
| fi | |
| - name: Comment PR Results | |
| if: github.event_name == 'pull_request' | |
| uses: marocchino/sticky-pull-request-comment@v2 | |
| with: | |
| header: inspect-ai-results | |
| path: comment_body.txt |