posit-dev · schloerke · Sep 6, 2025 · Jul 24, 2025 · Jul 24, 2025 · Jul 24, 2025
diff --git a/.github/workflows/conventional-commits.yaml → ...orkflows/verify-conventional-commits.yaml b/.github/workflows/conventional-commits.yaml → ...orkflows/verify-conventional-commits.yaml
diff --git a/.github/workflows/verify-test-generation-prompts.yaml b/.github/workflows/verify-test-generation-prompts.yaml
@@ -0,0 +1,97 @@
+name: Verify test generation prompts
+
+on:
+  pull_request:
+    paths:
+      - ".github/workflows/verify-test-generation-prompts.yml"
+      - "shiny/pytest/_generate/**"
+  workflow_dispatch:
+
+concurrency:
+  group: "prompt-test-generation-${{ github.event.pull_request.number || 'dispatch' }}"
+  cancel-in-progress: true
+
+env:
+  PYTHON_VERSION: "3.13"
+  ATTEMPTS: 3
+  PYTHONUNBUFFERED: 1
+
+jobs:
+  verify-test-generation-prompts:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Setup py-shiny
+        id: install
+        uses: ./.github/py-shiny/setup
+
+      - name: Install Test Generator Dependencies
+        run: |
+          make ci-install-ai-deps
+
+      - name: Run Evaluation and Tests 3 Times
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          PYTHONUNBUFFERED: 1
+        timeout-minutes: 25
+        run: |
+          make run-test-ai-evaluation
+
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: test-results-${{ github.run_id }}
+          path: |
+            test-results-inspect-ai/
+          retention-days: 7
+
+      - name: Process Results
+        timeout-minutes: 2
+        run: |
+          # Results are already averaged by the bash script, just verify they exist
+          if [ ! -f "test-results-inspect-ai/summary.json" ]; then
+            echo "No averaged summary found at test-results-inspect-ai/summary.json"
+            ls -la test-results-inspect-ai/
+            exit 1
+          else
+            echo "Using averaged results from all attempts"
+            cat test-results-inspect-ai/summary.json
+          fi
+
+      - name: Check Quality Gate
+        timeout-minutes: 2
+        run: |
+          if [ ! -f "test-results-inspect-ai/summary.json" ]; then
+            echo "Summary file not found at test-results-inspect-ai/summary.json"
+            ls -la test-results-inspect-ai/
+            exit 1
+          else
+            echo "Found summary file, checking quality gate..."
+            python tests/inspect-ai/utils/scripts/quality_gate.py test-results-inspect-ai/
+          fi
+
+      - name: Prepare Comment Body
+        if: github.event_name == 'pull_request'
+        timeout-minutes: 1
+        run: |
+          python tests/inspect-ai/scripts/prepare_comment.py test-results-inspect-ai/summary.json
+
+      - name: Comment PR Results
+        if: github.event_name == 'pull_request'
+        uses: marocchino/sticky-pull-request-comment@v2
+        with:
+          header: inspect-ai-results
+          path: comment_body.txt
diff --git a/.github/workflows/verify-testing-docs-on-change.yml b/.github/workflows/verify-testing-docs-on-change.yml
@@ -0,0 +1,93 @@
+name: Verify testing documentation for changes
+
+on:
+  pull_request:
+    paths:
+      - ".github/workflows/verify-testing-docs-on-change.yml"
+      - "docs/_quartodoc-testing.yml"
+      - "shiny/playwright/controller/**"
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  verify-testing-docs:
+    runs-on: ubuntu-latest
+    if: github.event_name == 'pull_request'
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Setup py-shiny
+        id: install
+        uses: ./.github/py-shiny/setup
+
+      - name: Install dependencies
+        run: |
+          make ci-install-docs
+
+      - name: Update testing docs and check for changes
+        id: check-docs-changes
+        run: |
+          # Store the current state of the documentation file
+          cp shiny/pytest/_generate/_data/testing-documentation.json testing-documentation-before.json
+
+          # Run the make command to update testing docs
+          make update-testing-docs
+
+          if [[ ! -f testing-documentation-before.json || ! -f shiny/pytest/_generate/_data/testing-documentation.json ]]; then
+            echo "One or both documentation files are missing."
+            exit 1
+          fi
+
+          # Check if the documentation file has changed
+          if diff -q testing-documentation-before.json shiny/pytest/_generate/_data/testing-documentation.json > /dev/null 2>&1; then
+            echo "docs_changed=true" >> $GITHUB_OUTPUT
+            echo "The generated documentation is out of sync with the current controller changes."
+            echo "\n\n"
+            diff -q testing-documentation-before.json shiny/pytest/_generate/_data/testing-documentation.json || true
+            echo "\n\n"
+          else
+            echo "docs_changed=false" >> $GITHUB_OUTPUT
+            echo "Documentation file is up to date"
+          fi
+
+      - name: Comment on PR about testing docs update
+        if: steps.check-docs-changes.outputs.docs_changed == 'true'
+        uses: marocchino/sticky-pull-request-comment@v2
+        with:
+          header: testing-docs-update
+          message: |
+            🚨 **Testing Documentation Out of Sync**
+
+            We detected changes in the `shiny/playwright/controller` directory that affect the testing documentation used by the `shiny add test` command.
+
+            **The generated documentation is out of sync with your controller changes. Please run:**
+
+            ```bash
+            make update-testing-docs
+            ```
+
+            **Then commit the updated `shiny/pytest/_generate/_data/testing-documentation.json` file.**
+
+            <details><summary>Additional details</summary>
+
+            The updated documentation file ensures that the AI test generator has access to the latest controller API documentation.
+
+            </details>
+
+            ❌ **This check will fail until the documentation is updated and committed.**
+
+            ---
+            *This comment was automatically generated by the `verify-testing-docs-on-change.yml` workflow.*
+
+      - name: Remove comment when no controller changes or docs are up to date
+        if: steps.check-docs-changes.outputs.docs_changed == 'false'
+        uses: marocchino/sticky-pull-request-comment@v2
+        with:
+          header: testing-docs-update
+          delete: true
diff --git a/.gitignore b/.gitignore
@@ -123,3 +123,10 @@ shiny_bookmarks/
 
 # setuptools_scm
 shiny/_version.py
+
+# Other
+tests/inspect-ai/apps/*/test_*.py
+test-results.xml
+results-inspect-ai/
+test-results-inspect-ai/
+tests/inspect-ai/scripts/test_metadata.json
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### New features
 
+* Added AI-powered test generator for Shiny applications. Use `shiny add test` to automatically generate comprehensive Playwright tests for your apps using AI models from Anthropic or OpenAI. (#2041)
+
 * `navset_card_*()` now has a `full_screen` option to support `card()`'s existing full-screen functionality. (#1451)
 
 * Added `ui.insert_nav_panel()`, `ui.remove_nav_panel()`, and `ui.update_nav_panel()` to support dynamic navigation. (#90)

diff --git a/Makefile b/Makefile
@@ -123,6 +123,35 @@ docs-quartodoc: FORCE
 	@echo "-------- Making quartodoc docs --------"
 	@cd docs && make quartodoc
 
+install-repomix: install-npm FORCE ## Install repomix if not already installed
+	@echo "-------- Installing repomix if needed --------"
+	@if ! command -v repomix > /dev/null 2>&1; then \
+		echo "Installing repomix..."; \
+		npm install -g repomix; \
+	else \
+		echo "repomix is already installed"; \
+	fi
+
+update-testing-docs-repomix: install-repomix FORCE ## Generate repomix output for testing docs
+	@echo "-------- Generating repomix output for testing docs --------"
+	repomix docs/api/testing -o tests/inspect-ai/utils/scripts/repomix-output-testing.xml
+
+update-testing-docs-process: FORCE ## Process repomix output to generate testing documentation JSON
+	@echo "-------- Processing testing documentation --------"
+	python tests/inspect-ai/utils/scripts/process_docs.py --input tests/inspect-ai/utils/scripts/repomix-output-testing.xml --output shiny/pytest/_generate/_data/testing-documentation.json
+	@echo "-------- Cleaning up temporary files --------"
+	rm -f tests/inspect-ai/utils/scripts/repomix-output-testing.xml
+
+update-testing-docs: docs update-testing-docs-repomix update-testing-docs-process FORCE ## Update testing documentation (full pipeline)
+	@echo "-------- Testing documentation update complete --------"
+
+ci-install-ai-deps: FORCE
+	uv pip install -e ".[dev,test,testgen]"
+	$(MAKE) install-playwright
+
+run-test-ai-evaluation: FORCE ## Run the AI evaluation script for tests
+	@echo "-------- Running AI evaluation for tests --------"
+	bash ./tests/inspect-ai/scripts/run-test-evaluation.sh
 
 install-npm: FORCE
 	$(if $(shell which npm), @echo -n, $(error Please install node.js and npm first. See https://nodejs.org/en/download/ for instructions.))

diff --git a/pyproject.toml b/pyproject.toml
@@ -124,6 +124,13 @@ doc = [
     "quartodoc>=0.8.1",
     "griffe>=1.3.2",
 ]
+testgen = [
+    "chatlas[anthropic,openai]",
+    "openai>=1.104.1",
+    "anthropic>=0.62.0",
+    "inspect-ai>=0.3.129",
+    "pytest-timeout",
+]
 
 
 [project.urls]

diff --git a/pyrightconfig.json b/pyrightconfig.json
@@ -10,7 +10,10 @@
     "docs",
     "tests/playwright/deploys/*/app.py",
     "shiny/templates",
-    "tests/playwright/ai_generated_apps",
+    "tests/playwright/ai_generated_apps/*/*/app*.py",
+    "tests/inspect-ai/apps/*/app*.py",
+    "shiny/pytest/_generate/_main.py",
+    "tests/inspect-ai/scripts/evaluation.py"
   ],
   "typeCheckingMode": "strict",
   "reportImportCycles": "none",

diff --git a/shiny/_main.py b/shiny/_main.py
@@ -533,11 +533,10 @@ def add() -> None:
 @add.command(
     help="""Add a test file for a specified Shiny app.
 
-Add an empty test file for a specified app. You will be prompted with a destination
-folder. If you don't provide a destination folder, it will be added in the current
-working directory based on the app name.
+Generate a comprehensive test file for a specified app using AI. The generator
+will analyze your app code and create appropriate test cases with assertions.
 
-After creating the shiny app file, you can use `pytest` to run the tests:
+After creating the test file, you can use `pytest` to run the tests:
 
         pytest TEST_FILE
 """
@@ -546,22 +545,37 @@ def add() -> None:
     "--app",
     "-a",
     type=str,
-    help="Please provide the path to the app file for which you want to create a test file.",
+    help="Path to the app file for which you want to generate a test file.",
 )
 @click.option(
     "--test-file",
     "-t",
     type=str,
-    help="Please provide the name of the test file you want to create. The basename of the test file should start with `test_` and be unique across all test files.",
+    help="Path for the generated test file. If not provided, will be auto-generated.",
+)
+@click.option(
+    "--provider",
+    type=click.Choice(["anthropic", "openai"]),
+    default="anthropic",
+    help="AI provider to use for test generation.",
+)
+@click.option(
+    "--model",
+    type=str,
+    help="Specific model to use (optional). Examples: haiku3.5, sonnet,  gpt-5, gpt-5-mini",
 )
 # Param for app.py, param for test_name
 def test(
-    app: Path | None,
-    test_file: Path | None,
+    app: str | None,
+    test_file: str | None,
+    provider: str,
+    model: str | None,
 ) -> None:
-    from ._main_add_test import add_test_file
+    from ._main_generate_test import generate_test_file
 
-    add_test_file(app_file=app, test_file=test_file)
+    generate_test_file(
+        app_file=app, output_file=test_file, provider=provider, model=model
+    )
 
 
 @main.command(