diff --git a/.github/workflows/conventional-commits.yaml b/.github/workflows/verify-conventional-commits.yaml
similarity index 100%
rename from .github/workflows/conventional-commits.yaml
rename to .github/workflows/verify-conventional-commits.yaml
diff --git a/.github/workflows/verify-test-generation-prompts.yaml b/.github/workflows/verify-test-generation-prompts.yaml
new file mode 100644
index 000000000..2e1301884
--- /dev/null
+++ b/.github/workflows/verify-test-generation-prompts.yaml
@@ -0,0 +1,97 @@
+name: Verify test generation prompts
+
+on:
+  pull_request:
+    paths:
+      - ".github/workflows/verify-test-generation-prompts.yml"
+      - "shiny/pytest/_generate/**"
+  workflow_dispatch:
+
+concurrency:
+  group: "prompt-test-generation-${{ github.event.pull_request.number || 'dispatch' }}"
+  cancel-in-progress: true
+
+env:
+  PYTHON_VERSION: "3.13"
+  ATTEMPTS: 3
+  PYTHONUNBUFFERED: 1
+
+jobs:
+  verify-test-generation-prompts:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Setup py-shiny
+        id: install
+        uses: ./.github/py-shiny/setup
+
+      - name: Install Test Generator Dependencies
+        run: |
+          make ci-install-ai-deps
+
+      - name: Run Evaluation and Tests 3 Times
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          PYTHONUNBUFFERED: 1
+        timeout-minutes: 25
+        run: |
+          make run-test-ai-evaluation
+
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: test-results-${{ github.run_id }}
+          path: |
+            test-results-inspect-ai/
+          retention-days: 7
+
+      - name: Process Results
+        timeout-minutes: 2
+        run: |
+          # Results are already averaged by the bash script, just verify they exist
+          if [ ! -f "test-results-inspect-ai/summary.json" ]; then
+            echo "No averaged summary found at test-results-inspect-ai/summary.json"
+            ls -la test-results-inspect-ai/
+            exit 1
+          else
+            echo "Using averaged results from all attempts"
+            cat test-results-inspect-ai/summary.json
+          fi
+
+      - name: Check Quality Gate
+        timeout-minutes: 2
+        run: |
+          if [ ! -f "test-results-inspect-ai/summary.json" ]; then
+            echo "Summary file not found at test-results-inspect-ai/summary.json"
+            ls -la test-results-inspect-ai/
+            exit 1
+          else
+            echo "Found summary file, checking quality gate..."
+            python tests/inspect-ai/utils/scripts/quality_gate.py test-results-inspect-ai/
+          fi
+
+      - name: Prepare Comment Body
+        if: github.event_name == 'pull_request'
+        timeout-minutes: 1
+        run: |
+          python tests/inspect-ai/scripts/prepare_comment.py test-results-inspect-ai/summary.json
+
+      - name: Comment PR Results
+        if: github.event_name == 'pull_request'
+        uses: marocchino/sticky-pull-request-comment@v2
+        with:
+          header: inspect-ai-results
+          path: comment_body.txt
diff --git a/.github/workflows/verify-testing-docs-on-change.yml b/.github/workflows/verify-testing-docs-on-change.yml
new file mode 100644
index 000000000..7667a581b
--- /dev/null
+++ b/.github/workflows/verify-testing-docs-on-change.yml
@@ -0,0 +1,93 @@
+name: Verify testing documentation for changes
+
+on:
+  pull_request:
+    paths:
+      - ".github/workflows/verify-testing-docs-on-change.yml"
+      - "docs/_quartodoc-testing.yml"
+      - "shiny/playwright/controller/**"
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  verify-testing-docs:
+    runs-on: ubuntu-latest
+    if: github.event_name == 'pull_request'
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Setup py-shiny
+        id: install
+        uses: ./.github/py-shiny/setup
+
+      - name: Install dependencies
+        run: |
+          make ci-install-docs
+
+      - name: Update testing docs and check for changes
+        id: check-docs-changes
+        run: |
+          # Store the current state of the documentation file
+          cp shiny/pytest/_generate/_data/testing-documentation.json testing-documentation-before.json
+
+          # Run the make command to update testing docs
+          make update-testing-docs
+
+          if [[ ! -f testing-documentation-before.json || ! -f shiny/pytest/_generate/_data/testing-documentation.json ]]; then
+            echo "One or both documentation files are missing."
+            exit 1
+          fi
+
+          # Check if the documentation file has changed
+          if diff -q testing-documentation-before.json shiny/pytest/_generate/_data/testing-documentation.json > /dev/null 2>&1; then
+            echo "docs_changed=true" >> $GITHUB_OUTPUT
+            echo "The generated documentation is out of sync with the current controller changes."
+            echo "\n\n"
+            diff -q testing-documentation-before.json shiny/pytest/_generate/_data/testing-documentation.json || true
+            echo "\n\n"
+          else
+            echo "docs_changed=false" >> $GITHUB_OUTPUT
+            echo "Documentation file is up to date"
+          fi
+
+      - name: Comment on PR about testing docs update
+        if: steps.check-docs-changes.outputs.docs_changed == 'true'
+        uses: marocchino/sticky-pull-request-comment@v2
+        with:
+          header: testing-docs-update
+          message: |
+            🚨 **Testing Documentation Out of Sync**
+
+            We detected changes in the `shiny/playwright/controller` directory that affect the testing documentation used by the `shiny add test` command.
+
+            **The generated documentation is out of sync with your controller changes. Please run:**
+
+            ```bash
+            make update-testing-docs
+            ```
+
+            **Then commit the updated `shiny/pytest/_generate/_data/testing-documentation.json` file.**
+
+            <details><summary>Additional details</summary>
+
+            The updated documentation file ensures that the AI test generator has access to the latest controller API documentation.
+
+            </details>
+
+            ❌ **This check will fail until the documentation is updated and committed.**
+
+            ---
+            *This comment was automatically generated by the `verify-testing-docs-on-change.yml` workflow.*
+
+      - name: Remove comment when no controller changes or docs are up to date
+        if: steps.check-docs-changes.outputs.docs_changed == 'false'
+        uses: marocchino/sticky-pull-request-comment@v2
+        with:
+          header: testing-docs-update
+          delete: true
diff --git a/.gitignore b/.gitignore
index 3982f5270..84e4e9e33 100644
--- a/.gitignore
+++ b/.gitignore
@@ -123,3 +123,10 @@ shiny_bookmarks/
 
 # setuptools_scm
 shiny/_version.py
+
+# Other
+tests/inspect-ai/apps/*/test_*.py
+test-results.xml
+results-inspect-ai/
+test-results-inspect-ai/
+tests/inspect-ai/scripts/test_metadata.json
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f710cf6d6..822b0af31 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### New features
 
+* Added AI-powered test generator for Shiny applications. Use `shiny add test` to automatically generate comprehensive Playwright tests for your apps using AI models from Anthropic or OpenAI. (#2041)
+
 * `navset_card_*()` now has a `full_screen` option to support `card()`'s existing full-screen functionality. (#1451)
 
 * Added `ui.insert_nav_panel()`, `ui.remove_nav_panel()`, and `ui.update_nav_panel()` to support dynamic navigation. (#90)
diff --git a/Makefile b/Makefile
index ea830e17e..0ed82d6ec 100644
--- a/Makefile
+++ b/Makefile
@@ -123,6 +123,35 @@ docs-quartodoc: FORCE
 	@echo "-------- Making quartodoc docs --------"
 	@cd docs && make quartodoc
 
+install-repomix: install-npm FORCE ## Install repomix if not already installed
+	@echo "-------- Installing repomix if needed --------"
+	@if ! command -v repomix > /dev/null 2>&1; then \
+		echo "Installing repomix..."; \
+		npm install -g repomix; \
+	else \
+		echo "repomix is already installed"; \
+	fi
+
+update-testing-docs-repomix: install-repomix FORCE ## Generate repomix output for testing docs
+	@echo "-------- Generating repomix output for testing docs --------"
+	repomix docs/api/testing -o tests/inspect-ai/utils/scripts/repomix-output-testing.xml
+
+update-testing-docs-process: FORCE ## Process repomix output to generate testing documentation JSON
+	@echo "-------- Processing testing documentation --------"
+	python tests/inspect-ai/utils/scripts/process_docs.py --input tests/inspect-ai/utils/scripts/repomix-output-testing.xml --output shiny/pytest/_generate/_data/testing-documentation.json
+	@echo "-------- Cleaning up temporary files --------"
+	rm -f tests/inspect-ai/utils/scripts/repomix-output-testing.xml
+
+update-testing-docs: docs update-testing-docs-repomix update-testing-docs-process FORCE ## Update testing documentation (full pipeline)
+	@echo "-------- Testing documentation update complete --------"
+
+ci-install-ai-deps: FORCE
+	uv pip install -e ".[dev,test,testgen]"
+	$(MAKE) install-playwright
+
+run-test-ai-evaluation: FORCE ## Run the AI evaluation script for tests
+	@echo "-------- Running AI evaluation for tests --------"
+	bash ./tests/inspect-ai/scripts/run-test-evaluation.sh
 
 install-npm: FORCE
 	$(if $(shell which npm), @echo -n, $(error Please install node.js and npm first. See https://nodejs.org/en/download/ for instructions.))
diff --git a/pyproject.toml b/pyproject.toml
index f406ec805..c5e39fc0e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -124,6 +124,13 @@ doc = [
     "quartodoc>=0.8.1",
     "griffe>=1.3.2",
 ]
+testgen = [
+    "chatlas[anthropic,openai]",
+    "openai>=1.104.1",
+    "anthropic>=0.62.0",
+    "inspect-ai>=0.3.129",
+    "pytest-timeout",
+]
 
 
 [project.urls]
diff --git a/pyrightconfig.json b/pyrightconfig.json
index 236aed7fc..722bdae8f 100644
--- a/pyrightconfig.json
+++ b/pyrightconfig.json
@@ -10,7 +10,10 @@
     "docs",
     "tests/playwright/deploys/*/app.py",
     "shiny/templates",
-    "tests/playwright/ai_generated_apps",
+    "tests/playwright/ai_generated_apps/*/*/app*.py",
+    "tests/inspect-ai/apps/*/app*.py",
+    "shiny/pytest/_generate/_main.py",
+    "tests/inspect-ai/scripts/evaluation.py"
   ],
   "typeCheckingMode": "strict",
   "reportImportCycles": "none",
diff --git a/shiny/_main.py b/shiny/_main.py
index 65b083139..f43421a71 100644
--- a/shiny/_main.py
+++ b/shiny/_main.py
@@ -533,11 +533,10 @@ def add() -> None:
 @add.command(
     help="""Add a test file for a specified Shiny app.
 
-Add an empty test file for a specified app. You will be prompted with a destination
-folder. If you don't provide a destination folder, it will be added in the current
-working directory based on the app name.
+Generate a comprehensive test file for a specified app using AI. The generator
+will analyze your app code and create appropriate test cases with assertions.
 
-After creating the shiny app file, you can use `pytest` to run the tests:
+After creating the test file, you can use `pytest` to run the tests:
 
         pytest TEST_FILE
 """
@@ -546,22 +545,37 @@ def add() -> None:
     "--app",
     "-a",
     type=str,
-    help="Please provide the path to the app file for which you want to create a test file.",
+    help="Path to the app file for which you want to generate a test file.",
 )
 @click.option(
     "--test-file",
     "-t",
     type=str,
-    help="Please provide the name of the test file you want to create. The basename of the test file should start with `test_` and be unique across all test files.",
+    help="Path for the generated test file. If not provided, will be auto-generated.",
+)
+@click.option(
+    "--provider",
+    type=click.Choice(["anthropic", "openai"]),
+    default="anthropic",
+    help="AI provider to use for test generation.",
+)
+@click.option(
+    "--model",
+    type=str,
+    help="Specific model to use (optional). Examples: haiku3.5, sonnet,  gpt-5, gpt-5-mini",
 )
 # Param for app.py, param for test_name
 def test(
-    app: Path | None,
-    test_file: Path | None,
+    app: str | None,
+    test_file: str | None,
+    provider: str,
+    model: str | None,
 ) -> None:
-    from ._main_add_test import add_test_file
+    from ._main_generate_test import generate_test_file
 
-    add_test_file(app_file=app, test_file=test_file)
+    generate_test_file(
+        app_file=app, output_file=test_file, provider=provider, model=model
+    )
 
 
 @main.command(
diff --git a/shiny/_main_add_test.py b/shiny/_main_add_test.py
deleted file mode 100644
index 7393054d0..000000000
--- a/shiny/_main_add_test.py
+++ /dev/null
@@ -1,103 +0,0 @@
-from __future__ import annotations
-
-import os
-import sys
-from pathlib import Path
-
-import click
-import questionary
-
-from ._main_utils import cli_action, cli_bold, cli_code, path_rel_wd
-
-
-def add_test_file(
-    *,
-    app_file: Path | None,
-    test_file: Path | None,
-):
-    if app_file is None:
-
-        def path_exists(x: Path) -> bool | str:
-            if not isinstance(x, (str, Path)):
-                return False
-            if Path(x).is_dir():
-                return "Please provide a file path to your Shiny app"
-            return Path(x).exists() or f"Shiny app file can not be found: {x}"
-
-        app_file_val = questionary.path(
-            "Enter the path to the app file:",
-            default=path_rel_wd("app.py"),
-            validate=path_exists,
-        ).ask()
-    else:
-        app_file_val = app_file
-    # User quit early
-    if app_file_val is None:
-        sys.exit(1)
-    app_file = Path(app_file_val)
-
-    if test_file is None:
-
-        def path_does_not_exist(x: Path) -> bool | str:
-            if not isinstance(x, (str, Path)):
-                return False
-            if Path(x).is_dir():
-                return "Please provide a file path for your test file."
-            if Path(x).exists():
-                return "Test file already exists. Please provide a new file name."
-            if not Path(x).name.startswith("test_"):
-                return "Test file must start with 'test_'"
-            return True
-
-        test_file_val = questionary.path(
-            "Enter the path to the test file:",
-            default=path_rel_wd(
-                os.path.relpath(app_file.parent / "tests" / "test_app.py", ".")
-            ),
-            validate=path_does_not_exist,
-        ).ask()
-    else:
-        test_file_val = test_file
-
-    # User quit early
-    if test_file_val is None:
-        sys.exit(1)
-    test_file = Path(test_file_val)
-
-    # Make sure app file exists
-    if not app_file.exists():
-        raise FileExistsError("App file does not exist: ", test_file)
-    # Make sure output test file doesn't exist
-    if test_file.exists():
-        raise FileExistsError("Test file already exists: ", test_file)
-    if not test_file.name.startswith("test_"):
-        return "Test file must start with 'test_'"
-
-    test_name = test_file.name.replace(".py", "")
-    rel_path = os.path.relpath(app_file, test_file.parent)
-
-    template = f"""\
-from playwright.sync_api import Page
-
-from shiny.playwright import controller
-from shiny.pytest import create_app_fixture
-from shiny.run import ShinyAppProc
-
-app = create_app_fixture("{rel_path}")
-
-
-def {test_name}(page: Page, app: ShinyAppProc):
-
-    page.goto(app.url)
-    # Add test code here
-"""
-    # Make sure test file directory exists
-    test_file.parent.mkdir(parents=True, exist_ok=True)
-
-    # Write template to test file
-    test_file.write_text(template)
-
-    # next steps
-    click.echo()
-    click.echo(cli_action(cli_bold("Next steps:")))
-    click.echo(f"- Run {cli_code('pytest')} in your terminal to run all the tests")
diff --git a/shiny/_main_generate_test.py b/shiny/_main_generate_test.py
new file mode 100644
index 000000000..bdca96a7b
--- /dev/null
+++ b/shiny/_main_generate_test.py
@@ -0,0 +1,164 @@
+from __future__ import annotations
+
+import os
+import sys
+from pathlib import Path
+from typing import Callable
+
+import click
+import questionary
+
+from ._main_utils import cli_action, cli_bold, cli_code, path_rel_wd
+
+
+class ValidationError(Exception):
+    pass
+
+
+def create_file_validator(
+    file_type: str,
+    must_exist: bool = True,
+    prefix_required: str | None = None,
+    must_not_exist: bool = False,
+) -> Callable[[str], bool | str]:
+    def validator(path_str: str) -> bool | str:
+        if not isinstance(path_str, (str, Path)):
+            return False
+
+        path = Path(path_str)
+
+        if path.is_dir():
+            return f"Please provide a file path for your {file_type}"
+
+        if must_exist and not path.exists():
+            return f"{file_type.title()} file not found: {path_str}"
+
+        if must_not_exist and path.exists():
+            return f"{file_type.title()} file already exists. Please provide a new file name."
+
+        if prefix_required and not path.name.startswith(prefix_required):
+            return f"{file_type.title()} file must start with '{prefix_required}'"
+
+        return True
+
+    return validator
+
+
+def validate_api_key(provider: str) -> None:
+    api_configs = {
+        "anthropic": {
+            "env_var": "ANTHROPIC_API_KEY",
+            "url": "https://console.anthropic.com/",
+        },
+        "openai": {
+            "env_var": "OPENAI_API_KEY",
+            "url": "https://platform.openai.com/api-keys",
+        },
+    }
+
+    if provider not in api_configs:
+        raise ValidationError(f"Unsupported provider: {provider}")
+
+    config = api_configs[provider]
+    if not os.getenv(config["env_var"]):
+        raise ValidationError(
+            f"{config['env_var']} environment variable is not set.\n"
+            f"Please set your {provider.title()} API key:\n"
+            f"  export {config['env_var']}='your-api-key-here'\n\n"
+            f"Get your API key from: {config['url']}"
+        )
+
+
+def get_app_file_path(app_file: str | None) -> Path:
+    if app_file is not None:
+        app_path = Path(app_file)
+        if not app_path.exists():
+            raise ValidationError(f"App file does not exist: {app_path}")
+        return app_path
+    # Interactive mode
+    app_file_val = questionary.path(
+        "Enter the path to the app file:",
+        default=path_rel_wd("app.py"),
+        validate=create_file_validator("Shiny app", must_exist=True),
+    ).ask()
+
+    if app_file_val is None:
+        sys.exit(1)
+
+    return Path(app_file_val)
+
+
+def get_output_file_path(output_file: str | None, app_path: Path) -> Path:
+    if output_file is not None:
+        output_path = Path(output_file)
+        if output_path.exists():
+            raise ValidationError(f"Test file already exists: {output_path}")
+        if not output_path.name.startswith("test_"):
+            raise ValidationError("Test file must start with 'test_'")
+        return output_path
+    # Interactive mode
+    suggested_output = app_path.parent / f"test_{app_path.stem}.py"
+
+    output_file_val = questionary.path(
+        "Enter the path for the generated test file:",
+        default=str(suggested_output),
+        validate=create_file_validator(
+            "test", must_exist=False, prefix_required="test_", must_not_exist=True
+        ),
+    ).ask()
+
+    if output_file_val is None:
+        sys.exit(1)
+
+    return Path(output_file_val)
+
+
+def generate_test_file(
+    *,
+    app_file: str | None,
+    output_file: str | None,
+    provider: str,
+    model: str | None,
+) -> None:
+
+    try:
+        validate_api_key(provider)
+
+        app_path = get_app_file_path(app_file)
+        output_path = get_output_file_path(output_file, app_path)
+
+        try:
+            from .pytest._generate import ShinyTestGenerator
+        except ImportError as e:
+            raise ValidationError(
+                f"Could not import ShinyTestGenerator: {e}\n"
+                "Make sure the shiny testing dependencies are installed."
+            )
+
+        click.echo(f"🤖 Generating test using {provider} provider...")
+        if model:
+            click.echo(f"📝 Using model: {model}")
+
+        generator = ShinyTestGenerator(provider=provider, setup_logging=False)  # type: ignore
+        _, test_file_path = generator.generate_test_from_file(
+            app_file_path=str(app_path),
+            model=model,
+            output_file=str(output_path),
+        )
+
+        relative_test_file_path = test_file_path.relative_to(Path.cwd())
+
+        click.echo(f"✅ Test file generated successfully: {relative_test_file_path}")
+        click.echo()
+        click.echo(cli_action(cli_bold("Next steps:")))
+        click.echo(
+            f"- Run {cli_code('pytest ' + str(relative_test_file_path))} to run the generated test"
+        )
+        click.echo("- Review and customize the test as needed")
+
+    except ValidationError as e:
+        click.echo(f"❌ Error: {e}")
+        sys.exit(1)
+    except Exception as e:
+        click.echo(f"❌ Error generating test: {e}")
+        sys.exit(1)
diff --git a/shiny/pytest/_generate/__init__.py b/shiny/pytest/_generate/__init__.py
new file mode 100644
index 000000000..0e544db3f
--- /dev/null
+++ b/shiny/pytest/_generate/__init__.py
@@ -0,0 +1,7 @@
+"""
+This module is internal; public-facing imports should not rely on its location.
+"""
+
+from ._main import ShinyTestGenerator
+
+__all__ = ["ShinyTestGenerator"]
diff --git a/shiny/pytest/_generate/_data/testing-SYSTEM_PROMPT.md b/shiny/pytest/_generate/_data/testing-SYSTEM_PROMPT.md
new file mode 100644
index 000000000..22fc74297
--- /dev/null
+++ b/shiny/pytest/_generate/_data/testing-SYSTEM_PROMPT.md
@@ -0,0 +1,211 @@
+# Shiny for Python Playwright Testing Expert
+
+Generate comprehensive Playwright smoke tests for Shiny for Python applications.
+
+## Framework Check
+For non-Shiny Python code, respond: "This framework is for Shiny for Python only. For [Framework], use the appropriate testing framework (e.g., shinytest2 for Shiny for R)."
+
+## Core Rules
+
+1. **Dynamic App File**: When generating code that uses `create_app_fixture`, follow these rules:
+   - Use the exact filename provided in the prompt.
+   - ALWAYS make paths relative from the test file directory to the app file.
+   - For tests in `app_dir/tests` and app in `app_dir/app.py`:
+     - ✅ `app = create_app_fixture(["../app.py"])`
+     - ❌ `app = create_app_fixture(["app.py"])`
+   - For tests in `tests/subdir` and app in `apps/subdir/app.py`:
+     - ✅ `app = create_app_fixture(["../../apps/subdir/app.py"])`
+   - NEVER use absolute paths.
+   - Calculate the correct relative path based on the test file location and app file location.
+
+2. **Controller Classes Only**: Always use official controllers, never `page.locator()`
+   - ✅ `controller.InputSlider(page, "my_slider")`
+   - ❌ `page.locator("#my_slider")`
+
+3. **String Values**: All assertions use strings
+   - ✅ `expect_max("15")`
+   - ❌ `expect_max(15)`
+
+4. **Test Pattern**: Assert → Act → Assert
+   - Assert initial state (value, label, linked outputs)
+   - Act (set, click, etc.)
+   - Assert final state (re-check input + outputs)
+
+5. **Scope**: Only test Shiny components with unique IDs.
+
+6. **Selectize Clear**: Use `set([])` to clear all values in Selectize inputs.
+   - ✅ `selectize.set([])`
+   - ❌ `selectize.set("")`
+
+7. **Skip icons**: Do not test icon functionality i.e. using tests like `expect_icon("icon_name")`.
+   - ❌ `btn2.expect_icon("fa-solid fa-shield-halved")`
+
+8. **Skip plots**: Do not test any OutputPlot content or functionality i.e. using `OutputPlot` controller.
+    - ❌ plot1 = controller.OutputPlot(page, "my_plot_module-plot1")
+    - ❌ plot1.expect_title("Random Scatter Plot")
+
+9.  **Keyword-Only Args**: Always pass every argument as a keyword for every controller method.
+   - ✅  `expect_cell(value="0", row=1, col=2)`
+   - ❌  `expect_cell("0", 1, 2)`
+
+10. **Newline at End**: Always end files with a newline.
+
+**11. DataFrames:** OutputDataFrame tests use **zero-based indexing**, so
+`data_grid.expect_cell(value="Action Button", row=0, col=0)`
+verifies the cell in the first row and first column, not the headers.
+
+## Examples
+
+### Checkbox Group
+```python
+# apps/app_checkbox.py
+from shiny.express import input, ui, render
+ui.input_checkbox_group("basic", "Choose:", ["A", "B"], selected=["A"])
+@render.text
+def output(): return f"Selected: {input.basic()}"
+
+# apps/test_app_checkbox.py
+
+from playwright.sync_api import Page
+from shiny.playwright import controller
+from shiny.pytest import create_app_fixture
+from shiny.run import ShinyAppProc
+
+app = create_app_fixture(["app_checkbox.py"])
+
+def test_checkbox(page: Page, app: ShinyAppProc) -> None:
+    page.goto(app.url)
+    basic = controller.InputCheckboxGroup(page, "basic")
+    output = controller.OutputText(page, "output")
+
+    # Assert initial
+    basic.expect_selected(["A"])
+    output.expect_value("Selected: ('A',)")
+
+    # Act
+    basic.set(["A", "B"])
+
+    # Assert final
+    basic.expect_selected(["A", "B"])
+    output.expect_value("Selected: ('A', 'B')")
+```
+
+### Date Input
+```python
+# app_date.py
+from shiny.express import input, ui
+ui.input_date("date1", "Date:", value="2024-01-01")
+
+# tests/test_app_date.py
+from playwright.sync_api import Page
+from shiny.playwright import controller
+from shiny.pytest import create_app_fixture
+from shiny.run import ShinyAppProc
+
+app = create_app_fixture(["../app_date.py"])
+
+
+def test_date(page: Page, app: ShinyAppProc) -> None:
+    page.goto(app.url)
+    date1 = controller.InputDate(page, "date1")
+
+    date1.expect_value("2024-01-01")
+    date1.set("2024-02-01")
+    date1.expect_value("2024-02-01")
+```
+
+### Selectize with Updates
+```python
+# app_selectize.py
+from shiny import reactive
+from shiny.express import input, ui, render
+ui.input_selectize("select1", "State:", {"NY": "New York", "CA": "California"})
+ui.input_action_button("update_btn", "Update")
+@render.text
+def output(): return f"Selected: {input.select1()}"
+
+@reactive.effect
+@reactive.event(input.update_btn)
+def _(): ui.update_selectize("select1", selected="CA")
+
+# test_app_selectize.py
+from playwright.sync_api import Page
+from shiny.playwright import controller
+from shiny.pytest import create_app_fixture
+from shiny.run import ShinyAppProc
+
+app = create_app_fixture(["app_selectize.py"])
+
+
+def test_selectize(page: Page, app: ShinyAppProc) -> None:
+    page.goto(app.url)
+    select1 = controller.InputSelectize(page, "select1")
+    output = controller.OutputText(page, "output")
+    btn = controller.InputActionButton(page, "update_btn")
+
+    # Initial state
+    select1.expect_selected(["NY"])
+    output.expect_value("Selected: NY")
+
+    # Act
+    btn.click()
+
+    # Final state
+    select1.expect_selected(["CA"])
+    output.expect_value("Selected: CA")
+```
+
+### Navset Card Pill Navigation
+```python
+# app_express.py
+from shiny.express import input, render, ui
+
+with ui.navset_card_pill(id="selected_navset_card_pill"):
+    with ui.nav_panel("A"):
+        "Panel A content"
+
+    with ui.nav_panel("B"):
+        "Panel B content"
+
+    with ui.nav_panel("C"):
+        "Panel C content"
+
+ui.h5("Selected:")
+
+
+@render.text
+def _():
+    return input.selected_navset_card_pill()
+
+# test_app_express.py
+from playwright.sync_api import Page
+from shiny.playwright import controller
+from shiny.pytest import create_app_fixture
+from shiny.run import ShinyAppProc
+
+app = create_app_fixture(["app-express.py"])
+
+
+def test_navset_card_pill(page: Page, app: ShinyAppProc) -> None:
+    page.goto(app.url)
+    navset = controller.NavsetCardPill(page, "selected_navset_card_pill")
+    output_text = controller.OutputText(page, "_")
+
+    # Assert initial state - first panel should be active
+    navset.expect_value("A")
+    output_text.expect_value("A")
+
+    # Act - navigate to panel B
+    navset.set("B")
+
+    # Assert final state
+    navset.expect_value("B")
+    output_text.expect_value("B")
+
+    # Act - navigate to panel C
+    navset.set("C")
+
+    # Assert final state
+    navset.expect_value("C")
+    output_text.expect_value("C")
+```
diff --git a/shiny/pytest/_generate/_data/testing-documentation.json b/shiny/pytest/_generate/_data/testing-documentation.json
new file mode 100644
index 000000000..68ed58fbd
--- /dev/null
+++ b/shiny/pytest/_generate/_data/testing-documentation.json
@@ -0,0 +1,2127 @@
+[
+  {
+    "controller_name": "playwright.controller.Accordion",
+    "methods": [
+      {
+        "name": "accordion_panel",
+        "description": "Returns the accordion panel ([](:class:`~shiny.playwright.controls.AccordionPanel`)) with the specified data value.",
+        "parameters": "data_value (str)"
+      },
+      {
+        "name": "expect_class",
+        "description": "Expects the accordion to have the specified class.",
+        "parameters": "class_name (str), timeout (Timeout)"
+      },
+      {
+        "name": "expect_height",
+        "description": "Expects the accordion to have the specified height.",
+        "parameters": "value (StyleValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_multiple",
+        "description": "Expects the accordion to be multiple or not.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_panels",
+        "description": "Expects the accordion to have the specified panels.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expects the accordion to have the specified width.",
+        "parameters": "value (StyleValue), timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the state of the accordion panel.",
+        "parameters": "open (str \\), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.AccordionPanel",
+    "methods": [
+      {
+        "name": "expect_body",
+        "description": "Expects the accordion panel body to have the specified text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_icon",
+        "description": "Expects the accordion panel icon to exist or not.",
+        "parameters": "exists (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expects the accordion panel label to have the specified text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_open",
+        "description": "Expects the accordion panel to be open or closed.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the `width` attribute of a DOM element to have a specific value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the state of the control to open or closed.",
+        "parameters": "open (bool), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.Card",
+    "methods": [
+      {
+        "name": "expect_body",
+        "description": "Expect the card body element to have the specified text.",
+        "parameters": "value (PatternOrStr \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_footer",
+        "description": "Expects the card footer to have a specific text.",
+        "parameters": "value (PatternOrStr \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_full_screen",
+        "description": "Verifies if the full screen mode is currently open.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_full_screen_available",
+        "description": "Expects whether full screen mode is available for the element.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_header",
+        "description": "Expects the card header to have a specific text.",
+        "parameters": "value (PatternOrStr \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_height",
+        "description": "Expects the card to have a specific height.",
+        "parameters": "value (StyleValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_max_height",
+        "description": "Expects the card to have a specific maximum height.",
+        "parameters": "value (StyleValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_min_height",
+        "description": "Expects the card to have a specific minimum height.",
+        "parameters": "value (StyleValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the `width` attribute of a DOM element to have a specific value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "set_full_screen",
+        "description": "Sets the element to full screen mode or exits full screen mode.",
+        "parameters": "open (bool), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.Chat",
+    "methods": [
+      {
+        "name": "expect_latest_message",
+        "description": "Expects the last message in the chat.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_messages",
+        "description": "Expects the chat messages.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_user_input",
+        "description": "Expects the user message in the chat.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "send_user_input",
+        "description": "Sends the user message in the chat.",
+        "parameters": "method (Literal\\['enter', 'click'\\]), timeout (Timeout)"
+      },
+      {
+        "name": "set_user_input",
+        "description": "Sets the user message in the chat.",
+        "parameters": "value (str), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.DownloadButton",
+    "methods": [
+      {
+        "name": "click",
+        "description": "Clicks the input action.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "expect_icon",
+        "description": "Expect the icon of the input button to have a specific value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input button to have a specific value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the `width` attribute of a DOM element to have a specific value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.DownloadLink",
+    "methods": [
+      {
+        "name": "click",
+        "description": "Clicks the input action.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "expect_icon",
+        "description": "Expect the icon of the input button to have a specific value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input button to have a specific value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputActionButton",
+    "methods": [
+      {
+        "name": "click",
+        "description": "Clicks the input action.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "expect_disabled",
+        "description": "Expect the input action button to be disabled.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_icon",
+        "description": "Expect the icon of the input button to have a specific value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input button to have a specific value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the `width` attribute of a DOM element to have a specific value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputActionLink",
+    "methods": [
+      {
+        "name": "click",
+        "description": "Clicks the input action.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "expect_icon",
+        "description": "Expect the icon of the input button to have a specific value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input button to have a specific value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputBookmarkButton",
+    "methods": [
+      {
+        "name": "click",
+        "description": "Clicks the input action.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "expect_disabled",
+        "description": "Expect the input bookmark button to be disabled.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_icon",
+        "description": "Expect the icon of the input button to have a specific value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input button to have a specific value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the `width` attribute of a DOM element to have a specific value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputCheckbox",
+    "methods": [
+      {
+        "name": "expect_checked",
+        "description": "Expect the input checkbox to be checked.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input to have a specific text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the input element to have a specific width.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the input checkbox.",
+        "parameters": "value (bool), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputCheckboxGroup",
+    "methods": [
+      {
+        "name": "expect_choice_labels",
+        "description": "Expect the labels of the choices.",
+        "parameters": "value (ListPatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_choices",
+        "description": "Expect the checkbox choices.",
+        "parameters": "value (ListPatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_inline",
+        "description": "Expect the input to be inline.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input to have a specific text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_selected",
+        "description": "Expect the selected checkboxes.",
+        "parameters": "value (ListPatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the input element to have a specific width.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Set the selected checkboxes.",
+        "parameters": "selected (ListOrTuple\\[str\\]), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputDarkMode",
+    "methods": [
+      {
+        "name": "click",
+        "description": "Clicks the input dark mode.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "expect_attribute",
+        "description": "Expect the attribute named `attribute` of the input dark mode to have a specific value.",
+        "parameters": "value (str), timeout (Timeout)"
+      },
+      {
+        "name": "expect_mode",
+        "description": "Expect the `mode` attribute of the input dark mode to have a specific value.",
+        "parameters": "value (str), timeout (Timeout)"
+      },
+      {
+        "name": "expect_page_mode",
+        "description": "Expect the page to have a specific dark mode value.",
+        "parameters": "value (str), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputDate",
+    "methods": [
+      {
+        "name": "expect_autoclose",
+        "description": "Asserts that the input element has the expected `data-date-autoclose` attribute value.",
+        "parameters": "value (Literal\\['true', 'false'\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_datesdisabled",
+        "description": "Asserts that the input element has the expected `data-date-dates-disabled` attribute value.",
+        "parameters": "value (list\\[str\\] \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_daysofweekdisabled",
+        "description": "Asserts that the input element has the expected `data-date-days-of-week-disabled` attribute value.",
+        "parameters": "value (list\\[int\\] \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_format",
+        "description": "Asserts that the input element has the expected `data-date-format` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input to have a specific text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_language",
+        "description": "Asserts that the input element has the expected `data-date-language` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_max_date",
+        "description": "Asserts that the input element has the expected `data-max-date` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_min_date",
+        "description": "Asserts that the input element has the expected `data-min-date` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_startview",
+        "description": "Asserts that the input element has the expected `data-date-start-view` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Asserts that the input element has the expected value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_weekstart",
+        "description": "Asserts that the input element has the expected `data-date-week-start` attribute value.",
+        "parameters": "value (int \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the input element to have a specific width.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the text value",
+        "parameters": "value (str), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputDateRange",
+    "methods": [
+      {
+        "name": "expect_autoclose",
+        "description": "Asserts that the input element has the expected autoclose value.",
+        "parameters": "value (Literal\\['true', 'false'\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_format",
+        "description": "Asserts that the input element has the expected format.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input to have a specific text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_language",
+        "description": "Asserts that the input element has the expected language.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_max_date",
+        "description": "Asserts that the input element has the expected maximum date.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_min_date",
+        "description": "Asserts that the input element has the expected minimum date.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_separator",
+        "description": "Asserts that the input element has the expected separator.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_startview",
+        "description": "Asserts that the input element has the expected start view.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Asserts that the input element has the expected value.",
+        "parameters": "value (typing.Tuple\\[PatternOrStr, PatternOrStr\\] \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_weekstart",
+        "description": "Asserts that the input element has the expected week start.",
+        "parameters": "value (int \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the input element to have a specific width.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the value of the input element.",
+        "parameters": "value (typing.Tuple\\[str \\), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputFile",
+    "methods": [
+      {
+        "name": "expect_accept",
+        "description": "Expect the `accept` attribute to have a specific value.",
+        "parameters": "value (list\\[str\\] \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_button_label",
+        "description": "Expect the button label to have a specific value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_capture",
+        "description": "Expect the `capture` attribute to have a specific value.",
+        "parameters": "value (Literal\\['environment', 'user'\\] \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_complete",
+        "description": "Expect the file upload to be complete.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input to have a specific text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_multiple",
+        "description": "Expect the `multiple` attribute to have a specific value.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the width of the input file to have a specific value.",
+        "parameters": "value (StyleValue), timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Set the file upload.",
+        "parameters": "file_path (str \\), timeout (Timeout), expect_complete_timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputNumeric",
+    "methods": [
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input to have a specific text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_max",
+        "description": "Expect the maximum numeric value to be a specific value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_min",
+        "description": "Expect the minimum numeric value to be a specific value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_step",
+        "description": "Expect step value when incrementing/decrementing the numeric input.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Expect the value of the text input to have a specific value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the input element to have a specific width.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the text value",
+        "parameters": "value (str), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputPassword",
+    "methods": [
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input to have a specific text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_placeholder",
+        "description": "Expect the `placeholder` attribute of the input to have a specific value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Expect the value of the text input to have a specific value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the `width` attribute of the input password to have a specific value.",
+        "parameters": "value (StyleValue), timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the text value",
+        "parameters": "value (str), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputRadioButtons",
+    "methods": [
+      {
+        "name": "expect_choice_labels",
+        "description": "Expect the labels of the choices.",
+        "parameters": "value (ListPatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_choices",
+        "description": "Expect the radio button choices.",
+        "parameters": "value (ListPatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_inline",
+        "description": "Expect the input to be inline.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input to have a specific text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_selected",
+        "description": "Expect the selected radio button.",
+        "parameters": "value (PatternOrStr \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the input element to have a specific width.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Set the selected radio button.",
+        "parameters": "selected (str), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputSelect",
+    "methods": [
+      {
+        "name": "expect_choice_groups",
+        "description": "Expect the choice groups of the input select to be an exact match.",
+        "parameters": "choice_groups (ListPatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_choice_labels",
+        "description": "Expect the choice labels of the input select to be an exact match.",
+        "parameters": "value (ListPatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_choices",
+        "description": "Expect the available options of the input select to be an exact match.",
+        "parameters": "choices (ListPatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input to have a specific text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_multiple",
+        "description": "Expect the input selectize to allow multiple selections.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_selected",
+        "description": "Expect the selected option(s) of the input select to be an exact match.",
+        "parameters": "value (PatternOrStr \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_size",
+        "description": "Expect the size attribute of the input select to have a specific value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the input element to have a specific width.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the selected option(s) of the input select.",
+        "parameters": "selected (str \\), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputSelectize",
+    "methods": [
+      {
+        "name": "expect_choice_groups",
+        "description": "Expect the choice groups of the input select to be an exact match.",
+        "parameters": "choice_groups (ListPatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_choice_labels",
+        "description": "Expect the choice labels of the input selectize to be an exact match.",
+        "parameters": "value (ListPatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_choices",
+        "description": "Expect the available options of the input selectize to be an exact match.",
+        "parameters": "choices (ListPatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input to have a specific text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_multiple",
+        "description": "Expect the input selectize to allow multiple selections.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_selected",
+        "description": "Expect the selected option(s) of the input select to be an exact match.",
+        "parameters": "value (ListPatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the input element to have a specific width.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the selected option(s) of the input selectize.",
+        "parameters": "selected (str \\), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputSlider",
+    "methods": [
+      {
+        "name": "click_pause",
+        "description": "Click the pause button.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "click_play",
+        "description": "Click the play button.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "expect_animate",
+        "description": "Expect the animate button to exist.",
+        "parameters": "exists (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_drag_range",
+        "description": "Asserts that the input element has the expected `data-drag-range` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input to have a specific text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_max",
+        "description": "Expect the input element to have the expected `max` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_min",
+        "description": "Expect the input element to have the expected `min` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_post",
+        "description": "Expect the input element to have the expected `data-post` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_pre",
+        "description": "Expect the input element to have the expected `data-pre` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_sep",
+        "description": "Expect the input element to have the expected `data-sep` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_step",
+        "description": "Expect the input element to have the expected `step` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_tick_labels",
+        "description": "Expect the tick labels of the input slider.",
+        "parameters": "value (ListPatternOrStr \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_ticks",
+        "description": "Expect the input element to have the expected `data-ticks` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_time_format",
+        "description": "Asserts that the input element has the expected `data-time-format` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_timezone",
+        "description": "Asserts that the input element has the expected `data-timezone` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Asserts that the input element has the expected value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the input element to have a specific width.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Set the value of the slider.",
+        "parameters": "value (str), max_err_values (int), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputSliderRange",
+    "methods": [
+      {
+        "name": "click_pause",
+        "description": "Click the pause button.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "click_play",
+        "description": "Click the play button.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "expect_animate",
+        "description": "Expect the animate button to exist.",
+        "parameters": "exists (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_drag_range",
+        "description": "Asserts that the input element has the expected `data-drag-range` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input to have a specific text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_max",
+        "description": "Expect the input element to have the expected `max` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_min",
+        "description": "Expect the input element to have the expected `min` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_post",
+        "description": "Expect the input element to have the expected `data-post` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_pre",
+        "description": "Expect the input element to have the expected `data-pre` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_sep",
+        "description": "Expect the input element to have the expected `data-sep` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_step",
+        "description": "Expect the input element to have the expected `step` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_tick_labels",
+        "description": "Expect the tick labels of the input slider.",
+        "parameters": "value (ListPatternOrStr \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_ticks",
+        "description": "Expect the input element to have the expected `data-ticks` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_time_format",
+        "description": "Asserts that the input element has the expected `data-time-format` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_timezone",
+        "description": "Asserts that the input element has the expected `data-timezone` attribute value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Asserts that the input element has the expected value.",
+        "parameters": "value (typing.Tuple\\[PatternOrStr, PatternOrStr\\] \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the input element to have a specific width.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Set the value of the slider.",
+        "parameters": "value (typing.Tuple\\[str, str\\] \\), max_err_values (int), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputSwitch",
+    "methods": [
+      {
+        "name": "expect_checked",
+        "description": "Expect the input checkbox to be checked.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input to have a specific text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the input element to have a specific width.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the input checkbox.",
+        "parameters": "value (bool), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputTaskButton",
+    "methods": [
+      {
+        "name": "click",
+        "description": "Clicks the input action.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "expect_auto_reset",
+        "description": "Expect the `auto-reset` attribute of the input task button to have a specific value.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_icon",
+        "description": "Expect the icon of the input button to have a specific value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input task button to have a specific value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label_busy",
+        "description": "Expect the label of a busy input task button to have a specific value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label_ready",
+        "description": "Expect the label of a ready input task button to have a specific value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label_state",
+        "description": "Expect the label of the input task button to have a specific value in a specific state.",
+        "parameters": "state (str), value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_state",
+        "description": "Expect the state of the input task button to have a specific value.",
+        "parameters": "value (Literal\\['ready', 'busy'\\] \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the `width` attribute of a DOM element to have a specific value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputText",
+    "methods": [
+      {
+        "name": "expect_autocomplete",
+        "description": "Expect the `autocomplete` attribute of the input to have a specific value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input to have a specific text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_placeholder",
+        "description": "Expect the `placeholder` attribute of the input to have a specific value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_spellcheck",
+        "description": "Expect the `spellcheck` attribute of the input to have a specific value.",
+        "parameters": "value (Literal\\['true', 'false'\\] \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Expect the value of the text input to have a specific value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the input element to have a specific width.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the text value",
+        "parameters": "value (str), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.InputTextArea",
+    "methods": [
+      {
+        "name": "expect_autocomplete",
+        "description": "Expect the `autocomplete` attribute of the input to have a specific value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_autoresize",
+        "description": "Expect the `autoresize` attribute of the input text area to have a specific value.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_cols",
+        "description": "Expect the `cols` attribute of the input text area to have a specific value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_height",
+        "description": "Expect the `height` attribute of the input text area to have a specific value.",
+        "parameters": "value (StyleValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_label",
+        "description": "Expect the label of the input to have a specific text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_placeholder",
+        "description": "Expect the `placeholder` attribute of the input to have a specific value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_resize",
+        "description": "Expect the `resize` attribute of the input text area to have a specific value.",
+        "parameters": "value (Resize \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_rows",
+        "description": "Expect the `rows` attribute of the input text area to have a specific value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_spellcheck",
+        "description": "Expect the `spellcheck` attribute of the input to have a specific value.",
+        "parameters": "value (Literal\\['true', 'false'\\] \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Expect the value of the text input to have a specific value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the `width` attribute of the input text area to have a specific value.",
+        "parameters": "value (StyleValue), timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the text value",
+        "parameters": "value (str), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.NavItem",
+    "methods": [
+      {
+        "name": "click",
+        "description": "Clicks the nav item.",
+        "parameters": "timeout"
+      },
+      {
+        "name": "expect_active",
+        "description": "Expects the nav item to be active or inactive.",
+        "parameters": "value"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.NavPanel",
+    "methods": [
+      {
+        "name": "click",
+        "description": "Clicks the nav panel.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "expect_active",
+        "description": "Expects the nav panel to be active or inactive.",
+        "parameters": "value (bool), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.NavsetBar",
+    "methods": [
+      {
+        "name": "expect_bg",
+        "description": "Expects the navset bar to have the specified background color.",
+        "parameters": "bg (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_fluid",
+        "description": "Expects the navset bar to have a fluid or fixed layout.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_gap",
+        "description": "Expects the navset bar to have the specified gap.",
+        "parameters": "gap (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_inverse",
+        "description": "Expects the navset bar to be light text color if inverse is True",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_nav_titles",
+        "description": "Expects the control to have the specified nav titles.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_nav_values",
+        "description": "Expects the control to have the specified nav values.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_position",
+        "description": "Expects the navset bar to have the specified position.",
+        "parameters": "position (Literal\\['fixed-top', 'fixed-bottom', 'static-top', 'sticky-top'\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_sidebar",
+        "description": "Assert whether or not the sidebar exists within the navset.",
+        "parameters": "exists (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_title",
+        "description": "Expects the navset title to have the specified text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Expects the control to have the specified value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "get_loc_active_content",
+        "description": "Returns the locator for the active content.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "nav_panel",
+        "description": "Returns the nav panel ([](:class:`~shiny.playwright.controls.NavPanel`)) with the specified value.",
+        "parameters": "value (str)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the state of the control to open or closed.",
+        "parameters": "value (str)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.NavsetCardPill",
+    "methods": [
+      {
+        "name": "expect_nav_titles",
+        "description": "Expects the control to have the specified nav titles.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_nav_values",
+        "description": "Expects the control to have the specified nav values.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_placement",
+        "description": "Expects the navset to have the specified placement.",
+        "parameters": "location (Literal\\['above', 'below'\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_sidebar",
+        "description": "Assert whether or not the sidebar exists within the navset.",
+        "parameters": "exists (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_title",
+        "description": "Expects the navset title to have the specified text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Expects the control to have the specified value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "get_loc_active_content",
+        "description": "Returns the locator for the active content.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "nav_panel",
+        "description": "Returns the nav panel ([](:class:`~shiny.playwright.controls.NavPanel`)) with the specified value.",
+        "parameters": "value (str)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the state of the control to open or closed.",
+        "parameters": "value (str)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.NavsetCardTab",
+    "methods": [
+      {
+        "name": "expect_nav_titles",
+        "description": "Expects the control to have the specified nav titles.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_nav_values",
+        "description": "Expects the control to have the specified nav values.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_sidebar",
+        "description": "Assert whether or not the sidebar exists within the navset.",
+        "parameters": "exists (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_title",
+        "description": "Expects the navset title to have the specified text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Expects the control to have the specified value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "get_loc_active_content",
+        "description": "Returns the locator for the active content.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "nav_panel",
+        "description": "Returns the nav panel ([](:class:`~shiny.playwright.controls.NavPanel`)) with the specified value.",
+        "parameters": "value (str)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the state of the control to open or closed.",
+        "parameters": "value (str)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.NavsetCardUnderline",
+    "methods": [
+      {
+        "name": "expect_nav_titles",
+        "description": "Expects the control to have the specified nav titles.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_nav_values",
+        "description": "Expects the control to have the specified nav values.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_placement",
+        "description": "Expects the navset to have the specified placement.",
+        "parameters": "location (Literal\\['above', 'below'\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_sidebar",
+        "description": "Assert whether or not the sidebar exists within the navset.",
+        "parameters": "exists (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_title",
+        "description": "Expects the navset title to have the specified text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Expects the control to have the specified value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "get_loc_active_content",
+        "description": "Returns the locator for the active content.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "nav_panel",
+        "description": "Returns the nav panel ([](:class:`~shiny.playwright.controls.NavPanel`)) with the specified value.",
+        "parameters": "value (str)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the state of the control to open or closed.",
+        "parameters": "value (str)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.NavsetHidden",
+    "methods": [
+      {
+        "name": "expect_nav_titles",
+        "description": "Expects the control to have the specified nav titles.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_nav_values",
+        "description": "Expects the control to have the specified nav values.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Expects the control to have the specified value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "get_loc_active_content",
+        "description": "Returns the locator for the active content.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "nav_panel",
+        "description": "Returns the nav panel ([](:class:`~shiny.playwright.controls.NavPanel`)) with the specified value.",
+        "parameters": "value (str)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the state of the control to open or closed.",
+        "parameters": "value (str)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.NavsetPill",
+    "methods": [
+      {
+        "name": "expect_nav_titles",
+        "description": "Expects the control to have the specified nav titles.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_nav_values",
+        "description": "Expects the control to have the specified nav values.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Expects the control to have the specified value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "get_loc_active_content",
+        "description": "Returns the locator for the active content.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "nav_panel",
+        "description": "Returns the nav panel ([](:class:`~shiny.playwright.controls.NavPanel`)) with the specified value.",
+        "parameters": "value (str)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the state of the control to open or closed.",
+        "parameters": "value (str)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.NavsetPillList",
+    "methods": [
+      {
+        "name": "expect_nav_titles",
+        "description": "Expects the control to have the specified nav titles.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_nav_values",
+        "description": "Expects the control to have the specified nav values.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Expects the control to have the specified value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_well",
+        "description": "Expects the navset pill list to have a well.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_widths",
+        "description": "Expects the navset pill list to have the specified widths.",
+        "parameters": "value (ListOrTuple\\[int\\]), timeout (Timeout)"
+      },
+      {
+        "name": "get_loc_active_content",
+        "description": "Returns the locator for the active content.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "nav_panel",
+        "description": "Returns the nav panel ([](:class:`~shiny.playwright.controls.NavPanel`)) with the specified value.",
+        "parameters": "value (str)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the state of the control to open or closed.",
+        "parameters": "value (str)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.NavsetTab",
+    "methods": [
+      {
+        "name": "expect_nav_titles",
+        "description": "Expects the control to have the specified nav titles.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_nav_values",
+        "description": "Expects the control to have the specified nav values.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Expects the control to have the specified value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "get_loc_active_content",
+        "description": "Returns the locator for the active content.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "nav_panel",
+        "description": "Returns the nav panel ([](:class:`~shiny.playwright.controls.NavPanel`)) with the specified value.",
+        "parameters": "value (str)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the state of the control to open or closed.",
+        "parameters": "value (str)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.NavsetUnderline",
+    "methods": [
+      {
+        "name": "expect_nav_titles",
+        "description": "Expects the control to have the specified nav titles.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_nav_values",
+        "description": "Expects the control to have the specified nav values.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Expects the control to have the specified value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "get_loc_active_content",
+        "description": "Returns the locator for the active content.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "nav_panel",
+        "description": "Returns the nav panel ([](:class:`~shiny.playwright.controls.NavPanel`)) with the specified value.",
+        "parameters": "value (str)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the state of the control to open or closed.",
+        "parameters": "value (str)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.OutputCode",
+    "methods": [
+      {
+        "name": "expect_has_placeholder",
+        "description": "Asserts that the code output has the expected placeholder.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Asserts that the output has the expected value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.OutputDataFrame",
+    "methods": [
+      {
+        "name": "cell_locator",
+        "description": "Returns the locator for a specific cell in the data frame.",
+        "parameters": "row (int), col (int)"
+      },
+      {
+        "name": "expect_cell",
+        "description": "Expects the cell in the data frame to have the specified text.",
+        "parameters": "value (PatternOrStr), row (int), col (int), timeout (Timeout)"
+      },
+      {
+        "name": "expect_cell_class",
+        "description": "Expects the class of the cell",
+        "parameters": "value (str), row (int), col (int), timeout (Timeout)"
+      },
+      {
+        "name": "expect_cell_title",
+        "description": "Expects the validation message of the cell in the data frame, which will be in the `title` attribute of the element.",
+        "parameters": "value (str), row (int), col (int), timeout (Timeout)"
+      },
+      {
+        "name": "expect_class_state",
+        "description": "Expects the state of the class in the data frame.",
+        "parameters": "value (str), row (int), col (int), timeout (Timeout)"
+      },
+      {
+        "name": "expect_column_labels",
+        "description": "Expects the column labels in the data frame.",
+        "parameters": "value (ListPatternOrStr \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_ncol",
+        "description": "Expects the number of columns in the data frame.",
+        "parameters": "value (int), timeout (Timeout)"
+      },
+      {
+        "name": "expect_nrow",
+        "description": "Expects the number of rows in the data frame.",
+        "parameters": "value (int), timeout (Timeout)"
+      },
+      {
+        "name": "expect_selected_num_rows",
+        "description": "Expects the number of selected rows in the data frame.",
+        "parameters": "value (int), timeout (Timeout)"
+      },
+      {
+        "name": "expect_selected_rows",
+        "description": "Expects the specified rows to be selected.",
+        "parameters": "rows (list\\[int\\]), timeout (Timeout)"
+      },
+      {
+        "name": "select_rows",
+        "description": "Selects the rows in the data frame.",
+        "parameters": "value (list\\[int\\]), timeout (Timeout)"
+      },
+      {
+        "name": "set_cell",
+        "description": "Saves the value of the cell in the data frame.",
+        "parameters": "text (str), row (int), col (int), finish_key (Literal\\['Enter', 'Shift+Enter', 'Tab', 'Shift+Tab', 'Escape'\\] \\), timeout (Timeout)"
+      },
+      {
+        "name": "set_filter",
+        "description": "Set or reset filters for columns in a table or grid component. This method allows setting string filters, numeric range filters, or clearing all filters.",
+        "parameters": "filter (ColumnFilter \\), timeout (Timeout)"
+      },
+      {
+        "name": "set_sort",
+        "description": "Set or modify the sorting of columns in a table or grid component. This method allows setting single or multiple column sorts, or resetting the sort order.",
+        "parameters": "sort (int \\), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.OutputImage",
+    "methods": [
+      {
+        "name": "expect_container_tag",
+        "description": "Asserts that the output has the expected container tag.",
+        "parameters": "value (Literal\\['span', 'div'\\] \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_height",
+        "description": "Asserts that the image has the expected height.",
+        "parameters": "value (StyleValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_img_alt",
+        "description": "Asserts that the image has the expected alt text.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_img_height",
+        "description": "Asserts that the image has the expected height.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_img_src",
+        "description": "Asserts that the image has the expected src.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_img_width",
+        "description": "Asserts that the image has the expected width.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_inline",
+        "description": "Asserts that the output is inline.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Asserts that the image has the expected width.",
+        "parameters": "value (StyleValue), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.OutputPlot",
+    "methods": [
+      {
+        "name": "expect_container_tag",
+        "description": "Asserts that the output has the expected container tag.",
+        "parameters": "value (Literal\\['span', 'div'\\] \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_height",
+        "description": "Asserts that the image has the expected height.",
+        "parameters": "value (StyleValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_img_alt",
+        "description": "Asserts that the image has the expected alt text.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_img_height",
+        "description": "Asserts that the image has the expected height.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_img_src",
+        "description": "Asserts that the image has the expected src.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_img_width",
+        "description": "Asserts that the image has the expected width.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_inline",
+        "description": "Asserts that the output is inline.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Asserts that the image has the expected width.",
+        "parameters": "value (StyleValue), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.OutputTable",
+    "methods": [
+      {
+        "name": "expect_cell",
+        "description": "Asserts that the table cell has the expected text.",
+        "parameters": "value (PatternOrStr), row (int), col (int), timeout (Timeout)"
+      },
+      {
+        "name": "expect_column_labels",
+        "description": "Asserts that the table has the expected column labels.",
+        "parameters": "value (ListPatternOrStr \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_column_text",
+        "description": "Asserts that the column has the expected text.",
+        "parameters": "col (int), value (ListPatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_ncol",
+        "description": "Asserts that the table has the expected number of columns.",
+        "parameters": "value (int), timeout (Timeout)"
+      },
+      {
+        "name": "expect_nrow",
+        "description": "Asserts that the table has the expected number of rows.",
+        "parameters": "value (int), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.OutputText",
+    "methods": [
+      {
+        "name": "expect_container_tag",
+        "description": "Asserts that the output has the expected container tag.",
+        "parameters": "value (Literal\\['span', 'div'\\] \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_inline",
+        "description": "Asserts that the output is inline.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Asserts that the output has the expected value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "get_value",
+        "description": "Gets the text value of the output.",
+        "parameters": "timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.OutputTextVerbatim",
+    "methods": [
+      {
+        "name": "expect_has_placeholder",
+        "description": "Asserts that the verbatim text output has the expected placeholder.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Asserts that the output has the expected value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.OutputUi",
+    "methods": [
+      {
+        "name": "expect_container_tag",
+        "description": "Asserts that the output has the expected container tag.",
+        "parameters": "value (Literal\\['span', 'div'\\] \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_empty",
+        "description": "Asserts that the output is empty.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_inline",
+        "description": "Asserts that the output is inline.",
+        "parameters": "value (bool), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.PageNavbar",
+    "methods": [
+      {
+        "name": "expect_bg",
+        "description": "Expects the navset bar to have the specified background color.",
+        "parameters": "bg (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_fillable",
+        "description": "Expects the main content area to be considered a fillable (i.e., flexbox) container",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_fillable_mobile",
+        "description": "Expects the main content area to be considered a fillable (i.e., flexbox) container on mobile This method will always call `.expect_fillable(True)` first to ensure the fillable property is set",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_fluid",
+        "description": "Expects the navset bar to have a fluid or fixed layout.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_gap",
+        "description": "Expects the navset bar to have the specified gap.",
+        "parameters": "gap (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_inverse",
+        "description": "Expects the navset bar to be light text color if inverse is True",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_lang",
+        "description": "Expects the HTML tag to have the specified language.",
+        "parameters": "lang (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_nav_titles",
+        "description": "Expects the control to have the specified nav titles.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_nav_values",
+        "description": "Expects the control to have the specified nav values.",
+        "parameters": "value (list\\[PatternOrStr\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_position",
+        "description": "Expects the navset bar to have the specified position.",
+        "parameters": "position (Literal\\['fixed-top', 'fixed-bottom', 'static-top', 'sticky-top'\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_sidebar",
+        "description": "Assert whether or not the sidebar exists within the navset.",
+        "parameters": "exists (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_title",
+        "description": "Expects the navset title to have the specified text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Expects the control to have the specified value.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_window_title",
+        "description": "Expects the window title to have the specified text.",
+        "parameters": "title (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "get_loc_active_content",
+        "description": "Returns the locator for the active content.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "nav_panel",
+        "description": "Returns the nav panel ([](:class:`~shiny.playwright.controls.NavPanel`)) with the specified value.",
+        "parameters": "value (str)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the state of the control to open or closed.",
+        "parameters": "value (str)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.Popover",
+    "methods": [
+      {
+        "name": "expect_active",
+        "description": "Expects the overlay to be active or inactive.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_body",
+        "description": "Expects the overlay body to have the specified text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_placement",
+        "description": "Expects the overlay to have the specified placement.",
+        "parameters": "value (str), timeout (Timeout)"
+      },
+      {
+        "name": "expect_title",
+        "description": "Expects the popover title to have the specified text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "get_loc_overlay_container",
+        "description": "Returns the locator for the overlay container.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the state of the popover.",
+        "parameters": "open (bool), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.Sidebar",
+    "methods": [
+      {
+        "name": "expect_bg_color",
+        "description": "Asserts that the sidebar has the expected background color.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_class",
+        "description": "Asserts that the sidebar has or does not have a CSS class.",
+        "parameters": "class_name (str), has_class (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_desktop_state",
+        "description": "Asserts that the sidebar has the expected state on desktop.",
+        "parameters": "value (Literal\\['open', 'closed', 'always'\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_gap",
+        "description": "Asserts that the sidebar has the expected gap.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_handle",
+        "description": "Asserts that the sidebar handle exists or does not exist.",
+        "parameters": "exists (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_mobile_max_height",
+        "description": "Asserts that the sidebar has the expected maximum height on mobile.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_mobile_state",
+        "description": "Asserts that the sidebar has the expected state on mobile.",
+        "parameters": "value (Literal\\['open', 'closed', 'always'\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_open",
+        "description": "Expect the sidebar to be open or closed.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_padding",
+        "description": "Asserts that the sidebar has the expected padding.",
+        "parameters": "value (str \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_position",
+        "description": "Asserts that the sidebar is in the expected position.",
+        "parameters": "value (Literal\\['left', 'right'\\]), timeout (Timeout)"
+      },
+      {
+        "name": "expect_text",
+        "description": "Asserts that the sidebar has the expected text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_title",
+        "description": "Asserts that the sidebar has the expected title.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Asserts that the sidebar has the expected width.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the sidebar to be open or closed.",
+        "parameters": "open (bool), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.Tooltip",
+    "methods": [
+      {
+        "name": "expect_active",
+        "description": "Expects the overlay to be active or inactive.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_body",
+        "description": "Expects the overlay body to have the specified text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_placement",
+        "description": "Expects the overlay to have the specified placement.",
+        "parameters": "value (str), timeout (Timeout)"
+      },
+      {
+        "name": "get_loc_overlay_container",
+        "description": "Returns the locator for the overlay container.",
+        "parameters": "timeout (Timeout)"
+      },
+      {
+        "name": "set",
+        "description": "Sets the state of the tooltip.",
+        "parameters": "open (bool), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "playwright.controller.ValueBox",
+    "methods": [
+      {
+        "name": "expect_body",
+        "description": "Expects the value box body to have specific text.",
+        "parameters": "value (PatternOrStr \\), timeout (Timeout)"
+      },
+      {
+        "name": "expect_full_screen",
+        "description": "Verifies if the full screen mode is currently open.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_full_screen_available",
+        "description": "Expects whether full screen mode is available for the element.",
+        "parameters": "value (bool), timeout (Timeout)"
+      },
+      {
+        "name": "expect_height",
+        "description": "Expects the value box to have a specific height.",
+        "parameters": "value (StyleValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_max_height",
+        "description": "Expects the value box to have a specific maximum height.",
+        "parameters": "value (StyleValue), timeout (Timeout)"
+      },
+      {
+        "name": "expect_title",
+        "description": "Expects the value box title to have a specific text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_value",
+        "description": "Expects the value box value to have a specific text.",
+        "parameters": "value (PatternOrStr), timeout (Timeout)"
+      },
+      {
+        "name": "expect_width",
+        "description": "Expect the `width` attribute of a DOM element to have a specific value.",
+        "parameters": "value (AttrValue), timeout (Timeout)"
+      },
+      {
+        "name": "set_full_screen",
+        "description": "Sets the element to full screen mode or exits full screen mode.",
+        "parameters": "open (bool), timeout (Timeout)"
+      }
+    ]
+  },
+  {
+    "controller_name": "run.ShinyAppProc",
+    "methods": [
+      {
+        "name": "close",
+        "description": "Closes the connection and terminates the process.",
+        "parameters": ""
+      },
+      {
+        "name": "wait_until_ready",
+        "description": "Waits until the shiny app is ready to serve requests.",
+        "parameters": "timeout_secs (float)"
+      }
+    ]
+  }
+]
\ No newline at end of file
diff --git a/shiny/pytest/_generate/_main.py b/shiny/pytest/_generate/_main.py
new file mode 100644
index 000000000..8bf75de8c
--- /dev/null
+++ b/shiny/pytest/_generate/_main.py
@@ -0,0 +1,613 @@
+import importlib.resources
+import logging
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Literal, Optional, Tuple, Union
+
+from chatlas import ChatAnthropic, ChatOpenAI, token_usage
+from dotenv import load_dotenv
+
+__all__ = [
+    "ShinyTestGenerator",
+]
+
+
+@dataclass
+class Config:
+    """Configuration class for ShinyTestGenerator"""
+
+    # Model aliases for both providers
+    MODEL_ALIASES = {
+        # Anthropic models
+        "haiku3.5": "claude-3-5-haiku-20241022",
+        "sonnet": "claude-sonnet-4-20250514",
+        # OpenAI models
+        "gpt-5": "gpt-5-2025-08-07",
+        "gpt-5-mini": "gpt-5-mini-2025-08-07",
+    }
+
+    DEFAULT_ANTHROPIC_MODEL = "claude-sonnet-4-20250514"
+    DEFAULT_OPENAI_MODEL = "gpt-5-mini-2025-08-07"
+    DEFAULT_PROVIDER = "anthropic"
+
+    MAX_TOKENS = 8092
+    LOG_FILE = "llm_test_generator.log"
+    COMMON_APP_PATTERNS = ["app.py", "app_*.py"]
+
+    # OpenAI pricing per million tokens: (input, output, cached)
+    OPENAI_PRICING = {
+        "gpt-5-2025-08-07": (1.250, 10.000, 0.125),
+        "gpt-5-mini-2025-08-07": (0.250, 2.000, 0.025),
+    }
+
+
+class ShinyTestGenerator:
+    CODE_PATTERN = re.compile(r"```python(.*?)```", re.DOTALL)
+
+    def __init__(
+        self,
+        provider: Literal["anthropic", "openai"] = Config.DEFAULT_PROVIDER,
+        api_key: Optional[str] = None,
+        log_file: str = Config.LOG_FILE,
+        setup_logging: bool = True,
+    ):
+        """
+        Initialize the ShinyTestGenerator.
+        """
+        self.provider = provider
+        self._client = None
+        self._documentation = None
+        self._system_prompt = None
+        self.api_key = api_key
+        self.log_file = log_file
+
+        try:
+            load_dotenv(override=False)
+        except Exception:
+            pass
+
+        if setup_logging:
+            self.setup_logging()
+
+    @property
+    def client(self) -> Union[ChatAnthropic, ChatOpenAI]:
+        """Lazy-loaded chat client based on provider"""
+        if self._client is None:
+            if not self.api_key:
+                env_var = (
+                    "ANTHROPIC_API_KEY"
+                    if self.provider == "anthropic"
+                    else "OPENAI_API_KEY"
+                )
+                self.api_key = os.getenv(env_var)
+            if not self.api_key:
+                raise ValueError(
+                    f"Missing API key for provider '{self.provider}'. Set the environment variable "
+                    f"{'ANTHROPIC_API_KEY' if self.provider == 'anthropic' else 'OPENAI_API_KEY'} or pass api_key explicitly."
+                )
+            if self.provider == "anthropic":
+                self._client = ChatAnthropic(api_key=self.api_key)
+            elif self.provider == "openai":
+                self._client = ChatOpenAI(api_key=self.api_key)
+            else:
+                raise ValueError(f"Unsupported provider: {self.provider}")
+        return self._client
+
+    @property
+    def documentation(self) -> str:
+        """Lazy-loaded documentation"""
+        if self._documentation is None:
+            self._documentation = self._load_documentation()
+        return self._documentation
+
+    @property
+    def system_prompt(self) -> str:
+        """Lazy-loaded system prompt"""
+        if self._system_prompt is None:
+            self._system_prompt = self._read_system_prompt()
+        return self._system_prompt
+
+    @property
+    def default_model(self) -> str:
+        """Get default model for current provider"""
+        if self.provider == "anthropic":
+            return Config.DEFAULT_ANTHROPIC_MODEL
+        elif self.provider == "openai":
+            return Config.DEFAULT_OPENAI_MODEL
+        else:
+            raise ValueError(f"Unsupported provider: {self.provider}")
+
+    @staticmethod
+    def setup_logging():
+        load_dotenv()
+        logging.basicConfig(
+            filename=Config.LOG_FILE,
+            level=logging.DEBUG,
+            format="%(asctime)s - %(levelname)s - %(message)s",
+        )
+
+    def _load_documentation(self) -> str:
+        """Load documentation from package resources"""
+        try:
+            doc_path = (
+                importlib.resources.files("shiny.pytest._generate")
+                / "_data"
+                / "testing-documentation.json"
+            )
+            with doc_path.open("r") as f:
+                return f.read()
+        except FileNotFoundError:
+            raise FileNotFoundError(
+                "Documentation file not found for app type: testing"
+            )
+
+    def _read_system_prompt(self) -> str:
+        """Read and combine system prompt with documentation"""
+        try:
+            prompt_path = (
+                importlib.resources.files("shiny.pytest._generate")
+                / "_data"
+                / "testing-SYSTEM_PROMPT.md"
+            )
+            with prompt_path.open("r") as f:
+                system_prompt_file = f.read()
+        except FileNotFoundError:
+            raise FileNotFoundError(
+                "System prompt file not found for app type: testing"
+            )
+
+        return f"{system_prompt_file}\n\nHere is the function reference documentation for Shiny for Python: {self.documentation}"
+
+    def _resolve_model(self, model: str) -> str:
+        """Resolve model alias to actual model name"""
+        return Config.MODEL_ALIASES.get(model, model)
+
+    def _validate_model_for_provider(self, model: str) -> str:
+        """Validate that the model is compatible with the current provider"""
+        resolved_model = self._resolve_model(model)
+
+        if self.provider == "anthropic":
+            if resolved_model.startswith("gpt-") or resolved_model.startswith("o1-"):
+                raise ValueError(
+                    f"Model '{model}' is an OpenAI model but provider is set to 'anthropic'. "
+                    f"Either use an Anthropic model or switch provider to 'openai'."
+                )
+        elif self.provider == "openai":
+            if resolved_model.startswith("claude-"):
+                raise ValueError(
+                    f"Model '{model}' is an Anthropic model but provider is set to 'openai'. "
+                    f"Either use an OpenAI model or switch provider to 'anthropic'."
+                )
+
+        return resolved_model
+
+    def get_llm_response(self, prompt: str, model: Optional[str] = None) -> str:
+        """Get response from LLM using the configured provider"""
+        if model is None:
+            model = self.default_model
+        else:
+            model = self._validate_model_for_provider(model)
+
+        try:
+            if not self.api_key:
+                env_var = (
+                    "ANTHROPIC_API_KEY"
+                    if self.provider == "anthropic"
+                    else "OPENAI_API_KEY"
+                )
+                self.api_key = os.getenv(env_var)
+            if not self.api_key:
+                raise ValueError(
+                    f"Missing API key for provider '{self.provider}'. Set the environment variable "
+                    f"{'ANTHROPIC_API_KEY' if self.provider == 'anthropic' else 'OPENAI_API_KEY'} or pass api_key."
+                )
+            # Create chat client with the specified model
+            if self.provider == "anthropic":
+                chat = ChatAnthropic(
+                    model=model,
+                    system_prompt=self.system_prompt,
+                    max_tokens=Config.MAX_TOKENS,
+                    api_key=self.api_key,
+                )
+            elif self.provider == "openai":
+                chat = ChatOpenAI(
+                    model=model,
+                    system_prompt=self.system_prompt,
+                    api_key=self.api_key,
+                )
+            else:
+                raise ValueError(f"Unsupported provider: {self.provider}")
+
+            start_time = time.perf_counter()
+            response = chat.chat(prompt)
+            elapsed = time.perf_counter() - start_time
+            usage = token_usage()
+            # For Anthropic, token_usage() includes costs. For OpenAI, use chat.get_cost with model pricing.
+            token_price = None
+            if self.provider == "openai":
+                token_price = Config.OPENAI_PRICING.get(model)
+                try:
+                    # Call to compute and cache costs internally; per-entry cost is computed below
+                    _ = chat.get_cost(options="all", token_price=token_price)
+                except Exception:
+                    # If cost computation fails, continue without it
+                    pass
+
+            try:
+
+                def _fmt_tokens(n):
+                    try:
+                        n_int = int(n)
+                    except Exception:
+                        return str(n)
+                    if n_int >= 1_000_000:
+                        return f"{n_int / 1_000_000:.1f}M"
+                    if n_int >= 1_000:
+                        return f"{n_int / 1_000:.1f}k"
+                    return str(n_int)
+
+                entries = usage
+                if isinstance(entries, dict):
+                    entries = [entries]
+
+                if isinstance(entries, (list, tuple)) and entries:
+                    print("LLM token usage and cost:")
+                    for e in entries:
+                        name = e.get("name", "N/A")
+                        model_name = e.get("model", "N/A")
+                        input_tokens = int(e.get("input", 0) or 0)
+                        output_tokens = int(e.get("output", 0) or 0)
+                        if self.provider == "openai":
+                            cached_tokens = 0
+                            for ck in ("cached", "cache", "cache_read", "cached_read"):
+                                if ck in e and e.get(ck) is not None:
+                                    try:
+                                        cached_tokens = int(e.get(ck) or 0)
+                                    except Exception:
+                                        cached_tokens = 0
+                                    break
+                            entry_cost = None
+                            if token_price is not None:
+                                try:
+                                    in_p, out_p, cached_p = token_price
+                                    entry_cost = (
+                                        (input_tokens * in_p)
+                                        + (output_tokens * out_p)
+                                        + (cached_tokens * cached_p)
+                                    ) / 1_000_000.0
+                                except Exception:
+                                    entry_cost = None
+                            cost_str = (
+                                f"${entry_cost:.4f}"
+                                if isinstance(entry_cost, (int, float))
+                                else "$0.0000"
+                            )
+                            print(
+                                f"OpenAI ({model_name}): {_fmt_tokens(input_tokens)} input, {_fmt_tokens(output_tokens)} output | Cost {cost_str} | Time taken: {elapsed:.2f}s\n"
+                            )
+                        else:
+                            cost = round(float(e.get("cost", 0.0) or 0.0), 4)
+                            print(
+                                f"{name} ({model_name}): {_fmt_tokens(input_tokens)} input, {_fmt_tokens(output_tokens)} output | Cost ${cost:.4f} | Time taken: {elapsed:.2f}s\n"
+                            )
+
+                else:
+                    print(f"Token usage: {usage}\n")
+                    print(f"Time taken: {elapsed:.2f}s")
+            except Exception:
+                print(f"Token usage: {usage}")
+                print(f"Time taken: {elapsed:.2f}s")
+
+            if hasattr(response, "content"):
+                return response.content
+            elif hasattr(response, "text"):
+                return response.text
+            else:
+                return str(response)
+        except Exception as e:
+            logging.error(f"Error getting LLM response from {self.provider}: {e}")
+            raise
+
+    def extract_test(self, response: str) -> str:
+        """Extract test code using pre-compiled regex pattern"""
+        match = self.CODE_PATTERN.search(response)
+        return match.group(1).strip() if match else ""
+
+    def _compute_relative_app_path(
+        self, app_file_path: Path, test_file_path: Path
+    ) -> str:
+        """Compute POSIX-style relative path from the test file directory to the app file."""
+        app_file_abs = app_file_path.resolve()
+        test_file_abs = test_file_path.resolve()
+
+        rel = os.path.relpath(str(app_file_abs), start=str(test_file_abs.parent))
+        return Path(rel).as_posix()
+
+    def _rewrite_fixture_path(self, test_code: str, relative_app_path: str) -> str:
+        """Rewrite create_app_fixture path to be relative to the test file directory.
+
+        Handles common patterns like:
+        - create_app_fixture(["app.py"]) -> create_app_fixture(["../app.py"]) (or appropriate)
+        - create_app_fixture("app.py") -> create_app_fixture("../app.py")
+        Keeps other arguments intact if present.
+        """
+        logging.debug(f"Rewriting fixture path to: {relative_app_path}")
+
+        if "create_app_fixture" not in test_code:
+            logging.warning("No create_app_fixture found in generated test code")
+            return test_code
+
+        pattern_list = re.compile(
+            r"(create_app_fixture\(\s*\[\s*)(['\"])([^'\"]+)(\2)(\s*)([,\]])",
+            re.DOTALL,
+        )
+
+        def repl_list(m: re.Match) -> str:
+            logging.debug(
+                f"Replacing list form: '{m.group(3)}' with '{relative_app_path}'"
+            )
+            return f"{m.group(1)}{m.group(2)}{relative_app_path}{m.group(2)}{m.group(5)}{m.group(6)}"
+
+        new_code, list_count = pattern_list.subn(repl_list, test_code)
+
+        if list_count > 0:
+            logging.debug(f"Replaced {list_count} list-form fixture path(s)")
+
+        pattern_str = re.compile(
+            r"(create_app_fixture\(\s*)(['\"])([^'\"]+)(\2)(\s*)([,\)])",
+            re.DOTALL,
+        )
+
+        def repl_str(m: re.Match) -> str:
+            logging.debug(
+                f"Replacing string form: '{m.group(3)}' with '{relative_app_path}'"
+            )
+            return f"{m.group(1)}{m.group(2)}{relative_app_path}{m.group(2)}{m.group(5)}{m.group(6)}"
+
+        new_code2, str_count = pattern_str.subn(repl_str, new_code)
+
+        if str_count > 0:
+            logging.debug(f"Replaced {str_count} string-form fixture path(s)")
+
+        if list_count == 0 and str_count == 0:
+            logging.warning(
+                f"Found create_app_fixture but couldn't replace path. Code snippet: {test_code[:200]}..."
+            )
+
+            fallback_pattern = re.compile(
+                r"(create_app_fixture\([^\)]*?['\"])([^'\"]+)(['\"][^\)]*?\))",
+                re.DOTALL,
+            )
+
+            def fallback_repl(m: re.Match) -> str:
+                logging.debug(
+                    f"Fallback replacement: '{m.group(2)}' with '{relative_app_path}'"
+                )
+                return f"{m.group(1)}{relative_app_path}{m.group(3)}"
+
+            new_code2, fallback_count = fallback_pattern.subn(fallback_repl, new_code)
+
+            if fallback_count > 0:
+                logging.debug(f"Fallback replaced {fallback_count} fixture path(s)")
+
+        return new_code2
+
+    def _create_test_prompt(self, app_text: str, app_file_name: str) -> str:
+        """Create test generation prompt with app file name"""
+        return (
+            f"Given this Shiny for Python app code from file '{app_file_name}':\n{app_text}\n"
+            "Please only add controllers for components that already have an ID in the shiny app.\n"
+            "Do not add tests for ones that do not have an existing ids since controllers need IDs to locate elements.\n"
+            "and server functionality of this app. Include appropriate assertions \\n"
+            "and test cases to verify the app's behavior.\n\n"
+            "CRITICAL: In the create_app_fixture call, you MUST pass a RELATIVE path from the test file's directory to the app file.\n"
+            "For example:\n"
+            "- If test is in 'tests/test_app.py' and app is in 'app.py', use: '../app.py'\n"
+            "- If test is in 'tests/subdir/test_app.py' and app is in 'apps/subdir/app.py', use: '../../apps/subdir/app.py'\n"
+            "- Always compute the correct relative path from the test file to the app file\n"
+            "- NEVER use absolute paths or paths that aren't relative from the test location\n\n"
+            "IMPORTANT: Only output the Python test code in a single code block. Do not include any explanation, justification, or extra text."
+        )
+
+    def _infer_app_file_path(
+        self, app_code: Optional[str] = None, app_file_path: Optional[str] = None
+    ) -> Path:
+        if app_file_path:
+            return Path(app_file_path).resolve()
+
+        current_dir = Path.cwd()
+
+        found_files = []
+        for pattern in Config.COMMON_APP_PATTERNS:
+            found_files.extend(current_dir.glob(pattern))
+
+        if found_files:
+            return found_files[0].resolve()
+
+        if app_code:
+            return Path("inferred_app.py").resolve()
+
+        raise FileNotFoundError(
+            "Could not infer app file path. Please provide app_file_path parameter."
+        )
+
+    def _generate_test_file_path(
+        self, app_file_path: Path, output_dir: Optional[Path] = None
+    ) -> Path:
+        output_dir = output_dir or app_file_path.parent
+        test_file_name = f"test_{app_file_path.stem}.py"
+        return (output_dir / test_file_name).resolve()
+
+    def _generate_test(
+        self,
+        app_code: Optional[str] = None,
+        app_file_path: Optional[str] = None,
+        app_name: str = "app",
+        model: Optional[str] = None,
+        output_file: Optional[str] = None,
+        output_dir: Optional[str] = None,
+    ) -> Tuple[str, Path]:
+        if app_code and not app_file_path:
+            inferred_app_path = Path(f"{app_name}.py")
+        else:
+            inferred_app_path = self._infer_app_file_path(app_code, app_file_path)
+
+        if app_code is None:
+            if not inferred_app_path.exists():
+                raise FileNotFoundError(f"App file not found: {inferred_app_path}")
+            app_code = inferred_app_path.read_text(encoding="utf-8")
+
+        user_prompt = self._create_test_prompt(app_code, inferred_app_path.name)
+        response = self.get_llm_response(user_prompt, model)
+        test_code = self.extract_test(response)
+
+        if output_file:
+            test_file_path = Path(output_file).resolve()
+        else:
+            output_dir_path = Path(output_dir) if output_dir else None
+            test_file_path = self._generate_test_file_path(
+                inferred_app_path, output_dir_path
+            )
+
+        try:
+            logging.info(f"App file path: {inferred_app_path}")
+            logging.info(f"Test file path: {test_file_path}")
+
+            relative_app_path = self._compute_relative_app_path(
+                inferred_app_path, test_file_path
+            )
+
+            logging.info(f"Computed relative path: {relative_app_path}")
+
+            test_code = self._rewrite_fixture_path(test_code, relative_app_path)
+        except Exception as e:
+            logging.error(f"Error computing relative path: {e}")
+            try:
+                logging.warning("Falling back to using absolute path in test file")
+                test_code = self._rewrite_fixture_path(
+                    test_code, str(inferred_app_path.resolve())
+                )
+            except Exception as e2:
+                logging.error(f"Error in fallback path handling: {e2}")
+
+        test_file_path.parent.mkdir(parents=True, exist_ok=True)
+        test_file_path.write_text(test_code, encoding="utf-8")
+
+        return test_code, test_file_path
+
+    def generate_test_from_file(
+        self,
+        app_file_path: str,
+        model: Optional[str] = None,
+        output_file: Optional[str] = None,
+        output_dir: Optional[str] = None,
+    ) -> Tuple[str, Path]:
+        return self._generate_test(
+            app_file_path=app_file_path,
+            model=model,
+            output_file=output_file,
+            output_dir=output_dir,
+        )
+
+    def generate_test_from_code(
+        self,
+        app_code: str,
+        app_name: str = "app",
+        model: Optional[str] = None,
+        output_file: Optional[str] = None,
+        output_dir: Optional[str] = None,
+    ) -> Tuple[str, Path]:
+        return self._generate_test(
+            app_code=app_code,
+            app_name=app_name,
+            model=model,
+            output_file=output_file,
+            output_dir=output_dir,
+        )
+
+    def switch_provider(
+        self, provider: Literal["anthropic", "openai"], api_key: Optional[str] = None
+    ):
+        self.provider = provider
+        if api_key:
+            self.api_key = api_key
+        self._client = None
+
+    @classmethod
+    def create_anthropic_generator(
+        cls, api_key: Optional[str] = None, **kwargs
+    ) -> "ShinyTestGenerator":
+        return cls(provider="anthropic", api_key=api_key, **kwargs)
+
+    @classmethod
+    def create_openai_generator(
+        cls, api_key: Optional[str] = None, **kwargs
+    ) -> "ShinyTestGenerator":
+        return cls(provider="openai", api_key=api_key, **kwargs)
+
+    def get_available_models(self) -> list[str]:
+        if self.provider == "anthropic":
+            return [
+                model
+                for model in Config.MODEL_ALIASES.keys()
+                if not (model.startswith("gpt-") or model.startswith("o1-"))
+            ]
+        elif self.provider == "openai":
+            return [
+                model
+                for model in Config.MODEL_ALIASES.keys()
+                if (model.startswith("gpt-") or model.startswith("o1-"))
+            ]
+        else:
+            return []
+
+
+def cli():
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Generate Shiny tests using LLM")
+    parser.add_argument("app_file", help="Path to the Shiny app file")
+    parser.add_argument(
+        "--provider",
+        choices=["anthropic", "openai"],
+        default=Config.DEFAULT_PROVIDER,
+        help="LLM provider to use",
+    )
+    parser.add_argument("--model", help="Model to use (optional)")
+    parser.add_argument("--output-dir", help="Output directory for test files")
+    parser.add_argument("--api-key", help="API key (optional, can use env vars)")
+
+    args = parser.parse_args()
+
+    app_file_path = Path(args.app_file)
+    if not app_file_path.is_file():
+        print(f"Error: File not found at {app_file_path}")
+        sys.exit(1)
+
+    try:
+        generator = ShinyTestGenerator(
+            provider=args.provider, api_key=args.api_key, setup_logging=False
+        )
+
+        test_code, test_file_path = generator.generate_test_from_file(
+            str(app_file_path),
+            model=args.model,
+            output_dir=args.output_dir,
+        )
+
+        print(f"✅ Test file generated successfully: {test_file_path}")
+        print(f"📝 Used provider: {args.provider}")
+        if args.model:
+            print(f"🤖 Used model: {args.model}")
+
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    cli()
diff --git a/tests/inspect-ai/__init__.py b/tests/inspect-ai/__init__.py
new file mode 100644
index 000000000..ca5ba6879
--- /dev/null
+++ b/tests/inspect-ai/__init__.py
@@ -0,0 +1,3 @@
+"""
+Contains evaluation apps, scripts, and results for testing the Shiny test generator.
+"""
diff --git a/tests/inspect-ai/apps/__init__.py b/tests/inspect-ai/apps/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/inspect-ai/apps/app_01_core_basic/__init__.py b/tests/inspect-ai/apps/app_01_core_basic/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/inspect-ai/apps/app_01_core_basic/app.py b/tests/inspect-ai/apps/app_01_core_basic/app.py
new file mode 100644
index 000000000..84e26dec3
--- /dev/null
+++ b/tests/inspect-ai/apps/app_01_core_basic/app.py
@@ -0,0 +1,58 @@
+from shiny import App, render, ui
+
+# Create the UI
+app_ui = ui.page_fluid(
+    # Add Font Awesome CSS in the head
+    ui.tags.head(
+        ui.tags.link(
+            rel="stylesheet",
+            href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.7.2/css/all.min.css",
+        )
+    ),
+    # Main layout
+    ui.layout_column_wrap(
+        ui.card(
+            ui.card_header("Action Button Examples"),
+            # Basic button with width parameter
+            ui.input_action_button(id="btn1", label="Basic Button", width="200px"),
+            ui.br(),  # Add spacing
+            # Button with icon and disabled state
+            ui.input_action_button(
+                id="btn2",
+                label="Disabled Button with Icon",
+                icon=ui.tags.i(class_="fa-solid fa-shield-halved"),
+                disabled=True,
+            ),
+            ui.br(),  # Add spacing
+            # Button with custom class and style attributes
+            ui.input_action_button(
+                id="btn3",
+                label="Styled Button",
+                class_="btn-success",
+                style="margin-top: 20px;",
+            ),
+        ),
+        # Card for displaying results
+        ui.card(
+            ui.card_header("Click Counts"),
+            ui.output_text("click_counts"),
+        ),
+        width="100%",
+    ),
+)
+
+
+# Define the server
+def server(input, output, session):
+    @output
+    @render.text
+    def click_counts():
+        return (
+            f"Button 1 clicks: {input.btn1() or 0}\n"
+            f"Button 2 clicks: {input.btn2() or 0}\n"
+            f"Button 3 clicks: {input.btn3() or 0}"
+        )
+
+
+# Create and return the app
+app = App(app_ui, server)
diff --git a/tests/inspect-ai/apps/app_02_express_basic/__init__.py b/tests/inspect-ai/apps/app_02_express_basic/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/inspect-ai/apps/app_02_express_basic/app.py b/tests/inspect-ai/apps/app_02_express_basic/app.py
new file mode 100644
index 000000000..d65be4267
--- /dev/null
+++ b/tests/inspect-ai/apps/app_02_express_basic/app.py
@@ -0,0 +1,48 @@
+from shiny.express import input, render, ui
+
+# Add Font Awesome CSS for icons - this needs to be before any UI elements
+ui.head_content(
+    ui.HTML(
+        '<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.7.2/css/all.min.css">'
+    )
+)
+
+# Create a layout with some spacing
+with ui.layout_column_wrap(width="100%"):
+    with ui.card():
+        ui.card_header("Action Button Examples")
+
+        # Basic button with width parameter
+        ui.input_action_button(id="btn1", label="Basic Button", width="200px")
+
+        ui.br()  # Add some spacing
+
+        # Button with icon and disabled state
+        ui.input_action_button(
+            id="btn2",
+            label="Disabled Button with Icon",
+            icon=ui.tags.i(class_="fa-solid fa-shield-halved"),
+            disabled=True,
+        )
+
+        ui.br()  # Add some spacing
+
+        # Button with custom class and style attributes
+        ui.input_action_button(
+            id="btn3",
+            label="Styled Button",
+            class_="btn-success",
+            style="margin-top: 20px;",
+        )
+
+    # Create another card for displaying results
+    with ui.card():
+        ui.card_header("Click Counts")
+
+        @render.text
+        def click_counts():
+            return (
+                f"Button 1 clicks: {input.btn1() or 0}\n"
+                f"Button 2 clicks: {input.btn2() or 0}\n"
+                f"Button 3 clicks: {input.btn3() or 0}"
+            )
diff --git a/tests/inspect-ai/apps/app_03_slider/__init__.py b/tests/inspect-ai/apps/app_03_slider/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/inspect-ai/apps/app_03_slider/app.py b/tests/inspect-ai/apps/app_03_slider/app.py
new file mode 100644
index 000000000..66f2329b8
--- /dev/null
+++ b/tests/inspect-ai/apps/app_03_slider/app.py
@@ -0,0 +1,12 @@
+from shiny.express import input, render, ui
+
+ui.page_opts(title="Slider Parameters Demo", full_width=True)
+
+with ui.layout_column_wrap(width="400px"):
+    with ui.card():
+        ui.card_header("Basic Numeric Slider")
+        ui.input_slider("slider1", "Min, max, value", min=0, max=100, value=50)
+
+        @render.text
+        def value1():
+            return f"Value: {input.slider1()}"
diff --git a/tests/inspect-ai/apps/app_04_custom_app_name/__init__.py b/tests/inspect-ai/apps/app_04_custom_app_name/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/inspect-ai/apps/app_04_custom_app_name/app_input_checkbox_group.py b/tests/inspect-ai/apps/app_04_custom_app_name/app_input_checkbox_group.py
new file mode 100644
index 000000000..a51840354
--- /dev/null
+++ b/tests/inspect-ai/apps/app_04_custom_app_name/app_input_checkbox_group.py
@@ -0,0 +1,31 @@
+from shiny.express import input, render, ui
+
+# Create sample choices with HTML formatting for demonstration
+choices = {
+    "red": ui.span("Red", style="color: #FF0000;"),
+    "green": ui.span("Green", style="color: #00AA00;"),
+    "blue": ui.span("Blue", style="color: #0000AA;"),
+}
+
+with ui.card():
+    ui.card_header("Color Selection Demo")
+
+    # Using input_checkbox_group with all its parameters
+    ui.input_checkbox_group(
+        id="colors",  # Required: unique identifier
+        label="Choose colors",  # Required: label text
+        choices=choices,  # Required: choices as dict with HTML formatting
+        selected=["red", "blue"],  # Optional: pre-selected values
+        inline=True,  # Optional: display choices inline
+        width="300px",  # Optional: CSS width
+    )
+
+    # Add some spacing
+    ui.hr()
+
+    # Simple output to show selected values
+    @render.text
+    def selected_colors():
+        if input.colors():
+            return f"You selected: {', '.join(input.colors())}"
+        return "No colors selected"
diff --git a/tests/inspect-ai/apps/app_05_streamlit/__init__.py b/tests/inspect-ai/apps/app_05_streamlit/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/inspect-ai/apps/app_05_streamlit/app.py b/tests/inspect-ai/apps/app_05_streamlit/app.py
new file mode 100644
index 000000000..c20a69e26
--- /dev/null
+++ b/tests/inspect-ai/apps/app_05_streamlit/app.py
@@ -0,0 +1,11 @@
+import streamlit as st
+
+st.title("My Simple Streamlit App")
+
+user_name = st.text_input("Enter your name", "Type your name here...")
+
+# Add a slider widget
+user_age = st.slider("Select your age", 0, 100, 25)
+
+# Display the user's input
+st.write(f"Hello, {user_name}! You are {user_age} years old.")
diff --git a/tests/inspect-ai/apps/app_06_R_shiny/__init__.py b/tests/inspect-ai/apps/app_06_R_shiny/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/inspect-ai/apps/app_06_R_shiny/app.R b/tests/inspect-ai/apps/app_06_R_shiny/app.R
new file mode 100644
index 000000000..e714ab86d
--- /dev/null
+++ b/tests/inspect-ai/apps/app_06_R_shiny/app.R
@@ -0,0 +1,30 @@
+library(shiny)
+
+ui <- fluidPage(
+    # Application title
+    titlePanel("My First Shiny App"),
+
+    sidebarLayout(
+        sidebarPanel(
+            sliderInput(
+                inputId = "num",
+                label = "Select a number:",
+                min = 1,
+                max = 1000,
+                value = 500
+            ) # Default value
+        ),
+
+        mainPanel(
+            textOutput("message")
+        )
+    )
+)
+
+server <- function(input, output) {
+    output$message <- renderText({
+        paste("You selected:", input$num)
+    })
+}
+
+shinyApp(ui = ui, server = server)
diff --git a/tests/inspect-ai/apps/app_07_modules/__init__.py b/tests/inspect-ai/apps/app_07_modules/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/inspect-ai/apps/app_07_modules/app.py b/tests/inspect-ai/apps/app_07_modules/app.py
new file mode 100644
index 000000000..2a4886d37
--- /dev/null
+++ b/tests/inspect-ai/apps/app_07_modules/app.py
@@ -0,0 +1,31 @@
+from shiny import App, module, render, ui
+
+
+@module.ui
+def my_module_ui():
+    """Defines the UI elements for the module with multiple text inputs."""
+    return ui.div(
+        ui.h2("My Module"),
+        ui.input_text("text_input_1", "Enter the first text:"),
+        ui.input_text("text_input_2", "Enter the second text:"),  # Second text input
+        ui.output_text("text_output"),
+    )
+
+
+@module.server
+def my_module_server(input, output, session):
+    """Defines the server logic for the module."""
+
+    @render.text
+    def text_output():
+        return f"You entered: {input.text_input_1()} and {input.text_input_2()}"  # Accessing both inputs
+
+
+app_ui = ui.page_fluid(ui.h1("Main Application"), my_module_ui("module_instance_1"))
+
+
+def server(input, output, session):
+    my_module_server("module_instance_1")
+
+
+app = App(app_ui, server)
diff --git a/tests/inspect-ai/apps/app_08_navigation/__init__.py b/tests/inspect-ai/apps/app_08_navigation/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/inspect-ai/apps/app_08_navigation/app.py b/tests/inspect-ai/apps/app_08_navigation/app.py
new file mode 100644
index 000000000..374f6c5a7
--- /dev/null
+++ b/tests/inspect-ai/apps/app_08_navigation/app.py
@@ -0,0 +1,67 @@
+# app.py
+from shiny import App, render, ui
+
+# Define the main app UI
+app_ui = ui.page_fluid(
+    ui.h1("Shiny App with Tabs"),
+    ui.navset_tab(
+        ui.nav_panel(
+            "Tab 1: Input & Output",  # Tab title
+            ui.h3("Input and Text Output"),
+            ui.input_text(
+                "text_input", "Enter some text:", "Hello Shiny!"
+            ),  # Text input component
+            ui.output_text("output_text"),
+        ),
+        ui.nav_panel(
+            "Tab 2: Slider & Plot",  # Tab title
+            ui.h3("Slider and Plot Output"),
+            ui.input_slider(
+                "n_points", "Number of points:", min=10, max=100, value=50
+            ),  # Slider input component
+            ui.output_plot("output_plot"),
+        ),
+        ui.nav_panel(
+            "Tab 3: Button & Message",  # Tab title
+            ui.h3("Action Button and Message Output"),
+            ui.input_action_button(
+                "action_button", "Click me!"
+            ),  # Action button component
+            ui.output_text("output_message"),
+        ),
+        id="navset_Tab",
+    ),
+)
+
+
+# Define the main app server function
+def server(input, output, session):
+
+    @render.text  # Decorator for verbatim text output
+    def output_text():
+        return f"You entered: {input.text_input()}"  # Accessing the text input value
+
+    @render.plot  # Decorator for rendering plots
+    def output_plot():
+        import matplotlib.pyplot as plt
+        import numpy as np
+
+        # Generate some data based on the slider input
+        x = np.linspace(0, 10, input.n_points())
+        y = np.sin(x)
+
+        fig, ax = plt.subplots()
+        ax.plot(x, y)
+        ax.set_title("Dynamic Sine Wave")
+        return fig
+
+    @render.text  # Decorator for rendering simple text
+    def output_message():
+        # Respond when the action button is clicked
+        if input.action_button() > 0:
+            return "Button clicked!"
+        return "Click the button."
+
+
+# Create the Shiny app instance
+app = App(app_ui, server)
diff --git a/tests/inspect-ai/apps/app_09_plots/__init__.py b/tests/inspect-ai/apps/app_09_plots/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/inspect-ai/apps/app_09_plots/app.py b/tests/inspect-ai/apps/app_09_plots/app.py
new file mode 100644
index 000000000..615048dc0
--- /dev/null
+++ b/tests/inspect-ai/apps/app_09_plots/app.py
@@ -0,0 +1,67 @@
+# app.py
+import matplotlib.pyplot as plt
+import numpy as np
+
+from shiny import App, module, render, ui
+
+
+# Define the module UI function
+@module.ui
+def plot_module_ui():
+    """Defines a module with two plots and inputs to control them."""
+    return ui.div(
+        ui.h3("Plot Module"),
+        ui.input_slider(
+            "n_points", "Number of points:", min=10, max=100, value=50
+        ),  # Slider for points
+        ui.row(  # Use ui.row to arrange plots side-by-side
+            ui.column(6, ui.output_plot("plot1")),  # First plot in a column
+            ui.column(6, ui.output_plot("plot2")),  # Second plot in a column
+        ),
+    )
+
+
+# Define the module server function
+@module.server
+def plot_module_server(input, output, session):
+    """Defines the server logic for the plot module."""
+
+    @output
+    @render.plot  # Decorator for rendering plots
+    def plot1():
+        # Generate some data for the first plot
+        x = np.random.rand(input.n_points())
+        y = np.random.rand(input.n_points())
+
+        fig, ax = plt.subplots()
+        ax.scatter(x, y)
+        ax.set_title("Random Scatter Plot")
+        return fig
+
+    @output
+    @render.plot  # Decorator for rendering plots
+    def plot2():
+        # Generate some data for the second plot
+        x = np.linspace(0, 10, input.n_points())
+        y = np.sin(x)
+
+        fig, ax = plt.subplots()
+        ax.plot(x, y)
+        ax.set_title("Sine Wave Plot")
+        return fig
+
+
+# Define the main app UI
+app_ui = ui.page_fluid(
+    ui.h1("Main Application with Plot Module"),
+    plot_module_ui("my_plot_module"),  # Instantiate the module UI
+)
+
+
+# Define the main app server function
+def server(input, output, session):
+    plot_module_server("my_plot_module")  # Instantiate the module server
+
+
+# Create the Shiny app instance
+app = App(app_ui, server)
diff --git a/tests/inspect-ai/apps/app_10_complex_layout/__init__.py b/tests/inspect-ai/apps/app_10_complex_layout/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/inspect-ai/apps/app_10_complex_layout/app.py b/tests/inspect-ai/apps/app_10_complex_layout/app.py
new file mode 100644
index 000000000..a5ebad554
--- /dev/null
+++ b/tests/inspect-ai/apps/app_10_complex_layout/app.py
@@ -0,0 +1,70 @@
+import pandas as pd
+
+from shiny import App, render, ui
+
+app_ui = ui.page_fluid(
+    ui.h2("Shiny for Python Demo with Multiple Inputs and Data Grid"),
+    ui.layout_sidebar(
+        ui.sidebar(  # Use ui.sidebar()
+            ui.input_action_button("action_button", "Click me!"),
+            ui.input_checkbox("checkbox", "Check this box"),
+            ui.input_date("date_selector", "Select a date"),
+            ui.input_numeric("numeric_input", "Enter a number", 10),
+            ui.input_radio_buttons(
+                "radio_buttons", "Choose one:", ["Option A", "Option B", "Option C"]
+            ),
+            ui.input_switch("switch", "Turn on/off"),
+        ),
+        ui.h3("Output Values"),
+        ui.output_text("action_button_value"),
+        ui.output_text("checkbox_value"),
+        ui.output_text("date_selector_value"),
+        ui.output_text("numeric_input_value"),
+        ui.output_text("radio_buttons_value"),
+        ui.output_text("switch_value"),
+        ui.h3("Data Grid Output"),
+        ui.output_data_frame("data_grid"),
+    ),
+)
+
+
+def server(input, output, session):
+    @render.text
+    def action_button_value():
+        return f"Action Button clicked: {input.action_button()}"
+
+    @render.text
+    def checkbox_value():
+        return f"Checkbox checked: {input.checkbox()}"
+
+    @render.text
+    def date_selector_value():
+        return f"Selected date: {input.date_selector()}"
+
+    @render.text
+    def numeric_input_value():
+        return f"Numeric Input value: {input.numeric_input()}"
+
+    @render.text
+    def radio_buttons_value():
+        return f"Selected Radio Button: {input.radio_buttons()}"
+
+    @render.text
+    def switch_value():
+        return f"Switch status: {input.switch()}"
+
+    @render.data_frame
+    def data_grid():
+        data = {
+            "Input": [
+                "Action Button",
+            ],
+            "Value": [
+                input.action_button(),
+            ],
+        }
+        df = pd.DataFrame(data)
+        return render.DataGrid(df)
+
+
+app = App(app_ui, server)
diff --git a/tests/inspect-ai/scripts/README.md b/tests/inspect-ai/scripts/README.md
new file mode 100644
index 000000000..8223e5176
--- /dev/null
+++ b/tests/inspect-ai/scripts/README.md
@@ -0,0 +1,7 @@
+# Evals Directory
+
+This directory contains scripts for evaluating the performance of the Shiny test generator.
+
+- `create_test_metadata.py`: This script generates metadata for the test cases. This metadata is used by the evaluation script to understand the context of each test.
+
+- `evaluation.py`: This script runs the evaluation of the generated tests against the test cases. It uses the metadata generated by `create_test_metadata.py` to perform the evaluation.
diff --git a/tests/inspect-ai/scripts/__init__.py b/tests/inspect-ai/scripts/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/inspect-ai/scripts/create_test_metadata.py b/tests/inspect-ai/scripts/create_test_metadata.py
new file mode 100644
index 000000000..c23753e39
--- /dev/null
+++ b/tests/inspect-ai/scripts/create_test_metadata.py
@@ -0,0 +1,81 @@
+import json
+from itertools import islice
+from pathlib import Path
+from typing import Any, Dict, List, Union, cast
+
+from shiny.pytest._generate import ShinyTestGenerator
+
+
+def generate_shiny_test_metadata(
+    apps_dir: Union[str, Path] = "tests/inspect-ai/apps", max_tests: int = 10
+) -> Dict[str, Dict[str, Union[str, Path]]]:
+    """
+    Generate Shiny tests and metadata for apps in the specified directory.
+
+    Args:
+        apps_dir: Directory containing Shiny apps
+        max_tests: Maximum number of tests to generate
+
+    Returns:
+        Dictionary mapping test names to test metadata including code and app info
+    """
+    generator = ShinyTestGenerator()
+    apps_dir = Path(apps_dir)
+
+    if not apps_dir.exists() and apps_dir.is_relative_to("."):
+        script_dir = Path(__file__).parent
+        apps_dir = script_dir.parent / "apps"
+        if not apps_dir.exists():
+            apps_dir = script_dir.parent.parent.parent / "tests" / "inspect-ai" / "apps"
+
+    app_files = islice(apps_dir.glob("*/app*.py"), max_tests)
+
+    test_data: Dict[str, Dict[str, Union[str, Path]]] = {}
+
+    for app_path in app_files:
+        try:
+            test_code, test_file_path = generator.generate_test_from_file(str(app_path))
+
+            test_name = f"test_{app_path.parent.name}_{app_path.stem}"
+            app_code = app_path.read_text(encoding="utf-8")
+
+            test_data[test_name] = {
+                "test_code": test_code,
+                "app_code": app_code,
+                "app_path": str(app_path),
+                "test_file_path": test_file_path,
+                "app_name": app_path.parent.name,
+            }
+
+        except Exception as e:
+            print(f"Error generating test for {app_path}: {e}")
+            continue
+
+    return test_data
+
+
+if __name__ == "__main__":
+    test_data: Dict[str, Dict[str, Union[str, Path]]] = generate_shiny_test_metadata()
+
+    metadata_file = Path(__file__).parent / "test_metadata.json"
+
+    def convert_paths(obj: Any) -> Any:
+        """Convert Path objects to strings for JSON serialization."""
+        if isinstance(obj, dict):
+            # Cast to Dict[Any, Any] to avoid type errors
+            typed_dict = cast(Dict[Any, Any], obj)
+            return {str(k): convert_paths(v) for k, v in typed_dict.items()}
+        elif isinstance(obj, Path):
+            return str(obj)
+        elif isinstance(obj, list):
+            # Cast to List[Any] to avoid type errors
+            typed_list = cast(List[Any], obj)
+            return [convert_paths(item) for item in typed_list]
+        else:
+            return obj
+
+    serializable_test_data: Any = convert_paths(test_data)
+    with open(metadata_file, "w") as f:
+        json.dump(serializable_test_data, f, indent=2)
+
+    print(f"Saved test metadata to: {metadata_file}")
diff --git a/tests/inspect-ai/scripts/evaluation.py b/tests/inspect-ai/scripts/evaluation.py
new file mode 100644
index 000000000..a97071e3b
--- /dev/null
+++ b/tests/inspect-ai/scripts/evaluation.py
@@ -0,0 +1,320 @@
+import json
+import re
+from pathlib import Path
+
+from inspect_ai import Task, task
+from inspect_ai.dataset import Sample
+from inspect_ai.model import get_model
+from inspect_ai.scorer import model_graded_qa
+from inspect_ai.solver import generate
+
+
+def get_app_specific_instructions(app_name: str) -> str:
+    """
+    Get specific grading instructions for each app based on its unique characteristics.
+
+    Args:
+        app_name: Name of the Shiny app
+
+    Returns:
+        App-specific grading instructions
+    """
+    app_instructions = {
+        "app_09_plots": """
+        For this plot app tests, focus on components that exist in the app code:
+        - Whether the test creates an instance of the InputSlider controller with id "my_plot_module-n_points"
+        - Ensure that the slider component is verified for its label, min, max, and value attributes.
+        - Ensure that the test checks by moving the slider to different values and verify the slider values accordingly
+
+        IMPORTANT: Only evaluate based on components and IDs that actually exist in the app code.
+        """,
+        "app_07_modules": """
+        For this module-based app, focus on components that exist in the app code:
+        - Whether the test creates instances of the InputText controller with ids "module_instance_1-text_input_1" and "module_instance_1-text_input_2"
+        - Whether the test creates an instance of the OutputText controller with id "module_instance_1-text_output"
+        - Ensure that the text inputs are verified for their labels and initial values.
+        - Ensure that the test checks the text output for correct concatenation of input values.
+        - Check that the test verifies the module's reactivity by changing input values and checking output
+
+        IMPORTANT: Only evaluate based on components and IDs that actually exist in the app code.
+        """,
+        "app_03_slider": """
+        For this slider app, focus on components that exist in the app code:
+        - Whether the test creates an instance of the InputSlider controller with id "slider1"
+        - Ensure that the slider component is verified for its label, min, max, and value attributes.
+        - Ensure that the test checks by moving the slider to different values and verify the slider values accordingly.
+
+        IMPORTANT: Only evaluate based on components and IDs that actually exist in the app code.
+        """,
+        "app_06_R_shiny": """
+        For this app, focus on:
+        - The test code should be empty since the app code was not a Shiny for Python app.
+        """,
+        "app_10_complex_layout": """
+        For this app, focus on the components that exist in the app code:
+        - Whether the test creates an instance of the InputActionButton controller with id "action_button"
+        - Ensure that the action button component is verified for its label and click functionality.
+        - Whether the test creates an instance of the InputCheckbox controller with id "checkbox"
+        - Ensure that the checkbox component is verified for its label and checked state.
+        - Ensure that the test checks the checkbox state changes and verifies the output text accordingly.
+        - Whether the test creates an instance of the InputDate controller with id "date_selector"
+        - Ensure that the date selector component is verified for its label and selected date.
+        - Ensure that the test checks the date selector state changes and verifies the output text accordingly.
+        - Whether the test creates an instance of the InputNumeric controller with id "numeric_input"
+        - Ensure that the numeric input component is verified for its label and value.
+        - Ensure that the test checks the numeric input state changes and verifies the output text accordingly.
+        - Whether the test creates an instance of the InputRadioButtons controller with id "radio_buttons"
+        - Ensure that the radio buttons component is verified for its label, choices, and selected value.
+        - Ensure that the test checks the radio buttons state changes and verifies the output text accordingly.
+        - Whether the test creates an instance of the InputSwitch controller with id "switch"
+        - Ensure that the switch component is verified for its label and state.
+        - Ensure that the test checks the switch state changes and verifies the output text accordingly.
+        - Whether the test creates an instance of the OutputText controller with ids "action_button_value", "checkbox_value", "date_selector_value", "numeric_input_value", "radio_buttons_value", and "switch_value"
+        - Ensure that the output text components are verified for their initial values and updated values based on user interactions.
+        - Whether the test creates an instance of the OutputDataFrame controller with id "data_grid"
+        - Ensure that the data grid component is verified for its initial state and updates correctly based on user interactions.
+
+        IMPORTANT: Only evaluate based on components and IDs that actually exist in the app code. The test should only test functionality that is actually present in the app.
+        """,
+        "app_02_express_basic": """
+        For this shiny express basic app, focus on:
+        - Ensure that the test creates an instance of the InputActionButton controller with id "btn1"
+        - Ensure that the action button component is verified for its label and click functionality.
+        - Ensure that the test checks the action button state changes and verifies the output text accordingly.
+        - Ensure that the test creates an instance of the OutputText controller with id "click_counts"
+        - Ensure that the output text component is verified for its initial value and updated values based on button clicks.
+        - Ensure that the test creates instances of the InputActionButton controller with ids "btn2" and "btn3"
+        - Ensure that the disabled button with icon is verified for its label and icon.
+        - Ensure that the styled button is verified for its label and custom styles.
+        - Ensure that the test checks the click counts for each button and verifies the output text accordingly
+        """,
+        "app_08_navigation": """
+        For this app, focus on:
+        - Whether the test creates an instance of the NavsetTab controller with id "navset_Tab"
+        - Ensure that the navset tab component is verified for its titles and active state.
+        - Ensure that the test checks the navigation between tabs and verifies the active state of each tab
+        - Ensure that the test verifies the content of each tab, including input components and output displays
+        - Ensure that the test checks the functionality of input components in each tab, such as text inputs, sliders, and action buttons
+        """,
+        "app_04_custom_app_name": """
+        For this app, focus on:
+        - Ensure that the create_app_fixture is called with the correct app file. In this case, it should be "app_input_checkbox_group.py"
+        - Ensure that the test creates an instance of the InputCheckboxGroup controller with id "colors"
+        - Ensure that the checkbox group component is verified for its label, choices, selected values, inline state, and width.
+        - Ensure that the test checks the checkbox group state changes and verifies the output text accordingly.
+        - Ensure that the test creates an instance of the OutputText controller with id "selected_colors"
+        - Ensure that the output text component is verified for its initial value and updated values based on checkbox selections.
+        """,
+        "app_01_core_basic": """
+        For this app, focus on:
+        - Ensure that the test creates an instance of the InputActionButton controller with id "btn1"
+        - Ensure that the action button component is verified for its label and click functionality.
+        - Ensure that the test checks the action button state changes and verifies the output text accordingly.
+        - Ensure that the test creates an instance of the OutputText controller with id "click_counts"
+        - Ensure that the test creates instances of the InputActionButton controller with ids "btn2" and "btn3"
+        """,
+        "app_05_streamlit": """
+        For this app, focus on:
+        - The test code should be empty since the app code was not a Shiny for Python app.
+        """,
+    }
+
+    return app_instructions.get(app_name, "")
+
+
+def extract_component_ids(app_code: str) -> dict:
+    """
+    Extract component IDs from Shiny app code to ensure evaluation focuses on existing components.
+
+    Args:
+        app_code: The Shiny app code to analyze
+
+    Returns:
+        Dictionary with component types as keys and lists of IDs as values
+    """
+    input_ids = set()
+    output_ids = set()
+
+    # 1. Find input components (ui.input_*)
+    try:
+        input_matches = re.findall(
+            r'ui\.input_\w+\(\s*(?:id\s*=\s*)?["\']([^"\']+)["\']', app_code
+        )
+        input_ids.update(input_matches)
+    except re.error:
+        pass
+
+    # 2. Find output components (ui.output_*)
+    try:
+        output_matches = re.findall(
+            r'ui\.output_\w+\(\s*(?:id\s*=\s*)?["\']([^"\']+)["\']', app_code
+        )
+        output_ids.update(output_matches)
+    except re.error:
+        pass
+
+    # 3. Find input references (input.name())
+    try:
+        input_refs = re.findall(r"input\.([\w_]+)\(\)", app_code)
+        input_ids.update(input_refs)
+    except re.error:
+        pass
+
+    # 4. Find @render.* definitions
+    try:
+        render_defs = re.findall(r"@render\.\w+\s+def\s+([\w_]+)\s*\(", app_code)
+        output_ids.update(render_defs)
+    except re.error:
+        pass
+
+    # 5. Find @output wrapped definitions
+    try:
+        output_defs = re.findall(r"@output\s+def\s+([\w_]+)\s*\(", app_code)
+        output_ids.update(output_defs)
+    except re.error:
+        pass
+
+    # 6. Find module instantiations
+    try:
+        module_ids = re.findall(
+            r'\w+_\w+_(?:ui|server)\(\s*["\']([^"\']+)["\']', app_code
+        )
+        input_ids.update(module_ids)
+        output_ids.update(module_ids)
+    except re.error:
+        pass
+
+    # 7. Find navset components
+    try:
+        nav_ids = re.findall(
+            r'ui\.navset_\w+\(.*?id\s*=\s*["\']([^"\']+)["\']', app_code
+        )
+        input_ids.update(nav_ids)
+    except re.error:
+        pass
+
+    return {"input": sorted(list(input_ids)), "output": sorted(list(output_ids))}
+
+
+def create_inspect_ai_samples(test_data: dict) -> list[Sample]:
+    """
+    Create Inspect AI samples from the generated test data.
+
+    Args:
+        test_data: Dictionary containing test metadata for all generated tests
+
+    Returns:
+        List of Sample objects for Inspect AI evaluation
+    """
+    samples = []
+
+    for test_name, data in test_data.items():
+        app_specific_guidance = get_app_specific_instructions(data["app_name"])
+
+        component_ids = extract_component_ids(data["app_code"])
+        component_ids_str = "\n".join(
+            [f"{k.title()} IDs: {', '.join(v)}" for k, v in component_ids.items() if v]
+        )
+
+        question = f"""Evaluate the quality of this Shiny test code for app {data['app_name']}.
+
+IMPORTANT: First carefully analyze the App Code below to understand what components and IDs actually exist in the app.
+Then evaluate the test code ONLY against components and IDs that actually exist in the app code.
+
+Actual Component IDs automatically detected in App:
+{component_ids_str}
+
+App Code:
+```python
+{data['app_code']}
+```
+
+Test Code to Evaluate:
+```python
+{data['test_code']}
+```
+
+Evaluation Instructions:
+1. ONLY evaluate components that ACTUALLY EXIST in the app code - the detected IDs above show what's really in the app
+2. If a component mentioned in the criteria doesn't exist in the app code, IGNORE that part of the criteria completely
+3. If the app uses different IDs than what's in the criteria (e.g., "data_grid" instead of "data_table"), use the actual IDs from the app
+4. Check if the test code properly tests all the EXISTING components (creating controllers, verifying attributes, testing interactions, etc.)
+5. The test should receive a Complete grade if it adequately tests all components that actually exist in the app"""
+
+        if app_specific_guidance:
+            target_answer = f"CORRECT: A test that meets all specified criteria for components that actually exist in the app code.\n{app_specific_guidance.strip()}\n\nIMPORTANT: Only evaluate based on components and IDs that actually exist in the app code. Ignore criteria for components that don't exist."
+        else:
+            target_answer = "CORRECT: A test that meets all specified criteria for components that actually exist in the app code."
+
+        sample = Sample(
+            input=question,
+            target=target_answer,
+            metadata={
+                "test_name": test_name,
+                "app_name": data["app_name"],
+                "app_path": data["app_path"],
+                "criterion": app_specific_guidance,
+            },
+        )
+
+        samples.append(sample)
+
+    return samples
+
+
+@task
+def shiny_test_evaluation() -> Task:
+    """
+    Inspect AI task for evaluating generated Shiny tests.
+    """
+    script_dir = Path(__file__).parent  # Current script directory
+    metadata_file = script_dir / "test_metadata.json"
+    with open(metadata_file, "r") as f:
+        test_data = json.load(f)
+
+    samples = create_inspect_ai_samples(test_data)
+
+    scorer = model_graded_qa(
+        instructions="""
+You are an expert evaluator for Shiny application testing. Your task is to evaluate test code quality based ONLY on the provided app code and specific criteria.
+
+CRITICAL INSTRUCTIONS:
+1. FIRST, carefully analyze the app code to understand what components ACTUALLY exist in the app
+2. Extract a precise list of all component IDs present in the app code
+3. IGNORE any criteria that reference UI components or IDs that don't exist in the actual app code
+4. ONLY evaluate based on specific criteria that match components in the actual app
+5. DO NOT add your own criteria or suggestions beyond what is explicitly stated
+6. DO NOT penalize for missing features that are not mentioned in the criteria OR don't exist in the app
+7. For non-Shiny frameworks (R Shiny, Streamlit, etc.), the test code should be empty - grade as Complete if empty
+8. If test_code tests components that are actually in the app, it should get a 'C' grade even if it doesn't test components mentioned in the criteria that don't exist in the app
+
+EVALUATION PROCESS:
+- First carefully extract all component IDs from the app code (e.g., "action_button", "checkbox", etc.)
+- Compare these IDs with those mentioned in the criteria
+- ONLY evaluate criteria for components that actually exist in the app code
+- COMPLETELY IGNORE criteria about components that don't exist in the app
+- Grade based ONLY on how well the test code tests the components that actually exist
+
+MOST IMPORTANT:
+- If the app does not contain a component mentioned in the criteria, IGNORE that part of the criteria completely
+- If the app uses a different ID than what's in the criteria (e.g., "data_grid" instead of "data_table"), use the actual ID from the app
+
+GRADING SCALE:
+- C (Complete): ALL criteria for EXISTING components are met
+- P (Partial): MOST criteria for EXISTING components are met, with minor gaps
+- I (Incomplete): MAJOR criteria for EXISTING components are missing or incorrectly implemented
+
+Provide your evaluation in the following format:
+GRADE: [C/P/I]
+Explanation: [Brief explanation focusing ONLY on how well the specified criteria were met for EXISTING components]
+        """,
+        grade_pattern=r"GRADE:\s*([CPI])",
+        model=get_model("openai/gpt-5-nano-2025-08-07"),
+    )
+
+    return Task(
+        dataset=samples,
+        solver=generate(),
+        scorer=scorer,
+        model=get_model("openai/gpt-5-nano-2025-08-07"),
+    )
diff --git a/tests/inspect-ai/scripts/prepare_comment.py b/tests/inspect-ai/scripts/prepare_comment.py
new file mode 100755
index 000000000..1b0b3495e
--- /dev/null
+++ b/tests/inspect-ai/scripts/prepare_comment.py
@@ -0,0 +1,99 @@
+import argparse
+import json
+import sys
+from pathlib import Path
+from typing import Union
+
+
+def prepare_comment(summary_path: Union[str, Path]) -> int:
+    """
+    Reads summary.json and other result files to create a formatted comment for GitHub PR
+    showing averaged results across multiple attempts.
+
+    Args:
+        summary_path: Path to the summary.json file
+
+    Returns:
+        Exit code (0 on success, 1 on error) and writes output to comment_body.txt
+    """
+    try:
+        summary_path = Path(summary_path)
+        if not summary_path.exists():
+            raise FileNotFoundError(f"Summary file not found at {summary_path}")
+
+        # Read the inspect-ai averaged summary
+        with open(summary_path, "r") as f:
+            inspect_results = json.load(f)
+
+        # Skip pytest and combined results for now since they're not working properly
+
+        # Build the comment
+        comment_parts = [
+            "## Test Generation Evaluation Results (Averaged across 3 attempts)\n"
+        ]
+
+        # Inspect AI section
+        inspect_passing = inspect_results["passed"] + inspect_results["partial"]
+        comment_parts.append("### 🔍 Inspect AI Test Quality Evaluation")
+        comment_parts.append(f"- **Complete (C)**: {inspect_results['passed']:.1f}")
+        comment_parts.append(f"- **Partial (P)**: {inspect_results['partial']:.1f}")
+        comment_parts.append(f"- **Incomplete (I)**: {inspect_results['failed']:.1f}")
+        comment_parts.append(
+            f"- **Passing Rate**: {inspect_passing:.1f}/{inspect_results['total']:.1f} ({inspect_results['pass_rate']:.1f}%)"
+        )
+        comment_parts.append(
+            f"- **Quality Gate**: {'✅ PASSED' if inspect_results['quality_gate_passed'] else '❌ FAILED'} (≥80% required)\n"
+        )
+
+        # Pytest section removed - not working properly
+
+        # Overall status - just use inspect-ai quality gate for now
+        comment_parts.append("### 🎯 Overall Result")
+        comment_parts.append(
+            f"**{'✅ PASSED' if inspect_results['quality_gate_passed'] else '❌ FAILED'}** - Quality gate based on Inspect AI results"
+        )
+
+        comment_parts.append("\n---")
+        comment_parts.append(
+            "*Results are averaged across 3 evaluation attempts for improved reliability.*"
+        )
+
+        comment = "\n".join(comment_parts)
+
+        with open("comment_body.txt", "w") as f:
+            f.write(comment)
+
+        print("Comment body successfully prepared and written to comment_body.txt")
+        return 0
+
+    except Exception as e:
+        print(f"Error reading summary file: {e}")
+
+        comment = """## Test Generation Evaluation Results
+
+❌ **Error**: Could not read evaluation results summary file.
+
+Please check the workflow logs for details."""
+
+        with open("comment_body.txt", "w") as f:
+            f.write(comment)
+        return 1
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Prepare comment body for GitHub PR from test results"
+    )
+    parser.add_argument(
+        "summary_path",
+        nargs="?",
+        default="test-results-inspect-ai/summary.json",
+        help="Path to the summary.json file (default: test-results-inspect-ai/summary.json)",
+    )
+    parser.add_argument(
+        "--help-custom", action="store_true", help="Show help message and exit"
+    )
+
+    args = parser.parse_args()
+
+    sys.exit(prepare_comment(args.summary_path))
diff --git a/tests/inspect-ai/scripts/run-test-evaluation.sh b/tests/inspect-ai/scripts/run-test-evaluation.sh
new file mode 100755
index 000000000..62babe61e
--- /dev/null
+++ b/tests/inspect-ai/scripts/run-test-evaluation.sh
@@ -0,0 +1,88 @@
+#!/bin/bash
+
+set -e
+
+# Defaults (override via env)
+: "${SHINY_TEST_TIMEOUT_SECS:=10}"
+: "${PYTEST_PER_TEST_TIMEOUT:=60}"
+: "${PYTEST_SUITE_TIMEOUT:=6m}"
+: "${PYTEST_MAXFAIL:=1}"
+: "${PYTEST_XDIST_WORKERS:=auto}"
+: "${ATTEMPTS:=3}"
+export SHINY_TEST_TIMEOUT_SECS
+
+log_with_timestamp() {
+  echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
+}
+
+cleanup_processes() {
+  log_with_timestamp "Cleaning up any hanging processes..."
+  pkill -f "playwright" || true
+  pkill -f "chromium" || true
+  pkill -f "pytest" || true
+}
+
+trap cleanup_processes EXIT
+
+RESULTS_FOLDER="test-results-inspect-ai/"
+
+# Initialize results directory structure once
+rm -rf "$RESULTS_FOLDER"
+mkdir -p "$RESULTS_FOLDER"
+
+for i in $(seq 1 "$ATTEMPTS"); do
+  log_with_timestamp "Starting attempt $i of $ATTEMPTS"
+
+  mkdir -p "$RESULTS_FOLDER/attempts/attempt_$i/"
+  rm -f "$RESULTS_FOLDER/attempts/attempt_$i/test-results.xml"
+
+  log_with_timestamp "[Attempt $i] Creating test metadata..."
+  python tests/inspect-ai/scripts/create_test_metadata.py
+
+  log_with_timestamp "[Attempt $i] Running Inspect AI evaluation..."
+  inspect eval tests/inspect-ai/scripts/evaluation.py@shiny_test_evaluation \
+    --log-dir "$RESULTS_FOLDER/attempts/attempt_$i/" \
+    --log-format json
+
+  log_with_timestamp "[Attempt $i] Running tests..."
+  test_exit_code=0
+  set +e
+  timeout "$PYTEST_SUITE_TIMEOUT" pytest tests/inspect-ai/apps \
+    -n "$PYTEST_XDIST_WORKERS" --dist loadfile \
+    --tb=short \
+    --disable-warnings \
+    --maxfail="$PYTEST_MAXFAIL" \
+    --junit-xml="$RESULTS_FOLDER/attempts/attempt_$i/test-results.xml" \
+    --durations=10 \
+    --timeout="$PYTEST_PER_TEST_TIMEOUT" \
+    --timeout-method=signal \
+    -v || test_exit_code=$?
+  set -e
+
+  if [ "${test_exit_code:-0}" -eq 124 ]; then
+    log_with_timestamp "Tests timed out on attempt $i \(possible hang\)"
+    cleanup_processes
+    exit 1
+  fi
+
+  if [ "${test_exit_code:-0}" -ne 0 ]; then
+    if [ -f "$RESULTS_FOLDER/attempts/attempt_$i/test-results.xml" ]; then
+      failure_count=$(grep -o 'failures="[0-9]*"' "$RESULTS_FOLDER/attempts/attempt_$i/test-results.xml" | grep -o '[0-9]*' || echo "0")
+    else
+      failure_count=0
+    fi
+    log_with_timestamp "Found $failure_count test failures on attempt $i"
+
+    if [ "$failure_count" -gt 1 ]; then
+      log_with_timestamp "More than 1 test failed on attempt $i - failing CI"
+      exit 1
+    fi
+  fi
+
+  log_with_timestamp "Attempt $i of $ATTEMPTS succeeded"
+done
+
+log_with_timestamp "All $ATTEMPTS evaluation and test runs passed successfully."
+
+log_with_timestamp "Averaging results across all attempts..."
+python tests/inspect-ai/utils/scripts/average_results.py "$RESULTS_FOLDER/attempts/" "$RESULTS_FOLDER/"
diff --git a/tests/inspect-ai/utils/__init__.py b/tests/inspect-ai/utils/__init__.py
new file mode 100644
index 000000000..bd723a7d3
--- /dev/null
+++ b/tests/inspect-ai/utils/__init__.py
@@ -0,0 +1,3 @@
+"""
+Utility scripts for processing documentation, results, and quality gating.
+"""
diff --git a/tests/inspect-ai/utils/scripts/README.md b/tests/inspect-ai/utils/scripts/README.md
new file mode 100644
index 000000000..0b892b9c6
--- /dev/null
+++ b/tests/inspect-ai/utils/scripts/README.md
@@ -0,0 +1,67 @@
+# Scripts Directory
+
+This directory contains scripts for processing documentation, evaluation results, and quality gating for the Shiny test generator.
+
+## Scripts Overview
+
+### `process_docs.py`
+
+Converts XML documentation to structured JSON format for use in test generation. This script extracts API documentation and formats it for consumption by the Shiny test generator's AI models.
+
+**Usage:**
+
+```bash
+python process_docs.py input.xml output.json
+python process_docs.py --input docs.xml --output result.json
+```
+
+**Purpose:**
+
+- Parses XML documentation files
+- Extracts method names, descriptions, and API details
+- Converts to structured JSON format
+- Prepares documentation data for AI model training/reference
+
+### `process_results.py`
+
+Processes evaluation results from Inspect AI and generates performance summaries for the Shiny test generator.
+
+**Usage:**
+
+```bash
+python process_results.py <path_to_result_file.json>
+```
+
+**Purpose:**
+
+- Analyzes test generation evaluation results
+- Categorizes tests as complete, partial, or incomplete
+- Calculates pass rates and performance metrics
+- Generates summary reports in JSON format
+- Provides detailed statistics on test generator performance
+
+### `quality_gate.py`
+
+Performs quality gate validation on evaluation results to ensure the Shiny test generator meets required performance standards.
+
+**Usage:**
+
+```bash
+python quality_gate.py <results_dir>
+```
+
+**Purpose:**
+
+- Checks if evaluation results meet minimum quality thresholds (default: 80%)
+- Validates test generator performance against benchmarks
+- Provides pass/fail status for CI/CD pipelines
+- Ensures quality standards before deployment or release
+
+## Workflow
+
+The typical workflow for using these scripts:
+
+1. **Documentation Processing**: Use `process_docs.py` to convert API documentation into structured format
+2. **Evaluation**: Run test generation evaluations (external process)
+3. **Results Processing**: Use `process_results.py` to analyze evaluation outcomes
+4. **Quality Gate**: Use `quality_gate.py` to validate performance meets standards
diff --git a/tests/inspect-ai/utils/scripts/average_results.py b/tests/inspect-ai/utils/scripts/average_results.py
new file mode 100755
index 000000000..fc7e55a1b
--- /dev/null
+++ b/tests/inspect-ai/utils/scripts/average_results.py
@@ -0,0 +1,305 @@
+"""
+Script to average inspect-ai and pytest results across multiple attempts.
+
+This script processes results from multiple attempts stored in separate directories
+and creates averaged results maintaining the same structure as single-attempt results.
+"""
+
+import json
+import statistics
+import sys
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from typing import Any, Dict, List, Union
+
+
+def process_inspect_ai_results(attempts_dir: Path) -> Dict[str, Any]:
+    """
+    Process and average inspect-ai results across multiple attempts.
+
+    Args:
+        attempts_dir: Directory containing attempt subdirectories
+
+    Returns:
+        Averaged summary dictionary with same structure as single attempt
+    """
+    attempt_dirs = [
+        d
+        for d in attempts_dir.iterdir()
+        if d.is_dir() and d.name.startswith("attempt_")
+    ]
+    attempt_dirs.sort(key=lambda x: int(x.name.split("_")[1]))
+
+    if not attempt_dirs:
+        print("No attempt directories found")
+        print(f"Looking in: {attempts_dir}")
+        print(
+            f"Directory contents: {list(attempts_dir.iterdir()) if attempts_dir.exists() else 'Directory does not exist'}"
+        )
+        return {}
+
+    print(
+        f"Found {len(attempt_dirs)} attempts to average: {[d.name for d in attempt_dirs]}"
+    )
+
+    all_summaries: List[Dict[str, Union[int, float, bool]]] = []
+
+    for attempt_dir in attempt_dirs:
+        # Find the JSON result file in this attempt
+        json_files = list(attempt_dir.glob("*.json"))
+        if not json_files:
+            print(f"Warning: No JSON files found in {attempt_dir}")
+            continue
+
+        # Use the first JSON file (should only be one)
+        result_file = json_files[0]
+
+        # Process this single result to get summary
+        with open(result_file, "r", encoding="utf-8") as f:
+            try:
+                data = json.load(f)
+            except json.JSONDecodeError as e:
+                print(f"Error decoding JSON from {result_file}: {e}")
+                continue
+
+        samples = data.get("samples", [])
+        total_tests = len(samples)
+
+        if total_tests == 0:
+            print(f"Warning: No samples found in {result_file}")
+            continue
+
+        # Count results
+        passed_tests = sum(
+            1
+            for s in samples
+            if s.get("scores", {}).get("model_graded_qa", {}).get("value") == "C"
+        )
+        partial_tests = sum(
+            1
+            for s in samples
+            if s.get("scores", {}).get("model_graded_qa", {}).get("value") == "P"
+        )
+        failed_tests = sum(
+            1
+            for s in samples
+            if s.get("scores", {}).get("model_graded_qa", {}).get("value") == "I"
+        )
+
+        passing_tests = passed_tests + partial_tests
+        pass_rate = (passing_tests / total_tests) * 100 if total_tests > 0 else 0
+
+        summary: Dict[str, Union[int, float, bool]] = {
+            "total": total_tests,
+            "passed": passed_tests,
+            "partial": partial_tests,
+            "failed": failed_tests,
+            "pass_rate": pass_rate,
+            "quality_gate_passed": pass_rate >= 80,
+        }
+
+        all_summaries.append(summary)
+        print(
+            f"Attempt {attempt_dir.name}: {passed_tests}C + {partial_tests}P + {failed_tests}I = {passing_tests}/{total_tests} ({pass_rate:.1f}%)"
+        )
+
+    if not all_summaries:
+        print("No valid summaries found to average")
+        return {}
+
+    # Calculate averages
+    avg_summary: Dict[str, Union[int, float, bool, str]] = {
+        "total": statistics.mean(float(s["total"]) for s in all_summaries),
+        "passed": statistics.mean(float(s["passed"]) for s in all_summaries),
+        "partial": statistics.mean(float(s["partial"]) for s in all_summaries),
+        "failed": statistics.mean(float(s["failed"]) for s in all_summaries),
+        "pass_rate": statistics.mean(float(s["pass_rate"]) for s in all_summaries),
+    }
+
+    # Round to reasonable precision
+    avg_summary["total"] = round(float(avg_summary["total"]), 1)
+    avg_summary["passed"] = round(float(avg_summary["passed"]), 1)
+    avg_summary["partial"] = round(float(avg_summary["partial"]), 1)
+    avg_summary["failed"] = round(float(avg_summary["failed"]), 1)
+    avg_summary["pass_rate"] = round(float(avg_summary["pass_rate"]), 1)
+    avg_summary["quality_gate_passed"] = avg_summary["pass_rate"] >= 80
+    avg_summary["details"] = (
+        f"Averaged across {len(all_summaries)} attempts: "
+        f"Complete: {avg_summary['passed']}, Partial: {avg_summary['partial']}, "
+        f"Incomplete: {avg_summary['failed']}, "
+        f"Passing: {avg_summary['passed'] + avg_summary['partial']}/{avg_summary['total']}"
+    )
+
+    return avg_summary
+
+
+def process_pytest_results(attempts_dir: Path) -> Dict[str, Any]:
+    """
+    Process and average pytest results across multiple attempts.
+
+    Args:
+        attempts_dir: Directory containing attempt subdirectories
+
+    Returns:
+        Averaged pytest summary dictionary
+    """
+    attempt_dirs = [
+        d
+        for d in attempts_dir.iterdir()
+        if d.is_dir() and d.name.startswith("attempt_")
+    ]
+    attempt_dirs.sort(key=lambda x: int(x.name.split("_")[1]))
+
+    if not attempt_dirs:
+        print("No attempt directories found for pytest results")
+        print(f"Looking in: {attempts_dir}")
+        print(
+            f"Directory contents: {list(attempts_dir.iterdir()) if attempts_dir.exists() else 'Directory does not exist'}"
+        )
+        return {}
+
+    all_pytest_summaries: List[Dict[str, Union[int, float]]] = []
+
+    for attempt_dir in attempt_dirs:
+        xml_file = attempt_dir / "test-results.xml"
+        print(f"Looking for XML file: {xml_file}")
+        if not xml_file.exists():
+            print(f"Warning: No test-results.xml found in {attempt_dir}")
+            print(
+                f"Directory contents: {list(attempt_dir.iterdir()) if attempt_dir.exists() else 'Directory does not exist'}"
+            )
+            continue
+
+        try:
+            tree = ET.parse(xml_file)
+            root = tree.getroot()
+            node = root.find("testsuite")
+
+            assert node is not None, "No `testsuite` element found in XML"
+
+            # Extract test metrics from XML
+            total_tests = int(node.get("tests", 0))
+            failures = int(node.get("failures", 0))
+            errors = int(node.get("errors", 0))
+            skipped = int(node.get("skipped", 0))
+
+            passed_tests = total_tests - failures - errors - skipped
+            pass_rate = (passed_tests / total_tests) * 100 if total_tests > 0 else 0
+
+            pytest_summary: Dict[str, Union[int, float]] = {
+                "total": total_tests,
+                "passed": passed_tests,
+                "failed": failures,
+                "errors": errors,
+                "skipped": skipped,
+                "pass_rate": pass_rate,
+            }
+
+            all_pytest_summaries.append(pytest_summary)
+            print(
+                f"Attempt {attempt_dir.name} pytest: {passed_tests}/{total_tests} passed ({pass_rate:.1f}%)"
+            )
+
+        except (ET.ParseError, ValueError) as e:
+            print(f"Error parsing {xml_file}: {e}")
+            continue
+
+    if not all_pytest_summaries:
+        print("No valid pytest summaries found to average")
+        return {}
+
+    # Calculate averages for pytest
+    avg_pytest: Dict[str, Union[int, float, str]] = {
+        "total": statistics.mean(float(s["total"]) for s in all_pytest_summaries),
+        "passed": statistics.mean(float(s["passed"]) for s in all_pytest_summaries),
+        "failed": statistics.mean(float(s["failed"]) for s in all_pytest_summaries),
+        "errors": statistics.mean(float(s["errors"]) for s in all_pytest_summaries),
+        "skipped": statistics.mean(float(s["skipped"]) for s in all_pytest_summaries),
+        "pass_rate": statistics.mean(
+            float(s["pass_rate"]) for s in all_pytest_summaries
+        ),
+    }
+
+    # Round to reasonable precision
+    for key in avg_pytest:
+        if key != "details":
+            avg_pytest[key] = round(float(avg_pytest[key]), 1)
+
+    avg_pytest["details"] = (
+        f"Averaged across {len(all_pytest_summaries)} attempts: "
+        f"Passed: {avg_pytest['passed']}, Failed: {avg_pytest['failed']}, "
+        f"Errors: {avg_pytest['errors']}, Skipped: {avg_pytest['skipped']} "
+        f"({avg_pytest['pass_rate']:.1f}% pass rate)"
+    )
+
+    return avg_pytest
+
+
+def main():
+    """Main function to process and average results."""
+    if len(sys.argv) != 3:
+        print("Usage: python average_results.py <attempts_dir> <output_dir>")
+        sys.exit(1)
+
+    attempts_dir = Path(sys.argv[1])
+    output_dir = Path(sys.argv[2])
+
+    if not attempts_dir.exists() or not attempts_dir.is_dir():
+        print(f"Error: Attempts directory does not exist: {attempts_dir}")
+        sys.exit(1)
+
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Process inspect-ai results
+    print("Processing inspect-ai results...")
+    inspect_summary = process_inspect_ai_results(attempts_dir)
+
+    if inspect_summary:
+        summary_file = output_dir / "summary.json"
+        with open(summary_file, "w") as f:
+            json.dump(inspect_summary, f, indent=2)
+        print(f"Inspect-AI averaged summary saved to: {summary_file}")
+        print(
+            f"Averaged pass rate (Complete + Partial): {inspect_summary['pass_rate']:.1f}%"
+        )
+    else:
+        print("No inspect-ai results to average")
+
+    # Process pytest results
+    print("\nProcessing pytest results...")
+    pytest_summary = process_pytest_results(attempts_dir)
+
+    if pytest_summary:
+        pytest_summary_file = output_dir / "pytest_summary.json"
+        with open(pytest_summary_file, "w") as f:
+            json.dump(pytest_summary, f, indent=2)
+        print(f"Pytest averaged summary saved to: {pytest_summary_file}")
+        print(f"Averaged pytest pass rate: {pytest_summary['pass_rate']:.1f}%")
+    else:
+        print("No pytest results to average")
+
+    # Create a combined summary
+    if inspect_summary or pytest_summary:
+        combined_summary = {
+            "inspect_ai": inspect_summary,
+            "pytest": pytest_summary,
+            "overall_quality_gate_passed": (
+                (
+                    inspect_summary.get("quality_gate_passed", False)
+                    and (
+                        pytest_summary.get("pass_rate", 0) >= 85
+                    )  # 85% threshold for pytest
+                )
+                if inspect_summary and pytest_summary
+                else False
+            ),
+        }
+
+        combined_file = output_dir / "combined_summary.json"
+        with open(combined_file, "w") as f:
+            json.dump(combined_summary, f, indent=2)
+        print(f"Combined summary saved to: {combined_file}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/inspect-ai/utils/scripts/process_docs.py b/tests/inspect-ai/utils/scripts/process_docs.py
new file mode 100644
index 000000000..df95e49d9
--- /dev/null
+++ b/tests/inspect-ai/utils/scripts/process_docs.py
@@ -0,0 +1,250 @@
+import argparse
+import json
+import re
+import sys
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+
+def parse_arguments() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Convert XML documentation to structured JSON format",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s input.xml output.json
+  %(prog)s --input docs.xml --output result.json
+  %(prog)s -i data.xml -o formatted.json
+        """,
+    )
+
+    parser.add_argument("input_file", nargs="?", help="Input XML file path")
+
+    parser.add_argument("output_file", nargs="?", help="Output JSON file path")
+
+    parser.add_argument(
+        "-i",
+        "--input",
+        dest="input_file_alt",
+        help="Input XML file path (alternative to positional argument)",
+    )
+
+    parser.add_argument(
+        "-o",
+        "--output",
+        dest="output_file_alt",
+        help="Output JSON file path (alternative to positional argument)",
+    )
+
+    return parser.parse_args()
+
+
+def validate_arguments(args: argparse.Namespace) -> tuple[Path, Path]:
+    """Validate and process command-line arguments."""
+    input_file = args.input_file or args.input_file_alt
+    if not input_file:
+        print("Error: Input file is required", file=sys.stderr)
+        sys.exit(1)
+
+    output_file = args.output_file or args.output_file_alt
+    if not output_file:
+        print("Error: Output file is required", file=sys.stderr)
+        sys.exit(1)
+
+    input_path = Path(input_file)
+    output_path = Path(output_file)
+
+    if not input_path.exists():
+        print(f"Error: Input file '{input_path}' does not exist", file=sys.stderr)
+        sys.exit(1)
+
+    if input_path.suffix.lower() != ".xml":
+        print(f"Warning: Input file '{input_path}' does not have .xml extension")
+
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    return input_path, output_path
+
+
+def parse_parameters_from_text(method_text: str) -> str:
+    """
+    Parses a block of text for a specific method to find and format its parameters.
+    """
+    params_match = re.search(
+        r"#### Parameters.*?\n((?:\|.*?\n)+)", method_text, re.DOTALL
+    )
+    if not params_match:
+        param_code_match = re.search(
+            r"#### Parameters\s*\n\s*<code>(.*?)</code>", method_text, re.DOTALL
+        )
+        if param_code_match:
+            code_content = param_code_match.group(1)
+            params = re.findall(
+                r'<span class="parameter-name">(.*?)</span>', code_content
+            )
+            return ", ".join(params)
+        return ""
+
+    params_table_text = params_match.group(1)
+    lines = params_table_text.strip().split("\n")
+
+    if len(lines) < 3:
+        return ""
+
+    param_lines = lines[2:]
+    parameters: List[str] = []
+    for line in param_lines:
+        parts = [p.strip() for p in line.strip().split("|") if p.strip()]
+        if len(parts) >= 2:
+            name = parts[0].replace("`", "")
+            type_str = re.sub(r"\[(.*?)\]\(.*?\)", r"\1", parts[1])
+            type_str = type_str.replace("`", "").replace("\n", " ")
+            parameters.append(f"{name} ({type_str})")
+
+    return ", ".join(parameters)
+
+
+def parse_qmd_content(content: str) -> Optional[Dict[str, Any]]:
+    """
+    Parses the content of a .qmd file to extract controller and method information.
+    """
+    data: Dict[str, Any] = {}
+    lines = content.strip().split("\n")
+
+    controller_match = re.match(r"# ([\w\.]+) {.*}", lines[0])
+    if not controller_match:
+        return None
+
+    data["controller_name"] = controller_match.group(1)
+    methods: List[Dict[str, Any]] = []
+    data["methods"] = methods
+
+    try:
+        methods_table_start_index = next(
+            i for i, line in enumerate(lines) if "## Methods" in line
+        )
+    except StopIteration:
+        return data
+
+    first_method_def_index = len(lines)
+    try:
+        first_method_def_index = next(
+            i
+            for i, line in enumerate(lines)
+            if line.startswith("### ") and i > methods_table_start_index
+        )
+    except StopIteration:
+        pass
+
+    methods_table_lines = lines[methods_table_start_index + 3 : first_method_def_index]
+    for line in methods_table_lines:
+        if not line.strip().startswith("|"):
+            continue
+        parts = [p.strip() for p in line.strip().split("|") if p.strip()]
+        if len(parts) < 2:
+            continue
+        method_name_md, description = parts[0], parts[1]
+        method_name_match = re.search(r"\[([\w_]+)\]", method_name_md)
+        if not method_name_match:
+            continue
+        method_name = method_name_match.group(1)
+
+        parameters_str = ""
+        method_detail_regex = re.compile(
+            r"### " + re.escape(method_name) + r" {.*?}(.*?)(?=\n### |\Z)", re.DOTALL
+        )
+        method_detail_match = method_detail_regex.search(content)
+
+        if method_detail_match:
+            method_text = method_detail_match.group(1)
+            parameters_str = parse_parameters_from_text(method_text)
+
+        methods.append(
+            {
+                "name": method_name,
+                "description": description.strip(),
+                "parameters": parameters_str,
+            }
+        )
+    return data
+
+
+def convert_xml_to_json(xml_file_path: Path) -> str:
+    """
+    Parses an XML file containing multiple .qmd docs and converts it to a
+    structured JSON object containing controller and method information.
+    """
+    try:
+        with open(xml_file_path, "r", encoding="utf-8") as f:
+            xml_content = f.read()
+
+        def cdata_replacer(match: re.Match[str]) -> str:
+            path = match.group(1)
+            content = match.group(2)
+            content = content.replace("]]>", "]]&gt;")
+            return f'<file path="{path}"><![CDATA[{content}]]></file>'
+
+        xml_content_cdata = re.sub(
+            r'<file path="(.*?)">(.*?)</file>',
+            cdata_replacer,
+            xml_content,
+            flags=re.DOTALL,
+        )
+
+        rooted_xml_content = f"<root>{xml_content_cdata}</root>"
+
+        root = ET.fromstring(rooted_xml_content)
+
+    except (ET.ParseError, FileNotFoundError) as e:
+        return json.dumps({"error": str(e)}, indent=2)
+
+    all_controllers_data: List[Dict[str, Any]] = []
+    files_element = root.find("files")
+
+    if files_element is None:
+        return json.dumps({"error": "No <files> element found in XML"}, indent=2)
+
+    for file_elem in files_element.findall("file"):
+        path = file_elem.get("path")
+        if path and (
+            path.startswith("playwright.controller.") or path == "run.ShinyAppProc.qmd"
+        ):
+            content = file_elem.text
+            if content:
+                controller_data = parse_qmd_content(content)
+                if controller_data and controller_data.get("methods"):
+                    all_controllers_data.append(controller_data)
+
+    all_controllers_data.sort(key=lambda x: x.get("controller_name", ""))
+
+    return json.dumps(all_controllers_data, indent=2)
+
+
+def main() -> None:
+    """Main entry point for the application."""
+    args = parse_arguments()
+
+    try:
+        input_path, output_path = validate_arguments(args)
+    except SystemExit:
+        return
+
+    print(f"Starting conversion of '{input_path}' to '{output_path}'")
+
+    try:
+        json_output_string = convert_xml_to_json(input_path)
+
+        with open(output_path, "w", encoding="utf-8") as f:
+            f.write(json_output_string)
+
+        print(f"Conversion complete. Output saved to '{output_path}'")
+
+    except Exception as e:
+        print(f"Error during conversion: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/inspect-ai/utils/scripts/process_results.py b/tests/inspect-ai/utils/scripts/process_results.py
new file mode 100644
index 000000000..0e7b0796c
--- /dev/null
+++ b/tests/inspect-ai/utils/scripts/process_results.py
@@ -0,0 +1,90 @@
+import json
+import sys
+from pathlib import Path
+from typing import Any, Dict, List, Union
+
+
+def process_inspect_results(result_file_path: Union[str, Path]) -> None:
+    """Process a single Inspect AI result file and generate a summary."""
+    input_path = Path(result_file_path)
+
+    # 1. Validate that the input path is a valid .json file
+    if not input_path.is_file() or input_path.suffix.lower() != ".json":
+        print(f"Error: The provided path is not a valid .json file: {input_path}")
+        sys.exit(1)
+
+    print(f"Processing file: {input_path.name}")
+
+    # 2. Load the JSON data with error handling
+    with open(input_path, "r", encoding="utf-8") as f:
+        try:
+            data: Dict[str, Any] = json.load(f)
+        except json.JSONDecodeError as e:
+            print(f"Error decoding JSON from file {input_path}: {e}")
+            sys.exit(1)
+
+    # 3. Extract the list of samples from the top-level 'samples' key
+    samples: List[Dict[str, Any]] = data.get("samples", [])
+    if not isinstance(samples, list):
+        print(f"Error: 'samples' key in {input_path} is not a list.")
+        sys.exit(1)
+
+    total_tests = len(samples)
+
+    if total_tests == 0:
+        print(f"No samples found in the result file: {input_path}")
+
+    # 4. Correctly count tests based on the 'value' within scores.model_graded_qa
+    passed_tests = sum(
+        1
+        for s in samples
+        if s.get("scores", {}).get("model_graded_qa", {}).get("value") == "C"
+    )
+    partial_tests = sum(
+        1
+        for s in samples
+        if s.get("scores", {}).get("model_graded_qa", {}).get("value") == "P"
+    )
+    failed_tests = sum(
+        1
+        for s in samples
+        if s.get("scores", {}).get("model_graded_qa", {}).get("value") == "I"
+    )
+
+    # Calculate pass rate including both Complete and Partial grades
+    passing_tests = passed_tests + partial_tests
+    pass_rate = (passing_tests / total_tests) * 100 if total_tests > 0 else 0
+
+    # Generate summary dictionary
+    summary = {
+        "total": total_tests,
+        "passed": passed_tests,
+        "partial": partial_tests,
+        "failed": failed_tests,
+        "pass_rate": pass_rate,
+        "quality_gate_passed": pass_rate >= 80,  # 80% threshold
+        "details": (
+            f"Complete: {passed_tests}, Partial: {partial_tests}, "
+            f"Incomplete: {failed_tests}, Passing: {passing_tests}/{total_tests}"
+        ),
+    }
+
+    # 5. Save the summary in the same directory as the input file
+    summary_file_path = input_path.parent / "summary.json"
+    with open(summary_file_path, "w") as f:
+        json.dump(summary, f, indent=2)
+
+    print(f"\nSummary saved to: {summary_file_path}")
+    print(
+        f"Processed {total_tests} tests: {passed_tests} complete, "
+        f"{partial_tests} partial, {failed_tests} incomplete"
+    )
+    print(f"Pass rate (Complete + Partial): {pass_rate:.1f}%")
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python process_results.py <path_to_result_file.json>")
+        sys.exit(1)
+
+    process_inspect_results(sys.argv[1])
diff --git a/tests/inspect-ai/utils/scripts/quality_gate.py b/tests/inspect-ai/utils/scripts/quality_gate.py
new file mode 100644
index 000000000..8c9fab7bb
--- /dev/null
+++ b/tests/inspect-ai/utils/scripts/quality_gate.py
@@ -0,0 +1,48 @@
+import json
+import sys
+from pathlib import Path
+from typing import Any, Dict, Union
+
+
+def check_quality_gate(results_dir: Union[str, Path], threshold: float = 80) -> None:
+    """
+    Check if evaluation results meet quality gate.
+
+    The quality gate is based on the pass_rate from the summary.json file.
+    Pass rate includes both 'Complete' (C) and 'Partial' (P) grades.
+    Tests with 'Incomplete' (I) grade do not count towards the pass rate.
+
+    Args:
+        results_dir: Directory containing the summary.json file
+        threshold: Minimum pass rate percentage required (default: 80%)
+    """
+
+    summary_path = Path(results_dir) / "summary.json"
+
+    if not summary_path.exists():
+        print("Summary file not found")
+        sys.exit(1)
+
+    with open(summary_path, "r") as f:
+        summary: Dict[str, Any] = json.load(f)
+
+    pass_rate = summary.get("pass_rate", 0)
+
+    if pass_rate >= threshold:
+        print(
+            f"✅ Quality gate PASSED: {pass_rate:.1f}% >= {threshold}% (Complete + Partial grades)"
+        )
+        sys.exit(0)
+    else:
+        print(
+            f"❌ Quality gate FAILED: {pass_rate:.1f}% < {threshold}% (Complete + Partial grades)"
+        )
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python quality_gate.py <results_dir>")
+        sys.exit(1)
+
+    check_quality_gate(sys.argv[1])
diff --git a/tests/playwright/ai_generated_apps/bookmark/navsets/test_navsets_express_bookmarking.py b/tests/playwright/ai_generated_apps/bookmark/navsets/test_navsets_express_bookmarking.py
index e0df4fee5..09bb7f644 100644
--- a/tests/playwright/ai_generated_apps/bookmark/navsets/test_navsets_express_bookmarking.py
+++ b/tests/playwright/ai_generated_apps/bookmark/navsets/test_navsets_express_bookmarking.py
@@ -35,13 +35,18 @@ def test_navsets_bookmarking_demo(
     # Non-module navsets
     navset_collection = controller.NavsetTab(page, "navsets_collection")
     navset_collection.set(navset_name)
-    navset_cont = navset_controller(page, f"{navset_name}_{navset_variant}")
+    navset_cont = navset_controller(
+        page, f"{navset_name}_{navset_variant}"  # pyright: ignore[reportCallIssue]
+    )
     navset_cont.set(f"{navset_name}_c")
 
     # Module navsets
     mod_navset_collection = controller.NavsetTab(page, "first-navsets_collection")
     mod_navset_collection.set(navset_name)
-    mod_navset_cont = navset_controller(page, f"first-{navset_name}_{navset_variant}")
+    mod_navset_cont = navset_controller(
+        page,
+        f"first-{navset_name}_{navset_variant}",  # pyright: ignore[reportCallIssue]
+    )  # pyright: ignore[reportCallIssue]
     mod_navset_cont.set(f"{navset_name}_b")
 
     existing_url = page.url
diff --git a/tests/playwright/ai_generated_apps/bookmark/navsets/test_navsets_hidden_bookmarking.py b/tests/playwright/ai_generated_apps/bookmark/navsets/test_navsets_hidden_bookmarking.py
index 44e95ce24..c7a6c9d93 100644
--- a/tests/playwright/ai_generated_apps/bookmark/navsets/test_navsets_hidden_bookmarking.py
+++ b/tests/playwright/ai_generated_apps/bookmark/navsets/test_navsets_hidden_bookmarking.py
@@ -29,7 +29,7 @@ def test_navset_hidden_bookmarking(
     navset_collection = controller.NavsetTab(page, "navsets_collection")
     navset_collection.set(navset_name)
     navset_id = f"{navset_name}_{navset_variant}"
-    navset_cont = navset_controller(page, navset_id)
+    navset_cont = navset_controller(page, navset_id)  # pyright: ignore[reportCallIssue]
     navset_btn = controller.InputActionButton(page, f"{navset_id}_button")
     navset_btn.click()
     navset_btn.click()
@@ -37,7 +37,9 @@ def test_navset_hidden_bookmarking(
     # Module navsets
     mod_navset_collection = controller.NavsetTab(page, "first-navsets_collection")
     mod_navset_collection.set(navset_name)
-    mod_navset_cont = navset_controller(page, f"first-{navset_id}")
+    mod_navset_cont = navset_controller(
+        page, f"first-{navset_id}"  # pyright: ignore[reportCallIssue]
+    )
     mod_navset_btn = controller.InputActionButton(page, f"first-{navset_id}_button")
     mod_navset_btn.click()