feat(conformance): add CLI (adk conformance create) for generating conformance tests from spec.yaml file

Jacksunwei · copybara-github · commit bf4ff310095e · 2025-09-18T09:14:05.000-07:00
- Add conformance command group with create subcommand
- Implement category/name/spec.yaml with generated-*.yaml files
- Support executing agents with queries and recording sessions
- Create test cases with recorded llm interactions and tool calls/results

Expected folder structure:

```

conformance_repo/
├── agents/                            # Agent definitions - contains all config-based agents shared by test cases.
│   ├── single_basic/
│   ├── multi_basic/
│   └── single_tool_builtin/
│
└── tests/                             # Test cases
    ├── core/                          # Test category
    │   ├── desc_001/                  # Individual test case
    │   │   ├── spec.yaml             # Human-written specification
    │   │   ├── generated-session.yaml
    │   │   ├── generated-recordings.yaml
    │   │   └── ...                   # Potential future generated files
    │   ├── f_001/
    │   │   ├── spec.yaml
    │   │   ├── generated-session.yaml
    │   │   ├── generated-recordings.yaml
    │   │   └── ...

```

Help text:

```
-&gt; % adk conformance create --help
Usage: adk conformance create [OPTIONS] [PATHS]...

  Generate ADK conformance test YAML files from TestCaseInput specifications.

  NOTE: this is work in progress.

  This command reads TestCaseInput specifications from input.yaml files, executes the specified test cases against agents, and generates conformance test files with recorded agent interactions as
  test.yaml files.

  Expected directory structure: category/name/input.yaml (TestCaseInput) -&gt; category/name/test.yaml (TestCase)

  PATHS: One or more directories containing test case specifications. If no paths are provided, defaults to 'tests/' directory.

  Examples:

  Use default directory: adk conformance create

  Custom directories: adk conformance create tests/core tests/tools

Options:
  --help  Show this message and exit.
```
PiperOrigin-RevId: 808609547
diff --git a/src/google/adk/cli/cli_tools_click.py b/src/google/adk/cli/cli_tools_click.py
@@ -15,12 +15,12 @@
 from __future__ import annotations
 
 import asyncio
-import collections
 from contextlib import asynccontextmanager
 from datetime import datetime
 import functools
 import logging
 import os
+from pathlib import Path
 import tempfile
 from typing import Optional
 
@@ -119,6 +119,66 @@ def deploy():
   pass
 
 
+@main.group()
+def conformance():
+  """Conformance testing tools for ADK."""
+  pass
+
+
+@conformance.command("create", cls=HelpfulCommand)
+@click.argument(
+    "paths",
+    nargs=-1,
+    type=click.Path(
+        exists=True, dir_okay=True, file_okay=False, resolve_path=True
+    ),
+)
+@click.pass_context
+def cli_conformance_create(
+    ctx,
+    paths: tuple[str, ...],
+):
+  """Generate ADK conformance test YAML files from TestCaseInput specifications.
+
+  NOTE: this is work in progress.
+
+  This command reads TestCaseInput specifications from input.yaml files,
+  executes the specified test cases against agents, and generates conformance
+  test files with recorded agent interactions as test.yaml files.
+
+  Expected directory structure:
+  category/name/input.yaml (TestCaseInput) -> category/name/test.yaml (TestCase)
+
+  PATHS: One or more directories containing test case specifications.
+  If no paths are provided, defaults to 'tests/' directory.
+
+  Examples:
+
+  Use default directory: adk conformance create
+
+  Custom directories: adk conformance create tests/core tests/tools
+  """
+
+  try:
+    from .conformance.cli_create import run_conformance_create
+  except ImportError as e:
+    click.secho(
+        f"Error: Missing conformance testing dependencies: {e}",
+        fg="red",
+        err=True,
+    )
+    click.secho(
+        "Please install the required conformance testing package dependencies.",
+        fg="yellow",
+        err=True,
+    )
+    ctx.exit(1)
+
+  # Default to tests/ directory if no paths provided
+  test_paths = [Path(p) for p in paths] if paths else [Path("tests").resolve()]
+  asyncio.run(run_conformance_create(test_paths))
+
+
 @main.command("create", cls=HelpfulCommand)
 @click.option(
     "--model",
@@ -697,6 +757,15 @@ def decorator(func):
         ),
         default=None,
     )
+    @click.option(
+        "--extra_plugins",
+        help=(
+            "Optional. Comma-separated list of extra plugin classes or"
+            " instances to enable (e.g., my.module.MyPluginClass or"
+            " my.module.my_plugin_instance)."
+        ),
+        multiple=True,
+    )
     @functools.wraps(func)
     @click.pass_context
     def wrapper(ctx, *args, **kwargs):
@@ -743,6 +812,7 @@ def cli_web(
     artifact_storage_uri: Optional[str] = None,  # Deprecated
     a2a: bool = False,
     reload_agents: bool = False,
+    extra_plugins: Optional[list[str]] = None,
 ):
   """Starts a FastAPI server with Web UI for agents.
 
@@ -794,6 +864,7 @@ async def _lifespan(app: FastAPI):
       host=host,
       port=port,
       reload_agents=reload_agents,
+      extra_plugins=extra_plugins,
   )
   config = uvicorn.Config(
       app,
@@ -836,6 +907,7 @@ def cli_api_server(
     artifact_storage_uri: Optional[str] = None,  # Deprecated
     a2a: bool = False,
     reload_agents: bool = False,
+    extra_plugins: Optional[list[str]] = None,
 ):
   """Starts a FastAPI server for agents.
 
@@ -865,6 +937,7 @@ def cli_api_server(
           host=host,
           port=port,
           reload_agents=reload_agents,
+          extra_plugins=extra_plugins,
       ),
       host=host,
       port=port,
diff --git a/src/google/adk/cli/conformance/cli_create.py b/src/google/adk/cli/conformance/cli_create.py
@@ -0,0 +1,144 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""CLI commands for ADK conformance testing."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import click
+from google.genai import types
+
+from ...utils.yaml_utils import dump_pydantic_to_yaml
+from ..adk_web_server import RunAgentRequest
+from ._generated_file_utils import load_test_case
+from .adk_web_server_client import AdkWebServerClient
+from .test_case import TestCase
+
+
+async def _create_conformance_test_files(
+    test_case: TestCase,
+    user_id: str = "adk_conformance_test_user",
+) -> Path:
+  """Generate conformance test files from TestCase."""
+  # Clean existing generated files
+  test_case_dir = test_case.dir
+
+  # Remove existing generated files to ensure clean state
+  generated_session_file = test_case_dir / "generated-session.yaml"
+  generated_recordings_file = test_case_dir / "generated-recordings.yaml"
+
+  generated_session_file.unlink(missing_ok=True)
+  generated_recordings_file.unlink(missing_ok=True)
+
+  async with AdkWebServerClient() as client:
+    # Create a new session for the test
+    session = await client.create_session(
+        app_name=test_case.test_spec.agent, user_id=user_id, state={}
+    )
+
+    # Run the agent with the user messages
+    for user_message_index, user_message in enumerate(
+        test_case.test_spec.user_messages
+    ):
+      content = types.Content(
+          parts=[types.Part(text=user_message)], role="user"
+      )
+      async for _ in client.run_agent(
+          RunAgentRequest(
+              app_name=test_case.test_spec.agent,
+              user_id=user_id,
+              session_id=session.id,
+              new_message=content,
+          ),
+          mode="record",
+          test_case_dir=str(test_case_dir),
+          user_message_index=user_message_index,
+      ):
+        pass
+
+    # Retrieve the updated session
+    updated_session = await client.get_session(
+        app_name=test_case.test_spec.agent,
+        user_id=user_id,
+        session_id=session.id,
+    )
+
+    # Save session.yaml
+    dump_pydantic_to_yaml(
+        updated_session,
+        generated_session_file,
+        indent=2,
+        sort_keys=False,
+        exclude_none=True,
+    )
+
+    return generated_session_file
+
+
+async def run_conformance_create(paths: list[Path]) -> None:
+  """Generate conformance tests from TestCaseInput files.
+
+  Args:
+    paths: list of directories containing test cases input files (spec.yaml).
+  """
+  click.echo("Generating ADK conformance tests...")
+
+  # Look for spec.yaml files and load TestCase objects
+  test_cases: dict[Path, TestCase] = {}
+
+  for test_dir in paths:
+    if not test_dir.exists():
+      continue
+
+    for spec_file in test_dir.rglob("spec.yaml"):
+      try:
+        test_case_dir = spec_file.parent
+        category = test_case_dir.parent.name
+        name = test_case_dir.name
+        test_spec = load_test_case(test_case_dir)
+        test_case = TestCase(
+            category=category,
+            name=name,
+            dir=test_case_dir,
+            test_spec=test_spec,
+        )
+        test_cases[test_case_dir] = test_case
+        click.echo(f"Loaded test spec: {category}/{name}")
+      except Exception as e:
+        click.secho(f"Failed to load {spec_file}: {e}", fg="red", err=True)
+
+  # Process all loaded test cases
+  if test_cases:
+    click.echo(f"\nProcessing {len(test_cases)} test cases...")
+
+    for test_case in test_cases.values():
+      try:
+        await _create_conformance_test_files(test_case)
+        click.secho(
+            "Generated conformance test files for:"
+            f" {test_case.category}/{test_case.name}",
+            fg="green",
+        )
+      except Exception as e:
+        click.secho(
+            f"Failed to generate {test_case.category}/{test_case.name}: {e}",
+            fg="red",
+            err=True,
+        )
+  else:
+    click.secho("No test specs found to process.", fg="yellow")
+
+  click.secho("\nConformance test generation complete!", fg="blue")
diff --git a/src/google/adk/cli/conformance/test_case.py b/src/google/adk/cli/conformance/test_case.py
@@ -14,6 +14,9 @@
 
 from __future__ import annotations
 
+from dataclasses import dataclass
+from pathlib import Path
+
 from pydantic import BaseModel
 from pydantic import ConfigDict
 
@@ -37,3 +40,13 @@ class TestSpec(BaseModel):
 
   user_messages: list[str]
   """Sequence of user messages to send to the agent during test execution."""
+
+
+@dataclass
+class TestCase:
+  """Represents a single conformance test case."""
+
+  category: str
+  name: str
+  dir: Path
+  test_spec: TestSpec