Add A2A conformance tests (#6)

staverm · web-flow · commit a57602db8378 · 2025-12-22T18:52:24.000+01:00
* Add a2a tests

* Run tests in CI

* Update readme
diff --git a/.github/workflows/test-and-publish.yml b/.github/workflows/test-and-publish.yml
@@ -1,15 +1,15 @@
-name: Publish Agent
+name: Test and Publish Agent
 
-# Trigger this workflow when pushing main branch and tags
 on:
+  pull_request:
   push:
     branches:
       - main
     tags:
       - 'v*'  # Trigger on version tags like v1.0.0, v1.1.0
 
 jobs:
-  publish:
+  test-and-publish:
     runs-on: ubuntu-latest
 
     # These permissions are required for the workflow to:
@@ -23,52 +23,63 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v4
 
-      - name: Log in to GitHub Container Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          # GITHUB_TOKEN is automatically provided by GitHub Actions
-          # No manual secret configuration needed!
-          # It has permissions based on the 'permissions' block above
-          password: ${{ secrets.GITHUB_TOKEN }}
-
       - name: Extract metadata for Docker
         id: meta
         uses: docker/metadata-action@v5
         with:
           images: ghcr.io/${{ github.repository }}
           tags: |
-            # For tags like v1.0, create tag '1.0'
+            type=ref,event=pr
             type=semver,pattern={{version}}
-            # For tags like v1.0, create tag '1'
             type=semver,pattern={{major}}
-            # For main branch, create tag 'latest'
             type=raw,value=latest,enable={{is_default_branch}}
-            # For PRs, create tag 'pr-123'
-            type=ref,event=pr
 
-      - name: Build and push Docker image
-        id: build
+      - name: Build Docker image
         uses: docker/build-push-action@v5
         with:
           context: .
-          file: Dockerfile
-          # Only push if this is a push event (not a PR)
-          # PRs will build but not push to avoid polluting the registry
-          push: ${{ github.event_name != 'pull_request' }}
+          push: false
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
-          # Explicitly build for linux/amd64 (GitHub Actions default)
+          load: true
           platforms: linux/amd64
 
+      - name: Start agent container
+        run: |
+          docker run -d -p 9009:9009 --name agent-container $(echo "${{ steps.meta.outputs.tags }}" | head -n1) --host 0.0.0.0 --port 9009
+          timeout 30 bash -c 'until curl -sf http://localhost:9009/.well-known/agent-card.json > /dev/null; do sleep 1; done'
+
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Install test dependencies
+        run: uv sync --extra test
+
+      - name: Run tests
+        run: uv run pytest -v --agent-url http://localhost:9009
+
+      - name: Stop container and show logs
+        if: always()
+        run: |
+          echo "=== Agent Container Logs ==="
+          docker logs agent-container || true
+          docker stop agent-container || true
+
+      - name: Log in to GitHub Container Registry
+        if: success() && github.event_name != 'pull_request'
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Push Docker image
+        if: success() && github.event_name != 'pull_request'
+        run: docker push --all-tags ghcr.io/${{ github.repository }}
+
       - name: Output image digest
-        if: github.event_name != 'pull_request'
+        if: success() && github.event_name != 'pull_request'
         run: |
           echo "## Docker Image Published :rocket:" >> $GITHUB_STEP_SUMMARY
           echo "" >> $GITHUB_STEP_SUMMARY
           echo "**Tags:** ${{ steps.meta.outputs.tags }}" >> $GITHUB_STEP_SUMMARY
-          echo "" >> $GITHUB_STEP_SUMMARY
-          echo "**Digest:** \`${{ steps.build.outputs.digest }}\`" >> $GITHUB_STEP_SUMMARY
-          echo "" >> $GITHUB_STEP_SUMMARY
-          echo "Use this digest in your MANIFEST.json for reproducibility." >> $GITHUB_STEP_SUMMARY
diff --git a/README.md b/README.md
@@ -19,7 +19,7 @@ uv.lock           # Locked dependencies
 
 1. **Create your repository** - Click "Use this template" to create your own repository from this template
 
-2. **Implement your agent** - Add your agent logic to the `run` method in [`src/agent.py`](src/agent.py)
+2. **Implement your agent** - Add your agent logic to [`src/agent.py`](src/agent.py)
 
 3. **Configure your agent card** - Fill in your agent's metadata (name, skills, description) in [`src/server.py`](src/server.py)
 
@@ -43,9 +43,23 @@ docker build -t my-agent .
 docker run -p 9009:9009 my-agent
 ```
 
+## Testing
+
+Run A2A conformance tests against your agent.
+
+```bash
+# Install test dependencies
+uv sync --extra test
+
+# Start your agent (uv or docker; see above)
+
+# Run tests against your running agent URL
+uv run pytest --agent-url http://localhost:9009
+```
+
 ## Publishing
 
-The repository includes a GitHub Actions workflow that automatically builds and publishes a Docker image of your agent to GitHub Container Registry:
+The repository includes a GitHub Actions workflow that automatically builds, tests, and publishes a Docker image of your agent to GitHub Container Registry:
 
 - **Push to `main`** → publishes `latest` tag:
 ```
@@ -60,4 +74,4 @@ ghcr.io/<your-username>/<your-repo-name>:1
 
 Once the workflow completes, find your Docker image in the Packages section (right sidebar of your repository). Configure the package visibility in package settings.
 
-> **Note:** Organization repositories may need package write permissions enabled manually (Settings → Actions → General). Version tags must follow [semantic versioning](https://semver.org/) (e.g., `v1.0.0`).
+> **Note:** Organization repositories may need package write permissions enabled manually (Settings → Actions → General). Version tags must follow [semantic versioning](https://semver.org/) (e.g., `v1.0.0`).
diff --git a/pyproject.toml b/pyproject.toml
@@ -8,3 +8,10 @@ dependencies = [
     "a2a-sdk[http-server]>=0.3.20",
     "uvicorn>=0.38.0",
 ]
+
+[project.optional-dependencies]
+test = [
+    "pytest>=8.0.0",
+    "pytest-asyncio>=0.24.0",
+    "httpx>=0.28.1",
+]
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,25 @@
+import httpx
+import pytest
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--agent-url",
+        default="http://localhost:9009",
+        help="Agent URL (default: http://localhost:9009)",
+    )
+
+
+@pytest.fixture(scope="session")
+def agent(request):
+    """Agent URL fixture. Agent must be running before tests start."""
+    url = request.config.getoption("--agent-url")
+
+    try:
+        response = httpx.get(f"{url}/.well-known/agent-card.json", timeout=2)
+        if response.status_code != 200:
+            pytest.exit(f"Agent at {url} returned status {response.status_code}", returncode=1)
+    except Exception as e:
+        pytest.exit(f"Could not connect to agent at {url}: {e}", returncode=1)
+
+    return url
diff --git a/tests/test_agent.py b/tests/test_agent.py
@@ -0,0 +1,199 @@
+from typing import Any
+import pytest
+import httpx
+from uuid import uuid4
+
+from a2a.client import A2ACardResolver, ClientConfig, ClientFactory
+from a2a.types import Message, Part, Role, TextPart
+
+
+# A2A validation helpers - adapted from https://github.com/a2aproject/a2a-inspector/blob/main/backend/validators.py
+
+def validate_agent_card(card_data: dict[str, Any]) -> list[str]:
+    """Validate the structure and fields of an agent card."""
+    errors: list[str] = []
+
+    # Use a frozenset for efficient checking and to indicate immutability.
+    required_fields = frozenset(
+        [
+            'name',
+            'description',
+            'url',
+            'version',
+            'capabilities',
+            'defaultInputModes',
+            'defaultOutputModes',
+            'skills',
+        ]
+    )
+
+    # Check for the presence of all required fields
+    for field in required_fields:
+        if field not in card_data:
+            errors.append(f"Required field is missing: '{field}'.")
+
+    # Check if 'url' is an absolute URL (basic check)
+    if 'url' in card_data and not (
+        card_data['url'].startswith('http://')
+        or card_data['url'].startswith('https://')
+    ):
+        errors.append(
+            "Field 'url' must be an absolute URL starting with http:// or https://."
+        )
+
+    # Check if capabilities is a dictionary
+    if 'capabilities' in card_data and not isinstance(
+        card_data['capabilities'], dict
+    ):
+        errors.append("Field 'capabilities' must be an object.")
+
+    # Check if defaultInputModes and defaultOutputModes are arrays of strings
+    for field in ['defaultInputModes', 'defaultOutputModes']:
+        if field in card_data:
+            if not isinstance(card_data[field], list):
+                errors.append(f"Field '{field}' must be an array of strings.")
+            elif not all(isinstance(item, str) for item in card_data[field]):
+                errors.append(f"All items in '{field}' must be strings.")
+
+    # Check skills array
+    if 'skills' in card_data:
+        if not isinstance(card_data['skills'], list):
+            errors.append(
+                "Field 'skills' must be an array of AgentSkill objects."
+            )
+        elif not card_data['skills']:
+            errors.append(
+                "Field 'skills' array is empty. Agent must have at least one skill if it performs actions."
+            )
+
+    return errors
+
+
+def _validate_task(data: dict[str, Any]) -> list[str]:
+    errors = []
+    if 'id' not in data:
+        errors.append("Task object missing required field: 'id'.")
+    if 'status' not in data or 'state' not in data.get('status', {}):
+        errors.append("Task object missing required field: 'status.state'.")
+    return errors
+
+
+def _validate_status_update(data: dict[str, Any]) -> list[str]:
+    errors = []
+    if 'status' not in data or 'state' not in data.get('status', {}):
+        errors.append(
+            "StatusUpdate object missing required field: 'status.state'."
+        )
+    return errors
+
+
+def _validate_artifact_update(data: dict[str, Any]) -> list[str]:
+    errors = []
+    if 'artifact' not in data:
+        errors.append(
+            "ArtifactUpdate object missing required field: 'artifact'."
+        )
+    elif (
+        'parts' not in data.get('artifact', {})
+        or not isinstance(data.get('artifact', {}).get('parts'), list)
+        or not data.get('artifact', {}).get('parts')
+    ):
+        errors.append("Artifact object must have a non-empty 'parts' array.")
+    return errors
+
+
+def _validate_message(data: dict[str, Any]) -> list[str]:
+    errors = []
+    if (
+        'parts' not in data
+        or not isinstance(data.get('parts'), list)
+        or not data.get('parts')
+    ):
+        errors.append("Message object must have a non-empty 'parts' array.")
+    if 'role' not in data or data.get('role') != 'agent':
+        errors.append("Message from agent must have 'role' set to 'agent'.")
+    return errors
+
+
+def validate_event(data: dict[str, Any]) -> list[str]:
+    """Validate an incoming event from the agent based on its kind."""
+    if 'kind' not in data:
+        return ["Response from agent is missing required 'kind' field."]
+
+    kind = data.get('kind')
+    validators = {
+        'task': _validate_task,
+        'status-update': _validate_status_update,
+        'artifact-update': _validate_artifact_update,
+        'message': _validate_message,
+    }
+
+    validator = validators.get(str(kind))
+    if validator:
+        return validator(data)
+
+    return [f"Unknown message kind received: '{kind}'."]
+
+
+# A2A messaging helpers
+
+async def send_text_message(text: str, url: str, context_id: str | None = None, streaming: bool = False):
+    async with httpx.AsyncClient(timeout=10) as httpx_client:
+        resolver = A2ACardResolver(httpx_client=httpx_client, base_url=url)
+        agent_card = await resolver.get_agent_card()
+        config = ClientConfig(httpx_client=httpx_client, streaming=streaming)
+        factory = ClientFactory(config)
+        client = factory.create(agent_card)
+
+        msg = Message(
+            kind="message",
+            role=Role.user,
+            parts=[Part(TextPart(text=text))],
+            message_id=uuid4().hex,
+            context_id=context_id,
+        )
+
+        events = [event async for event in client.send_message(msg)]
+
+    return events
+
+
+# A2A conformance tests
+
+def test_agent_card(agent):
+    """Validate agent card structure and required fields."""
+    response = httpx.get(f"{agent}/.well-known/agent-card.json")
+    assert response.status_code == 200, "Agent card endpoint must return 200"
+
+    card_data = response.json()
+    errors = validate_agent_card(card_data)
+
+    assert not errors, f"Agent card validation failed:\n" + "\n".join(errors)
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("streaming", [True, False])
+async def test_message(agent, streaming):
+    """Test that agent returns valid A2A message format."""
+    events = await send_text_message("Hello", agent, streaming=streaming)
+
+    all_errors = []
+    for event in events:
+        match event:
+            case Message() as msg:
+                errors = validate_event(msg.model_dump())
+                all_errors.extend(errors)
+
+            case (task, update):
+                errors = validate_event(task.model_dump())
+                all_errors.extend(errors)
+                if update:
+                    errors = validate_event(update.model_dump())
+                    all_errors.extend(errors)
+
+            case _:
+                pytest.fail(f"Unexpected event type: {type(event)}")
+
+    assert events, "Agent should respond with at least one event"
+    assert not all_errors, f"Message validation failed:\n" + "\n".join(all_errors)
+
+# Add your custom tests here
diff --git a/uv.lock b/uv.lock