Add Docker-based test infrastructure for e2e tests (#424)

ashwin-ant · claude · web-flow · commit a0ce44a3fabb · 2025-12-16T10:53:13.000-08:00
## Summary - Add `Dockerfile.test`: Python 3.12 image with Claude Code CLI installed - Add `scripts/test-docker.sh`: Local script to run tests in Docker - Add `test-e2e-docker` job to CI workflow that runs the full e2e suite in a container - Add `.dockerignore` to speed up Docker builds ## Context This helps catch Docker-specific issues like #406 where filesystem-based agents loaded via `setting_sources=["project"]` may silently fail in Docker environments. ## Local Usage ```bash # Run unit tests in Docker (no API key needed) ./scripts/test-docker.sh unit # Run e2e tests in Docker ANTHROPIC_API_KEY=sk-... ./scripts/test-docker.sh e2e # Run all tests ANTHROPIC_API_KEY=sk-... ./scripts/test-docker.sh all ``` ## Test plan - [x] Unit tests pass in Docker locally (129 passed) - [ ] CI job runs successfully 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude <noreply@anthropic.com>
diff --git a/.claude/agents/test-agent.md b/.claude/agents/test-agent.md
@@ -0,0 +1,9 @@
+---
+name: test-agent
+description: A simple test agent for SDK testing
+tools: Read
+---
+
+# Test Agent
+
+You are a simple test agent. When asked a question, provide a brief, helpful answer.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,49 @@
+# Git
+.git
+.gitignore
+
+# Python
+__pycache__
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual environments
+.env
+.venv
+env/
+venv/
+ENV/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Testing/Coverage
+.coverage
+.pytest_cache/
+htmlcov/
+.tox/
+.nox/
+
+# Misc
+*.log
+.DS_Store
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -81,6 +81,24 @@ jobs:
         run: |
           python -m pytest e2e-tests/ -v -m e2e
 
+  test-e2e-docker:
+    runs-on: ubuntu-latest
+    needs: test # Run after unit tests pass
+    # Run e2e tests in Docker to catch container-specific issues like #406
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Build Docker test image
+        run: docker build -f Dockerfile.test -t claude-sdk-test .
+
+      - name: Run e2e tests in Docker
+        env:
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+        run: |
+          docker run --rm -e ANTHROPIC_API_KEY \
+            claude-sdk-test python -m pytest e2e-tests/ -v -m e2e
+
   test-examples:
     runs-on: ubuntu-latest
     needs: test-e2e # Run after e2e tests
diff --git a/Dockerfile.test b/Dockerfile.test
@@ -0,0 +1,29 @@
+# Dockerfile for running SDK tests in a containerized environment
+# This helps catch Docker-specific issues like #406
+
+FROM python:3.12-slim
+
+# Install dependencies for Claude CLI and git (needed for some tests)
+RUN apt-get update && apt-get install -y \
+    curl \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Claude Code CLI
+RUN curl -fsSL https://claude.ai/install.sh | bash
+ENV PATH="/root/.local/bin:$PATH"
+
+# Set up working directory
+WORKDIR /app
+
+# Copy the SDK source
+COPY . .
+
+# Install SDK with dev dependencies
+RUN pip install -e ".[dev]"
+
+# Verify CLI installation
+RUN claude -v
+
+# Default: run unit tests
+CMD ["python", "-m", "pytest", "tests/", "-v"]
diff --git a/e2e-tests/test_agents_and_settings.py b/e2e-tests/test_agents_and_settings.py
@@ -38,15 +38,88 @@ async def test_agent_definition():
         async for message in client.receive_response():
             if isinstance(message, SystemMessage) and message.subtype == "init":
                 agents = message.data.get("agents", [])
-                assert isinstance(
-                    agents, list
-                ), f"agents should be a list of strings, got: {type(agents)}"
-                assert (
-                    "test-agent" in agents
-                ), f"test-agent should be available, got: {agents}"
+                assert isinstance(agents, list), (
+                    f"agents should be a list of strings, got: {type(agents)}"
+                )
+                assert "test-agent" in agents, (
+                    f"test-agent should be available, got: {agents}"
+                )
                 break
 
 
+@pytest.mark.e2e
+@pytest.mark.asyncio
+async def test_filesystem_agent_loading():
+    """Test that filesystem-based agents load via setting_sources and produce full response.
+
+    This is the core test for issue #406. It verifies that when using
+    setting_sources=["project"] with a .claude/agents/ directory containing
+    agent definitions, the SDK:
+    1. Loads the agents (they appear in init message)
+    2. Produces a full response with AssistantMessage
+    3. Completes with a ResultMessage
+
+    The bug in #406 causes the iterator to complete after only the
+    init SystemMessage, never yielding AssistantMessage or ResultMessage.
+    """
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # Create a temporary project with a filesystem agent
+        project_dir = Path(tmpdir)
+        agents_dir = project_dir / ".claude" / "agents"
+        agents_dir.mkdir(parents=True)
+
+        # Create a test agent file
+        agent_file = agents_dir / "fs-test-agent.md"
+        agent_file.write_text(
+            """---
+name: fs-test-agent
+description: A filesystem test agent for SDK testing
+tools: Read
+---
+
+# Filesystem Test Agent
+
+You are a simple test agent. When asked a question, provide a brief, helpful answer.
+"""
+        )
+
+        options = ClaudeAgentOptions(
+            setting_sources=["project"],
+            cwd=project_dir,
+            max_turns=1,
+        )
+
+        messages = []
+        async with ClaudeSDKClient(options=options) as client:
+            await client.query("Say hello in exactly 3 words")
+            async for msg in client.receive_response():
+                messages.append(msg)
+
+        # Must have at least init, assistant, result
+        message_types = [type(m).__name__ for m in messages]
+
+        assert "SystemMessage" in message_types, "Missing SystemMessage (init)"
+        assert "AssistantMessage" in message_types, (
+            f"Missing AssistantMessage - got only: {message_types}. "
+            "This may indicate issue #406 (silent failure with filesystem agents)."
+        )
+        assert "ResultMessage" in message_types, "Missing ResultMessage"
+
+        # Find the init message and check for the filesystem agent
+        for msg in messages:
+            if isinstance(msg, SystemMessage) and msg.subtype == "init":
+                agents = msg.data.get("agents", [])
+                # Agents are returned as strings (just names)
+                assert "fs-test-agent" in agents, (
+                    f"fs-test-agent not loaded from filesystem. Found: {agents}"
+                )
+                break
+
+        # On Windows, wait for file handles to be released before cleanup
+        if sys.platform == "win32":
+            await asyncio.sleep(0.5)
+
+
 @pytest.mark.e2e
 @pytest.mark.asyncio
 async def test_setting_sources_default():
@@ -74,12 +147,12 @@ async def test_setting_sources_default():
             async for message in client.receive_response():
                 if isinstance(message, SystemMessage) and message.subtype == "init":
                     output_style = message.data.get("output_style")
-                    assert (
-                        output_style != "local-test-style"
-                    ), f"outputStyle should NOT be from local settings (default is no settings), got: {output_style}"
-                    assert (
-                        output_style == "default"
-                    ), f"outputStyle should be 'default', got: {output_style}"
+                    assert output_style != "local-test-style", (
+                        f"outputStyle should NOT be from local settings (default is no settings), got: {output_style}"
+                    )
+                    assert output_style == "default", (
+                        f"outputStyle should be 'default', got: {output_style}"
+                    )
                     break
 
         # On Windows, wait for file handles to be released before cleanup
@@ -121,9 +194,9 @@ async def test_setting_sources_user_only():
             async for message in client.receive_response():
                 if isinstance(message, SystemMessage) and message.subtype == "init":
                     commands = message.data.get("slash_commands", [])
-                    assert (
-                        "testcmd" not in commands
-                    ), f"testcmd should NOT be available with user-only sources, got: {commands}"
+                    assert "testcmd" not in commands, (
+                        f"testcmd should NOT be available with user-only sources, got: {commands}"
+                    )
                     break
 
         # On Windows, wait for file handles to be released before cleanup
@@ -159,11 +232,11 @@ async def test_setting_sources_project_included():
             async for message in client.receive_response():
                 if isinstance(message, SystemMessage) and message.subtype == "init":
                     output_style = message.data.get("output_style")
-                    assert (
-                        output_style == "local-test-style"
-                    ), f"outputStyle should be from local settings, got: {output_style}"
+                    assert output_style == "local-test-style", (
+                        f"outputStyle should be from local settings, got: {output_style}"
+                    )
                     break
 
         # On Windows, wait for file handles to be released before cleanup
         if sys.platform == "win32":
-            await asyncio.sleep(0.5)
+            await asyncio.sleep(0.5)
diff --git a/examples/filesystem_agents.py b/examples/filesystem_agents.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+"""Example of loading filesystem-based agents via setting_sources.
+
+This example demonstrates how to load agents defined in .claude/agents/ files
+using the setting_sources option. This is different from inline AgentDefinition
+objects - these agents are loaded from markdown files on disk.
+
+This example tests the scenario from issue #406 where filesystem-based agents
+loaded via setting_sources=["project"] may silently fail in certain environments.
+
+Usage:
+./examples/filesystem_agents.py
+"""
+
+import asyncio
+from pathlib import Path
+
+from claude_agent_sdk import (
+    AssistantMessage,
+    ClaudeAgentOptions,
+    ClaudeSDKClient,
+    ResultMessage,
+    SystemMessage,
+    TextBlock,
+)
+
+
+def extract_agents(msg: SystemMessage) -> list[str]:
+    """Extract agent names from system message init data."""
+    if msg.subtype == "init":
+        agents = msg.data.get("agents", [])
+        # Agents can be either strings or dicts with a 'name' field
+        result = []
+        for a in agents:
+            if isinstance(a, str):
+                result.append(a)
+            elif isinstance(a, dict):
+                result.append(a.get("name", ""))
+        return result
+    return []
+
+
+async def main():
+    """Test loading filesystem-based agents."""
+    print("=== Filesystem Agents Example ===")
+    print("Testing: setting_sources=['project'] with .claude/agents/test-agent.md")
+    print()
+
+    # Use the SDK repo directory which has .claude/agents/test-agent.md
+    sdk_dir = Path(__file__).parent.parent
+
+    options = ClaudeAgentOptions(
+        setting_sources=["project"],
+        cwd=sdk_dir,
+    )
+
+    message_types: list[str] = []
+    agents_found: list[str] = []
+
+    async with ClaudeSDKClient(options=options) as client:
+        await client.query("Say hello in exactly 3 words")
+
+        async for msg in client.receive_response():
+            message_types.append(type(msg).__name__)
+
+            if isinstance(msg, SystemMessage) and msg.subtype == "init":
+                agents_found = extract_agents(msg)
+                print(f"Init message received. Agents loaded: {agents_found}")
+
+            elif isinstance(msg, AssistantMessage):
+                for block in msg.content:
+                    if isinstance(block, TextBlock):
+                        print(f"Assistant: {block.text}")
+
+            elif isinstance(msg, ResultMessage):
+                print(
+                    f"Result: subtype={msg.subtype}, cost=${msg.total_cost_usd or 0:.4f}"
+                )
+
+    print()
+    print("=== Summary ===")
+    print(f"Message types received: {message_types}")
+    print(f"Total messages: {len(message_types)}")
+
+    # Validate the results
+    has_init = "SystemMessage" in message_types
+    has_assistant = "AssistantMessage" in message_types
+    has_result = "ResultMessage" in message_types
+    has_test_agent = "test-agent" in agents_found
+
+    print()
+    if has_init and has_assistant and has_result:
+        print("SUCCESS: Received full response (init, assistant, result)")
+    else:
+        print("FAILURE: Did not receive full response")
+        print(f"  - Init: {has_init}")
+        print(f"  - Assistant: {has_assistant}")
+        print(f"  - Result: {has_result}")
+
+    if has_test_agent:
+        print("SUCCESS: test-agent was loaded from filesystem")
+    else:
+        print("WARNING: test-agent was NOT loaded (may not exist in .claude/agents/)")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/scripts/test-docker.sh b/scripts/test-docker.sh