Skip to content

Commit a0ce44a

Browse files
ashwin-antclaude
andauthored
Add Docker-based test infrastructure for e2e tests (#424)
## Summary - Add `Dockerfile.test`: Python 3.12 image with Claude Code CLI installed - Add `scripts/test-docker.sh`: Local script to run tests in Docker - Add `test-e2e-docker` job to CI workflow that runs the full e2e suite in a container - Add `.dockerignore` to speed up Docker builds ## Context This helps catch Docker-specific issues like #406 where filesystem-based agents loaded via `setting_sources=["project"]` may silently fail in Docker environments. ## Local Usage ```bash # Run unit tests in Docker (no API key needed) ./scripts/test-docker.sh unit # Run e2e tests in Docker ANTHROPIC_API_KEY=sk-... ./scripts/test-docker.sh e2e # Run all tests ANTHROPIC_API_KEY=sk-... ./scripts/test-docker.sh all ``` ## Test plan - [x] Unit tests pass in Docker locally (129 passed) - [ ] CI job runs successfully 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude <[email protected]>
1 parent 904c2ec commit a0ce44a

File tree

7 files changed

+381
-19
lines changed

7 files changed

+381
-19
lines changed

.claude/agents/test-agent.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
name: test-agent
3+
description: A simple test agent for SDK testing
4+
tools: Read
5+
---
6+
7+
# Test Agent
8+
9+
You are a simple test agent. When asked a question, provide a brief, helpful answer.

.dockerignore

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Git
2+
.git
3+
.gitignore
4+
5+
# Python
6+
__pycache__
7+
*.py[cod]
8+
*$py.class
9+
*.so
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
*.egg-info/
24+
.installed.cfg
25+
*.egg
26+
27+
# Virtual environments
28+
.env
29+
.venv
30+
env/
31+
venv/
32+
ENV/
33+
34+
# IDE
35+
.idea/
36+
.vscode/
37+
*.swp
38+
*.swo
39+
40+
# Testing/Coverage
41+
.coverage
42+
.pytest_cache/
43+
htmlcov/
44+
.tox/
45+
.nox/
46+
47+
# Misc
48+
*.log
49+
.DS_Store

.github/workflows/test.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,24 @@ jobs:
8181
run: |
8282
python -m pytest e2e-tests/ -v -m e2e
8383
84+
test-e2e-docker:
85+
runs-on: ubuntu-latest
86+
needs: test # Run after unit tests pass
87+
# Run e2e tests in Docker to catch container-specific issues like #406
88+
89+
steps:
90+
- uses: actions/checkout@v4
91+
92+
- name: Build Docker test image
93+
run: docker build -f Dockerfile.test -t claude-sdk-test .
94+
95+
- name: Run e2e tests in Docker
96+
env:
97+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
98+
run: |
99+
docker run --rm -e ANTHROPIC_API_KEY \
100+
claude-sdk-test python -m pytest e2e-tests/ -v -m e2e
101+
84102
test-examples:
85103
runs-on: ubuntu-latest
86104
needs: test-e2e # Run after e2e tests

Dockerfile.test

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Dockerfile for running SDK tests in a containerized environment
2+
# This helps catch Docker-specific issues like #406
3+
4+
FROM python:3.12-slim
5+
6+
# Install dependencies for Claude CLI and git (needed for some tests)
7+
RUN apt-get update && apt-get install -y \
8+
curl \
9+
git \
10+
&& rm -rf /var/lib/apt/lists/*
11+
12+
# Install Claude Code CLI
13+
RUN curl -fsSL https://claude.ai/install.sh | bash
14+
ENV PATH="/root/.local/bin:$PATH"
15+
16+
# Set up working directory
17+
WORKDIR /app
18+
19+
# Copy the SDK source
20+
COPY . .
21+
22+
# Install SDK with dev dependencies
23+
RUN pip install -e ".[dev]"
24+
25+
# Verify CLI installation
26+
RUN claude -v
27+
28+
# Default: run unit tests
29+
CMD ["python", "-m", "pytest", "tests/", "-v"]

e2e-tests/test_agents_and_settings.py

Lines changed: 92 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,88 @@ async def test_agent_definition():
3838
async for message in client.receive_response():
3939
if isinstance(message, SystemMessage) and message.subtype == "init":
4040
agents = message.data.get("agents", [])
41-
assert isinstance(
42-
agents, list
43-
), f"agents should be a list of strings, got: {type(agents)}"
44-
assert (
45-
"test-agent" in agents
46-
), f"test-agent should be available, got: {agents}"
41+
assert isinstance(agents, list), (
42+
f"agents should be a list of strings, got: {type(agents)}"
43+
)
44+
assert "test-agent" in agents, (
45+
f"test-agent should be available, got: {agents}"
46+
)
4747
break
4848

4949

50+
@pytest.mark.e2e
51+
@pytest.mark.asyncio
52+
async def test_filesystem_agent_loading():
53+
"""Test that filesystem-based agents load via setting_sources and produce full response.
54+
55+
This is the core test for issue #406. It verifies that when using
56+
setting_sources=["project"] with a .claude/agents/ directory containing
57+
agent definitions, the SDK:
58+
1. Loads the agents (they appear in init message)
59+
2. Produces a full response with AssistantMessage
60+
3. Completes with a ResultMessage
61+
62+
The bug in #406 causes the iterator to complete after only the
63+
init SystemMessage, never yielding AssistantMessage or ResultMessage.
64+
"""
65+
with tempfile.TemporaryDirectory() as tmpdir:
66+
# Create a temporary project with a filesystem agent
67+
project_dir = Path(tmpdir)
68+
agents_dir = project_dir / ".claude" / "agents"
69+
agents_dir.mkdir(parents=True)
70+
71+
# Create a test agent file
72+
agent_file = agents_dir / "fs-test-agent.md"
73+
agent_file.write_text(
74+
"""---
75+
name: fs-test-agent
76+
description: A filesystem test agent for SDK testing
77+
tools: Read
78+
---
79+
80+
# Filesystem Test Agent
81+
82+
You are a simple test agent. When asked a question, provide a brief, helpful answer.
83+
"""
84+
)
85+
86+
options = ClaudeAgentOptions(
87+
setting_sources=["project"],
88+
cwd=project_dir,
89+
max_turns=1,
90+
)
91+
92+
messages = []
93+
async with ClaudeSDKClient(options=options) as client:
94+
await client.query("Say hello in exactly 3 words")
95+
async for msg in client.receive_response():
96+
messages.append(msg)
97+
98+
# Must have at least init, assistant, result
99+
message_types = [type(m).__name__ for m in messages]
100+
101+
assert "SystemMessage" in message_types, "Missing SystemMessage (init)"
102+
assert "AssistantMessage" in message_types, (
103+
f"Missing AssistantMessage - got only: {message_types}. "
104+
"This may indicate issue #406 (silent failure with filesystem agents)."
105+
)
106+
assert "ResultMessage" in message_types, "Missing ResultMessage"
107+
108+
# Find the init message and check for the filesystem agent
109+
for msg in messages:
110+
if isinstance(msg, SystemMessage) and msg.subtype == "init":
111+
agents = msg.data.get("agents", [])
112+
# Agents are returned as strings (just names)
113+
assert "fs-test-agent" in agents, (
114+
f"fs-test-agent not loaded from filesystem. Found: {agents}"
115+
)
116+
break
117+
118+
# On Windows, wait for file handles to be released before cleanup
119+
if sys.platform == "win32":
120+
await asyncio.sleep(0.5)
121+
122+
50123
@pytest.mark.e2e
51124
@pytest.mark.asyncio
52125
async def test_setting_sources_default():
@@ -74,12 +147,12 @@ async def test_setting_sources_default():
74147
async for message in client.receive_response():
75148
if isinstance(message, SystemMessage) and message.subtype == "init":
76149
output_style = message.data.get("output_style")
77-
assert (
78-
output_style != "local-test-style"
79-
), f"outputStyle should NOT be from local settings (default is no settings), got: {output_style}"
80-
assert (
81-
output_style == "default"
82-
), f"outputStyle should be 'default', got: {output_style}"
150+
assert output_style != "local-test-style", (
151+
f"outputStyle should NOT be from local settings (default is no settings), got: {output_style}"
152+
)
153+
assert output_style == "default", (
154+
f"outputStyle should be 'default', got: {output_style}"
155+
)
83156
break
84157

85158
# On Windows, wait for file handles to be released before cleanup
@@ -121,9 +194,9 @@ async def test_setting_sources_user_only():
121194
async for message in client.receive_response():
122195
if isinstance(message, SystemMessage) and message.subtype == "init":
123196
commands = message.data.get("slash_commands", [])
124-
assert (
125-
"testcmd" not in commands
126-
), f"testcmd should NOT be available with user-only sources, got: {commands}"
197+
assert "testcmd" not in commands, (
198+
f"testcmd should NOT be available with user-only sources, got: {commands}"
199+
)
127200
break
128201

129202
# On Windows, wait for file handles to be released before cleanup
@@ -159,11 +232,11 @@ async def test_setting_sources_project_included():
159232
async for message in client.receive_response():
160233
if isinstance(message, SystemMessage) and message.subtype == "init":
161234
output_style = message.data.get("output_style")
162-
assert (
163-
output_style == "local-test-style"
164-
), f"outputStyle should be from local settings, got: {output_style}"
235+
assert output_style == "local-test-style", (
236+
f"outputStyle should be from local settings, got: {output_style}"
237+
)
165238
break
166239

167240
# On Windows, wait for file handles to be released before cleanup
168241
if sys.platform == "win32":
169-
await asyncio.sleep(0.5)
242+
await asyncio.sleep(0.5)

examples/filesystem_agents.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
#!/usr/bin/env python3
2+
"""Example of loading filesystem-based agents via setting_sources.
3+
4+
This example demonstrates how to load agents defined in .claude/agents/ files
5+
using the setting_sources option. This is different from inline AgentDefinition
6+
objects - these agents are loaded from markdown files on disk.
7+
8+
This example tests the scenario from issue #406 where filesystem-based agents
9+
loaded via setting_sources=["project"] may silently fail in certain environments.
10+
11+
Usage:
12+
./examples/filesystem_agents.py
13+
"""
14+
15+
import asyncio
16+
from pathlib import Path
17+
18+
from claude_agent_sdk import (
19+
AssistantMessage,
20+
ClaudeAgentOptions,
21+
ClaudeSDKClient,
22+
ResultMessage,
23+
SystemMessage,
24+
TextBlock,
25+
)
26+
27+
28+
def extract_agents(msg: SystemMessage) -> list[str]:
29+
"""Extract agent names from system message init data."""
30+
if msg.subtype == "init":
31+
agents = msg.data.get("agents", [])
32+
# Agents can be either strings or dicts with a 'name' field
33+
result = []
34+
for a in agents:
35+
if isinstance(a, str):
36+
result.append(a)
37+
elif isinstance(a, dict):
38+
result.append(a.get("name", ""))
39+
return result
40+
return []
41+
42+
43+
async def main():
44+
"""Test loading filesystem-based agents."""
45+
print("=== Filesystem Agents Example ===")
46+
print("Testing: setting_sources=['project'] with .claude/agents/test-agent.md")
47+
print()
48+
49+
# Use the SDK repo directory which has .claude/agents/test-agent.md
50+
sdk_dir = Path(__file__).parent.parent
51+
52+
options = ClaudeAgentOptions(
53+
setting_sources=["project"],
54+
cwd=sdk_dir,
55+
)
56+
57+
message_types: list[str] = []
58+
agents_found: list[str] = []
59+
60+
async with ClaudeSDKClient(options=options) as client:
61+
await client.query("Say hello in exactly 3 words")
62+
63+
async for msg in client.receive_response():
64+
message_types.append(type(msg).__name__)
65+
66+
if isinstance(msg, SystemMessage) and msg.subtype == "init":
67+
agents_found = extract_agents(msg)
68+
print(f"Init message received. Agents loaded: {agents_found}")
69+
70+
elif isinstance(msg, AssistantMessage):
71+
for block in msg.content:
72+
if isinstance(block, TextBlock):
73+
print(f"Assistant: {block.text}")
74+
75+
elif isinstance(msg, ResultMessage):
76+
print(
77+
f"Result: subtype={msg.subtype}, cost=${msg.total_cost_usd or 0:.4f}"
78+
)
79+
80+
print()
81+
print("=== Summary ===")
82+
print(f"Message types received: {message_types}")
83+
print(f"Total messages: {len(message_types)}")
84+
85+
# Validate the results
86+
has_init = "SystemMessage" in message_types
87+
has_assistant = "AssistantMessage" in message_types
88+
has_result = "ResultMessage" in message_types
89+
has_test_agent = "test-agent" in agents_found
90+
91+
print()
92+
if has_init and has_assistant and has_result:
93+
print("SUCCESS: Received full response (init, assistant, result)")
94+
else:
95+
print("FAILURE: Did not receive full response")
96+
print(f" - Init: {has_init}")
97+
print(f" - Assistant: {has_assistant}")
98+
print(f" - Result: {has_result}")
99+
100+
if has_test_agent:
101+
print("SUCCESS: test-agent was loaded from filesystem")
102+
else:
103+
print("WARNING: test-agent was NOT loaded (may not exist in .claude/agents/)")
104+
105+
106+
if __name__ == "__main__":
107+
asyncio.run(main())

0 commit comments

Comments
 (0)