Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
204 changes: 204 additions & 0 deletions e2e-tests/test_structured_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
"""End-to-end tests for structured output with real Claude API calls.

These tests verify that the output_schema feature works correctly by making
actual API calls to Claude with JSON Schema validation.
"""

import tempfile

import pytest

from claude_agent_sdk import (
ClaudeAgentOptions,
ResultMessage,
query,
)


@pytest.mark.e2e
@pytest.mark.asyncio
async def test_simple_structured_output():
"""Test structured output with file counting requiring tool use."""

# Define schema for file analysis
schema = {
"type": "object",
"properties": {
"file_count": {"type": "number"},
"has_tests": {"type": "boolean"},
"test_file_count": {"type": "number"},
},
"required": ["file_count", "has_tests"],
}

options = ClaudeAgentOptions(
output_format={"type": "json_schema", "schema": schema},
permission_mode="acceptEdits",
cwd=".", # Use current directory
)

# Agent must use Glob/Bash to count files
result_message = None
async for message in query(
prompt="Count how many Python files are in src/claude_agent_sdk/ and check if there are any test files. Use tools to explore the filesystem.",
options=options,
):
if isinstance(message, ResultMessage):
result_message = message

# Verify result
assert result_message is not None, "No result message received"
assert not result_message.is_error, f"Query failed: {result_message.result}"
assert result_message.subtype == "success"

# Verify structured output is present and valid
assert result_message.structured_output is not None, "No structured output in result"
assert "file_count" in result_message.structured_output
assert "has_tests" in result_message.structured_output
assert isinstance(result_message.structured_output["file_count"], (int, float))
assert isinstance(result_message.structured_output["has_tests"], bool)

# Should find Python files in src/
assert result_message.structured_output["file_count"] > 0


@pytest.mark.e2e
@pytest.mark.asyncio
async def test_nested_structured_output():
"""Test structured output with nested objects and arrays."""

# Define a schema with nested structure
schema = {
"type": "object",
"properties": {
"analysis": {
"type": "object",
"properties": {
"word_count": {"type": "number"},
"character_count": {"type": "number"},
},
"required": ["word_count", "character_count"],
},
"words": {
"type": "array",
"items": {"type": "string"},
},
},
"required": ["analysis", "words"],
}

options = ClaudeAgentOptions(
output_format={"type": "json_schema", "schema": schema},
permission_mode="acceptEdits",
)

result_message = None
async for message in query(
prompt="Analyze this text: 'Hello world'. Provide word count, character count, and list of words.",
options=options,
):
if isinstance(message, ResultMessage):
result_message = message

# Verify result
assert result_message is not None
assert not result_message.is_error
assert result_message.structured_output is not None

# Check nested structure
output = result_message.structured_output
assert "analysis" in output
assert "words" in output
assert output["analysis"]["word_count"] == 2
assert output["analysis"]["character_count"] == 11 # "Hello world"
assert len(output["words"]) == 2


@pytest.mark.e2e
@pytest.mark.asyncio
async def test_structured_output_with_enum():
"""Test structured output with enum constraints requiring code analysis."""

schema = {
"type": "object",
"properties": {
"has_tests": {"type": "boolean"},
"test_framework": {
"type": "string",
"enum": ["pytest", "unittest", "nose", "unknown"],
},
"test_count": {"type": "number"},
},
"required": ["has_tests", "test_framework"],
}

options = ClaudeAgentOptions(
output_format={"type": "json_schema", "schema": schema},
permission_mode="acceptEdits",
cwd=".",
)

result_message = None
async for message in query(
prompt="Search for test files in the tests/ directory. Determine which test framework is being used (pytest/unittest/nose) and count how many test files exist. Use Grep to search for framework imports.",
options=options,
):
if isinstance(message, ResultMessage):
result_message = message

# Verify result
assert result_message is not None
assert not result_message.is_error
assert result_message.structured_output is not None

# Check enum values are valid
output = result_message.structured_output
assert output["test_framework"] in ["pytest", "unittest", "nose", "unknown"]
assert isinstance(output["has_tests"], bool)

# This repo uses pytest
assert output["has_tests"] is True
assert output["test_framework"] == "pytest"


@pytest.mark.e2e
@pytest.mark.asyncio
async def test_structured_output_with_tools():
"""Test structured output when agent uses tools."""

# Schema for file analysis
schema = {
"type": "object",
"properties": {
"file_count": {"type": "number"},
"has_readme": {"type": "boolean"},
},
"required": ["file_count", "has_readme"],
}

options = ClaudeAgentOptions(
output_format={"type": "json_schema", "schema": schema},
permission_mode="acceptEdits",
cwd=tempfile.gettempdir(), # Cross-platform temp directory
)

result_message = None
async for message in query(
prompt="Count how many files are in the current directory and check if there's a README file. Use tools as needed.",
options=options,
):
if isinstance(message, ResultMessage):
result_message = message

# Verify result
assert result_message is not None
assert not result_message.is_error
assert result_message.structured_output is not None

# Check structure
output = result_message.structured_output
assert "file_count" in output
assert "has_readme" in output
assert isinstance(output["file_count"], (int, float))
assert isinstance(output["has_readme"], bool)
assert output["file_count"] >= 0 # Should be non-negative
1 change: 1 addition & 0 deletions src/claude_agent_sdk/_internal/message_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ def parse_message(data: dict[str, Any]) -> Message:
total_cost_usd=data.get("total_cost_usd"),
usage=data.get("usage"),
result=data.get("result"),
structured_output=data.get("structured_output"),
)
except KeyError as e:
raise MessageParseError(
Expand Down
22 changes: 17 additions & 5 deletions src/claude_agent_sdk/_internal/transport/subprocess_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,19 +215,31 @@ def _build_command(self) -> list[str]:
# Flag with value
cmd.extend([f"--{flag}", str(value)])

if self._options.max_thinking_tokens is not None:
cmd.extend(
["--max-thinking-tokens", str(self._options.max_thinking_tokens)]
)

# Extract schema from output_format structure if provided
# Expected: {"type": "json_schema", "schema": {...}}
if (
self._options.output_format is not None
and isinstance(self._options.output_format, dict)
and self._options.output_format.get("type") == "json_schema"
):
schema = self._options.output_format.get("schema")
if schema is not None:
cmd.extend(["--json-schema", json.dumps(schema)])

# Add prompt handling based on mode
# IMPORTANT: This must come AFTER all flags because everything after "--" is treated as arguments
if self._is_streaming:
# Streaming mode: use --input-format stream-json
cmd.extend(["--input-format", "stream-json"])
else:
# String mode: use --print with the prompt
cmd.extend(["--print", "--", str(self._prompt)])

if self._options.max_thinking_tokens is not None:
cmd.extend(
["--max-thinking-tokens", str(self._options.max_thinking_tokens)]
)

# Check if command line is too long (Windows limitation)
cmd_str = " ".join(cmd)
if len(cmd_str) > _CMD_LENGTH_LIMIT and self._options.agents:
Expand Down
4 changes: 4 additions & 0 deletions src/claude_agent_sdk/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,7 @@ class ResultMessage:
total_cost_usd: float | None = None
usage: dict[str, Any] | None = None
result: str | None = None
structured_output: Any = None


@dataclass
Expand Down Expand Up @@ -558,6 +559,9 @@ class ClaudeAgentOptions:
plugins: list[SdkPluginConfig] = field(default_factory=list)
# Max tokens for thinking blocks
max_thinking_tokens: int | None = None
# Output format for structured outputs (matches Messages API structure)
# Example: {"type": "json_schema", "schema": {"type": "object", "properties": {...}}}
output_format: dict[str, Any] | None = None


# SDK Control Protocol
Expand Down