diff --git a/e2e-tests/test_structured_output.py b/e2e-tests/test_structured_output.py new file mode 100644 index 00000000..32e7ba21 --- /dev/null +++ b/e2e-tests/test_structured_output.py @@ -0,0 +1,204 @@ +"""End-to-end tests for structured output with real Claude API calls. + +These tests verify that the output_schema feature works correctly by making +actual API calls to Claude with JSON Schema validation. +""" + +import tempfile + +import pytest + +from claude_agent_sdk import ( + ClaudeAgentOptions, + ResultMessage, + query, +) + + +@pytest.mark.e2e +@pytest.mark.asyncio +async def test_simple_structured_output(): + """Test structured output with file counting requiring tool use.""" + + # Define schema for file analysis + schema = { + "type": "object", + "properties": { + "file_count": {"type": "number"}, + "has_tests": {"type": "boolean"}, + "test_file_count": {"type": "number"}, + }, + "required": ["file_count", "has_tests"], + } + + options = ClaudeAgentOptions( + output_format={"type": "json_schema", "schema": schema}, + permission_mode="acceptEdits", + cwd=".", # Use current directory + ) + + # Agent must use Glob/Bash to count files + result_message = None + async for message in query( + prompt="Count how many Python files are in src/claude_agent_sdk/ and check if there are any test files. Use tools to explore the filesystem.", + options=options, + ): + if isinstance(message, ResultMessage): + result_message = message + + # Verify result + assert result_message is not None, "No result message received" + assert not result_message.is_error, f"Query failed: {result_message.result}" + assert result_message.subtype == "success" + + # Verify structured output is present and valid + assert result_message.structured_output is not None, "No structured output in result" + assert "file_count" in result_message.structured_output + assert "has_tests" in result_message.structured_output + assert isinstance(result_message.structured_output["file_count"], (int, float)) + assert isinstance(result_message.structured_output["has_tests"], bool) + + # Should find Python files in src/ + assert result_message.structured_output["file_count"] > 0 + + +@pytest.mark.e2e +@pytest.mark.asyncio +async def test_nested_structured_output(): + """Test structured output with nested objects and arrays.""" + + # Define a schema with nested structure + schema = { + "type": "object", + "properties": { + "analysis": { + "type": "object", + "properties": { + "word_count": {"type": "number"}, + "character_count": {"type": "number"}, + }, + "required": ["word_count", "character_count"], + }, + "words": { + "type": "array", + "items": {"type": "string"}, + }, + }, + "required": ["analysis", "words"], + } + + options = ClaudeAgentOptions( + output_format={"type": "json_schema", "schema": schema}, + permission_mode="acceptEdits", + ) + + result_message = None + async for message in query( + prompt="Analyze this text: 'Hello world'. Provide word count, character count, and list of words.", + options=options, + ): + if isinstance(message, ResultMessage): + result_message = message + + # Verify result + assert result_message is not None + assert not result_message.is_error + assert result_message.structured_output is not None + + # Check nested structure + output = result_message.structured_output + assert "analysis" in output + assert "words" in output + assert output["analysis"]["word_count"] == 2 + assert output["analysis"]["character_count"] == 11 # "Hello world" + assert len(output["words"]) == 2 + + +@pytest.mark.e2e +@pytest.mark.asyncio +async def test_structured_output_with_enum(): + """Test structured output with enum constraints requiring code analysis.""" + + schema = { + "type": "object", + "properties": { + "has_tests": {"type": "boolean"}, + "test_framework": { + "type": "string", + "enum": ["pytest", "unittest", "nose", "unknown"], + }, + "test_count": {"type": "number"}, + }, + "required": ["has_tests", "test_framework"], + } + + options = ClaudeAgentOptions( + output_format={"type": "json_schema", "schema": schema}, + permission_mode="acceptEdits", + cwd=".", + ) + + result_message = None + async for message in query( + prompt="Search for test files in the tests/ directory. Determine which test framework is being used (pytest/unittest/nose) and count how many test files exist. Use Grep to search for framework imports.", + options=options, + ): + if isinstance(message, ResultMessage): + result_message = message + + # Verify result + assert result_message is not None + assert not result_message.is_error + assert result_message.structured_output is not None + + # Check enum values are valid + output = result_message.structured_output + assert output["test_framework"] in ["pytest", "unittest", "nose", "unknown"] + assert isinstance(output["has_tests"], bool) + + # This repo uses pytest + assert output["has_tests"] is True + assert output["test_framework"] == "pytest" + + +@pytest.mark.e2e +@pytest.mark.asyncio +async def test_structured_output_with_tools(): + """Test structured output when agent uses tools.""" + + # Schema for file analysis + schema = { + "type": "object", + "properties": { + "file_count": {"type": "number"}, + "has_readme": {"type": "boolean"}, + }, + "required": ["file_count", "has_readme"], + } + + options = ClaudeAgentOptions( + output_format={"type": "json_schema", "schema": schema}, + permission_mode="acceptEdits", + cwd=tempfile.gettempdir(), # Cross-platform temp directory + ) + + result_message = None + async for message in query( + prompt="Count how many files are in the current directory and check if there's a README file. Use tools as needed.", + options=options, + ): + if isinstance(message, ResultMessage): + result_message = message + + # Verify result + assert result_message is not None + assert not result_message.is_error + assert result_message.structured_output is not None + + # Check structure + output = result_message.structured_output + assert "file_count" in output + assert "has_readme" in output + assert isinstance(output["file_count"], (int, float)) + assert isinstance(output["has_readme"], bool) + assert output["file_count"] >= 0 # Should be non-negative diff --git a/src/claude_agent_sdk/_internal/message_parser.py b/src/claude_agent_sdk/_internal/message_parser.py index 6532a204..694c52c3 100644 --- a/src/claude_agent_sdk/_internal/message_parser.py +++ b/src/claude_agent_sdk/_internal/message_parser.py @@ -149,6 +149,7 @@ def parse_message(data: dict[str, Any]) -> Message: total_cost_usd=data.get("total_cost_usd"), usage=data.get("usage"), result=data.get("result"), + structured_output=data.get("structured_output"), ) except KeyError as e: raise MessageParseError( diff --git a/src/claude_agent_sdk/_internal/transport/subprocess_cli.py b/src/claude_agent_sdk/_internal/transport/subprocess_cli.py index d669a412..433c47e6 100644 --- a/src/claude_agent_sdk/_internal/transport/subprocess_cli.py +++ b/src/claude_agent_sdk/_internal/transport/subprocess_cli.py @@ -215,7 +215,24 @@ def _build_command(self) -> list[str]: # Flag with value cmd.extend([f"--{flag}", str(value)]) + if self._options.max_thinking_tokens is not None: + cmd.extend( + ["--max-thinking-tokens", str(self._options.max_thinking_tokens)] + ) + + # Extract schema from output_format structure if provided + # Expected: {"type": "json_schema", "schema": {...}} + if ( + self._options.output_format is not None + and isinstance(self._options.output_format, dict) + and self._options.output_format.get("type") == "json_schema" + ): + schema = self._options.output_format.get("schema") + if schema is not None: + cmd.extend(["--json-schema", json.dumps(schema)]) + # Add prompt handling based on mode + # IMPORTANT: This must come AFTER all flags because everything after "--" is treated as arguments if self._is_streaming: # Streaming mode: use --input-format stream-json cmd.extend(["--input-format", "stream-json"]) @@ -223,11 +240,6 @@ def _build_command(self) -> list[str]: # String mode: use --print with the prompt cmd.extend(["--print", "--", str(self._prompt)]) - if self._options.max_thinking_tokens is not None: - cmd.extend( - ["--max-thinking-tokens", str(self._options.max_thinking_tokens)] - ) - # Check if command line is too long (Windows limitation) cmd_str = " ".join(cmd) if len(cmd_str) > _CMD_LENGTH_LIMIT and self._options.agents: diff --git a/src/claude_agent_sdk/types.py b/src/claude_agent_sdk/types.py index e375bee2..406c204c 100644 --- a/src/claude_agent_sdk/types.py +++ b/src/claude_agent_sdk/types.py @@ -492,6 +492,7 @@ class ResultMessage: total_cost_usd: float | None = None usage: dict[str, Any] | None = None result: str | None = None + structured_output: Any = None @dataclass @@ -558,6 +559,9 @@ class ClaudeAgentOptions: plugins: list[SdkPluginConfig] = field(default_factory=list) # Max tokens for thinking blocks max_thinking_tokens: int | None = None + # Output format for structured outputs (matches Messages API structure) + # Example: {"type": "json_schema", "schema": {"type": "object", "properties": {...}}} + output_format: dict[str, Any] | None = None # SDK Control Protocol