Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### New features

* `.stream()` and `.stream_async()` now support a `data_model` parameter for structured data extraction while streaming. (#262)
* `ChatAnthropic()` now uses native structured outputs API for supported models (claude-sonnet-4-5, claude-opus-4-1, claude-opus-4-5, claude-haiku-4-5), enabling streaming with `data_model`. Older models fall back to the tool-based approach. (#263)

## [0.15.0] - 2026-01-06

Expand Down
159 changes: 106 additions & 53 deletions chatlas/_provider_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,27 @@
RawMessageStreamEvent = object


STRUCTURED_OUTPUTS_BETA = "structured-outputs-2025-11-13"


def supports_structured_outputs(model: str) -> bool:
"""
Check if the model supports the beta structured outputs API.

https://platform.claude.com/docs/en/build-with-claude/structured-outputs
"""
supported_models = {
"claude-sonnet-4-5",
"claude-opus-4-1",
"claude-opus-4-5",
"claude-haiku-4-5",
}
for supported in supported_models:
if model.startswith(supported):
return True
return False


def ChatAnthropic(
*,
system_prompt: Optional[str] = None,
Expand Down Expand Up @@ -353,8 +374,13 @@ def chat_perform(
data_model: Optional[type[BaseModel]] = None,
kwargs: Optional["SubmitInputArgs"] = None,
):
kwargs = self._chat_perform_args(stream, turns, tools, data_model, kwargs)
return self._client.messages.create(**kwargs) # type: ignore
api_kwargs = self._chat_perform_args(stream, turns, tools, data_model, kwargs)
if data_model is not None and supports_structured_outputs(self.model):
return self._client.beta.messages.create(
betas=[STRUCTURED_OUTPUTS_BETA],
**api_kwargs, # type: ignore[arg-type]
)
return self._client.messages.create(**api_kwargs) # type: ignore

@overload
async def chat_perform_async(
Expand Down Expand Up @@ -387,8 +413,13 @@ async def chat_perform_async(
data_model: Optional[type[BaseModel]] = None,
kwargs: Optional["SubmitInputArgs"] = None,
):
kwargs = self._chat_perform_args(stream, turns, tools, data_model, kwargs)
return await self._async_client.messages.create(**kwargs) # type: ignore
api_kwargs = self._chat_perform_args(stream, turns, tools, data_model, kwargs)
if data_model is not None and supports_structured_outputs(self.model):
return await self._async_client.beta.messages.create(
betas=[STRUCTURED_OUTPUTS_BETA],
**api_kwargs, # type: ignore[arg-type]
)
return await self._async_client.messages.create(**api_kwargs) # type: ignore

def _chat_perform_args(
self,
Expand All @@ -400,42 +431,6 @@ def _chat_perform_args(
) -> "SubmitInputArgs":
tool_schemas = [self._anthropic_tool_schema(tool) for tool in tools.values()]

# If data extraction is requested, add a "mock" tool with parameters inferred from the data model
data_model_tool: Tool | None = None
if data_model is not None:

def _structured_tool_call(**kwargs: Any):
"""Extract structured data"""
pass

data_model_tool = Tool.from_func(_structured_tool_call)

data_model_schema = basemodel_to_param_schema(data_model)

# Extract $defs from the nested schema and place at top level
# JSON Schema $ref pointers like "#/$defs/..." need $defs at the root
defs = data_model_schema.pop("$defs", None)

params: dict[str, Any] = {
"type": "object",
"properties": {
"data": data_model_schema,
},
}
if defs:
params["$defs"] = defs

data_model_tool.schema["function"]["parameters"] = params

tool_schemas.append(self._anthropic_tool_schema(data_model_tool))

if stream:
stream = False
warnings.warn(
"Anthropic does not support structured data extraction in streaming mode.",
stacklevel=2,
)

kwargs_full: "SubmitInputArgs" = {
"stream": stream,
"messages": self._as_message_params(turns),
Expand All @@ -445,11 +440,25 @@ def _structured_tool_call(**kwargs: Any):
**(kwargs or {}),
}

if data_model_tool:
kwargs_full["tool_choice"] = {
"type": "tool",
"name": data_model_tool.name,
}
if data_model is not None:
if supports_structured_outputs(self.model):
from anthropic import transform_schema

kwargs_full["output_format"] = { # type: ignore[typeddict-unknown-key]
"type": "json_schema",
"schema": transform_schema(data_model),
}
else:
# TODO: when structured outputs are generally available,
# we can remove this legacy tool-based approach
data_model_tool = self.create_data_model_tool(data_model)
cast(list, kwargs_full["tools"]).append(
self._anthropic_tool_schema(data_model_tool)
)
kwargs_full["tool_choice"] = {
"type": "tool",
"name": data_model_tool.name,
}

if "system" not in kwargs_full:
if len(turns) > 0 and isinstance(turns[0], SystemTurn):
Expand All @@ -463,6 +472,33 @@ def _structured_tool_call(**kwargs: Any):

return kwargs_full

@staticmethod
def create_data_model_tool(data_model: type[BaseModel]) -> Tool:
def _structured_tool_call(**kwargs: Any):
"""Extract structured data"""
pass

data_model_tool = Tool.from_func(_structured_tool_call)

data_model_schema = basemodel_to_param_schema(data_model)

# Extract $defs from the nested schema and place at top level
# JSON Schema $ref pointers like "#/$defs/..." need $defs at the root
defs = data_model_schema.pop("$defs", None)

params: dict[str, Any] = {
"type": "object",
"properties": {
"data": data_model_schema,
},
}
if defs:
params["$defs"] = defs

data_model_tool.schema["function"]["parameters"] = params

return data_model_tool

def stream_text(self, chunk) -> Optional[str]:
if chunk.type == "content_block_delta":
if chunk.delta.type == "text_delta":
Expand Down Expand Up @@ -753,11 +789,24 @@ def _anthropic_tool_schema(tool: "Tool | ToolBuiltIn") -> "ToolUnionParam":

def _as_turn(self, completion: Message, has_data_model=False) -> AssistantTurn:
contents = []

# Detect which structured output approach was used:
# - Old approach: has a _structured_tool_call tool_use block
# - New approach: has_data_model=True but no _structured_tool_call (JSON in text)
uses_old_tool_approach = has_data_model and any(
c.type == "tool_use" and c.name == "_structured_tool_call"
for c in completion.content
)
uses_new_output_format = has_data_model and not uses_old_tool_approach

for content in completion.content:
if content.type == "text":
contents.append(ContentText(text=content.text))
if uses_new_output_format:
contents.append(ContentJson(value=orjson.loads(content.text)))
else:
contents.append(ContentText(text=content.text))
elif content.type == "tool_use":
if has_data_model and content.name == "_structured_tool_call":
if uses_old_tool_approach and content.name == "_structured_tool_call":
if not isinstance(content.input, dict):
raise ValueError(
"Expected data extraction tool to return a dictionary."
Expand Down Expand Up @@ -874,26 +923,30 @@ def batch_submit(
requests: list["BatchRequest"] = []

for i, turns in enumerate(conversations):
kwargs = self._chat_perform_args(
api_kwargs = self._chat_perform_args(
stream=False,
turns=turns,
tools={},
data_model=data_model,
)

params: "MessageCreateParamsNonStreaming" = {
"messages": kwargs.get("messages", {}),
"messages": api_kwargs.get("messages", {}),
"model": self.model,
"max_tokens": kwargs.get("max_tokens", 4096),
"max_tokens": api_kwargs.get("max_tokens", 4096),
}

# If data_model, tools/tool_choice should be present
tools = kwargs.get("tools")
tool_choice = kwargs.get("tool_choice")
# If data_model, tools/tool_choice should be present (old API)
# or output_format (new API)
tools = api_kwargs.get("tools")
tool_choice = api_kwargs.get("tool_choice")
output_format = api_kwargs.get("output_format")
if tools and not isinstance(tools, NotGiven):
params["tools"] = tools
if tool_choice and not isinstance(tool_choice, NotGiven):
params["tool_choice"] = tool_choice
if output_format and not isinstance(output_format, NotGiven):
params["output_format"] = output_format # type: ignore[typeddict-unknown-key]

requests.append({"custom_id": f"request-{i}", "params": params})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,19 @@ interactions:
body: '{"max_tokens": 4096, "messages": [{"role": "user", "content": [{"text":
"The new quantum computing breakthrough could revolutionize the tech industry.",
"type": "text", "cache_control": {"type": "ephemeral", "ttl": "5m"}}]}], "model":
"claude-haiku-4-5-20251001", "stream": false, "system": [{"type": "text", "text":
"You are a friendly but terse assistant.", "cache_control": {"type": "ephemeral",
"ttl": "5m"}}], "tool_choice": {"type": "tool", "name": "_structured_tool_call"},
"tools": [{"name": "_structured_tool_call", "input_schema": {"type": "object",
"properties": {"data": {"description": "Array of classification results. The
scores should sum to 1.", "properties": {"classifications": {"items": {"$ref":
"#/$defs/Classification"}, "type": "array"}}, "required": ["classifications"],
"type": "object", "additionalProperties": false}}, "$defs": {"Classification":
{"properties": {"name": {"description": "The category name", "enum": ["Politics",
"Sports", "Technology", "Entertainment", "Business", "Other"], "title": "Name",
"type": "string"}, "score": {"description": "The classification score for the
category, ranging from 0.0 to 1.0.", "title": "Score", "type": "number"}}, "required":
["name", "score"], "title": "Classification", "type": "object", "additionalProperties":
false}}}, "description": "Extract structured data"}]}'
"claude-haiku-4-5-20251001", "output_format": {"type": "json_schema", "schema":
{"$defs": {"Classification": {"type": "object", "title": "Classification", "properties":
{"name": {"type": "string", "description": "The category name\n\n{enum: [''Politics'',
''Sports'', ''Technology'', ''Entertainment'', ''Business'', ''Other'']}", "title":
"Name"}, "score": {"type": "number", "description": "The classification score
for the category, ranging from 0.0 to 1.0.", "title": "Score"}}, "additionalProperties":
false, "required": ["name", "score"]}}, "type": "object", "description": "Array
of classification results. The scores should sum to 1.", "title": "Classifications",
"properties": {"classifications": {"type": "array", "title": "Classifications",
"items": {"$ref": "#/$defs/Classification"}}}, "additionalProperties": false,
"required": ["classifications"]}}, "stream": false, "system": [{"type": "text",
"text": "You are a friendly but terse assistant.", "cache_control": {"type":
"ephemeral", "ttl": "5m"}}], "tools": []}'
headers:
Accept:
- application/json
Expand All @@ -25,33 +24,37 @@ interactions:
Connection:
- keep-alive
Content-Length:
- '1251'
- '1172'
Content-Type:
- application/json
Host:
- api.anthropic.com
X-Stainless-Async:
- 'false'
anthropic-beta:
- structured-outputs-2025-11-13
anthropic-version:
- '2023-06-01'
x-stainless-read-timeout:
- '600'
x-stainless-timeout:
- '600'
method: POST
uri: https://api.anthropic.com/v1/messages
uri: https://api.anthropic.com/v1/messages?beta=true
response:
body:
string: '{"model":"claude-haiku-4-5-20251001","id":"msg_012sEhjbSWV1Bi2HHnMLA2Ec","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01WQtuJ2RRnRgfEyzEnmWSMZ","name":"_structured_tool_call","input":{"data":{"classifications":[{"name":"Technology","score":0.95},{"name":"Business","score":0.05},{"name":"Politics","score":0.0},{"name":"Sports","score":0.0},{"name":"Entertainment","score":0.0},{"name":"Other","score":0.0}]}}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":855,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":181,"service_tier":"standard"}}'
string: '{"model":"claude-haiku-4-5-20251001","id":"msg_01XbxzbDzo96okuCZtSmkJKX","type":"message","role":"assistant","content":[{"type":"text","text":"{\"classifications\":
[{\"name\": \"Technology\", \"score\": 0.95}, {\"name\": \"Business\", \"score\":
0.05}]}"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":416,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":36,"service_tier":"standard"}}'
headers:
CF-RAY:
- 9b653de28d671f38-DEN
- 9b9c58811b851f32-DEN
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Tue, 30 Dec 2025 23:15:54 GMT
- Tue, 06 Jan 2026 15:46:03 GMT
Server:
- cloudflare
Transfer-Encoding:
Expand All @@ -63,33 +66,33 @@ interactions:
anthropic-ratelimit-input-tokens-remaining:
- '4000000'
anthropic-ratelimit-input-tokens-reset:
- '2025-12-30T23:15:53Z'
- '2026-01-06T15:46:03Z'
anthropic-ratelimit-output-tokens-limit:
- '800000'
anthropic-ratelimit-output-tokens-remaining:
- '800000'
anthropic-ratelimit-output-tokens-reset:
- '2025-12-30T23:15:54Z'
- '2026-01-06T15:46:03Z'
anthropic-ratelimit-requests-limit:
- '4000'
anthropic-ratelimit-requests-remaining:
- '3999'
anthropic-ratelimit-requests-reset:
- '2025-12-30T23:15:53Z'
- '2026-01-06T15:46:00Z'
anthropic-ratelimit-tokens-limit:
- '4800000'
anthropic-ratelimit-tokens-remaining:
- '4800000'
anthropic-ratelimit-tokens-reset:
- '2025-12-30T23:15:53Z'
- '2026-01-06T15:46:03Z'
cf-cache-status:
- DYNAMIC
content-length:
- '705'
- '517'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-envoy-upstream-service-time:
- '1028'
- '3106'
status:
code: 200
message: OK
Expand Down
Loading