Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class AnthropicStreamWrapper(AdapterCompletionStreamWrapper):
sent_first_chunk: bool = False
sent_content_block_start: bool = False
sent_content_block_finish: bool = False
current_content_block_type: Literal["text", "tool_use"] = "text"
current_content_block_type: Literal["text", "tool_use", "thinking"] = "text"
sent_last_message: bool = False
holding_chunk: Optional[Any] = None
holding_stop_reason_chunk: Optional[Any] = None
Expand Down
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

your code modifies the anthropic v1 messages api.

The error - is on the bedrock/converse_transformation for /v1/chat/completions AFAIK

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that was the API I was looking to fix initially (such that litellm works for Claude Code), so it's not fully related to the linked issue.

This change should fix models that are explicitly routing to converse though.

Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

from openai.types.chat.chat_completion_chunk import Choice as OpenAIStreamingChoice

from litellm.types.utils import StreamingChoices

from litellm.types.llms.anthropic import (
AllAnthropicToolsValues,
AnthopicMessagesAssistantMessageParam,
Expand All @@ -22,9 +24,13 @@
AnthropicMessagesUserMessageParam,
AnthropicResponseContentBlockText,
AnthropicResponseContentBlockToolUse,
AnthropicResponseContentBlockThinking,
AnthropicResponseContentBlockRedactedThinking,
ContentBlockDelta,
ContentJsonBlockDelta,
ContentTextBlockDelta,
ContentThinkingBlockDelta,
ContentThinkingSignatureBlockDelta,
MessageBlockDelta,
MessageDelta,
UsageDelta,
Expand All @@ -42,6 +48,8 @@
ChatCompletionRequest,
ChatCompletionSystemMessage,
ChatCompletionTextObject,
ChatCompletionThinkingBlock,
ChatCompletionRedactedThinkingBlock,
ChatCompletionToolCallFunctionChunk,
ChatCompletionToolChoiceFunctionParam,
ChatCompletionToolChoiceObjectParam,
Expand Down Expand Up @@ -227,6 +235,7 @@ def translate_anthropic_messages_to_openai( # noqa: PLR0915
## ASSISTANT MESSAGE ##
assistant_message_str: Optional[str] = None
tool_calls: List[ChatCompletionAssistantToolCall] = []
thinking_blocks: List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]] = []
if m["role"] == "assistant":
if isinstance(m.get("content"), str):
assistant_message_str = str(m.get("content", ""))
Expand All @@ -253,11 +262,28 @@ def translate_anthropic_messages_to_openai( # noqa: PLR0915
function=function_chunk,
)
)
elif content.get("type") == "thinking":
thinking_block = ChatCompletionThinkingBlock(
type="thinking",
thinking=content.get("thinking") or "",
signature=content.get("signature") or "",
cache_control=content.get("cache_control", {})
)
thinking_blocks.append(thinking_block)
elif content.get("type") == "redacted_thinking":
redacted_thinking_block = ChatCompletionRedactedThinkingBlock(
type="redacted_thinking",
data=content.get("data") or "",
cache_control=content.get("cache_control", {})
)
thinking_blocks.append(redacted_thinking_block)


if assistant_message_str is not None or len(tool_calls) > 0:
assistant_message = ChatCompletionAssistantMessage(
role="assistant",
content=assistant_message_str,
thinking_blocks=thinking_blocks if len(thinking_blocks) > 0 else None,
)
if len(tool_calls) > 0:
assistant_message["tool_calls"] = tool_calls
Expand Down Expand Up @@ -383,11 +409,11 @@ def translate_anthropic_to_openai(
def _translate_openai_content_to_anthropic(
self, choices: List[Choices]
) -> List[
Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse, AnthropicResponseContentBlockThinking, AnthropicResponseContentBlockRedactedThinking]
]:
new_content: List[
Union[
AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse
AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse, AnthropicResponseContentBlockThinking, AnthropicResponseContentBlockRedactedThinking
]
] = []
for choice in choices:
Expand All @@ -410,6 +436,34 @@ def _translate_openai_content_to_anthropic(
type="text", text=choice.message.content
)
)
elif choice.message.thinking_blocks is not None:
for thinking_block in choice.message.thinking_blocks:
if "thinking" in thinking_block and "signature" in thinking_block:
thinking = thinking_block.get("thinking")
signature = thinking_block.get("signature")

assert isinstance(thinking, str)
assert isinstance(signature, str) or signature is None

new_content.append(
AnthropicResponseContentBlockThinking(
type="thinking",
thinking=thinking,
signature=signature,
)
)
elif "data" in thinking_block:
data = thinking_block.get("data")

assert isinstance(data, str)

new_content.append(
AnthropicResponseContentBlockRedactedThinking(
type="redacted_thinking",
data=data,
)
)


return new_content

Expand Down Expand Up @@ -453,9 +507,9 @@ def translate_openai_response_to_anthropic(
return translated_obj

def _translate_streaming_openai_chunk_to_anthropic_content_block(
self, choices: List[OpenAIStreamingChoice]
self, choices: List[Union[OpenAIStreamingChoice, StreamingChoices]]
) -> Tuple[
Literal["text", "tool_use"],
Literal["text", "tool_use", "thinking"],
"ContentBlockContentBlockDict",
]:
from litellm._uuid import uuid
Expand All @@ -476,17 +530,41 @@ def _translate_streaming_openai_chunk_to_anthropic_content_block(
name=choice.delta.tool_calls[0].function.name or "",
input={},
)
elif (
isinstance(choice, StreamingChoices) and hasattr(choice.delta, "thinking_blocks")
):
thinking_blocks = choice.delta.thinking_blocks or []
if len(thinking_blocks) > 0:
thinking_block = thinking_blocks[0]
if thinking_block["type"] == "thinking":
thinking = thinking_block.get("thinking") or ""
signature = thinking_block.get("signature") or ""

assert isinstance(thinking, str)
assert isinstance(signature, str)

if thinking and signature:
raise ValueError("Both `thinking` and `signature` in a single streaming chunk isn't supported.")

return "thinking", ChatCompletionThinkingBlock(
type="thinking",
thinking=thinking,
signature=signature
)


return "text", TextBlock(type="text", text="")

def _translate_streaming_openai_chunk_to_anthropic(
self, choices: List[OpenAIStreamingChoice]
self, choices: List[Union[OpenAIStreamingChoice, StreamingChoices]]
) -> Tuple[
Literal["text_delta", "input_json_delta"],
Union[ContentTextBlockDelta, ContentJsonBlockDelta],
Literal["text_delta", "input_json_delta", "thinking_delta", "signature_delta"],
Union[ContentTextBlockDelta, ContentJsonBlockDelta, ContentThinkingBlockDelta, ContentThinkingSignatureBlockDelta],
]:

text: str = ""
reasoning_content: str = ""
reasoning_signature: str = ""
partial_json: Optional[str] = None
for choice in choices:
if choice.delta.content is not None and len(choice.delta.content) > 0:
Expand All @@ -499,10 +577,32 @@ def _translate_streaming_openai_chunk_to_anthropic(
and tool.function.arguments is not None
):
partial_json += tool.function.arguments
elif isinstance(choice, StreamingChoices) and hasattr(choice.delta, "thinking_blocks"):
thinking_blocks = choice.delta.thinking_blocks or []
if len(thinking_blocks) > 0:
for thinking_block in thinking_blocks:
if thinking_block["type"] == "thinking":
thinking = thinking_block.get("thinking") or ""
signature = thinking_block.get("signature") or ""

assert isinstance(thinking, str)
assert isinstance(signature, str)

reasoning_content += thinking
reasoning_signature += signature

if reasoning_content and reasoning_signature:
raise ValueError("Both `reasoning` and `signature` in a single streaming chunk isn't supported.")


if partial_json is not None:
return "input_json_delta", ContentJsonBlockDelta(
type="input_json_delta", partial_json=partial_json
)
elif reasoning_content:
return "thinking_delta", ContentThinkingBlockDelta(type="thinking_delta", thinking=reasoning_content)
elif reasoning_signature:
return "signature_delta", ContentThinkingSignatureBlockDelta(type="signature_delta", signature=reasoning_signature)
else:
return "text_delta", ContentTextBlockDelta(type="text_delta", text=text)

Expand Down
37 changes: 19 additions & 18 deletions litellm/llms/vertex_ai/gemini/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,26 +302,27 @@ def _gemini_convert_messages_with_history( # noqa: PLR0915
)
if thinking_blocks is not None:
for block in thinking_blocks:
block_thinking_str = block.get("thinking")
block_signature = block.get("signature")
if (
block_thinking_str is not None
and block_signature is not None
):
try:
assistant_content.append(
PartType(
thoughtSignature=block_signature,
**json.loads(block_thinking_str),
if block["type"] == "thinking":
block_thinking_str = block.get("thinking")
block_signature = block.get("signature")
if (
block_thinking_str is not None
and block_signature is not None
):
try:
assistant_content.append(
PartType(
thoughtSignature=block_signature,
**json.loads(block_thinking_str),
)
)
)
except Exception:
assistant_content.append(
PartType(
thoughtSignature=block_signature,
text=block_thinking_str,
except Exception:
assistant_content.append(
PartType(
thoughtSignature=block_signature,
text=block_thinking_str,
)
)
)
if _message_content is not None and isinstance(_message_content, list):
_parts = []
for element in _message_content:
Expand Down
36 changes: 33 additions & 3 deletions litellm/types/llms/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pydantic import BaseModel, validator
from typing_extensions import Literal, Required, TypedDict

from .openai import ChatCompletionCachedContent, ChatCompletionThinkingBlock
from .openai import ChatCompletionCachedContent, ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock


class AnthropicMessagesToolChoice(TypedDict, total=False):
Expand Down Expand Up @@ -104,6 +104,7 @@ class AnthropicMessagesToolUseParam(TypedDict, total=False):
AnthropicMessagesTextParam,
AnthropicMessagesToolUseParam,
ChatCompletionThinkingBlock,
ChatCompletionRedactedThinkingBlock,
]


Expand Down Expand Up @@ -264,11 +265,29 @@ class ContentJsonBlockDelta(TypedDict):
partial_json: str


class ContentThinkingBlockDelta(TypedDict):
"""
"delta": {"type": "thinking_delta", "thinking": "Let me solve this step by step:"}}
"""

type: Literal["thinking_delta"]
thinking: str


class ContentThinkingSignatureBlockDelta(TypedDict):
"""
"delta": {"type": "signature_delta", "signature": "EqQBCgIYAhIM1gbcDa9GJwZA2b3hGgxBdjrkzLoky3dl1pkiMOYds..."}}
"""

type: Literal["signature_delta"]
signature: str


class ContentBlockDelta(TypedDict):
type: Literal["content_block_delta"]
index: int
delta: Union[
ContentTextBlockDelta, ContentJsonBlockDelta, ContentCitationsBlockDelta
ContentTextBlockDelta, ContentJsonBlockDelta, ContentCitationsBlockDelta, ContentThinkingBlockDelta, ContentThinkingSignatureBlockDelta
]


Expand Down Expand Up @@ -311,7 +330,7 @@ class ContentBlockStartText(TypedDict):
content_block: TextBlock


ContentBlockContentBlockDict = Union[ToolUseBlock, TextBlock]
ContentBlockContentBlockDict = Union[ToolUseBlock, TextBlock, ChatCompletionThinkingBlock]

ContentBlockStart = Union[ContentBlockStartToolUse, ContentBlockStartText]

Expand Down Expand Up @@ -384,6 +403,17 @@ class AnthropicResponseContentBlockToolUse(BaseModel):
input: dict


class AnthropicResponseContentBlockThinking(BaseModel):
type: Literal["thinking"]
thinking: str
signature: Optional[str]


class AnthropicResponseContentBlockRedactedThinking(BaseModel):
type: Literal["redacted_thinking"]
data: str


class AnthropicResponseUsageBlock(BaseModel):
input_tokens: int
output_tokens: int
Expand Down
2 changes: 1 addition & 1 deletion litellm/types/llms/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,7 @@ class OpenAIChatCompletionAssistantMessage(TypedDict, total=False):

class ChatCompletionAssistantMessage(OpenAIChatCompletionAssistantMessage, total=False):
cache_control: ChatCompletionCachedContent
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]]
thinking_blocks: Optional[List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]]


class ChatCompletionToolMessage(TypedDict):
Expand Down
Loading
Loading