Implement fix for thinking_blocks and converse API calls

lcfyi · lcfyi · commit dd7d12eabdd2 · 2025-10-05T03:36:07.000-07:00
This fixes Claude's models via the Converse API, which should also fix
Claude Code.
diff --git a/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py b/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py
@@ -13,6 +13,8 @@
 
 from openai.types.chat.chat_completion_chunk import Choice as OpenAIStreamingChoice
 
+from litellm.types.utils import StreamingChoices
+
 from litellm.types.llms.anthropic import (
     AllAnthropicToolsValues,
     AnthopicMessagesAssistantMessageParam,
@@ -22,9 +24,13 @@
     AnthropicMessagesUserMessageParam,
     AnthropicResponseContentBlockText,
     AnthropicResponseContentBlockToolUse,
+    AnthropicResponseContentBlockThinking,
+    AnthropicResponseContentBlockRedactedThinking,
     ContentBlockDelta,
     ContentJsonBlockDelta,
     ContentTextBlockDelta,
+    ContentThinkingBlockDelta,
+    ContentThinkingSignatureBlockDelta,
     MessageBlockDelta,
     MessageDelta,
     UsageDelta,
@@ -42,6 +48,8 @@
     ChatCompletionRequest,
     ChatCompletionSystemMessage,
     ChatCompletionTextObject,
+    ChatCompletionThinkingBlock,
+    ChatCompletionRedactedThinkingBlock,
     ChatCompletionToolCallFunctionChunk,
     ChatCompletionToolChoiceFunctionParam,
     ChatCompletionToolChoiceObjectParam,
@@ -227,6 +235,7 @@ def translate_anthropic_messages_to_openai(  # noqa: PLR0915
             ## ASSISTANT MESSAGE ##
             assistant_message_str: Optional[str] = None
             tool_calls: List[ChatCompletionAssistantToolCall] = []
+            thinking_blocks: List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]] = []
             if m["role"] == "assistant":
                 if isinstance(m.get("content"), str):
                     assistant_message_str = str(m.get("content", ""))
@@ -253,11 +262,28 @@ def translate_anthropic_messages_to_openai(  # noqa: PLR0915
                                         function=function_chunk,
                                     )
                                 )
+                            elif content.get("type") == "thinking":
+                                thinking_block = ChatCompletionThinkingBlock(
+                                    type="thinking",
+                                    thinking=content.get("thinking") or "",
+                                    signature=content.get("signature") or "",
+                                    cache_control=content.get("cache_control", {})
+                                )
+                                thinking_blocks.append(thinking_block)
+                            elif content.get("type") == "redacted_thinking":
+                                redacted_thinking_block = ChatCompletionRedactedThinkingBlock(
+                                    type="redacted_thinking",
+                                    data=content.get("data") or "",
+                                    cache_control=content.get("cache_control", {})
+                                )
+                                thinking_blocks.append(redacted_thinking_block)
+
 
             if assistant_message_str is not None or len(tool_calls) > 0:
                 assistant_message = ChatCompletionAssistantMessage(
                     role="assistant",
                     content=assistant_message_str,
+                    thinking_blocks=thinking_blocks if len(thinking_blocks) > 0 else None,
                 )
                 if len(tool_calls) > 0:
                     assistant_message["tool_calls"] = tool_calls
@@ -383,11 +409,11 @@ def translate_anthropic_to_openai(
     def _translate_openai_content_to_anthropic(
         self, choices: List[Choices]
     ) -> List[
-        Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
+        Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse, AnthropicResponseContentBlockThinking, AnthropicResponseContentBlockRedactedThinking]
     ]:
         new_content: List[
             Union[
-                AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse
+                AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse, AnthropicResponseContentBlockThinking, AnthropicResponseContentBlockRedactedThinking
             ]
         ] = []
         for choice in choices:
@@ -410,6 +436,24 @@ def _translate_openai_content_to_anthropic(
                         type="text", text=choice.message.content
                     )
                 )
+            elif choice.message.thinking_blocks is not None:
+                for thinking_block in choice.message.thinking_blocks:
+                    if "thinking" in thinking_block and "signature" in thinking_block:
+                        new_content.append(
+                            AnthropicResponseContentBlockThinking(
+                                type="thinking",
+                                thinking=thinking_block.get("thinking") or "",
+                                signature=thinking_block.get("signature") or "",
+                            )
+                        )
+                    elif "data" in thinking_block:
+                        new_content.append(
+                            AnthropicResponseContentBlockRedactedThinking(
+                                type="redacted_thinking",
+                                data=thinking_block.get("data", ""),
+                            )
+                        )
+                    
 
         return new_content
 
@@ -453,9 +497,9 @@ def translate_openai_response_to_anthropic(
         return translated_obj
 
     def _translate_streaming_openai_chunk_to_anthropic_content_block(
-        self, choices: List[OpenAIStreamingChoice]
+        self, choices: List[Union[OpenAIStreamingChoice, StreamingChoices]]
     ) -> Tuple[
-        Literal["text", "tool_use"],
+        Literal["text", "tool_use", "thinking"],
         "ContentBlockContentBlockDict",
     ]:
         from litellm._uuid import uuid
@@ -476,17 +520,35 @@ def _translate_streaming_openai_chunk_to_anthropic_content_block(
                     name=choice.delta.tool_calls[0].function.name or "",
                     input={},
                 )
+            elif (
+                isinstance(choice, StreamingChoices) and hasattr(choice.delta, "thinking_blocks")
+            ):
+                thinking_blocks = choice.delta.thinking_blocks or []
+                if len(thinking_blocks) > 0:
+                    thinking = thinking_blocks[0].get("thinking") or ""
+                    signature = thinking_blocks[0].get("signature") or ""
+
+                    if thinking and signature:
+                        raise ValueError("Both `thinking` and `signature` in a single streaming chunk isn't supported.")
+                    return "thinking", ChatCompletionThinkingBlock(
+                        type="thinking",
+                        thinking=thinking,
+                        signature=signature
+                    )
+
 
         return "text", TextBlock(type="text", text="")
 
     def _translate_streaming_openai_chunk_to_anthropic(
-        self, choices: List[OpenAIStreamingChoice]
+        self, choices: List[Union[OpenAIStreamingChoice, StreamingChoices]]
     ) -> Tuple[
-        Literal["text_delta", "input_json_delta"],
-        Union[ContentTextBlockDelta, ContentJsonBlockDelta],
+        Literal["text_delta", "input_json_delta", "thinking_delta", "signature_delta"],
+        Union[ContentTextBlockDelta, ContentJsonBlockDelta, ContentThinkingBlockDelta, ContentThinkingSignatureBlockDelta],
     ]:
 
         text: str = ""
+        reasoning_content: str = ""
+        reasoning_signature: str = ""
         partial_json: Optional[str] = None
         for choice in choices:
             if choice.delta.content is not None and len(choice.delta.content) > 0:
@@ -499,10 +561,24 @@ def _translate_streaming_openai_chunk_to_anthropic(
                         and tool.function.arguments is not None
                     ):
                         partial_json += tool.function.arguments
+            elif isinstance(choice, StreamingChoices) and hasattr(choice.delta, "thinking_blocks"):
+                thinking_blocks = choice.delta.thinking_blocks or []
+                if len(thinking_blocks) > 0:
+                    reasoning_content += thinking_blocks[0].get("thinking") or ""
+                    reasoning_signature += thinking_blocks[0].get("signature") or ""
+        
+        if reasoning_content and reasoning_signature:
+            raise ValueError("Both `reasoning` and `signature` in a single streaming chunk isn't supported.")
+
+
         if partial_json is not None:
             return "input_json_delta", ContentJsonBlockDelta(
                 type="input_json_delta", partial_json=partial_json
             )
+        elif reasoning_content:
+            return "thinking_delta", ContentThinkingBlockDelta(type="thinking_delta", thinking=reasoning_content)
+        elif reasoning_signature:
+            return "signature_delta", ContentThinkingSignatureBlockDelta(type="signature_delta", signature=reasoning_signature)
         else:
             return "text_delta", ContentTextBlockDelta(type="text_delta", text=text)
 
diff --git a/litellm/types/llms/anthropic.py b/litellm/types/llms/anthropic.py
@@ -4,7 +4,7 @@
 from pydantic import BaseModel, validator
 from typing_extensions import Literal, Required, TypedDict
 
-from .openai import ChatCompletionCachedContent, ChatCompletionThinkingBlock
+from .openai import ChatCompletionCachedContent, ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
 
 
 class AnthropicMessagesToolChoice(TypedDict, total=False):
@@ -104,6 +104,7 @@ class AnthropicMessagesToolUseParam(TypedDict, total=False):
     AnthropicMessagesTextParam,
     AnthropicMessagesToolUseParam,
     ChatCompletionThinkingBlock,
+    ChatCompletionRedactedThinkingBlock,
 ]
 
 
@@ -264,11 +265,29 @@ class ContentJsonBlockDelta(TypedDict):
     partial_json: str
 
 
+class ContentThinkingBlockDelta(TypedDict):
+    """
+    "delta": {"type": "thinking_delta", "thinking": "Let me solve this step by step:"}}
+    """
+
+    type: Literal["thinking_delta"]
+    thinking: str
+
+
+class ContentThinkingSignatureBlockDelta(TypedDict):
+    """
+    "delta": {"type": "signature_delta", "signature": "EqQBCgIYAhIM1gbcDa9GJwZA2b3hGgxBdjrkzLoky3dl1pkiMOYds..."}}
+    """
+
+    type: Literal["signature_delta"]
+    signature: str
+
+
 class ContentBlockDelta(TypedDict):
     type: Literal["content_block_delta"]
     index: int
     delta: Union[
-        ContentTextBlockDelta, ContentJsonBlockDelta, ContentCitationsBlockDelta
+        ContentTextBlockDelta, ContentJsonBlockDelta, ContentCitationsBlockDelta, ContentThinkingBlockDelta, ContentThinkingSignatureBlockDelta
     ]
 
 
@@ -311,7 +330,7 @@ class ContentBlockStartText(TypedDict):
     content_block: TextBlock
 
 
-ContentBlockContentBlockDict = Union[ToolUseBlock, TextBlock]
+ContentBlockContentBlockDict = Union[ToolUseBlock, TextBlock, ChatCompletionThinkingBlock]
 
 ContentBlockStart = Union[ContentBlockStartToolUse, ContentBlockStartText]
 
@@ -384,6 +403,17 @@ class AnthropicResponseContentBlockToolUse(BaseModel):
     input: dict
 
 
+class AnthropicResponseContentBlockThinking(BaseModel):
+    type: Literal["thinking"]
+    thinking: str
+    signature: Optional[str]
+
+
+class AnthropicResponseContentBlockRedactedThinking(BaseModel):
+    type: Literal["redacted_thinking"]
+    data: str
+
+
 class AnthropicResponseUsageBlock(BaseModel):
     input_tokens: int
     output_tokens: int
diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py
@@ -679,7 +679,7 @@ class OpenAIChatCompletionAssistantMessage(TypedDict, total=False):
 
 class ChatCompletionAssistantMessage(OpenAIChatCompletionAssistantMessage, total=False):
     cache_control: ChatCompletionCachedContent
-    thinking_blocks: Optional[List[ChatCompletionThinkingBlock]]
+    thinking_blocks: Optional[List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]]
 
 
 class ChatCompletionToolMessage(TypedDict):
diff --git a/tests/test_litellm/llms/anthropic/experimental_pass_through/adapters/test_anthropic_experimental_pass_through_adapters_transformation.py b/tests/test_litellm/llms/anthropic/experimental_pass_through/adapters/test_anthropic_experimental_pass_through_adapters_transformation.py