BerriAI · lcfyi · Oct 5, 2025 · Oct 5, 2025 · Oct 5, 2025 · Oct 5, 2025
diff --git a/litellm/llms/anthropic/experimental_pass_through/adapters/streaming_iterator.py b/litellm/llms/anthropic/experimental_pass_through/adapters/streaming_iterator.py
@@ -31,7 +31,7 @@ class AnthropicStreamWrapper(AdapterCompletionStreamWrapper):
     sent_first_chunk: bool = False
     sent_content_block_start: bool = False
     sent_content_block_finish: bool = False
-    current_content_block_type: Literal["text", "tool_use"] = "text"
+    current_content_block_type: Literal["text", "tool_use", "thinking"] = "text"
     sent_last_message: bool = False
     holding_chunk: Optional[Any] = None
     holding_stop_reason_chunk: Optional[Any] = None

diff --git a/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py b/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py
@@ -13,6 +13,8 @@
 
 from openai.types.chat.chat_completion_chunk import Choice as OpenAIStreamingChoice
 
+from litellm.types.utils import StreamingChoices
+
 from litellm.types.llms.anthropic import (
     AllAnthropicToolsValues,
     AnthopicMessagesAssistantMessageParam,
@@ -22,9 +24,13 @@
     AnthropicMessagesUserMessageParam,
     AnthropicResponseContentBlockText,
     AnthropicResponseContentBlockToolUse,
+    AnthropicResponseContentBlockThinking,
+    AnthropicResponseContentBlockRedactedThinking,
     ContentBlockDelta,
     ContentJsonBlockDelta,
     ContentTextBlockDelta,
+    ContentThinkingBlockDelta,
+    ContentThinkingSignatureBlockDelta,
     MessageBlockDelta,
     MessageDelta,
     UsageDelta,
@@ -42,6 +48,8 @@
     ChatCompletionRequest,
     ChatCompletionSystemMessage,
     ChatCompletionTextObject,
+    ChatCompletionThinkingBlock,
+    ChatCompletionRedactedThinkingBlock,
     ChatCompletionToolCallFunctionChunk,
     ChatCompletionToolChoiceFunctionParam,
     ChatCompletionToolChoiceObjectParam,
@@ -227,6 +235,7 @@ def translate_anthropic_messages_to_openai(  # noqa: PLR0915
             ## ASSISTANT MESSAGE ##
             assistant_message_str: Optional[str] = None
             tool_calls: List[ChatCompletionAssistantToolCall] = []
+            thinking_blocks: List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]] = []
             if m["role"] == "assistant":
                 if isinstance(m.get("content"), str):
                     assistant_message_str = str(m.get("content", ""))
@@ -253,11 +262,28 @@ def translate_anthropic_messages_to_openai(  # noqa: PLR0915
                                         function=function_chunk,
                                     )
                                 )
+                            elif content.get("type") == "thinking":
+                                thinking_block = ChatCompletionThinkingBlock(
+                                    type="thinking",
+                                    thinking=content.get("thinking") or "",
+                                    signature=content.get("signature") or "",
+                                    cache_control=content.get("cache_control", {})
+                                )
+                                thinking_blocks.append(thinking_block)
+                            elif content.get("type") == "redacted_thinking":
+                                redacted_thinking_block = ChatCompletionRedactedThinkingBlock(
+                                    type="redacted_thinking",
+                                    data=content.get("data") or "",
+                                    cache_control=content.get("cache_control", {})
+                                )
+                                thinking_blocks.append(redacted_thinking_block)
+
 
             if assistant_message_str is not None or len(tool_calls) > 0:
                 assistant_message = ChatCompletionAssistantMessage(
                     role="assistant",
                     content=assistant_message_str,
+                    thinking_blocks=thinking_blocks if len(thinking_blocks) > 0 else None,
                 )
                 if len(tool_calls) > 0:
                     assistant_message["tool_calls"] = tool_calls
@@ -383,11 +409,11 @@ def translate_anthropic_to_openai(
     def _translate_openai_content_to_anthropic(
         self, choices: List[Choices]
     ) -> List[
-        Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
+        Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse, AnthropicResponseContentBlockThinking, AnthropicResponseContentBlockRedactedThinking]
     ]:
         new_content: List[
             Union[
-                AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse
+                AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse, AnthropicResponseContentBlockThinking, AnthropicResponseContentBlockRedactedThinking
             ]
         ] = []
         for choice in choices:
@@ -410,6 +436,34 @@ def _translate_openai_content_to_anthropic(
                         type="text", text=choice.message.content
                     )
                 )
+            elif choice.message.thinking_blocks is not None:
+                for thinking_block in choice.message.thinking_blocks:
+                    if "thinking" in thinking_block and "signature" in thinking_block:
+                        thinking = thinking_block.get("thinking")
+                        signature = thinking_block.get("signature")
+
+                        assert isinstance(thinking, str)
+                        assert isinstance(signature, str) or signature is None
+
+                        new_content.append(
+                            AnthropicResponseContentBlockThinking(
+                                type="thinking",
+                                thinking=thinking,
+                                signature=signature,
+                            )
+                        )
+                    elif "data" in thinking_block:
+                        data = thinking_block.get("data")
+
+                        assert isinstance(data, str)
+
+                        new_content.append(
+                            AnthropicResponseContentBlockRedactedThinking(
+                                type="redacted_thinking",
+                                data=data,
+                            )
+                        )
+
 
         return new_content
 
@@ -453,9 +507,9 @@ def translate_openai_response_to_anthropic(
         return translated_obj
 
     def _translate_streaming_openai_chunk_to_anthropic_content_block(
-        self, choices: List[OpenAIStreamingChoice]
+        self, choices: List[Union[OpenAIStreamingChoice, StreamingChoices]]
     ) -> Tuple[
-        Literal["text", "tool_use"],
+        Literal["text", "tool_use", "thinking"],
         "ContentBlockContentBlockDict",
     ]:
         from litellm._uuid import uuid
@@ -476,17 +530,41 @@ def _translate_streaming_openai_chunk_to_anthropic_content_block(
                     name=choice.delta.tool_calls[0].function.name or "",
                     input={},
                 )
+            elif (
+                isinstance(choice, StreamingChoices) and hasattr(choice.delta, "thinking_blocks")
+            ):
+                thinking_blocks = choice.delta.thinking_blocks or []
+                if len(thinking_blocks) > 0:
+                    thinking_block = thinking_blocks[0]
+                    if thinking_block["type"] == "thinking":
+                        thinking = thinking_block.get("thinking") or ""
+                        signature = thinking_block.get("signature") or ""
+
+                        assert isinstance(thinking, str)
+                        assert isinstance(signature, str)
+
+                        if thinking and signature:
+                            raise ValueError("Both `thinking` and `signature` in a single streaming chunk isn't supported.")
+
+                        return "thinking", ChatCompletionThinkingBlock(
+                            type="thinking",
+                            thinking=thinking,
+                            signature=signature
+                        )
+
 
         return "text", TextBlock(type="text", text="")
 
     def _translate_streaming_openai_chunk_to_anthropic(
-        self, choices: List[OpenAIStreamingChoice]
+        self, choices: List[Union[OpenAIStreamingChoice, StreamingChoices]]
     ) -> Tuple[
-        Literal["text_delta", "input_json_delta"],
-        Union[ContentTextBlockDelta, ContentJsonBlockDelta],
+        Literal["text_delta", "input_json_delta", "thinking_delta", "signature_delta"],
+        Union[ContentTextBlockDelta, ContentJsonBlockDelta, ContentThinkingBlockDelta, ContentThinkingSignatureBlockDelta],
     ]:
 
         text: str = ""
+        reasoning_content: str = ""
+        reasoning_signature: str = ""
         partial_json: Optional[str] = None
         for choice in choices:
             if choice.delta.content is not None and len(choice.delta.content) > 0:
@@ -499,10 +577,32 @@ def _translate_streaming_openai_chunk_to_anthropic(
                         and tool.function.arguments is not None
                     ):
                         partial_json += tool.function.arguments
+            elif isinstance(choice, StreamingChoices) and hasattr(choice.delta, "thinking_blocks"):
+                thinking_blocks = choice.delta.thinking_blocks or []
+                if len(thinking_blocks) > 0:
+                    for thinking_block in thinking_blocks:
+                        if thinking_block["type"] == "thinking":
+                            thinking = thinking_block.get("thinking") or ""
+                            signature = thinking_block.get("signature") or ""
+
+                            assert isinstance(thinking, str)
+                            assert isinstance(signature, str)
+
+                            reasoning_content += thinking
+                            reasoning_signature += signature
+
+        if reasoning_content and reasoning_signature:
+            raise ValueError("Both `reasoning` and `signature` in a single streaming chunk isn't supported.")
+
+
         if partial_json is not None:
             return "input_json_delta", ContentJsonBlockDelta(
                 type="input_json_delta", partial_json=partial_json
             )
+        elif reasoning_content:
+            return "thinking_delta", ContentThinkingBlockDelta(type="thinking_delta", thinking=reasoning_content)
+        elif reasoning_signature:
+            return "signature_delta", ContentThinkingSignatureBlockDelta(type="signature_delta", signature=reasoning_signature)
         else:
             return "text_delta", ContentTextBlockDelta(type="text_delta", text=text)
 

diff --git a/litellm/llms/vertex_ai/gemini/transformation.py b/litellm/llms/vertex_ai/gemini/transformation.py
@@ -302,26 +302,27 @@ def _gemini_convert_messages_with_history(  # noqa: PLR0915
                     )
                 if thinking_blocks is not None:
                     for block in thinking_blocks:
-                        block_thinking_str = block.get("thinking")
-                        block_signature = block.get("signature")
-                        if (
-                            block_thinking_str is not None
-                            and block_signature is not None
-                        ):
-                            try:
-                                assistant_content.append(
-                                    PartType(
-                                        thoughtSignature=block_signature,
-                                        **json.loads(block_thinking_str),
+                        if block["type"] == "thinking":
+                            block_thinking_str = block.get("thinking")
+                            block_signature = block.get("signature")
+                            if (
+                                block_thinking_str is not None
+                                and block_signature is not None
+                            ):
+                                try:
+                                    assistant_content.append(
+                                        PartType(
+                                            thoughtSignature=block_signature,
+                                            **json.loads(block_thinking_str),
+                                        )
                                     )
-                                )
-                            except Exception:
-                                assistant_content.append(
-                                    PartType(
-                                        thoughtSignature=block_signature,
-                                        text=block_thinking_str,
+                                except Exception:
+                                    assistant_content.append(
+                                        PartType(
+                                            thoughtSignature=block_signature,
+                                            text=block_thinking_str,
+                                        )
                                     )
-                                )
                 if _message_content is not None and isinstance(_message_content, list):
                     _parts = []
                     for element in _message_content:

diff --git a/litellm/types/llms/anthropic.py b/litellm/types/llms/anthropic.py
@@ -4,7 +4,7 @@
 from pydantic import BaseModel, validator
 from typing_extensions import Literal, Required, TypedDict
 
-from .openai import ChatCompletionCachedContent, ChatCompletionThinkingBlock
+from .openai import ChatCompletionCachedContent, ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
 
 
 class AnthropicMessagesToolChoice(TypedDict, total=False):
@@ -104,6 +104,7 @@ class AnthropicMessagesToolUseParam(TypedDict, total=False):
     AnthropicMessagesTextParam,
     AnthropicMessagesToolUseParam,
     ChatCompletionThinkingBlock,
+    ChatCompletionRedactedThinkingBlock,
 ]
 
 
@@ -264,11 +265,29 @@ class ContentJsonBlockDelta(TypedDict):
     partial_json: str
 
 
+class ContentThinkingBlockDelta(TypedDict):
+    """
+    "delta": {"type": "thinking_delta", "thinking": "Let me solve this step by step:"}}
+    """
+
+    type: Literal["thinking_delta"]
+    thinking: str
+
+
+class ContentThinkingSignatureBlockDelta(TypedDict):
+    """
+    "delta": {"type": "signature_delta", "signature": "EqQBCgIYAhIM1gbcDa9GJwZA2b3hGgxBdjrkzLoky3dl1pkiMOYds..."}}
+    """
+
+    type: Literal["signature_delta"]
+    signature: str
+
+
 class ContentBlockDelta(TypedDict):
     type: Literal["content_block_delta"]
     index: int
     delta: Union[
-        ContentTextBlockDelta, ContentJsonBlockDelta, ContentCitationsBlockDelta
+        ContentTextBlockDelta, ContentJsonBlockDelta, ContentCitationsBlockDelta, ContentThinkingBlockDelta, ContentThinkingSignatureBlockDelta
     ]
 
 
@@ -311,7 +330,7 @@ class ContentBlockStartText(TypedDict):
     content_block: TextBlock
 
 
-ContentBlockContentBlockDict = Union[ToolUseBlock, TextBlock]
+ContentBlockContentBlockDict = Union[ToolUseBlock, TextBlock, ChatCompletionThinkingBlock]
 
 ContentBlockStart = Union[ContentBlockStartToolUse, ContentBlockStartText]
 
@@ -384,6 +403,17 @@ class AnthropicResponseContentBlockToolUse(BaseModel):
     input: dict
 
 
+class AnthropicResponseContentBlockThinking(BaseModel):
+    type: Literal["thinking"]
+    thinking: str
+    signature: Optional[str]
+
+
+class AnthropicResponseContentBlockRedactedThinking(BaseModel):
+    type: Literal["redacted_thinking"]
+    data: str
+
+
 class AnthropicResponseUsageBlock(BaseModel):
     input_tokens: int
     output_tokens: int

diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py
@@ -679,7 +679,7 @@ class OpenAIChatCompletionAssistantMessage(TypedDict, total=False):
 
 class ChatCompletionAssistantMessage(OpenAIChatCompletionAssistantMessage, total=False):
     cache_control: ChatCompletionCachedContent
-    thinking_blocks: Optional[List[ChatCompletionThinkingBlock]]
+    thinking_blocks: Optional[List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]]
 
 
 class ChatCompletionToolMessage(TypedDict):