From dd7d12eabdd213a6aa3905f737f5774e07edafb2 Mon Sep 17 00:00:00 2001 From: Leslie Cheng Date: Sun, 5 Oct 2025 00:09:25 -0700 Subject: [PATCH 1/4] Implement fix for thinking_blocks and converse API calls This fixes Claude's models via the Converse API, which should also fix Claude Code. --- .../adapters/transformation.py | 90 ++++- litellm/types/llms/anthropic.py | 36 +- litellm/types/llms/openai.py | 2 +- ...al_pass_through_adapters_transformation.py | 344 ++++++++++++++++++ 4 files changed, 461 insertions(+), 11 deletions(-) diff --git a/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py b/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py index 7de2a1e1c66f..645d477b05fc 100644 --- a/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py +++ b/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py @@ -13,6 +13,8 @@ from openai.types.chat.chat_completion_chunk import Choice as OpenAIStreamingChoice +from litellm.types.utils import StreamingChoices + from litellm.types.llms.anthropic import ( AllAnthropicToolsValues, AnthopicMessagesAssistantMessageParam, @@ -22,9 +24,13 @@ AnthropicMessagesUserMessageParam, AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse, + AnthropicResponseContentBlockThinking, + AnthropicResponseContentBlockRedactedThinking, ContentBlockDelta, ContentJsonBlockDelta, ContentTextBlockDelta, + ContentThinkingBlockDelta, + ContentThinkingSignatureBlockDelta, MessageBlockDelta, MessageDelta, UsageDelta, @@ -42,6 +48,8 @@ ChatCompletionRequest, ChatCompletionSystemMessage, ChatCompletionTextObject, + ChatCompletionThinkingBlock, + ChatCompletionRedactedThinkingBlock, ChatCompletionToolCallFunctionChunk, ChatCompletionToolChoiceFunctionParam, ChatCompletionToolChoiceObjectParam, @@ -227,6 +235,7 @@ def translate_anthropic_messages_to_openai( # noqa: PLR0915 ## ASSISTANT MESSAGE ## assistant_message_str: Optional[str] = None tool_calls: List[ChatCompletionAssistantToolCall] = [] + thinking_blocks: List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]] = [] if m["role"] == "assistant": if isinstance(m.get("content"), str): assistant_message_str = str(m.get("content", "")) @@ -253,11 +262,28 @@ def translate_anthropic_messages_to_openai( # noqa: PLR0915 function=function_chunk, ) ) + elif content.get("type") == "thinking": + thinking_block = ChatCompletionThinkingBlock( + type="thinking", + thinking=content.get("thinking") or "", + signature=content.get("signature") or "", + cache_control=content.get("cache_control", {}) + ) + thinking_blocks.append(thinking_block) + elif content.get("type") == "redacted_thinking": + redacted_thinking_block = ChatCompletionRedactedThinkingBlock( + type="redacted_thinking", + data=content.get("data") or "", + cache_control=content.get("cache_control", {}) + ) + thinking_blocks.append(redacted_thinking_block) + if assistant_message_str is not None or len(tool_calls) > 0: assistant_message = ChatCompletionAssistantMessage( role="assistant", content=assistant_message_str, + thinking_blocks=thinking_blocks if len(thinking_blocks) > 0 else None, ) if len(tool_calls) > 0: assistant_message["tool_calls"] = tool_calls @@ -383,11 +409,11 @@ def translate_anthropic_to_openai( def _translate_openai_content_to_anthropic( self, choices: List[Choices] ) -> List[ - Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse] + Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse, AnthropicResponseContentBlockThinking, AnthropicResponseContentBlockRedactedThinking] ]: new_content: List[ Union[ - AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse + AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse, AnthropicResponseContentBlockThinking, AnthropicResponseContentBlockRedactedThinking ] ] = [] for choice in choices: @@ -410,6 +436,24 @@ def _translate_openai_content_to_anthropic( type="text", text=choice.message.content ) ) + elif choice.message.thinking_blocks is not None: + for thinking_block in choice.message.thinking_blocks: + if "thinking" in thinking_block and "signature" in thinking_block: + new_content.append( + AnthropicResponseContentBlockThinking( + type="thinking", + thinking=thinking_block.get("thinking") or "", + signature=thinking_block.get("signature") or "", + ) + ) + elif "data" in thinking_block: + new_content.append( + AnthropicResponseContentBlockRedactedThinking( + type="redacted_thinking", + data=thinking_block.get("data", ""), + ) + ) + return new_content @@ -453,9 +497,9 @@ def translate_openai_response_to_anthropic( return translated_obj def _translate_streaming_openai_chunk_to_anthropic_content_block( - self, choices: List[OpenAIStreamingChoice] + self, choices: List[Union[OpenAIStreamingChoice, StreamingChoices]] ) -> Tuple[ - Literal["text", "tool_use"], + Literal["text", "tool_use", "thinking"], "ContentBlockContentBlockDict", ]: from litellm._uuid import uuid @@ -476,17 +520,35 @@ def _translate_streaming_openai_chunk_to_anthropic_content_block( name=choice.delta.tool_calls[0].function.name or "", input={}, ) + elif ( + isinstance(choice, StreamingChoices) and hasattr(choice.delta, "thinking_blocks") + ): + thinking_blocks = choice.delta.thinking_blocks or [] + if len(thinking_blocks) > 0: + thinking = thinking_blocks[0].get("thinking") or "" + signature = thinking_blocks[0].get("signature") or "" + + if thinking and signature: + raise ValueError("Both `thinking` and `signature` in a single streaming chunk isn't supported.") + return "thinking", ChatCompletionThinkingBlock( + type="thinking", + thinking=thinking, + signature=signature + ) + return "text", TextBlock(type="text", text="") def _translate_streaming_openai_chunk_to_anthropic( - self, choices: List[OpenAIStreamingChoice] + self, choices: List[Union[OpenAIStreamingChoice, StreamingChoices]] ) -> Tuple[ - Literal["text_delta", "input_json_delta"], - Union[ContentTextBlockDelta, ContentJsonBlockDelta], + Literal["text_delta", "input_json_delta", "thinking_delta", "signature_delta"], + Union[ContentTextBlockDelta, ContentJsonBlockDelta, ContentThinkingBlockDelta, ContentThinkingSignatureBlockDelta], ]: text: str = "" + reasoning_content: str = "" + reasoning_signature: str = "" partial_json: Optional[str] = None for choice in choices: if choice.delta.content is not None and len(choice.delta.content) > 0: @@ -499,10 +561,24 @@ def _translate_streaming_openai_chunk_to_anthropic( and tool.function.arguments is not None ): partial_json += tool.function.arguments + elif isinstance(choice, StreamingChoices) and hasattr(choice.delta, "thinking_blocks"): + thinking_blocks = choice.delta.thinking_blocks or [] + if len(thinking_blocks) > 0: + reasoning_content += thinking_blocks[0].get("thinking") or "" + reasoning_signature += thinking_blocks[0].get("signature") or "" + + if reasoning_content and reasoning_signature: + raise ValueError("Both `reasoning` and `signature` in a single streaming chunk isn't supported.") + + if partial_json is not None: return "input_json_delta", ContentJsonBlockDelta( type="input_json_delta", partial_json=partial_json ) + elif reasoning_content: + return "thinking_delta", ContentThinkingBlockDelta(type="thinking_delta", thinking=reasoning_content) + elif reasoning_signature: + return "signature_delta", ContentThinkingSignatureBlockDelta(type="signature_delta", signature=reasoning_signature) else: return "text_delta", ContentTextBlockDelta(type="text_delta", text=text) diff --git a/litellm/types/llms/anthropic.py b/litellm/types/llms/anthropic.py index 02c6f2cf8cf8..24eb8bc86aae 100644 --- a/litellm/types/llms/anthropic.py +++ b/litellm/types/llms/anthropic.py @@ -4,7 +4,7 @@ from pydantic import BaseModel, validator from typing_extensions import Literal, Required, TypedDict -from .openai import ChatCompletionCachedContent, ChatCompletionThinkingBlock +from .openai import ChatCompletionCachedContent, ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock class AnthropicMessagesToolChoice(TypedDict, total=False): @@ -104,6 +104,7 @@ class AnthropicMessagesToolUseParam(TypedDict, total=False): AnthropicMessagesTextParam, AnthropicMessagesToolUseParam, ChatCompletionThinkingBlock, + ChatCompletionRedactedThinkingBlock, ] @@ -264,11 +265,29 @@ class ContentJsonBlockDelta(TypedDict): partial_json: str +class ContentThinkingBlockDelta(TypedDict): + """ + "delta": {"type": "thinking_delta", "thinking": "Let me solve this step by step:"}} + """ + + type: Literal["thinking_delta"] + thinking: str + + +class ContentThinkingSignatureBlockDelta(TypedDict): + """ + "delta": {"type": "signature_delta", "signature": "EqQBCgIYAhIM1gbcDa9GJwZA2b3hGgxBdjrkzLoky3dl1pkiMOYds..."}} + """ + + type: Literal["signature_delta"] + signature: str + + class ContentBlockDelta(TypedDict): type: Literal["content_block_delta"] index: int delta: Union[ - ContentTextBlockDelta, ContentJsonBlockDelta, ContentCitationsBlockDelta + ContentTextBlockDelta, ContentJsonBlockDelta, ContentCitationsBlockDelta, ContentThinkingBlockDelta, ContentThinkingSignatureBlockDelta ] @@ -311,7 +330,7 @@ class ContentBlockStartText(TypedDict): content_block: TextBlock -ContentBlockContentBlockDict = Union[ToolUseBlock, TextBlock] +ContentBlockContentBlockDict = Union[ToolUseBlock, TextBlock, ChatCompletionThinkingBlock] ContentBlockStart = Union[ContentBlockStartToolUse, ContentBlockStartText] @@ -384,6 +403,17 @@ class AnthropicResponseContentBlockToolUse(BaseModel): input: dict +class AnthropicResponseContentBlockThinking(BaseModel): + type: Literal["thinking"] + thinking: str + signature: Optional[str] + + +class AnthropicResponseContentBlockRedactedThinking(BaseModel): + type: Literal["redacted_thinking"] + data: str + + class AnthropicResponseUsageBlock(BaseModel): input_tokens: int output_tokens: int diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index 9f4ae03b39dc..c6eaf8aa9763 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -679,7 +679,7 @@ class OpenAIChatCompletionAssistantMessage(TypedDict, total=False): class ChatCompletionAssistantMessage(OpenAIChatCompletionAssistantMessage, total=False): cache_control: ChatCompletionCachedContent - thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] + thinking_blocks: Optional[List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]] class ChatCompletionToolMessage(TypedDict): diff --git a/tests/test_litellm/llms/anthropic/experimental_pass_through/adapters/test_anthropic_experimental_pass_through_adapters_transformation.py b/tests/test_litellm/llms/anthropic/experimental_pass_through/adapters/test_anthropic_experimental_pass_through_adapters_transformation.py index e5dba275bda9..dab4049b0b74 100644 --- a/tests/test_litellm/llms/anthropic/experimental_pass_through/adapters/test_anthropic_experimental_pass_through_adapters_transformation.py +++ b/tests/test_litellm/llms/anthropic/experimental_pass_through/adapters/test_anthropic_experimental_pass_through_adapters_transformation.py @@ -70,6 +70,188 @@ def test_translate_streaming_openai_chunk_to_anthropic_content_block(): } +def test_translate_streaming_openai_chunk_to_anthropic_thinking_content_block(): + choices = [ + StreamingChoices( + finish_reason=None, + index=0, + delta=Delta( + reasoning_content="I need to summar", + thinking_blocks=[ + { + "type": "thinking", + "thinking": "I need to summar", + "signature": None, + } + ], + provider_specific_fields={ + "thinking_blocks": [ + { + "type": "thinking", + "thinking": "I need to summar", + "signature": None, + } + ] + }, + content="", + role="assistant", + function_call=None, + tool_calls=None, + audio=None, + ), + logprobs=None, + ) + ] + + ( + block_type, + content_block_start, + ) = LiteLLMAnthropicMessagesAdapter()._translate_streaming_openai_chunk_to_anthropic_content_block( + choices=choices + ) + + assert block_type == "thinking" + assert content_block_start == { + "type": "thinking", + "thinking": "I need to summar", + "signature": "", + } + + +def test_translate_streaming_openai_chunk_to_anthropic_thinking_signature_block(): + choices = [ + StreamingChoices( + finish_reason=None, + index=0, + delta=Delta( + reasoning_content="", + thinking_blocks=[ + { + "type": "thinking", + "thinking": None, + "signature": "sigsig", + } + ], + provider_specific_fields={ + "thinking_blocks": [ + { + "type": "thinking", + "thinking": None, + "signature": "sigsig", + } + ] + }, + content="", + role="assistant", + function_call=None, + tool_calls=None, + audio=None, + ), + logprobs=None, + ) + ] + + ( + block_type, + content_block_start, + ) = LiteLLMAnthropicMessagesAdapter()._translate_streaming_openai_chunk_to_anthropic_content_block( + choices=choices + ) + + assert block_type == "thinking" + assert content_block_start == { + "type": "thinking", + "thinking": "", + "signature": "sigsig", + } + + +def test_translate_streaming_openai_chunk_to_anthropic_raises_when_thinking_and_signature_content_block(): + choices = [ + StreamingChoices( + finish_reason=None, + index=0, + delta=Delta( + reasoning_content="", + thinking_blocks=[ + { + "type": "thinking", + "thinking": "I need to summar", + "signature": "sigsig", + } + ], + provider_specific_fields={ + "thinking_blocks": [ + { + "type": "thinking", + "thinking": "I need to summar", + "signature": "sigsig", + } + ] + }, + content="", + role="assistant", + function_call=None, + tool_calls=None, + audio=None, + ), + logprobs=None, + ) + ] + + with pytest.raises(ValueError): + LiteLLMAnthropicMessagesAdapter()._translate_streaming_openai_chunk_to_anthropic_content_block( + choices=choices + ) + + +def test_translate_anthropic_messages_to_openai_thinking_blocks(): + """Test that tool result messages are placed before user messages in the conversation order.""" + + anthropic_messages = [ + AnthropicMessagesUserMessageParam( + role="user", + content=[{"type": "text", "text": "What's the weather in Boston?"}] + ), + AnthopicMessagesAssistantMessageParam( + role="assistant", + content=[ + { + "type": "thinking", + "thinking": "I will call the get_weather tool.", + "signature": "sigsig" + }, + { + "type": "redacted_thinking", + "data": "REDACTED", + }, + { + "type": "tool_use", + "id": "toolu_01234", + "name": "get_weather", + "input": {"location": "Boston"} + } + ] + ), + ] + + adapter = LiteLLMAnthropicMessagesAdapter() + result = adapter.translate_anthropic_messages_to_openai(messages=anthropic_messages) + + assert len(result) == 2 + assert result[1]["role"] == "assistant" + assert "thinking_blocks" in result[1] + assert len(result[1]["thinking_blocks"]) == 2 + assert result[1]["thinking_blocks"][0]["type"] == "thinking" + assert result[1]["thinking_blocks"][0]["thinking"] == "I will call the get_weather tool." + assert result[1]["thinking_blocks"][0]["signature"] == "sigsig" + assert result[1]["thinking_blocks"][1]["type"] == "redacted_thinking" + assert result[1]["thinking_blocks"][1]["data"] == "REDACTED" + assert "tool_calls" in result[1] + assert len(result[1]["tool_calls"]) == 1 + assert result[1]["tool_calls"][0]["id"] == "toolu_01234" + + def test_translate_anthropic_messages_to_openai_tool_message_placement(): """Test that tool result messages are placed before user messages in the conversation order.""" @@ -192,3 +374,165 @@ def test_translate_streaming_openai_chunk_to_anthropic_with_partial_json(): assert type_of_content == "input_json_delta" assert content_block_delta["type"] == "input_json_delta" assert content_block_delta["partial_json"] == ': "San ' + + + +def test_translate_openai_content_to_anthropic_thinking_and_redacted_thinking(): + openai_choices = [ + Choices( + message=Message( + role="assistant", + content=None, + thinking_blocks=[ + { + "type": "thinking", + "thinking": "I need to summar", + "signature": "sigsig", + }, + { + "type": "redacted_thinking", + "data": "REDACTED" + } + ] + ) + ) + ] + + adapter = LiteLLMAnthropicMessagesAdapter() + result = adapter._translate_openai_content_to_anthropic(choices=openai_choices) + + assert len(result) == 2 + assert result[0].type == "thinking" + assert result[0].thinking == "I need to summar" + assert result[0].signature == "sigsig" + assert result[1].type == "redacted_thinking" + assert result[1].data == "REDACTED" + + +def test_translate_streaming_openai_chunk_to_anthropic_with_thinking(): + choices = [ + StreamingChoices( + finish_reason=None, + index=0, + delta=Delta( + reasoning_content="I need to summar", + thinking_blocks=[ + { + "type": "thinking", + "thinking": "I need to summar", + "signature": None, + } + ], + provider_specific_fields={ + "thinking_blocks": [ + { + "type": "thinking", + "thinking": "I need to summar", + "signature": None, + } + ] + }, + content="", + role="assistant", + function_call=None, + tool_calls=None, + audio=None, + ), + logprobs=None, + ) + ] + + ( + type_of_content, + content_block_delta, + ) = LiteLLMAnthropicMessagesAdapter()._translate_streaming_openai_chunk_to_anthropic( + choices=choices + ) + + assert type_of_content == "thinking_delta" + assert content_block_delta["type"] == "thinking_delta" + assert content_block_delta["thinking"] == "I need to summar" + + +def test_translate_streaming_openai_chunk_to_anthropic_with_thinking(): + choices = [ + StreamingChoices( + finish_reason=None, + index=0, + delta=Delta( + reasoning_content="", + thinking_blocks=[ + { + "type": "thinking", + "thinking": None, + "signature": "sigsig", + } + ], + provider_specific_fields={ + "thinking_blocks": [ + { + "type": "thinking", + "thinking": None, + "signature": "sigsig", + } + ] + }, + content="", + role="assistant", + function_call=None, + tool_calls=None, + audio=None, + ), + logprobs=None, + ) + ] + + ( + type_of_content, + content_block_delta, + ) = LiteLLMAnthropicMessagesAdapter()._translate_streaming_openai_chunk_to_anthropic( + choices=choices + ) + + assert type_of_content == "signature_delta" + assert content_block_delta["type"] == "signature_delta" + assert content_block_delta["signature"] == "sigsig" + + +def test_translate_streaming_openai_chunk_to_anthropic_raises_when_thinking_and_signature(): + choices = [ + StreamingChoices( + finish_reason=None, + index=0, + delta=Delta( + reasoning_content="", + thinking_blocks=[ + { + "type": "thinking", + "thinking": "I need to summar", + "signature": "sigsig", + } + ], + provider_specific_fields={ + "thinking_blocks": [ + { + "type": "thinking", + "thinking": "I need to summar", + "signature": "sigsig", + } + ] + }, + content="", + role="assistant", + function_call=None, + tool_calls=None, + audio=None, + ), + logprobs=None, + ) + ] + + with pytest.raises(ValueError): + LiteLLMAnthropicMessagesAdapter()._translate_streaming_openai_chunk_to_anthropic( + choices=choices + ) \ No newline at end of file From 1c3ec18c826368914775f69c1cdba8d4b2f160e4 Mon Sep 17 00:00:00 2001 From: Leslie Cheng Date: Sun, 5 Oct 2025 03:45:54 -0700 Subject: [PATCH 2/4] Add thinking literal --- .../experimental_pass_through/adapters/streaming_iterator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/llms/anthropic/experimental_pass_through/adapters/streaming_iterator.py b/litellm/llms/anthropic/experimental_pass_through/adapters/streaming_iterator.py index 306bcd9bb2cc..ba8be3148616 100644 --- a/litellm/llms/anthropic/experimental_pass_through/adapters/streaming_iterator.py +++ b/litellm/llms/anthropic/experimental_pass_through/adapters/streaming_iterator.py @@ -31,7 +31,7 @@ class AnthropicStreamWrapper(AdapterCompletionStreamWrapper): sent_first_chunk: bool = False sent_content_block_start: bool = False sent_content_block_finish: bool = False - current_content_block_type: Literal["text", "tool_use"] = "text" + current_content_block_type: Literal["text", "tool_use", "thinking"] = "text" sent_last_message: bool = False holding_chunk: Optional[Any] = None holding_stop_reason_chunk: Optional[Any] = None From 310e3b30d5b440879970927cd8df831ae3290737 Mon Sep 17 00:00:00 2001 From: Leslie Cheng Date: Sun, 5 Oct 2025 04:48:50 -0700 Subject: [PATCH 3/4] Fix mypy issues --- .../adapters/transformation.py | 54 +++++++++++++------ 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py b/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py index 645d477b05fc..ef54f92281d4 100644 --- a/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py +++ b/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py @@ -439,18 +439,28 @@ def _translate_openai_content_to_anthropic( elif choice.message.thinking_blocks is not None: for thinking_block in choice.message.thinking_blocks: if "thinking" in thinking_block and "signature" in thinking_block: + thinking = thinking_block.get("thinking") + signature = thinking_block.get("signature") + + assert isinstance(thinking, str) + assert isinstance(signature, str) or signature is None + new_content.append( AnthropicResponseContentBlockThinking( type="thinking", - thinking=thinking_block.get("thinking") or "", - signature=thinking_block.get("signature") or "", + thinking=thinking, + signature=signature, ) ) elif "data" in thinking_block: + data = thinking_block.get("data") + + assert isinstance(data, str) + new_content.append( AnthropicResponseContentBlockRedactedThinking( type="redacted_thinking", - data=thinking_block.get("data", ""), + data=data, ) ) @@ -525,16 +535,22 @@ def _translate_streaming_openai_chunk_to_anthropic_content_block( ): thinking_blocks = choice.delta.thinking_blocks or [] if len(thinking_blocks) > 0: - thinking = thinking_blocks[0].get("thinking") or "" - signature = thinking_blocks[0].get("signature") or "" - - if thinking and signature: - raise ValueError("Both `thinking` and `signature` in a single streaming chunk isn't supported.") - return "thinking", ChatCompletionThinkingBlock( - type="thinking", - thinking=thinking, - signature=signature - ) + thinking_block = thinking_blocks[0] + if thinking_block["type"] == "thinking": + thinking = thinking_block.get("thinking") or "" + signature = thinking_block.get("signature") or "" + + assert isinstance(thinking, str) + assert isinstance(signature, str) + + if thinking and signature: + raise ValueError("Both `thinking` and `signature` in a single streaming chunk isn't supported.") + + return "thinking", ChatCompletionThinkingBlock( + type="thinking", + thinking=thinking, + signature=signature + ) return "text", TextBlock(type="text", text="") @@ -564,8 +580,16 @@ def _translate_streaming_openai_chunk_to_anthropic( elif isinstance(choice, StreamingChoices) and hasattr(choice.delta, "thinking_blocks"): thinking_blocks = choice.delta.thinking_blocks or [] if len(thinking_blocks) > 0: - reasoning_content += thinking_blocks[0].get("thinking") or "" - reasoning_signature += thinking_blocks[0].get("signature") or "" + for thinking_block in thinking_blocks: + if thinking_block["type"] == "thinking": + thinking = thinking_block.get("thinking") or "" + signature = thinking_block.get("signature") or "" + + assert isinstance(thinking, str) + assert isinstance(signature, str) + + reasoning_content += thinking + reasoning_signature += signature if reasoning_content and reasoning_signature: raise ValueError("Both `reasoning` and `signature` in a single streaming chunk isn't supported.") From d07455bd8ba9e5faac44280d3b7a642e513e8a53 Mon Sep 17 00:00:00 2001 From: Leslie Cheng Date: Sun, 5 Oct 2025 05:25:18 -0700 Subject: [PATCH 4/4] Type fix for redacted thinking --- .../llms/vertex_ai/gemini/transformation.py | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/litellm/llms/vertex_ai/gemini/transformation.py b/litellm/llms/vertex_ai/gemini/transformation.py index ccaf28e59060..211f9077d77f 100644 --- a/litellm/llms/vertex_ai/gemini/transformation.py +++ b/litellm/llms/vertex_ai/gemini/transformation.py @@ -302,26 +302,27 @@ def _gemini_convert_messages_with_history( # noqa: PLR0915 ) if thinking_blocks is not None: for block in thinking_blocks: - block_thinking_str = block.get("thinking") - block_signature = block.get("signature") - if ( - block_thinking_str is not None - and block_signature is not None - ): - try: - assistant_content.append( - PartType( - thoughtSignature=block_signature, - **json.loads(block_thinking_str), + if block["type"] == "thinking": + block_thinking_str = block.get("thinking") + block_signature = block.get("signature") + if ( + block_thinking_str is not None + and block_signature is not None + ): + try: + assistant_content.append( + PartType( + thoughtSignature=block_signature, + **json.loads(block_thinking_str), + ) ) - ) - except Exception: - assistant_content.append( - PartType( - thoughtSignature=block_signature, - text=block_thinking_str, + except Exception: + assistant_content.append( + PartType( + thoughtSignature=block_signature, + text=block_thinking_str, + ) ) - ) if _message_content is not None and isinstance(_message_content, list): _parts = [] for element in _message_content: