Skip to content

Commit dd7d12e

Browse files
committed
Implement fix for thinking_blocks and converse API calls
This fixes Claude's models via the Converse API, which should also fix Claude Code.
1 parent b348a26 commit dd7d12e

File tree

4 files changed

+461
-11
lines changed

4 files changed

+461
-11
lines changed

litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py

Lines changed: 83 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313

1414
from openai.types.chat.chat_completion_chunk import Choice as OpenAIStreamingChoice
1515

16+
from litellm.types.utils import StreamingChoices
17+
1618
from litellm.types.llms.anthropic import (
1719
AllAnthropicToolsValues,
1820
AnthopicMessagesAssistantMessageParam,
@@ -22,9 +24,13 @@
2224
AnthropicMessagesUserMessageParam,
2325
AnthropicResponseContentBlockText,
2426
AnthropicResponseContentBlockToolUse,
27+
AnthropicResponseContentBlockThinking,
28+
AnthropicResponseContentBlockRedactedThinking,
2529
ContentBlockDelta,
2630
ContentJsonBlockDelta,
2731
ContentTextBlockDelta,
32+
ContentThinkingBlockDelta,
33+
ContentThinkingSignatureBlockDelta,
2834
MessageBlockDelta,
2935
MessageDelta,
3036
UsageDelta,
@@ -42,6 +48,8 @@
4248
ChatCompletionRequest,
4349
ChatCompletionSystemMessage,
4450
ChatCompletionTextObject,
51+
ChatCompletionThinkingBlock,
52+
ChatCompletionRedactedThinkingBlock,
4553
ChatCompletionToolCallFunctionChunk,
4654
ChatCompletionToolChoiceFunctionParam,
4755
ChatCompletionToolChoiceObjectParam,
@@ -227,6 +235,7 @@ def translate_anthropic_messages_to_openai( # noqa: PLR0915
227235
## ASSISTANT MESSAGE ##
228236
assistant_message_str: Optional[str] = None
229237
tool_calls: List[ChatCompletionAssistantToolCall] = []
238+
thinking_blocks: List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]] = []
230239
if m["role"] == "assistant":
231240
if isinstance(m.get("content"), str):
232241
assistant_message_str = str(m.get("content", ""))
@@ -253,11 +262,28 @@ def translate_anthropic_messages_to_openai( # noqa: PLR0915
253262
function=function_chunk,
254263
)
255264
)
265+
elif content.get("type") == "thinking":
266+
thinking_block = ChatCompletionThinkingBlock(
267+
type="thinking",
268+
thinking=content.get("thinking") or "",
269+
signature=content.get("signature") or "",
270+
cache_control=content.get("cache_control", {})
271+
)
272+
thinking_blocks.append(thinking_block)
273+
elif content.get("type") == "redacted_thinking":
274+
redacted_thinking_block = ChatCompletionRedactedThinkingBlock(
275+
type="redacted_thinking",
276+
data=content.get("data") or "",
277+
cache_control=content.get("cache_control", {})
278+
)
279+
thinking_blocks.append(redacted_thinking_block)
280+
256281

257282
if assistant_message_str is not None or len(tool_calls) > 0:
258283
assistant_message = ChatCompletionAssistantMessage(
259284
role="assistant",
260285
content=assistant_message_str,
286+
thinking_blocks=thinking_blocks if len(thinking_blocks) > 0 else None,
261287
)
262288
if len(tool_calls) > 0:
263289
assistant_message["tool_calls"] = tool_calls
@@ -383,11 +409,11 @@ def translate_anthropic_to_openai(
383409
def _translate_openai_content_to_anthropic(
384410
self, choices: List[Choices]
385411
) -> List[
386-
Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
412+
Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse, AnthropicResponseContentBlockThinking, AnthropicResponseContentBlockRedactedThinking]
387413
]:
388414
new_content: List[
389415
Union[
390-
AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse
416+
AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse, AnthropicResponseContentBlockThinking, AnthropicResponseContentBlockRedactedThinking
391417
]
392418
] = []
393419
for choice in choices:
@@ -410,6 +436,24 @@ def _translate_openai_content_to_anthropic(
410436
type="text", text=choice.message.content
411437
)
412438
)
439+
elif choice.message.thinking_blocks is not None:
440+
for thinking_block in choice.message.thinking_blocks:
441+
if "thinking" in thinking_block and "signature" in thinking_block:
442+
new_content.append(
443+
AnthropicResponseContentBlockThinking(
444+
type="thinking",
445+
thinking=thinking_block.get("thinking") or "",
446+
signature=thinking_block.get("signature") or "",
447+
)
448+
)
449+
elif "data" in thinking_block:
450+
new_content.append(
451+
AnthropicResponseContentBlockRedactedThinking(
452+
type="redacted_thinking",
453+
data=thinking_block.get("data", ""),
454+
)
455+
)
456+
413457

414458
return new_content
415459

@@ -453,9 +497,9 @@ def translate_openai_response_to_anthropic(
453497
return translated_obj
454498

455499
def _translate_streaming_openai_chunk_to_anthropic_content_block(
456-
self, choices: List[OpenAIStreamingChoice]
500+
self, choices: List[Union[OpenAIStreamingChoice, StreamingChoices]]
457501
) -> Tuple[
458-
Literal["text", "tool_use"],
502+
Literal["text", "tool_use", "thinking"],
459503
"ContentBlockContentBlockDict",
460504
]:
461505
from litellm._uuid import uuid
@@ -476,17 +520,35 @@ def _translate_streaming_openai_chunk_to_anthropic_content_block(
476520
name=choice.delta.tool_calls[0].function.name or "",
477521
input={},
478522
)
523+
elif (
524+
isinstance(choice, StreamingChoices) and hasattr(choice.delta, "thinking_blocks")
525+
):
526+
thinking_blocks = choice.delta.thinking_blocks or []
527+
if len(thinking_blocks) > 0:
528+
thinking = thinking_blocks[0].get("thinking") or ""
529+
signature = thinking_blocks[0].get("signature") or ""
530+
531+
if thinking and signature:
532+
raise ValueError("Both `thinking` and `signature` in a single streaming chunk isn't supported.")
533+
return "thinking", ChatCompletionThinkingBlock(
534+
type="thinking",
535+
thinking=thinking,
536+
signature=signature
537+
)
538+
479539

480540
return "text", TextBlock(type="text", text="")
481541

482542
def _translate_streaming_openai_chunk_to_anthropic(
483-
self, choices: List[OpenAIStreamingChoice]
543+
self, choices: List[Union[OpenAIStreamingChoice, StreamingChoices]]
484544
) -> Tuple[
485-
Literal["text_delta", "input_json_delta"],
486-
Union[ContentTextBlockDelta, ContentJsonBlockDelta],
545+
Literal["text_delta", "input_json_delta", "thinking_delta", "signature_delta"],
546+
Union[ContentTextBlockDelta, ContentJsonBlockDelta, ContentThinkingBlockDelta, ContentThinkingSignatureBlockDelta],
487547
]:
488548

489549
text: str = ""
550+
reasoning_content: str = ""
551+
reasoning_signature: str = ""
490552
partial_json: Optional[str] = None
491553
for choice in choices:
492554
if choice.delta.content is not None and len(choice.delta.content) > 0:
@@ -499,10 +561,24 @@ def _translate_streaming_openai_chunk_to_anthropic(
499561
and tool.function.arguments is not None
500562
):
501563
partial_json += tool.function.arguments
564+
elif isinstance(choice, StreamingChoices) and hasattr(choice.delta, "thinking_blocks"):
565+
thinking_blocks = choice.delta.thinking_blocks or []
566+
if len(thinking_blocks) > 0:
567+
reasoning_content += thinking_blocks[0].get("thinking") or ""
568+
reasoning_signature += thinking_blocks[0].get("signature") or ""
569+
570+
if reasoning_content and reasoning_signature:
571+
raise ValueError("Both `reasoning` and `signature` in a single streaming chunk isn't supported.")
572+
573+
502574
if partial_json is not None:
503575
return "input_json_delta", ContentJsonBlockDelta(
504576
type="input_json_delta", partial_json=partial_json
505577
)
578+
elif reasoning_content:
579+
return "thinking_delta", ContentThinkingBlockDelta(type="thinking_delta", thinking=reasoning_content)
580+
elif reasoning_signature:
581+
return "signature_delta", ContentThinkingSignatureBlockDelta(type="signature_delta", signature=reasoning_signature)
506582
else:
507583
return "text_delta", ContentTextBlockDelta(type="text_delta", text=text)
508584

litellm/types/llms/anthropic.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from pydantic import BaseModel, validator
55
from typing_extensions import Literal, Required, TypedDict
66

7-
from .openai import ChatCompletionCachedContent, ChatCompletionThinkingBlock
7+
from .openai import ChatCompletionCachedContent, ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
88

99

1010
class AnthropicMessagesToolChoice(TypedDict, total=False):
@@ -104,6 +104,7 @@ class AnthropicMessagesToolUseParam(TypedDict, total=False):
104104
AnthropicMessagesTextParam,
105105
AnthropicMessagesToolUseParam,
106106
ChatCompletionThinkingBlock,
107+
ChatCompletionRedactedThinkingBlock,
107108
]
108109

109110

@@ -264,11 +265,29 @@ class ContentJsonBlockDelta(TypedDict):
264265
partial_json: str
265266

266267

268+
class ContentThinkingBlockDelta(TypedDict):
269+
"""
270+
"delta": {"type": "thinking_delta", "thinking": "Let me solve this step by step:"}}
271+
"""
272+
273+
type: Literal["thinking_delta"]
274+
thinking: str
275+
276+
277+
class ContentThinkingSignatureBlockDelta(TypedDict):
278+
"""
279+
"delta": {"type": "signature_delta", "signature": "EqQBCgIYAhIM1gbcDa9GJwZA2b3hGgxBdjrkzLoky3dl1pkiMOYds..."}}
280+
"""
281+
282+
type: Literal["signature_delta"]
283+
signature: str
284+
285+
267286
class ContentBlockDelta(TypedDict):
268287
type: Literal["content_block_delta"]
269288
index: int
270289
delta: Union[
271-
ContentTextBlockDelta, ContentJsonBlockDelta, ContentCitationsBlockDelta
290+
ContentTextBlockDelta, ContentJsonBlockDelta, ContentCitationsBlockDelta, ContentThinkingBlockDelta, ContentThinkingSignatureBlockDelta
272291
]
273292

274293

@@ -311,7 +330,7 @@ class ContentBlockStartText(TypedDict):
311330
content_block: TextBlock
312331

313332

314-
ContentBlockContentBlockDict = Union[ToolUseBlock, TextBlock]
333+
ContentBlockContentBlockDict = Union[ToolUseBlock, TextBlock, ChatCompletionThinkingBlock]
315334

316335
ContentBlockStart = Union[ContentBlockStartToolUse, ContentBlockStartText]
317336

@@ -384,6 +403,17 @@ class AnthropicResponseContentBlockToolUse(BaseModel):
384403
input: dict
385404

386405

406+
class AnthropicResponseContentBlockThinking(BaseModel):
407+
type: Literal["thinking"]
408+
thinking: str
409+
signature: Optional[str]
410+
411+
412+
class AnthropicResponseContentBlockRedactedThinking(BaseModel):
413+
type: Literal["redacted_thinking"]
414+
data: str
415+
416+
387417
class AnthropicResponseUsageBlock(BaseModel):
388418
input_tokens: int
389419
output_tokens: int

litellm/types/llms/openai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -679,7 +679,7 @@ class OpenAIChatCompletionAssistantMessage(TypedDict, total=False):
679679

680680
class ChatCompletionAssistantMessage(OpenAIChatCompletionAssistantMessage, total=False):
681681
cache_control: ChatCompletionCachedContent
682-
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]]
682+
thinking_blocks: Optional[List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]]
683683

684684

685685
class ChatCompletionToolMessage(TypedDict):

0 commit comments

Comments
 (0)