Skip to content

Commit 51c73dc

Browse files
fix(vertex_and_google_ai_studio_gemini.py): bubble up thoughtsignature back to client
1 parent 5990713 commit 51c73dc

File tree

4 files changed

+137
-48
lines changed

4 files changed

+137
-48
lines changed

litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py

Lines changed: 55 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
from litellm.types.llms.openai import (
4444
AllMessageValues,
4545
ChatCompletionResponseMessage,
46+
ChatCompletionThinkingBlock,
4647
ChatCompletionToolCallChunk,
4748
ChatCompletionToolCallFunctionChunk,
4849
ChatCompletionToolParamFunctionChunk,
@@ -792,7 +793,25 @@ def get_assistant_content_message(
792793
content_str += _content_str
793794

794795
return content_str, reasoning_content_str
795-
796+
797+
def _extract_thinking_blocks_from_parts(
798+
self, parts: List[HttpxPartType]
799+
) -> List[ChatCompletionThinkingBlock]:
800+
"""Extract thinking blocks from parts if present"""
801+
thinking_blocks: List[ChatCompletionThinkingBlock] = []
802+
for part in parts:
803+
if "thoughtSignature" in part:
804+
part_copy = part.copy()
805+
part_copy.pop("thoughtSignature")
806+
thinking_blocks.append(
807+
ChatCompletionThinkingBlock(
808+
type="thinking",
809+
thinking=json.dumps(part_copy),
810+
signature=part["thoughtSignature"],
811+
)
812+
)
813+
return thinking_blocks
814+
796815
def _extract_image_response_from_parts(
797816
self, parts: List[HttpxPartType]
798817
) -> Optional[ImageURLObject]:
@@ -804,10 +823,7 @@ def _extract_image_response_from_parts(
804823
if mime_type.startswith("image/"):
805824
# Convert base64 data to data URI format
806825
data_uri = f"data:{mime_type};base64,{data}"
807-
return ImageURLObject(
808-
url=data_uri,
809-
detail="auto"
810-
)
826+
return ImageURLObject(url=data_uri, detail="auto")
811827
return None
812828

813829
def _extract_audio_response_from_parts(
@@ -1127,7 +1143,7 @@ def _calculate_web_search_requests(grounding_metadata: List[dict]) -> Optional[i
11271143
elif web_search_queries:
11281144
web_search_requests = len(grounding_metadata)
11291145
return web_search_requests
1130-
1146+
11311147
@staticmethod
11321148
def _create_streaming_choice(
11331149
chat_completion_message: ChatCompletionResponseMessage,
@@ -1151,9 +1167,7 @@ def _create_streaming_choice(
11511167
index=candidate.get("index", idx),
11521168
delta=Delta(
11531169
content=chat_completion_message.get("content"),
1154-
reasoning_content=chat_completion_message.get(
1155-
"reasoning_content"
1156-
),
1170+
reasoning_content=chat_completion_message.get("reasoning_content"),
11571171
tool_calls=tools,
11581172
image=image_response,
11591173
function_call=functions,
@@ -1164,21 +1178,23 @@ def _create_streaming_choice(
11641178
return choice
11651179

11661180
@staticmethod
1167-
def _extract_candidate_metadata(candidate: Candidates) -> Tuple[List[dict], List[dict], List, List]:
1181+
def _extract_candidate_metadata(
1182+
candidate: Candidates,
1183+
) -> Tuple[List[dict], List[dict], List, List]:
11681184
"""
11691185
Extract metadata from a single candidate response.
1170-
1186+
11711187
Returns:
11721188
grounding_metadata: List[dict]
1173-
url_context_metadata: List[dict]
1189+
url_context_metadata: List[dict]
11741190
safety_ratings: List
11751191
citation_metadata: List
11761192
"""
11771193
grounding_metadata: List[dict] = []
11781194
url_context_metadata: List[dict] = []
11791195
safety_ratings: List = []
11801196
citation_metadata: List = []
1181-
1197+
11821198
if "groundingMetadata" in candidate:
11831199
if isinstance(candidate["groundingMetadata"], list):
11841200
grounding_metadata.extend(candidate["groundingMetadata"]) # type: ignore
@@ -1194,8 +1210,13 @@ def _extract_candidate_metadata(candidate: Candidates) -> Tuple[List[dict], List
11941210
if "urlContextMetadata" in candidate:
11951211
# Add URL context metadata to grounding metadata
11961212
url_context_metadata.append(cast(dict, candidate["urlContextMetadata"]))
1197-
1198-
return grounding_metadata, url_context_metadata, safety_ratings, citation_metadata
1213+
1214+
return (
1215+
grounding_metadata,
1216+
url_context_metadata,
1217+
safety_ratings,
1218+
citation_metadata,
1219+
)
11991220

12001221
@staticmethod
12011222
def _process_candidates(
@@ -1227,6 +1248,7 @@ def _process_candidates(
12271248
tools: Optional[List[ChatCompletionToolCallChunk]] = []
12281249
functions: Optional[ChatCompletionToolCallFunctionChunk] = None
12291250
cumulative_tool_call_index: int = 0
1251+
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
12301252

12311253
for idx, candidate in enumerate(_candidates):
12321254
if "content" not in candidate:
@@ -1239,7 +1261,7 @@ def _process_candidates(
12391261
candidate_safety_ratings,
12401262
candidate_citation_metadata,
12411263
) = VertexGeminiConfig._extract_candidate_metadata(candidate)
1242-
1264+
12431265
grounding_metadata.extend(candidate_grounding_metadata)
12441266
url_context_metadata.extend(candidate_url_context_metadata)
12451267
safety_ratings.extend(candidate_safety_ratings)
@@ -1264,14 +1286,22 @@ def _process_candidates(
12641286
)
12651287
)
12661288

1289+
thinking_blocks = (
1290+
VertexGeminiConfig()._extract_thinking_blocks_from_parts(
1291+
parts=candidate["content"]["parts"]
1292+
)
1293+
)
1294+
12671295
if audio_response is not None:
12681296
cast(Dict[str, Any], chat_completion_message)[
12691297
"audio"
12701298
] = audio_response
12711299
chat_completion_message["content"] = None # OpenAI spec
12721300
if image_response is not None:
12731301
# Handle image response - combine with text content into structured format
1274-
cast(Dict[str, Any], chat_completion_message)["image"] = image_response
1302+
cast(Dict[str, Any], chat_completion_message)[
1303+
"image"
1304+
] = image_response
12751305
if content is not None:
12761306
chat_completion_message["content"] = content
12771307

@@ -1298,15 +1328,18 @@ def _process_candidates(
12981328
if functions is not None:
12991329
chat_completion_message["function_call"] = functions
13001330

1331+
if thinking_blocks is not None:
1332+
chat_completion_message["thinking_blocks"] = thinking_blocks # type: ignore
1333+
13011334
if isinstance(model_response, ModelResponseStream):
13021335
choice = VertexGeminiConfig._create_streaming_choice(
13031336
chat_completion_message=chat_completion_message,
1304-
candidate=candidate,
1305-
idx=idx,
1306-
tools=tools,
1307-
functions=functions,
1337+
candidate=candidate,
1338+
idx=idx,
1339+
tools=tools,
1340+
functions=functions,
13081341
chat_completion_logprobs=chat_completion_logprobs,
1309-
image_response=image_response
1342+
image_response=image_response,
13101343
)
13111344
model_response.choices.append(choice)
13121345
elif isinstance(model_response, ModelResponse):

litellm/types/llms/openai.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,14 @@
4343

4444
# Handle OpenAI SDK version compatibility for Text type
4545
try:
46-
from openai.types.responses.response_create_params import Text as ResponseText
46+
from openai.types.responses.response_create_params import (
47+
Text as ResponseText, # type: ignore
48+
)
4749
except (ImportError, AttributeError):
4850
# Fall back to the concrete config type available in all SDK versions
49-
from openai.types.responses.response_text_config_param import ResponseTextConfigParam as ResponseText
51+
from openai.types.responses.response_text_config_param import (
52+
ResponseTextConfigParam as ResponseText,
53+
)
5054

5155
from openai.types.responses.response_create_params import (
5256
Reasoning,

litellm/types/llms/vertex_ai.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ class HttpxPartType(TypedDict, total=False):
7272
executableCode: HttpxExecutableCode
7373
codeExecutionResult: HttpxCodeExecutionResult
7474
thought: bool
75+
thoughtSignature: str
7576

7677

7778
class HttpxContentType(TypedDict, total=False):
@@ -245,10 +246,11 @@ class UsageMetadata(TypedDict, total=False):
245246
class TokenCountDetailsResponse(TypedDict):
246247
"""
247248
Response structure for token count details with modality breakdown.
248-
249+
249250
Example:
250251
{'totalTokens': 12, 'promptTokensDetails': [{'modality': 'TEXT', 'tokenCount': 12}]}
251252
"""
253+
252254
totalTokens: int
253255
promptTokensDetails: List[PromptTokensDetails]
254256

tests/llm_translation/test_gemini.py

Lines changed: 73 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,10 @@ def test_gemini_with_empty_function_call_arguments():
436436
async def test_claude_tool_use_with_gemini():
437437
response = await litellm.anthropic.messages.acreate(
438438
messages=[
439-
{"role": "user", "content": "Hello, can you tell me the weather in Boston. Please respond with a tool call?"}
439+
{
440+
"role": "user",
441+
"content": "Hello, can you tell me the weather in Boston. Please respond with a tool call?",
442+
}
440443
],
441444
model="gemini/gemini-2.5-flash",
442445
stream=True,
@@ -578,11 +581,17 @@ def test_gemini_tool_use():
578581
assert stop_reason is not None
579582
assert stop_reason == "tool_calls"
580583

584+
581585
@pytest.mark.asyncio
582586
async def test_gemini_image_generation_async():
583587
litellm._turn_on_debug()
584588
response = await litellm.acompletion(
585-
messages=[{"role": "user", "content": "Generate an image of a banana wearing a costume that says LiteLLM"}],
589+
messages=[
590+
{
591+
"role": "user",
592+
"content": "Generate an image of a banana wearing a costume that says LiteLLM",
593+
}
594+
],
586595
model="gemini/gemini-2.5-flash-image-preview",
587596
)
588597

@@ -597,12 +606,16 @@ async def test_gemini_image_generation_async():
597606
assert IMAGE_URL["url"].startswith("data:image/png;base64,")
598607

599608

600-
601609
@pytest.mark.asyncio
602610
async def test_gemini_image_generation_async_stream():
603-
#litellm._turn_on_debug()
611+
# litellm._turn_on_debug()
604612
response = await litellm.acompletion(
605-
messages=[{"role": "user", "content": "Generate an image of a banana wearing a costume that says LiteLLM"}],
613+
messages=[
614+
{
615+
"role": "user",
616+
"content": "Generate an image of a banana wearing a costume that says LiteLLM",
617+
}
618+
],
606619
model="gemini/gemini-2.5-flash-image-preview",
607620
stream=True,
608621
)
@@ -611,35 +624,72 @@ async def test_gemini_image_generation_async_stream():
611624
model_response_image = None
612625
async for chunk in response:
613626
print("CHUNK: ", chunk)
614-
if hasattr(chunk.choices[0].delta, "image") and chunk.choices[0].delta.image is not None:
627+
if (
628+
hasattr(chunk.choices[0].delta, "image")
629+
and chunk.choices[0].delta.image is not None
630+
):
615631
model_response_image = chunk.choices[0].delta.image
616632
print("MODEL_RESPONSE_IMAGE: ", model_response_image)
617633
assert model_response_image is not None
618634
assert model_response_image["url"].startswith("data:image/png;base64,")
619635
break
620-
636+
621637
#########################################################
622638
# Important: Validate we did get an image in the response
623639
#########################################################
624640
assert model_response_image is not None
625641
assert model_response_image["url"].startswith("data:image/png;base64,")
626-
642+
627643

628644
def test_system_message_with_no_user_message():
629-
"""
630-
Test that the system message is translated correctly for non-OpenAI providers.
631-
"""
632-
messages = [
633-
{
634-
"role": "system",
635-
"content": "Be a good bot!",
636-
},
637-
]
645+
"""
646+
Test that the system message is translated correctly for non-OpenAI providers.
647+
"""
648+
messages = [
649+
{
650+
"role": "system",
651+
"content": "Be a good bot!",
652+
},
653+
]
638654

639-
response = litellm.completion(
640-
model="gemini/gemini-2.5-flash",
641-
messages=messages,
642-
)
643-
assert response is not None
655+
response = litellm.completion(
656+
model="gemini/gemini-2.5-flash",
657+
messages=messages,
658+
)
659+
assert response is not None
644660

645-
assert response.choices[0].message.content is not None
661+
assert response.choices[0].message.content is not None
662+
663+
664+
def test_gemini_with_thinking():
665+
from litellm import completion
666+
667+
litellm._turn_on_debug()
668+
tools = [
669+
{
670+
"type": "function",
671+
"function": {
672+
"name": "get_current_weather",
673+
"description": "Get the current weather in a given location",
674+
"parameters": {
675+
"type": "object",
676+
"properties": {
677+
"location": {
678+
"type": "string",
679+
"description": "The city and state, e.g. San Francisco, CA",
680+
},
681+
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
682+
},
683+
"required": ["location"],
684+
},
685+
},
686+
}
687+
]
688+
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
689+
690+
result = completion(
691+
model="gemini/gemini-2.5-flash",
692+
messages=messages,
693+
tools=tools,
694+
)
695+
print(f"result: {result}")

0 commit comments

Comments
 (0)