43
43
from litellm .types .llms .openai import (
44
44
AllMessageValues ,
45
45
ChatCompletionResponseMessage ,
46
+ ChatCompletionThinkingBlock ,
46
47
ChatCompletionToolCallChunk ,
47
48
ChatCompletionToolCallFunctionChunk ,
48
49
ChatCompletionToolParamFunctionChunk ,
@@ -792,7 +793,25 @@ def get_assistant_content_message(
792
793
content_str += _content_str
793
794
794
795
return content_str , reasoning_content_str
795
-
796
+
797
+ def _extract_thinking_blocks_from_parts (
798
+ self , parts : List [HttpxPartType ]
799
+ ) -> List [ChatCompletionThinkingBlock ]:
800
+ """Extract thinking blocks from parts if present"""
801
+ thinking_blocks : List [ChatCompletionThinkingBlock ] = []
802
+ for part in parts :
803
+ if "thoughtSignature" in part :
804
+ part_copy = part .copy ()
805
+ part_copy .pop ("thoughtSignature" )
806
+ thinking_blocks .append (
807
+ ChatCompletionThinkingBlock (
808
+ type = "thinking" ,
809
+ thinking = json .dumps (part_copy ),
810
+ signature = part ["thoughtSignature" ],
811
+ )
812
+ )
813
+ return thinking_blocks
814
+
796
815
def _extract_image_response_from_parts (
797
816
self , parts : List [HttpxPartType ]
798
817
) -> Optional [ImageURLObject ]:
@@ -804,10 +823,7 @@ def _extract_image_response_from_parts(
804
823
if mime_type .startswith ("image/" ):
805
824
# Convert base64 data to data URI format
806
825
data_uri = f"data:{ mime_type } ;base64,{ data } "
807
- return ImageURLObject (
808
- url = data_uri ,
809
- detail = "auto"
810
- )
826
+ return ImageURLObject (url = data_uri , detail = "auto" )
811
827
return None
812
828
813
829
def _extract_audio_response_from_parts (
@@ -1127,7 +1143,7 @@ def _calculate_web_search_requests(grounding_metadata: List[dict]) -> Optional[i
1127
1143
elif web_search_queries :
1128
1144
web_search_requests = len (grounding_metadata )
1129
1145
return web_search_requests
1130
-
1146
+
1131
1147
@staticmethod
1132
1148
def _create_streaming_choice (
1133
1149
chat_completion_message : ChatCompletionResponseMessage ,
@@ -1151,9 +1167,7 @@ def _create_streaming_choice(
1151
1167
index = candidate .get ("index" , idx ),
1152
1168
delta = Delta (
1153
1169
content = chat_completion_message .get ("content" ),
1154
- reasoning_content = chat_completion_message .get (
1155
- "reasoning_content"
1156
- ),
1170
+ reasoning_content = chat_completion_message .get ("reasoning_content" ),
1157
1171
tool_calls = tools ,
1158
1172
image = image_response ,
1159
1173
function_call = functions ,
@@ -1164,21 +1178,23 @@ def _create_streaming_choice(
1164
1178
return choice
1165
1179
1166
1180
@staticmethod
1167
- def _extract_candidate_metadata (candidate : Candidates ) -> Tuple [List [dict ], List [dict ], List , List ]:
1181
+ def _extract_candidate_metadata (
1182
+ candidate : Candidates ,
1183
+ ) -> Tuple [List [dict ], List [dict ], List , List ]:
1168
1184
"""
1169
1185
Extract metadata from a single candidate response.
1170
-
1186
+
1171
1187
Returns:
1172
1188
grounding_metadata: List[dict]
1173
- url_context_metadata: List[dict]
1189
+ url_context_metadata: List[dict]
1174
1190
safety_ratings: List
1175
1191
citation_metadata: List
1176
1192
"""
1177
1193
grounding_metadata : List [dict ] = []
1178
1194
url_context_metadata : List [dict ] = []
1179
1195
safety_ratings : List = []
1180
1196
citation_metadata : List = []
1181
-
1197
+
1182
1198
if "groundingMetadata" in candidate :
1183
1199
if isinstance (candidate ["groundingMetadata" ], list ):
1184
1200
grounding_metadata .extend (candidate ["groundingMetadata" ]) # type: ignore
@@ -1194,8 +1210,13 @@ def _extract_candidate_metadata(candidate: Candidates) -> Tuple[List[dict], List
1194
1210
if "urlContextMetadata" in candidate :
1195
1211
# Add URL context metadata to grounding metadata
1196
1212
url_context_metadata .append (cast (dict , candidate ["urlContextMetadata" ]))
1197
-
1198
- return grounding_metadata , url_context_metadata , safety_ratings , citation_metadata
1213
+
1214
+ return (
1215
+ grounding_metadata ,
1216
+ url_context_metadata ,
1217
+ safety_ratings ,
1218
+ citation_metadata ,
1219
+ )
1199
1220
1200
1221
@staticmethod
1201
1222
def _process_candidates (
@@ -1227,6 +1248,7 @@ def _process_candidates(
1227
1248
tools : Optional [List [ChatCompletionToolCallChunk ]] = []
1228
1249
functions : Optional [ChatCompletionToolCallFunctionChunk ] = None
1229
1250
cumulative_tool_call_index : int = 0
1251
+ thinking_blocks : Optional [List [ChatCompletionThinkingBlock ]] = None
1230
1252
1231
1253
for idx , candidate in enumerate (_candidates ):
1232
1254
if "content" not in candidate :
@@ -1239,7 +1261,7 @@ def _process_candidates(
1239
1261
candidate_safety_ratings ,
1240
1262
candidate_citation_metadata ,
1241
1263
) = VertexGeminiConfig ._extract_candidate_metadata (candidate )
1242
-
1264
+
1243
1265
grounding_metadata .extend (candidate_grounding_metadata )
1244
1266
url_context_metadata .extend (candidate_url_context_metadata )
1245
1267
safety_ratings .extend (candidate_safety_ratings )
@@ -1264,14 +1286,22 @@ def _process_candidates(
1264
1286
)
1265
1287
)
1266
1288
1289
+ thinking_blocks = (
1290
+ VertexGeminiConfig ()._extract_thinking_blocks_from_parts (
1291
+ parts = candidate ["content" ]["parts" ]
1292
+ )
1293
+ )
1294
+
1267
1295
if audio_response is not None :
1268
1296
cast (Dict [str , Any ], chat_completion_message )[
1269
1297
"audio"
1270
1298
] = audio_response
1271
1299
chat_completion_message ["content" ] = None # OpenAI spec
1272
1300
if image_response is not None :
1273
1301
# Handle image response - combine with text content into structured format
1274
- cast (Dict [str , Any ], chat_completion_message )["image" ] = image_response
1302
+ cast (Dict [str , Any ], chat_completion_message )[
1303
+ "image"
1304
+ ] = image_response
1275
1305
if content is not None :
1276
1306
chat_completion_message ["content" ] = content
1277
1307
@@ -1298,15 +1328,18 @@ def _process_candidates(
1298
1328
if functions is not None :
1299
1329
chat_completion_message ["function_call" ] = functions
1300
1330
1331
+ if thinking_blocks is not None :
1332
+ chat_completion_message ["thinking_blocks" ] = thinking_blocks # type: ignore
1333
+
1301
1334
if isinstance (model_response , ModelResponseStream ):
1302
1335
choice = VertexGeminiConfig ._create_streaming_choice (
1303
1336
chat_completion_message = chat_completion_message ,
1304
- candidate = candidate ,
1305
- idx = idx ,
1306
- tools = tools ,
1307
- functions = functions ,
1337
+ candidate = candidate ,
1338
+ idx = idx ,
1339
+ tools = tools ,
1340
+ functions = functions ,
1308
1341
chat_completion_logprobs = chat_completion_logprobs ,
1309
- image_response = image_response
1342
+ image_response = image_response ,
1310
1343
)
1311
1344
model_response .choices .append (choice )
1312
1345
elif isinstance (model_response , ModelResponse ):
0 commit comments