13
13
from pydantic import BaseModel
14
14
from typing_extensions import Unpack , override
15
15
16
+ from ..tools import convert_pydantic_to_tool_spec
16
17
from ..types .content import ContentBlock , Messages
17
18
from ..types .exceptions import ContextWindowOverflowException
18
19
from ..types .streaming import StreamEvent
@@ -202,6 +203,10 @@ async def structured_output(
202
203
) -> AsyncGenerator [dict [str , Union [T , Any ]], None ]:
203
204
"""Get structured output from the model.
204
205
206
+ Some models do not support native structured output via response_format.
207
+ In cases of proxies, we may not have a way to determine support, so we
208
+ fallback to using tool calling to achieve structured output.
209
+
205
210
Args:
206
211
output_model: The output model to use for the agent.
207
212
prompt: The prompt messages to use for the agent.
@@ -211,42 +216,69 @@ async def structured_output(
211
216
Yields:
212
217
Model events with the last being the structured output.
213
218
"""
214
- supports_schema = supports_response_schema (self .get_config ()["model_id" ])
219
+ if supports_response_schema (self .get_config ()["model_id" ]):
220
+ logger .debug ("structuring output using response schema" )
221
+ result = await self ._structured_output_using_response_schema (output_model , prompt , system_prompt )
222
+ else :
223
+ logger .debug ("model does not support response schema, structuring output using tool approach" )
224
+ result = await self ._structured_output_using_tool (output_model , prompt , system_prompt )
225
+
226
+ yield {"output" : result }
227
+
228
+ async def _structured_output_using_response_schema (
229
+ self , output_model : Type [T ], prompt : Messages , system_prompt : Optional [str ] = None
230
+ ) -> T :
231
+ """Get structured output using native response_format support."""
232
+ response = await litellm .acompletion (
233
+ ** self .client_args ,
234
+ model = self .get_config ()["model_id" ],
235
+ messages = self .format_request (prompt , system_prompt = system_prompt )["messages" ],
236
+ response_format = output_model ,
237
+ )
215
238
216
- # If the provider does not support response schemas, we cannot reliably parse structured output.
217
- # In that case we must not call the provider and must raise the documented ValueError.
218
- if not supports_schema :
219
- raise ValueError ("Model does not support response_format " )
239
+ if len ( response . choices ) > 1 :
240
+ raise ValueError ( "Multiple choices found in the response." )
241
+ if not response . choices or response . choices [ 0 ]. finish_reason != "tool_calls" :
242
+ raise ValueError ("No tool_calls found in response " )
220
243
221
- # For providers that DO support response schemas, call litellm and map context-window errors.
244
+ choice = response . choices [ 0 ]
222
245
try :
223
- response = await litellm .acompletion (
224
- ** self .client_args ,
225
- model = self .get_config ()["model_id" ],
226
- messages = self .format_request (prompt , system_prompt = system_prompt )["messages" ],
227
- response_format = output_model ,
228
- )
246
+ # Parse the message content as JSON
247
+ tool_call_data = json .loads (choice .message .content )
248
+ # Instantiate the output model with the parsed data
249
+ return output_model (** tool_call_data )
229
250
except ContextWindowExceededError as e :
230
251
logger .warning ("litellm client raised context window overflow in structured_output" )
231
252
raise ContextWindowOverflowException (e ) from e
253
+ except (json .JSONDecodeError , TypeError , ValueError ) as e :
254
+ raise ValueError (f"Failed to parse or load content into model: { e } " ) from e
255
+
256
+ async def _structured_output_using_tool (
257
+ self , output_model : Type [T ], prompt : Messages , system_prompt : Optional [str ] = None
258
+ ) -> T :
259
+ """Get structured output using tool calling fallback."""
260
+ tool_spec = convert_pydantic_to_tool_spec (output_model )
261
+ request = self .format_request (prompt , [tool_spec ], system_prompt , cast (ToolChoice , {"any" : {}}))
262
+ args = {** self .client_args , ** request , "stream" : False }
263
+ response = await litellm .acompletion (** args )
232
264
233
265
if len (response .choices ) > 1 :
234
266
raise ValueError ("Multiple choices found in the response." )
267
+ if not response .choices or response .choices [0 ].finish_reason != "tool_calls" :
268
+ raise ValueError ("No tool_calls found in response" )
235
269
236
- # Find the first choice with tool_calls
237
- for choice in response .choices :
238
- if choice .finish_reason == "tool_calls" :
239
- try :
240
- # Parse the tool call content as JSON
241
- tool_call_data = json .loads (choice .message .content )
242
- # Instantiate the output model with the parsed data
243
- yield {"output" : output_model (** tool_call_data )}
244
- return
245
- except (json .JSONDecodeError , TypeError , ValueError ) as e :
246
- raise ValueError (f"Failed to parse or load content into model: { e } " ) from e
247
-
248
- # If no tool_calls found, raise an error
249
- raise ValueError ("No tool_calls found in response" )
270
+ choice = response .choices [0 ]
271
+ try :
272
+ # Parse the tool call content as JSON
273
+ tool_call = choice .message .tool_calls [0 ]
274
+ tool_call_data = json .loads (tool_call .function .arguments )
275
+ # Instantiate the output model with the parsed data
276
+ return output_model (** tool_call_data )
277
+ except ContextWindowExceededError as e :
278
+ logger .warning ("litellm client raised context window overflow in structured_output" )
279
+ raise ContextWindowOverflowException (e ) from e
280
+ except (json .JSONDecodeError , TypeError , ValueError ) as e :
281
+ raise ValueError (f"Failed to parse or load content into model: { e } " ) from e
250
282
251
283
def _apply_proxy_prefix (self ) -> None :
252
284
"""Apply litellm_proxy/ prefix to model_id when use_litellm_proxy is True.
0 commit comments