@@ -1161,7 +1161,12 @@ async def get_response_async(
11611161 stream : bool = True ,
11621162 ** kwargs
11631163 ) -> str :
1164- """Async version of get_response with identical functionality."""
1164+ """Async version of get_response with identical functionality.
1165+
1166+ NOTE: This async version currently does NOT support sequential tool calling
1167+ like the sync version does. It will return after the first tool execution.
1168+ This is a known limitation that needs to be addressed in a future update.
1169+ """
11651170 try :
11661171 import litellm
11671172 logging .info (f"Getting async response from { self .model } " )
@@ -1233,47 +1238,46 @@ async def get_response_async(
12331238
12341239 response_text = ""
12351240 if reasoning_steps :
1236- # Non-streaming call to capture reasoning
1237- resp = await litellm .acompletion (
1238- ** self ._build_completion_params (
1239- messages = messages ,
1240- temperature = temperature ,
1241- stream = False , # force non-streaming
1242- ** {k :v for k ,v in kwargs .items () if k != 'reasoning_steps' }
1243- )
1244- )
1245- reasoning_content = resp ["choices" ][0 ]["message" ].get ("provider_specific_fields" , {}).get ("reasoning_content" )
1246- response_text = resp ["choices" ][0 ]["message" ]["content" ]
1247-
1248- if verbose and reasoning_content :
1249- display_interaction (
1250- "Initial reasoning:" ,
1251- f"Reasoning:\n { reasoning_content } \n \n Answer:\n { response_text } " ,
1252- markdown = markdown ,
1253- generation_time = time .time () - start_time ,
1254- console = console
1255- )
1256- elif verbose :
1257- display_interaction (
1258- "Initial response:" ,
1259- response_text ,
1260- markdown = markdown ,
1261- generation_time = time .time () - start_time ,
1262- console = console
1241+ # Non-streaming call to capture reasoning
1242+ resp = await litellm .acompletion (
1243+ ** self ._build_completion_params (
1244+ messages = messages ,
1245+ temperature = temperature ,
1246+ stream = False , # force non-streaming
1247+ ** {k :v for k ,v in kwargs .items () if k != 'reasoning_steps' }
1248+ )
12631249 )
1264- else :
1265- # Determine if we should use streaming based on tool support
1266- use_streaming = stream
1267- if formatted_tools and not self ._supports_streaming_tools ():
1268- # Provider doesn't support streaming with tools, use non-streaming
1269- use_streaming = False
1270-
1271- if use_streaming :
1272- # Streaming approach (with or without tools)
1273- tool_calls = []
1250+ reasoning_content = resp ["choices" ][0 ]["message" ].get ("provider_specific_fields" , {}).get ("reasoning_content" )
1251+ response_text = resp ["choices" ][0 ]["message" ]["content" ]
12741252
1275- if verbose :
1276- async for chunk in await litellm .acompletion (
1253+ if verbose and reasoning_content :
1254+ display_interaction (
1255+ "Initial reasoning:" ,
1256+ f"Reasoning:\n { reasoning_content } \n \n Answer:\n { response_text } " ,
1257+ markdown = markdown ,
1258+ generation_time = time .time () - start_time ,
1259+ console = console
1260+ )
1261+ elif verbose :
1262+ display_interaction (
1263+ "Initial response:" ,
1264+ response_text ,
1265+ markdown = markdown ,
1266+ generation_time = time .time () - start_time ,
1267+ console = console
1268+ )
1269+ else :
1270+ # Determine if we should use streaming based on tool support
1271+ use_streaming = stream
1272+ if formatted_tools and not self ._supports_streaming_tools ():
1273+ # Provider doesn't support streaming with tools, use non-streaming
1274+ use_streaming = False
1275+
1276+ if use_streaming :
1277+ # Streaming approach (with or without tools)
1278+
1279+ if verbose :
1280+ async for chunk in await litellm .acompletion (
12771281 ** self ._build_completion_params (
12781282 messages = messages ,
12791283 temperature = temperature ,
@@ -1429,67 +1433,7 @@ async def get_response_async(
14291433 else :
14301434 logging .warning ("[OLLAMA_DEBUG] Ollama follow-up returned empty response" )
14311435
1432- # If no special handling was needed or if it's not an Ollama model
1433- if reasoning_steps and not ollama_handled :
1434- # Non-streaming call to capture reasoning
1435- resp = await litellm .acompletion (
1436- ** self ._build_completion_params (
1437- messages = messages ,
1438- temperature = temperature ,
1439- stream = False , # force non-streaming
1440- tools = formatted_tools , # Include tools
1441- ** {k :v for k ,v in kwargs .items () if k != 'reasoning_steps' }
1442- )
1443- )
1444- reasoning_content = resp ["choices" ][0 ]["message" ].get ("provider_specific_fields" , {}).get ("reasoning_content" )
1445- response_text = resp ["choices" ][0 ]["message" ]["content" ]
1446-
1447- if verbose and reasoning_content :
1448- display_interaction (
1449- "Tool response reasoning:" ,
1450- f"Reasoning:\n { reasoning_content } \n \n Answer:\n { response_text } " ,
1451- markdown = markdown ,
1452- generation_time = time .time () - start_time ,
1453- console = console
1454- )
1455- elif verbose :
1456- display_interaction (
1457- "Tool response:" ,
1458- response_text ,
1459- markdown = markdown ,
1460- generation_time = time .time () - start_time ,
1461- console = console
1462- )
1463- elif not ollama_handled :
1464- # Get response after tool calls with streaming if not already handled
1465- if verbose :
1466- async for chunk in await litellm .acompletion (
1467- ** self ._build_completion_params (
1468- messages = messages ,
1469- temperature = temperature ,
1470- stream = stream ,
1471- tools = formatted_tools ,
1472- ** {k :v for k ,v in kwargs .items () if k != 'reasoning_steps' }
1473- )
1474- ):
1475- if chunk and chunk .choices and chunk .choices [0 ].delta .content :
1476- content = chunk .choices [0 ].delta .content
1477- response_text += content
1478- print ("\033 [K" , end = "\r " )
1479- print (f"Reflecting... { time .time () - start_time :.1f} s" , end = "\r " )
1480- else :
1481- response_text = ""
1482- async for chunk in await litellm .acompletion (
1483- ** self ._build_completion_params (
1484- messages = messages ,
1485- temperature = temperature ,
1486- stream = stream ,
1487- ** {k :v for k ,v in kwargs .items () if k != 'reasoning_steps' }
1488- )
1489- ):
1490- if chunk and chunk .choices and chunk .choices [0 ].delta .content :
1491- response_text += chunk .choices [0 ].delta .content
1492-
1436+ # Get response after tool calls
14931437 response_text = response_text .strip ()
14941438
14951439 # Handle output formatting
0 commit comments