@@ -680,6 +680,7 @@ def get_response(
680680 max_iterations = 10 # Prevent infinite loops
681681 iteration_count = 0
682682 final_response_text = ""
683+ stored_reasoning_content = None # Store reasoning content from tool execution
683684
684685 while iteration_count < max_iterations :
685686 try :
@@ -922,6 +923,30 @@ def get_response(
922923 else :
923924 logging .warning ("[OLLAMA_DEBUG] Ollama follow-up returned empty response" )
924925
926+ # Handle reasoning_steps after tool execution if not already handled by Ollama
927+ if reasoning_steps and not ollama_handled :
928+ # Make a non-streaming call to capture reasoning content
929+ reasoning_resp = litellm .completion (
930+ ** self ._build_completion_params (
931+ messages = messages ,
932+ temperature = temperature ,
933+ stream = False , # force non-streaming
934+ ** {k :v for k ,v in kwargs .items () if k != 'reasoning_steps' }
935+ )
936+ )
937+ reasoning_content = reasoning_resp ["choices" ][0 ]["message" ].get ("provider_specific_fields" , {}).get ("reasoning_content" )
938+ response_text = reasoning_resp ["choices" ][0 ]["message" ]["content" ]
939+
940+ # Store reasoning content for later use
941+ if reasoning_content :
942+ stored_reasoning_content = reasoning_content
943+
944+ # Update messages with the response
945+ messages .append ({
946+ "role" : "assistant" ,
947+ "content" : response_text
948+ })
949+
925950 # After tool execution, continue the loop to check if more tools are needed
926951 # instead of immediately trying to get a final response
927952 iteration_count += 1
@@ -943,16 +968,30 @@ def get_response(
943968
944969 # No tool calls were made in this iteration, return the response
945970 if verbose :
946- display_interaction (
947- original_prompt ,
948- response_text ,
949- markdown = markdown ,
950- generation_time = time .time () - start_time ,
951- console = console
952- )
971+ # If we have stored reasoning content from tool execution, display it
972+ if stored_reasoning_content :
973+ display_interaction (
974+ original_prompt ,
975+ f"Reasoning:\n { stored_reasoning_content } \n \n Answer:\n { response_text } " ,
976+ markdown = markdown ,
977+ generation_time = time .time () - start_time ,
978+ console = console
979+ )
980+ else :
981+ display_interaction (
982+ original_prompt ,
983+ response_text ,
984+ markdown = markdown ,
985+ generation_time = time .time () - start_time ,
986+ console = console
987+ )
953988
954989 response_text = response_text .strip ()
955990
991+ # Return reasoning content if reasoning_steps is True and we have it
992+ if reasoning_steps and stored_reasoning_content :
993+ return stored_reasoning_content
994+
956995 # Handle output formatting
957996 if output_json or output_pydantic :
958997 self .chat_history .append ({"role" : "user" , "content" : original_prompt })
@@ -1161,12 +1200,7 @@ async def get_response_async(
11611200 stream : bool = True ,
11621201 ** kwargs
11631202 ) -> str :
1164- """Async version of get_response with identical functionality.
1165-
1166- NOTE: This async version currently does NOT support sequential tool calling
1167- like the sync version does. It will return after the first tool execution.
1168- This is a known limitation that needs to be addressed in a future update.
1169- """
1203+ """Async version of get_response with identical functionality."""
11701204 try :
11711205 import litellm
11721206 logging .info (f"Getting async response from { self .model } " )
@@ -1238,46 +1272,47 @@ async def get_response_async(
12381272
12391273 response_text = ""
12401274 if reasoning_steps :
1241- # Non-streaming call to capture reasoning
1242- resp = await litellm .acompletion (
1243- ** self ._build_completion_params (
1244- messages = messages ,
1245- temperature = temperature ,
1246- stream = False , # force non-streaming
1247- ** {k :v for k ,v in kwargs .items () if k != 'reasoning_steps' }
1248- )
1275+ # Non-streaming call to capture reasoning
1276+ resp = await litellm .acompletion (
1277+ ** self ._build_completion_params (
1278+ messages = messages ,
1279+ temperature = temperature ,
1280+ stream = False , # force non-streaming
1281+ ** {k :v for k ,v in kwargs .items () if k != 'reasoning_steps' }
12491282 )
1250- reasoning_content = resp ["choices" ][0 ]["message" ].get ("provider_specific_fields" , {}).get ("reasoning_content" )
1251- response_text = resp ["choices" ][0 ]["message" ]["content" ]
1252-
1253- if verbose and reasoning_content :
1254- display_interaction (
1255- "Initial reasoning:" ,
1256- f"Reasoning:\n { reasoning_content } \n \n Answer:\n { response_text } " ,
1257- markdown = markdown ,
1258- generation_time = time .time () - start_time ,
1259- console = console
1260- )
1261- elif verbose :
1262- display_interaction (
1263- "Initial response:" ,
1264- response_text ,
1265- markdown = markdown ,
1266- generation_time = time .time () - start_time ,
1267- console = console
1268- )
1269- else :
1270- # Determine if we should use streaming based on tool support
1271- use_streaming = stream
1272- if formatted_tools and not self ._supports_streaming_tools ():
1273- # Provider doesn't support streaming with tools, use non-streaming
1274- use_streaming = False
1283+ )
1284+ reasoning_content = resp ["choices" ][0 ]["message" ].get ("provider_specific_fields" , {}).get ("reasoning_content" )
1285+ response_text = resp ["choices" ][0 ]["message" ]["content" ]
1286+
1287+ if verbose and reasoning_content :
1288+ display_interaction (
1289+ "Initial reasoning:" ,
1290+ f"Reasoning:\n { reasoning_content } \n \n Answer:\n { response_text } " ,
1291+ markdown = markdown ,
1292+ generation_time = time .time () - start_time ,
1293+ console = console
1294+ )
1295+ elif verbose :
1296+ display_interaction (
1297+ "Initial response:" ,
1298+ response_text ,
1299+ markdown = markdown ,
1300+ generation_time = time .time () - start_time ,
1301+ console = console
1302+ )
1303+ else :
1304+ # Determine if we should use streaming based on tool support
1305+ use_streaming = stream
1306+ if formatted_tools and not self ._supports_streaming_tools ():
1307+ # Provider doesn't support streaming with tools, use non-streaming
1308+ use_streaming = False
1309+
1310+ if use_streaming :
1311+ # Streaming approach (with or without tools)
1312+ tool_calls = []
12751313
1276- if use_streaming :
1277- # Streaming approach (with or without tools)
1278-
1279- if verbose :
1280- async for chunk in await litellm .acompletion (
1314+ if verbose :
1315+ async for chunk in await litellm .acompletion (
12811316 ** self ._build_completion_params (
12821317 messages = messages ,
12831318 temperature = temperature ,
@@ -1433,7 +1468,67 @@ async def get_response_async(
14331468 else :
14341469 logging .warning ("[OLLAMA_DEBUG] Ollama follow-up returned empty response" )
14351470
1436- # Get response after tool calls
1471+ # If no special handling was needed or if it's not an Ollama model
1472+ if reasoning_steps and not ollama_handled :
1473+ # Non-streaming call to capture reasoning
1474+ resp = await litellm .acompletion (
1475+ ** self ._build_completion_params (
1476+ messages = messages ,
1477+ temperature = temperature ,
1478+ stream = False , # force non-streaming
1479+ tools = formatted_tools , # Include tools
1480+ ** {k :v for k ,v in kwargs .items () if k != 'reasoning_steps' }
1481+ )
1482+ )
1483+ reasoning_content = resp ["choices" ][0 ]["message" ].get ("provider_specific_fields" , {}).get ("reasoning_content" )
1484+ response_text = resp ["choices" ][0 ]["message" ]["content" ]
1485+
1486+ if verbose and reasoning_content :
1487+ display_interaction (
1488+ "Tool response reasoning:" ,
1489+ f"Reasoning:\n { reasoning_content } \n \n Answer:\n { response_text } " ,
1490+ markdown = markdown ,
1491+ generation_time = time .time () - start_time ,
1492+ console = console
1493+ )
1494+ elif verbose :
1495+ display_interaction (
1496+ "Tool response:" ,
1497+ response_text ,
1498+ markdown = markdown ,
1499+ generation_time = time .time () - start_time ,
1500+ console = console
1501+ )
1502+ elif not ollama_handled :
1503+ # Get response after tool calls with streaming if not already handled
1504+ if verbose :
1505+ async for chunk in await litellm .acompletion (
1506+ ** self ._build_completion_params (
1507+ messages = messages ,
1508+ temperature = temperature ,
1509+ stream = stream ,
1510+ tools = formatted_tools ,
1511+ ** {k :v for k ,v in kwargs .items () if k != 'reasoning_steps' }
1512+ )
1513+ ):
1514+ if chunk and chunk .choices and chunk .choices [0 ].delta .content :
1515+ content = chunk .choices [0 ].delta .content
1516+ response_text += content
1517+ print ("\033 [K" , end = "\r " )
1518+ print (f"Reflecting... { time .time () - start_time :.1f} s" , end = "\r " )
1519+ else :
1520+ response_text = ""
1521+ async for chunk in await litellm .acompletion (
1522+ ** self ._build_completion_params (
1523+ messages = messages ,
1524+ temperature = temperature ,
1525+ stream = stream ,
1526+ ** {k :v for k ,v in kwargs .items () if k != 'reasoning_steps' }
1527+ )
1528+ ):
1529+ if chunk and chunk .choices and chunk .choices [0 ].delta .content :
1530+ response_text += chunk .choices [0 ].delta .content
1531+
14371532 response_text = response_text .strip ()
14381533
14391534 # Handle output formatting
0 commit comments