@@ -1270,48 +1270,58 @@ async def get_response_async(
12701270 # Format tools for LiteLLM using the shared helper
12711271 formatted_tools = self ._format_tools_for_litellm (tools )
12721272
1273- response_text = ""
1274- if reasoning_steps :
1275- # Non-streaming call to capture reasoning
1276- resp = await litellm .acompletion (
1277- ** self ._build_completion_params (
1278- messages = messages ,
1273+ # Initialize variables for iteration loop
1274+ max_iterations = 10 # Prevent infinite loops
1275+ iteration_count = 0
1276+ final_response_text = ""
1277+ stored_reasoning_content = None # Store reasoning content from tool execution
1278+
1279+ while iteration_count < max_iterations :
1280+ response_text = ""
1281+ reasoning_content = None
1282+ tool_calls = []
1283+
1284+ if reasoning_steps and iteration_count == 0 :
1285+ # Non-streaming call to capture reasoning
1286+ resp = await litellm .acompletion (
1287+ ** self ._build_completion_params (
1288+ messages = messages ,
12791289 temperature = temperature ,
12801290 stream = False , # force non-streaming
12811291 ** {k :v for k ,v in kwargs .items () if k != 'reasoning_steps' }
12821292 )
1283- )
1284- reasoning_content = resp ["choices" ][0 ]["message" ].get ("provider_specific_fields" , {}).get ("reasoning_content" )
1285- response_text = resp ["choices" ][0 ]["message" ]["content" ]
1286-
1287- if verbose and reasoning_content :
1288- display_interaction (
1289- "Initial reasoning:" ,
1290- f"Reasoning:\n { reasoning_content } \n \n Answer:\n { response_text } " ,
1291- markdown = markdown ,
1292- generation_time = time .time () - start_time ,
1293- console = console
12941293 )
1295- elif verbose :
1296- display_interaction (
1297- "Initial response:" ,
1298- response_text ,
1299- markdown = markdown ,
1300- generation_time = time .time () - start_time ,
1301- console = console
1302- )
1303- else :
1304- # Determine if we should use streaming based on tool support
1305- use_streaming = stream
1306- if formatted_tools and not self ._supports_streaming_tools ():
1307- # Provider doesn't support streaming with tools, use non-streaming
1308- use_streaming = False
1309-
1310- if use_streaming :
1311- # Streaming approach (with or without tools)
1312- tool_calls = []
1294+ reasoning_content = resp ["choices" ][0 ]["message" ].get ("provider_specific_fields" , {}).get ("reasoning_content" )
1295+ response_text = resp ["choices" ][0 ]["message" ]["content" ]
13131296
1314- if verbose :
1297+ if verbose and reasoning_content :
1298+ display_interaction (
1299+ "Initial reasoning:" ,
1300+ f"Reasoning:\n { reasoning_content } \n \n Answer:\n { response_text } " ,
1301+ markdown = markdown ,
1302+ generation_time = time .time () - start_time ,
1303+ console = console
1304+ )
1305+ elif verbose :
1306+ display_interaction (
1307+ "Initial response:" ,
1308+ response_text ,
1309+ markdown = markdown ,
1310+ generation_time = time .time () - start_time ,
1311+ console = console
1312+ )
1313+ else :
1314+ # Determine if we should use streaming based on tool support
1315+ use_streaming = stream
1316+ if formatted_tools and not self ._supports_streaming_tools ():
1317+ # Provider doesn't support streaming with tools, use non-streaming
1318+ use_streaming = False
1319+
1320+ if use_streaming :
1321+ # Streaming approach (with or without tools)
1322+ tool_calls = []
1323+
1324+ if verbose :
13151325 async for chunk in await litellm .acompletion (
13161326 ** self ._build_completion_params (
13171327 messages = messages ,
@@ -1378,10 +1388,8 @@ async def get_response_async(
13781388 console = console
13791389 )
13801390
1381- # Now handle tools if we have them (either from streaming or non-streaming)
1382- if tools and execute_tool_fn and tool_calls :
1383-
1384- if tool_calls :
1391+ # Now handle tools if we have them (either from streaming or non-streaming)
1392+ if tools and execute_tool_fn and tool_calls :
13851393 # Convert tool_calls to a serializable format for all providers
13861394 serializable_tool_calls = self ._serialize_tool_calls (tool_calls )
13871395 messages .append ({
@@ -1462,9 +1470,16 @@ async def get_response_async(
14621470 console = console
14631471 )
14641472
1465- # Return the final response after processing Ollama's follow-up
1473+ # Store the response for potential final return
14661474 if final_response_text :
1467- return final_response_text
1475+ # Update messages with the response to maintain conversation context
1476+ messages .append ({
1477+ "role" : "assistant" ,
1478+ "content" : final_response_text
1479+ })
1480+ # Continue the loop to check if more tools are needed
1481+ iteration_count += 1
1482+ continue
14681483 else :
14691484 logging .warning ("[OLLAMA_DEBUG] Ollama follow-up returned empty response" )
14701485
@@ -1530,6 +1545,27 @@ async def get_response_async(
15301545 response_text += chunk .choices [0 ].delta .content
15311546
15321547 response_text = response_text .strip ()
1548+
1549+ # After tool execution, update messages and continue the loop
1550+ if response_text :
1551+ messages .append ({
1552+ "role" : "assistant" ,
1553+ "content" : response_text
1554+ })
1555+
1556+ # Store reasoning content if captured
1557+ if reasoning_steps and reasoning_content :
1558+ stored_reasoning_content = reasoning_content
1559+
1560+ # Continue the loop to check if more tools are needed
1561+ iteration_count += 1
1562+ continue
1563+ else :
1564+ # No tool calls, we're done with this iteration
1565+ # If we've executed tools in previous iterations, this response contains the final answer
1566+ if iteration_count > 0 :
1567+ final_response_text = response_text .strip ()
1568+ break
15331569
15341570 # Handle output formatting
15351571 if output_json or output_pydantic :
@@ -1541,13 +1577,27 @@ async def get_response_async(
15411577 return response_text
15421578
15431579 if not self_reflect :
1580+ # Use final_response_text if we went through tool iterations
1581+ display_text = final_response_text if final_response_text else response_text
1582+
1583+ # Display with stored reasoning content if available
15441584 if verbose :
1545- display_interaction (original_prompt , response_text , markdown = markdown ,
1546- generation_time = time .time () - start_time , console = console )
1547- # Return reasoning content if reasoning_steps is True
1548- if reasoning_steps and reasoning_content :
1549- return reasoning_content
1550- return response_text
1585+ if stored_reasoning_content :
1586+ display_interaction (
1587+ original_prompt ,
1588+ f"Reasoning:\n { stored_reasoning_content } \n \n Answer:\n { display_text } " ,
1589+ markdown = markdown ,
1590+ generation_time = time .time () - start_time ,
1591+ console = console
1592+ )
1593+ else :
1594+ display_interaction (original_prompt , display_text , markdown = markdown ,
1595+ generation_time = time .time () - start_time , console = console )
1596+
1597+ # Return reasoning content if reasoning_steps is True and we have it
1598+ if reasoning_steps and stored_reasoning_content :
1599+ return stored_reasoning_content
1600+ return display_text
15511601
15521602 # Handle self-reflection
15531603 reflection_prompt = f"""
0 commit comments