@@ -549,6 +549,7 @@ def get_response(
549549 })
550550
551551 should_continue = False
552+ tool_results = [] # Store all tool results
552553 for tool_call in tool_calls :
553554 # Handle both object and dict access patterns
554555 if isinstance (tool_call , dict ):
@@ -569,6 +570,7 @@ def get_response(
569570 logging .debug (f"[TOOL_EXEC_DEBUG] About to execute tool { function_name } with args: { arguments } " )
570571 tool_result = execute_tool_fn (function_name , arguments )
571572 logging .debug (f"[TOOL_EXEC_DEBUG] Tool execution result: { tool_result } " )
573+ tool_results .append (tool_result ) # Store the result
572574
573575 if verbose :
574576 display_message = f"Agent { agent_name } called function '{ function_name } ' with arguments: { arguments } \n "
@@ -601,7 +603,8 @@ def get_response(
601603 # If we reach here, no more tool calls needed - get final response
602604 # Make one more call to get the final summary response
603605 # Special handling for Ollama models that don't automatically process tool results
604- if self .model and self .model .startswith ("ollama/" ) and tool_result :
606+ ollama_handled = False
607+ if self .model and self .model .startswith ("ollama/" ) and tool_results :
605608 # For Ollama models, we need to explicitly ask the model to process the tool results
606609 # First, check if the response is just a JSON tool call
607610 try :
@@ -614,13 +617,30 @@ def get_response(
614617 # Create a prompt that asks the model to process the tool results based on original context
615618 # Extract the original user query from messages
616619 original_query = ""
617- for msg in messages :
620+ for msg in reversed ( messages ): # Look from the end to find the most recent user message
618621 if msg .get ("role" ) == "user" :
619- original_query = msg .get ("content" , "" )
620- break
622+ content = msg .get ("content" , "" )
623+ # Handle list content (multimodal)
624+ if isinstance (content , list ):
625+ for item in content :
626+ if isinstance (item , dict ) and item .get ("type" ) == "text" :
627+ original_query = item .get ("text" , "" )
628+ break
629+ else :
630+ original_query = content
631+ if original_query :
632+ break
633+
634+ # Create a shorter follow-up prompt with all tool results
635+ # If there's only one result, use it directly; otherwise combine them
636+ if len (tool_results ) == 1 :
637+ results_text = json .dumps (tool_results [0 ], indent = 2 )
638+ else :
639+ results_text = json .dumps (tool_results , indent = 2 )
621640
622- # Create a shorter follow-up prompt
623- follow_up_prompt = f"Results:\n { json .dumps (tool_result , indent = 2 )} \n Provide Answer to this Original Question based on the above results: '{ original_query } '"
641+ follow_up_prompt = f"Results:\n { results_text } \n Provide Answer to this Original Question based on the above results: '{ original_query } '"
642+ logging .debug (f"[OLLAMA_DEBUG] Original query extracted: { original_query } " )
643+ logging .debug (f"[OLLAMA_DEBUG] Follow-up prompt: { follow_up_prompt [:200 ]} ..." )
624644
625645 # Make a follow-up call to process the results
626646 follow_up_messages = [
@@ -653,12 +673,33 @@ def get_response(
653673 ):
654674 if chunk and chunk .choices and chunk .choices [0 ].delta .content :
655675 response_text += chunk .choices [0 ].delta .content
676+
677+ # Set flag to indicate Ollama was handled
678+ ollama_handled = True
679+ final_response_text = response_text .strip ()
680+ logging .debug (f"[OLLAMA_DEBUG] Ollama follow-up response: { final_response_text [:200 ]} ..." )
681+
682+ # Display the response if we got one
683+ if final_response_text and verbose :
684+ display_interaction (
685+ original_prompt ,
686+ final_response_text ,
687+ markdown = markdown ,
688+ generation_time = time .time () - start_time ,
689+ console = console
690+ )
691+
692+ # Return the final response after processing Ollama's follow-up
693+ if final_response_text :
694+ return final_response_text
695+ else :
696+ logging .warning ("[OLLAMA_DEBUG] Ollama follow-up returned empty response" )
656697 except (json .JSONDecodeError , KeyError ):
657698 # Not a JSON response or not a tool call format, continue normally
658699 pass
659700
660- # If reasoning_steps is True, do a single non-streaming call
661- elif reasoning_steps :
701+ # If reasoning_steps is True and we haven't handled Ollama already , do a single non-streaming call
702+ if reasoning_steps and not ollama_handled :
662703 resp = litellm .completion (
663704 ** self ._build_completion_params (
664705 messages = messages ,
@@ -688,8 +729,8 @@ def get_response(
688729 console = console
689730 )
690731
691- # Otherwise do the existing streaming approach
692- else :
732+ # Otherwise do the existing streaming approach if not already handled
733+ elif not ollama_handled :
693734 # Get response after tool calls with streaming
694735 if verbose :
695736 with Live (display_generating ("" , current_time ), console = console , refresh_per_second = 4 ) as live :
@@ -1225,6 +1266,7 @@ async def get_response_async(
12251266 "tool_calls" : serializable_tool_calls
12261267 })
12271268
1269+ tool_results = [] # Store all tool results
12281270 for tool_call in tool_calls :
12291271 # Handle both object and dict access patterns
12301272 if isinstance (tool_call , dict ):
@@ -1243,6 +1285,7 @@ async def get_response_async(
12431285 tool_call_id = f"tool_{ id (tool_call )} "
12441286
12451287 tool_result = await execute_tool_fn (function_name , arguments )
1288+ tool_results .append (tool_result ) # Store the result
12461289
12471290 if verbose :
12481291 display_message = f"Agent { agent_name } called function '{ function_name } ' with arguments: { arguments } \n "
@@ -1261,7 +1304,8 @@ async def get_response_async(
12611304 response_text = ""
12621305
12631306 # Special handling for Ollama models that don't automatically process tool results
1264- if self ._is_ollama_provider () and tool_result :
1307+ ollama_handled = False
1308+ if self ._is_ollama_provider () and tool_results :
12651309 # For Ollama models, we need to explicitly ask the model to process the tool results
12661310 # First, check if the response is just a JSON tool call
12671311 try :
@@ -1274,13 +1318,30 @@ async def get_response_async(
12741318 # Create a prompt that asks the model to process the tool results based on original context
12751319 # Extract the original user query from messages
12761320 original_query = ""
1277- for msg in messages :
1321+ for msg in reversed ( messages ): # Look from the end to find the most recent user message
12781322 if msg .get ("role" ) == "user" :
1279- original_query = msg .get ("content" , "" )
1280- break
1323+ content = msg .get ("content" , "" )
1324+ # Handle list content (multimodal)
1325+ if isinstance (content , list ):
1326+ for item in content :
1327+ if isinstance (item , dict ) and item .get ("type" ) == "text" :
1328+ original_query = item .get ("text" , "" )
1329+ break
1330+ else :
1331+ original_query = content
1332+ if original_query :
1333+ break
1334+
1335+ # Create a shorter follow-up prompt with all tool results
1336+ # If there's only one result, use it directly; otherwise combine them
1337+ if len (tool_results ) == 1 :
1338+ results_text = json .dumps (tool_results [0 ], indent = 2 )
1339+ else :
1340+ results_text = json .dumps (tool_results , indent = 2 )
12811341
1282- # Create a shorter follow-up prompt
1283- follow_up_prompt = f"Results:\n { json .dumps (tool_result , indent = 2 )} \n Provide Answer to this Original Question based on the above results: '{ original_query } '"
1342+ follow_up_prompt = f"Results:\n { results_text } \n Provide Answer to this Original Question based on the above results: '{ original_query } '"
1343+ logging .debug (f"[OLLAMA_DEBUG] Original query extracted: { original_query } " )
1344+ logging .debug (f"[OLLAMA_DEBUG] Follow-up prompt: { follow_up_prompt [:200 ]} ..." )
12841345
12851346 # Make a follow-up call to process the results
12861347 follow_up_messages = [
@@ -1313,12 +1374,33 @@ async def get_response_async(
13131374 ):
13141375 if chunk and chunk .choices and chunk .choices [0 ].delta .content :
13151376 response_text += chunk .choices [0 ].delta .content
1377+
1378+ # Set flag to indicate Ollama was handled
1379+ ollama_handled = True
1380+ final_response_text = response_text .strip ()
1381+ logging .debug (f"[OLLAMA_DEBUG] Ollama follow-up response: { final_response_text [:200 ]} ..." )
1382+
1383+ # Display the response if we got one
1384+ if final_response_text and verbose :
1385+ display_interaction (
1386+ original_prompt ,
1387+ final_response_text ,
1388+ markdown = markdown ,
1389+ generation_time = time .time () - start_time ,
1390+ console = console
1391+ )
1392+
1393+ # Return the final response after processing Ollama's follow-up
1394+ if final_response_text :
1395+ return final_response_text
1396+ else :
1397+ logging .warning ("[OLLAMA_DEBUG] Ollama follow-up returned empty response" )
13161398 except (json .JSONDecodeError , KeyError ):
13171399 # Not a JSON response or not a tool call format, continue normally
13181400 pass
13191401
13201402 # If no special handling was needed or if it's not an Ollama model
1321- elif reasoning_steps :
1403+ if reasoning_steps and not ollama_handled :
13221404 # Non-streaming call to capture reasoning
13231405 resp = await litellm .acompletion (
13241406 ** self ._build_completion_params (
@@ -1348,8 +1430,8 @@ async def get_response_async(
13481430 generation_time = time .time () - start_time ,
13491431 console = console
13501432 )
1351- else :
1352- # Get response after tool calls with streaming
1433+ elif not ollama_handled :
1434+ # Get response after tool calls with streaming if not already handled
13531435 if verbose :
13541436 async for chunk in await litellm .acompletion (
13551437 ** self ._build_completion_params (
0 commit comments