@@ -857,8 +857,6 @@ def get_response(
857857 iteration_count += 1
858858 continue
859859
860- # If we reach here, no more tool calls needed - get final response
861- # Make one more call to get the final summary response
862860 # Special handling for Ollama models that don't automatically process tool results
863861 ollama_handled = False
864862 ollama_params = self ._handle_ollama_model (response_text , tool_results , messages , original_prompt )
@@ -924,97 +922,10 @@ def get_response(
924922 else :
925923 logging .warning ("[OLLAMA_DEBUG] Ollama follow-up returned empty response" )
926924
927- # If reasoning_steps is True and we haven't handled Ollama already, do a single non-streaming call
928- if reasoning_steps and not ollama_handled :
929- resp = litellm .completion (
930- ** self ._build_completion_params (
931- messages = messages ,
932- temperature = temperature ,
933- stream = False , # force non-streaming
934- ** {k :v for k ,v in kwargs .items () if k != 'reasoning_steps' }
935- )
936- )
937- reasoning_content = resp ["choices" ][0 ]["message" ].get ("provider_specific_fields" , {}).get ("reasoning_content" )
938- response_text = resp ["choices" ][0 ]["message" ]["content" ]
939-
940- # Optionally display reasoning if present
941- if verbose and reasoning_content :
942- display_interaction (
943- original_prompt ,
944- f"Reasoning:\n { reasoning_content } \n \n Answer:\n { response_text } " ,
945- markdown = markdown ,
946- generation_time = time .time () - start_time ,
947- console = console
948- )
949- else :
950- display_interaction (
951- original_prompt ,
952- response_text ,
953- markdown = markdown ,
954- generation_time = time .time () - start_time ,
955- console = console
956- )
957-
958- # Otherwise do the existing streaming approach if not already handled
959- elif not ollama_handled :
960- # Get response after tool calls
961- if stream :
962- # Streaming approach
963- if verbose :
964- with Live (display_generating ("" , current_time ), console = console , refresh_per_second = 4 ) as live :
965- final_response_text = ""
966- for chunk in litellm .completion (
967- ** self ._build_completion_params (
968- messages = messages ,
969- tools = formatted_tools ,
970- temperature = temperature ,
971- stream = True ,
972- ** kwargs
973- )
974- ):
975- if chunk and chunk .choices and chunk .choices [0 ].delta .content :
976- content = chunk .choices [0 ].delta .content
977- final_response_text += content
978- live .update (display_generating (final_response_text , current_time ))
979- else :
980- final_response_text = ""
981- for chunk in litellm .completion (
982- ** self ._build_completion_params (
983- messages = messages ,
984- tools = formatted_tools ,
985- temperature = temperature ,
986- stream = True ,
987- ** kwargs
988- )
989- ):
990- if chunk and chunk .choices and chunk .choices [0 ].delta .content :
991- final_response_text += chunk .choices [0 ].delta .content
992- else :
993- # Non-streaming approach
994- resp = litellm .completion (
995- ** self ._build_completion_params (
996- messages = messages ,
997- tools = formatted_tools ,
998- temperature = temperature ,
999- stream = False ,
1000- ** kwargs
1001- )
1002- )
1003- final_response_text = resp .get ("choices" , [{}])[0 ].get ("message" , {}).get ("content" , "" ) or ""
1004-
1005- final_response_text = final_response_text .strip ()
1006-
1007- # Display final response
1008- if verbose :
1009- display_interaction (
1010- original_prompt ,
1011- final_response_text ,
1012- markdown = markdown ,
1013- generation_time = time .time () - start_time ,
1014- console = console
1015- )
1016-
1017- return final_response_text
925+ # After tool execution, continue the loop to check if more tools are needed
926+ # instead of immediately trying to get a final response
927+ iteration_count += 1
928+ continue
1018929 else :
1019930 # No tool calls, we're done with this iteration
1020931 # If we've executed tools in previous iterations, this response contains the final answer
0 commit comments