@@ -864,32 +864,44 @@ def get_response(
864864 ollama_params = self ._handle_ollama_model (response_text , tool_results , messages , original_prompt )
865865
866866 if ollama_params :
867- # Get response with streaming
868- if verbose :
869- with Live (display_generating ("" , start_time ), console = console , refresh_per_second = 4 ) as live :
867+ # Get response based on streaming mode
868+ if stream :
869+ # Streaming approach
870+ if verbose :
871+ with Live (display_generating ("" , start_time ), console = console , refresh_per_second = 4 ) as live :
872+ response_text = ""
873+ for chunk in litellm .completion (
874+ ** self ._build_completion_params (
875+ messages = ollama_params ["follow_up_messages" ],
876+ temperature = temperature ,
877+ stream = True
878+ )
879+ ):
880+ if chunk and chunk .choices and chunk .choices [0 ].delta .content :
881+ content = chunk .choices [0 ].delta .content
882+ response_text += content
883+ live .update (display_generating (response_text , start_time ))
884+ else :
870885 response_text = ""
871886 for chunk in litellm .completion (
872887 ** self ._build_completion_params (
873888 messages = ollama_params ["follow_up_messages" ],
874889 temperature = temperature ,
875- stream = stream
890+ stream = True
876891 )
877892 ):
878893 if chunk and chunk .choices and chunk .choices [0 ].delta .content :
879- content = chunk .choices [0 ].delta .content
880- response_text += content
881- live .update (display_generating (response_text , start_time ))
894+ response_text += chunk .choices [0 ].delta .content
882895 else :
883- response_text = ""
884- for chunk in litellm .completion (
896+ # Non-streaming approach
897+ resp = litellm .completion (
885898 ** self ._build_completion_params (
886899 messages = ollama_params ["follow_up_messages" ],
887900 temperature = temperature ,
888- stream = stream
901+ stream = False
889902 )
890- ):
891- if chunk and chunk .choices and chunk .choices [0 ].delta .content :
892- response_text += chunk .choices [0 ].delta .content
903+ )
904+ response_text = resp .get ("choices" , [{}])[0 ].get ("message" , {}).get ("content" , "" ) or ""
893905
894906 # Set flag to indicate Ollama was handled
895907 ollama_handled = True
@@ -945,9 +957,26 @@ def get_response(
945957
946958 # Otherwise do the existing streaming approach if not already handled
947959 elif not ollama_handled :
948- # Get response after tool calls with streaming
949- if verbose :
950- with Live (display_generating ("" , current_time ), console = console , refresh_per_second = 4 ) as live :
960+ # Get response after tool calls
961+ if stream :
962+ # Streaming approach
963+ if verbose :
964+ with Live (display_generating ("" , current_time ), console = console , refresh_per_second = 4 ) as live :
965+ final_response_text = ""
966+ for chunk in litellm .completion (
967+ ** self ._build_completion_params (
968+ messages = messages ,
969+ tools = formatted_tools ,
970+ temperature = temperature ,
971+ stream = True ,
972+ ** kwargs
973+ )
974+ ):
975+ if chunk and chunk .choices and chunk .choices [0 ].delta .content :
976+ content = chunk .choices [0 ].delta .content
977+ final_response_text += content
978+ live .update (display_generating (final_response_text , current_time ))
979+ else :
951980 final_response_text = ""
952981 for chunk in litellm .completion (
953982 ** self ._build_completion_params (
@@ -959,22 +988,19 @@ def get_response(
959988 )
960989 ):
961990 if chunk and chunk .choices and chunk .choices [0 ].delta .content :
962- content = chunk .choices [0 ].delta .content
963- final_response_text += content
964- live .update (display_generating (final_response_text , current_time ))
991+ final_response_text += chunk .choices [0 ].delta .content
965992 else :
966- final_response_text = ""
967- for chunk in litellm .completion (
993+ # Non-streaming approach
994+ resp = litellm .completion (
968995 ** self ._build_completion_params (
969996 messages = messages ,
970997 tools = formatted_tools ,
971998 temperature = temperature ,
972- stream = stream ,
999+ stream = False ,
9731000 ** kwargs
9741001 )
975- ):
976- if chunk and chunk .choices and chunk .choices [0 ].delta .content :
977- final_response_text += chunk .choices [0 ].delta .content
1002+ )
1003+ final_response_text = resp .get ("choices" , [{}])[0 ].get ("message" , {}).get ("content" , "" ) or ""
9781004
9791005 final_response_text = final_response_text .strip ()
9801006
0 commit comments