fix: handle non-streaming responses after tool execution for sequential tool calling

claude[bot] · MervinPraison · web-flow · commit 48d0f8ddabe4 · 2025-07-11T15:59:04.000Z
- Fixed issue where tool outputs were returned directly to user when stream=False - Added proper handling for non-streaming responses after tool calls - Maintains backward compatibility with existing streaming behavior Fixes #824 Co-authored-by: Mervin Praison <MervinPraison@users.noreply.github.com>
diff --git a/src/praisonai-agents/praisonaiagents/llm/llm.py b/src/praisonai-agents/praisonaiagents/llm/llm.py
@@ -945,9 +945,26 @@ def get_response(
                         
                         # Otherwise do the existing streaming approach if not already handled
                         elif not ollama_handled:
-                            # Get response after tool calls with streaming
-                            if verbose:
-                                with Live(display_generating("", current_time), console=console, refresh_per_second=4) as live:
+                            # Get response after tool calls
+                            if stream:
+                                # Streaming approach
+                                if verbose:
+                                    with Live(display_generating("", current_time), console=console, refresh_per_second=4) as live:
+                                        final_response_text = ""
+                                        for chunk in litellm.completion(
+                                            **self._build_completion_params(
+                                                messages=messages,
+                                                tools=formatted_tools,
+                                                temperature=temperature,
+                                                stream=True,
+                                                **kwargs
+                                            )
+                                        ):
+                                            if chunk and chunk.choices and chunk.choices[0].delta.content:
+                                                content = chunk.choices[0].delta.content
+                                                final_response_text += content
+                                                live.update(display_generating(final_response_text, current_time))
+                                else:
                                     final_response_text = ""
                                     for chunk in litellm.completion(
                                         **self._build_completion_params(
@@ -959,22 +976,19 @@ def get_response(
                                         )
                                     ):
                                         if chunk and chunk.choices and chunk.choices[0].delta.content:
-                                            content = chunk.choices[0].delta.content
-                                            final_response_text += content
-                                            live.update(display_generating(final_response_text, current_time))
+                                            final_response_text += chunk.choices[0].delta.content
                             else:
-                                final_response_text = ""
-                                for chunk in litellm.completion(
+                                # Non-streaming approach
+                                resp = litellm.completion(
                                     **self._build_completion_params(
                                         messages=messages,
                                         tools=formatted_tools,
                                         temperature=temperature,
-                                        stream=stream,
+                                        stream=False,
                                         **kwargs
                                     )
-                                ):
-                                    if chunk and chunk.choices and chunk.choices[0].delta.content:
-                                        final_response_text += chunk.choices[0].delta.content
+                                )
+                                final_response_text = resp["choices"][0]["message"]["content"]
                             
                             final_response_text = final_response_text.strip()
                         
diff --git a/test_sequential_tool_calling.py b/test_sequential_tool_calling.py
@@ -0,0 +1,49 @@
+"""Test sequential tool calling fix"""
+from praisonaiagents import Agent
+
+def get_stock_price(company_name: str) -> str:
+    """
+    Get the stock price of a company
+    
+    Args:
+        company_name (str): The name of the company
+        
+    Returns:
+        str: The stock price of the company
+    """
+    print(f"Tool called: get_stock_price({company_name})")
+    return f"The stock price of {company_name} is 100"
+
+def multiply(a: int, b: int) -> int:
+    """
+    Multiply two numbers
+    """
+    print(f"Tool called: multiply({a}, {b})")
+    return a * b
+
+# Test with streaming disabled to verify the fix
+print("Testing sequential tool calling with stream=False...")
+agent = Agent(
+    instructions="You are a helpful assistant. You can use the tools provided to you to help the user.",
+    llm="gemini/gemini-2.5-flash-lite-preview-06-17",
+    self_reflect=False,
+    verbose=True,
+    tools=[get_stock_price, multiply],
+    llm_config={"stream": False}  # Force non-streaming mode
+)
+
+result = agent.chat("Get the stock price of Google and multiply it by 2")
+print(f"\nFinal result: {result}")
+
+# Test with default streaming mode
+print("\n\nTesting sequential tool calling with default streaming...")
+agent2 = Agent(
+    instructions="You are a helpful assistant. You can use the tools provided to you to help the user.",
+    llm="gemini/gemini-2.5-flash-lite-preview-06-17",
+    self_reflect=False,
+    verbose=True,
+    tools=[get_stock_price, multiply]
+)
+
+result2 = agent2.chat("Get the stock price of Google and multiply it by 2")
+print(f"\nFinal result: {result2}")