Merge pull request #832 from MervinPraison/claude/issue-824-20250711_155141

MervinPraison · web-flow · commit 98e7903d2aca · 2025-07-12T00:19:42.000+01:00
fix: sequential tool calling for non-streaming responses
diff --git a/src/praisonai-agents/praisonaiagents/llm/llm.py b/src/praisonai-agents/praisonaiagents/llm/llm.py
@@ -864,32 +864,44 @@ def get_response(
                         ollama_params = self._handle_ollama_model(response_text, tool_results, messages, original_prompt)
                         
                         if ollama_params:
-                            # Get response with streaming
-                            if verbose:
-                                with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
+                            # Get response based on streaming mode
+                            if stream:
+                                # Streaming approach
+                                if verbose:
+                                    with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
+                                        response_text = ""
+                                        for chunk in litellm.completion(
+                                            **self._build_completion_params(
+                                                messages=ollama_params["follow_up_messages"],
+                                                temperature=temperature,
+                                                stream=True
+                                            )
+                                        ):
+                                            if chunk and chunk.choices and chunk.choices[0].delta.content:
+                                                content = chunk.choices[0].delta.content
+                                                response_text += content
+                                                live.update(display_generating(response_text, start_time))
+                                else:
                                     response_text = ""
                                     for chunk in litellm.completion(
                                         **self._build_completion_params(
                                             messages=ollama_params["follow_up_messages"],
                                             temperature=temperature,
-                                            stream=stream
+                                            stream=True
                                         )
                                     ):
                                         if chunk and chunk.choices and chunk.choices[0].delta.content:
-                                            content = chunk.choices[0].delta.content
-                                            response_text += content
-                                            live.update(display_generating(response_text, start_time))
+                                            response_text += chunk.choices[0].delta.content
                             else:
-                                response_text = ""
-                                for chunk in litellm.completion(
+                                # Non-streaming approach
+                                resp = litellm.completion(
                                     **self._build_completion_params(
                                         messages=ollama_params["follow_up_messages"],
                                         temperature=temperature,
-                                        stream=stream
+                                        stream=False
                                     )
-                                ):
-                                    if chunk and chunk.choices and chunk.choices[0].delta.content:
-                                        response_text += chunk.choices[0].delta.content
+                                )
+                                response_text = resp.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
                             
                             # Set flag to indicate Ollama was handled
                             ollama_handled = True
@@ -945,9 +957,26 @@ def get_response(
                         
                         # Otherwise do the existing streaming approach if not already handled
                         elif not ollama_handled:
-                            # Get response after tool calls with streaming
-                            if verbose:
-                                with Live(display_generating("", current_time), console=console, refresh_per_second=4) as live:
+                            # Get response after tool calls
+                            if stream:
+                                # Streaming approach
+                                if verbose:
+                                    with Live(display_generating("", current_time), console=console, refresh_per_second=4) as live:
+                                        final_response_text = ""
+                                        for chunk in litellm.completion(
+                                            **self._build_completion_params(
+                                                messages=messages,
+                                                tools=formatted_tools,
+                                                temperature=temperature,
+                                                stream=True,
+                                                **kwargs
+                                            )
+                                        ):
+                                            if chunk and chunk.choices and chunk.choices[0].delta.content:
+                                                content = chunk.choices[0].delta.content
+                                                final_response_text += content
+                                                live.update(display_generating(final_response_text, current_time))
+                                else:
                                     final_response_text = ""
                                     for chunk in litellm.completion(
                                         **self._build_completion_params(
@@ -959,22 +988,19 @@ def get_response(
                                         )
                                     ):
                                         if chunk and chunk.choices and chunk.choices[0].delta.content:
-                                            content = chunk.choices[0].delta.content
-                                            final_response_text += content
-                                            live.update(display_generating(final_response_text, current_time))
+                                            final_response_text += chunk.choices[0].delta.content
                             else:
-                                final_response_text = ""
-                                for chunk in litellm.completion(
+                                # Non-streaming approach
+                                resp = litellm.completion(
                                     **self._build_completion_params(
                                         messages=messages,
                                         tools=formatted_tools,
                                         temperature=temperature,
-                                        stream=stream,
+                                        stream=False,
                                         **kwargs
                                     )
-                                ):
-                                    if chunk and chunk.choices and chunk.choices[0].delta.content:
-                                        final_response_text += chunk.choices[0].delta.content
+                                )
+                                final_response_text = resp.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
                             
                             final_response_text = final_response_text.strip()
                         
diff --git a/src/praisonai-agents/test_sequential_tool_calling.py b/src/praisonai-agents/test_sequential_tool_calling.py
@@ -0,0 +1,49 @@
+"""Test sequential tool calling fix"""
+from praisonaiagents import Agent
+
+def get_stock_price(company_name: str) -> str:
+    """
+    Get the stock price of a company
+    
+    Args:
+        company_name (str): The name of the company
+        
+    Returns:
+        str: The stock price of the company
+    """
+    print(f"Tool called: get_stock_price({company_name})")
+    return f"The stock price of {company_name} is 100"
+
+def multiply(a: int, b: int) -> int:
+    """
+    Multiply two numbers
+    """
+    print(f"Tool called: multiply({a}, {b})")
+    return a * b
+
+# Test with streaming disabled to verify the fix
+print("Testing sequential tool calling with stream=False...")
+agent = Agent(
+    instructions="You are a helpful assistant. You can use the tools provided to you to help the user.",
+    llm="gemini/gemini-2.5-flash-lite-preview-06-17",
+    self_reflect=False,
+    verbose=True,
+    tools=[get_stock_price, multiply],
+    stream=False  # Force non-streaming mode - use stream parameter directly
+)
+
+result = agent.chat("Get the stock price of Google and multiply it by 2")
+print(f"\nFinal result: {result}")
+
+# Test with default streaming mode
+print("\n\nTesting sequential tool calling with default streaming...")
+agent2 = Agent(
+    instructions="You are a helpful assistant. You can use the tools provided to you to help the user.",
+    llm="gemini/gemini-2.5-flash-lite-preview-06-17",
+    self_reflect=False,
+    verbose=True,
+    tools=[get_stock_price, multiply]
+)
+
+result2 = agent2.chat("Get the stock price of Google and multiply it by 2")
+print(f"\nFinal result: {result2}")