Skip to content

Commit 98e7903

Browse files
Merge pull request #832 from MervinPraison/claude/issue-824-20250711_155141
fix: sequential tool calling for non-streaming responses
2 parents a04b205 + 4466e50 commit 98e7903

File tree

2 files changed

+100
-25
lines changed

2 files changed

+100
-25
lines changed

src/praisonai-agents/praisonaiagents/llm/llm.py

Lines changed: 51 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -864,32 +864,44 @@ def get_response(
864864
ollama_params = self._handle_ollama_model(response_text, tool_results, messages, original_prompt)
865865

866866
if ollama_params:
867-
# Get response with streaming
868-
if verbose:
869-
with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
867+
# Get response based on streaming mode
868+
if stream:
869+
# Streaming approach
870+
if verbose:
871+
with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
872+
response_text = ""
873+
for chunk in litellm.completion(
874+
**self._build_completion_params(
875+
messages=ollama_params["follow_up_messages"],
876+
temperature=temperature,
877+
stream=True
878+
)
879+
):
880+
if chunk and chunk.choices and chunk.choices[0].delta.content:
881+
content = chunk.choices[0].delta.content
882+
response_text += content
883+
live.update(display_generating(response_text, start_time))
884+
else:
870885
response_text = ""
871886
for chunk in litellm.completion(
872887
**self._build_completion_params(
873888
messages=ollama_params["follow_up_messages"],
874889
temperature=temperature,
875-
stream=stream
890+
stream=True
876891
)
877892
):
878893
if chunk and chunk.choices and chunk.choices[0].delta.content:
879-
content = chunk.choices[0].delta.content
880-
response_text += content
881-
live.update(display_generating(response_text, start_time))
894+
response_text += chunk.choices[0].delta.content
882895
else:
883-
response_text = ""
884-
for chunk in litellm.completion(
896+
# Non-streaming approach
897+
resp = litellm.completion(
885898
**self._build_completion_params(
886899
messages=ollama_params["follow_up_messages"],
887900
temperature=temperature,
888-
stream=stream
901+
stream=False
889902
)
890-
):
891-
if chunk and chunk.choices and chunk.choices[0].delta.content:
892-
response_text += chunk.choices[0].delta.content
903+
)
904+
response_text = resp.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
893905

894906
# Set flag to indicate Ollama was handled
895907
ollama_handled = True
@@ -945,9 +957,26 @@ def get_response(
945957

946958
# Otherwise do the existing streaming approach if not already handled
947959
elif not ollama_handled:
948-
# Get response after tool calls with streaming
949-
if verbose:
950-
with Live(display_generating("", current_time), console=console, refresh_per_second=4) as live:
960+
# Get response after tool calls
961+
if stream:
962+
# Streaming approach
963+
if verbose:
964+
with Live(display_generating("", current_time), console=console, refresh_per_second=4) as live:
965+
final_response_text = ""
966+
for chunk in litellm.completion(
967+
**self._build_completion_params(
968+
messages=messages,
969+
tools=formatted_tools,
970+
temperature=temperature,
971+
stream=True,
972+
**kwargs
973+
)
974+
):
975+
if chunk and chunk.choices and chunk.choices[0].delta.content:
976+
content = chunk.choices[0].delta.content
977+
final_response_text += content
978+
live.update(display_generating(final_response_text, current_time))
979+
else:
951980
final_response_text = ""
952981
for chunk in litellm.completion(
953982
**self._build_completion_params(
@@ -959,22 +988,19 @@ def get_response(
959988
)
960989
):
961990
if chunk and chunk.choices and chunk.choices[0].delta.content:
962-
content = chunk.choices[0].delta.content
963-
final_response_text += content
964-
live.update(display_generating(final_response_text, current_time))
991+
final_response_text += chunk.choices[0].delta.content
965992
else:
966-
final_response_text = ""
967-
for chunk in litellm.completion(
993+
# Non-streaming approach
994+
resp = litellm.completion(
968995
**self._build_completion_params(
969996
messages=messages,
970997
tools=formatted_tools,
971998
temperature=temperature,
972-
stream=stream,
999+
stream=False,
9731000
**kwargs
9741001
)
975-
):
976-
if chunk and chunk.choices and chunk.choices[0].delta.content:
977-
final_response_text += chunk.choices[0].delta.content
1002+
)
1003+
final_response_text = resp.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
9781004

9791005
final_response_text = final_response_text.strip()
9801006

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
"""Test sequential tool calling fix"""
2+
from praisonaiagents import Agent
3+
4+
def get_stock_price(company_name: str) -> str:
5+
"""
6+
Get the stock price of a company
7+
8+
Args:
9+
company_name (str): The name of the company
10+
11+
Returns:
12+
str: The stock price of the company
13+
"""
14+
print(f"Tool called: get_stock_price({company_name})")
15+
return f"The stock price of {company_name} is 100"
16+
17+
def multiply(a: int, b: int) -> int:
18+
"""
19+
Multiply two numbers
20+
"""
21+
print(f"Tool called: multiply({a}, {b})")
22+
return a * b
23+
24+
# Test with streaming disabled to verify the fix
25+
print("Testing sequential tool calling with stream=False...")
26+
agent = Agent(
27+
instructions="You are a helpful assistant. You can use the tools provided to you to help the user.",
28+
llm="gemini/gemini-2.5-flash-lite-preview-06-17",
29+
self_reflect=False,
30+
verbose=True,
31+
tools=[get_stock_price, multiply],
32+
stream=False # Force non-streaming mode - use stream parameter directly
33+
)
34+
35+
result = agent.chat("Get the stock price of Google and multiply it by 2")
36+
print(f"\nFinal result: {result}")
37+
38+
# Test with default streaming mode
39+
print("\n\nTesting sequential tool calling with default streaming...")
40+
agent2 = Agent(
41+
instructions="You are a helpful assistant. You can use the tools provided to you to help the user.",
42+
llm="gemini/gemini-2.5-flash-lite-preview-06-17",
43+
self_reflect=False,
44+
verbose=True,
45+
tools=[get_stock_price, multiply]
46+
)
47+
48+
result2 = agent2.chat("Get the stock price of Google and multiply it by 2")
49+
print(f"\nFinal result: {result2}")

0 commit comments

Comments
 (0)