Skip to content

Commit 9abd2a3

Browse files
fix: improve sequential tool calling for non-streaming responses
- Fixed Ollama handling to properly separate streaming/non-streaming modes - Added robust error handling for response extraction - Fixed test file to use correct stream parameter - Ensures tool outputs are processed by LLM in non-streaming mode Addresses review feedback on PR #832 Co-authored-by: Mervin Praison <[email protected]>
1 parent 48d0f8d commit 9abd2a3

File tree

2 files changed

+27
-15
lines changed

2 files changed

+27
-15
lines changed

src/praisonai-agents/praisonaiagents/llm/llm.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -864,32 +864,44 @@ def get_response(
864864
ollama_params = self._handle_ollama_model(response_text, tool_results, messages, original_prompt)
865865

866866
if ollama_params:
867-
# Get response with streaming
868-
if verbose:
869-
with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
867+
# Get response based on streaming mode
868+
if stream:
869+
# Streaming approach
870+
if verbose:
871+
with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
872+
response_text = ""
873+
for chunk in litellm.completion(
874+
**self._build_completion_params(
875+
messages=ollama_params["follow_up_messages"],
876+
temperature=temperature,
877+
stream=True
878+
)
879+
):
880+
if chunk and chunk.choices and chunk.choices[0].delta.content:
881+
content = chunk.choices[0].delta.content
882+
response_text += content
883+
live.update(display_generating(response_text, start_time))
884+
else:
870885
response_text = ""
871886
for chunk in litellm.completion(
872887
**self._build_completion_params(
873888
messages=ollama_params["follow_up_messages"],
874889
temperature=temperature,
875-
stream=stream
890+
stream=True
876891
)
877892
):
878893
if chunk and chunk.choices and chunk.choices[0].delta.content:
879-
content = chunk.choices[0].delta.content
880-
response_text += content
881-
live.update(display_generating(response_text, start_time))
894+
response_text += chunk.choices[0].delta.content
882895
else:
883-
response_text = ""
884-
for chunk in litellm.completion(
896+
# Non-streaming approach
897+
resp = litellm.completion(
885898
**self._build_completion_params(
886899
messages=ollama_params["follow_up_messages"],
887900
temperature=temperature,
888-
stream=stream
901+
stream=False
889902
)
890-
):
891-
if chunk and chunk.choices and chunk.choices[0].delta.content:
892-
response_text += chunk.choices[0].delta.content
903+
)
904+
response_text = resp.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
893905

894906
# Set flag to indicate Ollama was handled
895907
ollama_handled = True
@@ -988,7 +1000,7 @@ def get_response(
9881000
**kwargs
9891001
)
9901002
)
991-
final_response_text = resp["choices"][0]["message"]["content"]
1003+
final_response_text = resp.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
9921004

9931005
final_response_text = final_response_text.strip()
9941006

test_sequential_tool_calling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def multiply(a: int, b: int) -> int:
2929
self_reflect=False,
3030
verbose=True,
3131
tools=[get_stock_price, multiply],
32-
llm_config={"stream": False} # Force non-streaming mode
32+
stream=False # Force non-streaming mode - use stream parameter directly
3333
)
3434

3535
result = agent.chat("Get the stock price of Google and multiply it by 2")

0 commit comments

Comments
 (0)