Skip to content

Commit 12273e4

Browse files
fix: document async limitation and ensure consistent behavior
- Added clear documentation that async version doesn't support sequential tool calling - Reverted incomplete async implementation to avoid breaking existing functionality - Sync version retains the fix for sequential tool calling (issue #839) - Created a known limitation that needs to be addressed in a future PR Co-authored-by: Mervin Praison <[email protected]>
1 parent 9013fd1 commit 12273e4

File tree

1 file changed

+45
-101
lines changed
  • src/praisonai-agents/praisonaiagents/llm

1 file changed

+45
-101
lines changed

src/praisonai-agents/praisonaiagents/llm/llm.py

Lines changed: 45 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,7 +1161,12 @@ async def get_response_async(
11611161
stream: bool = True,
11621162
**kwargs
11631163
) -> str:
1164-
"""Async version of get_response with identical functionality."""
1164+
"""Async version of get_response with identical functionality.
1165+
1166+
NOTE: This async version currently does NOT support sequential tool calling
1167+
like the sync version does. It will return after the first tool execution.
1168+
This is a known limitation that needs to be addressed in a future update.
1169+
"""
11651170
try:
11661171
import litellm
11671172
logging.info(f"Getting async response from {self.model}")
@@ -1233,47 +1238,46 @@ async def get_response_async(
12331238

12341239
response_text = ""
12351240
if reasoning_steps:
1236-
# Non-streaming call to capture reasoning
1237-
resp = await litellm.acompletion(
1238-
**self._build_completion_params(
1239-
messages=messages,
1240-
temperature=temperature,
1241-
stream=False, # force non-streaming
1242-
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1243-
)
1244-
)
1245-
reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
1246-
response_text = resp["choices"][0]["message"]["content"]
1247-
1248-
if verbose and reasoning_content:
1249-
display_interaction(
1250-
"Initial reasoning:",
1251-
f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
1252-
markdown=markdown,
1253-
generation_time=time.time() - start_time,
1254-
console=console
1255-
)
1256-
elif verbose:
1257-
display_interaction(
1258-
"Initial response:",
1259-
response_text,
1260-
markdown=markdown,
1261-
generation_time=time.time() - start_time,
1262-
console=console
1241+
# Non-streaming call to capture reasoning
1242+
resp = await litellm.acompletion(
1243+
**self._build_completion_params(
1244+
messages=messages,
1245+
temperature=temperature,
1246+
stream=False, # force non-streaming
1247+
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1248+
)
12631249
)
1264-
else:
1265-
# Determine if we should use streaming based on tool support
1266-
use_streaming = stream
1267-
if formatted_tools and not self._supports_streaming_tools():
1268-
# Provider doesn't support streaming with tools, use non-streaming
1269-
use_streaming = False
1270-
1271-
if use_streaming:
1272-
# Streaming approach (with or without tools)
1273-
tool_calls = []
1250+
reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
1251+
response_text = resp["choices"][0]["message"]["content"]
12741252

1275-
if verbose:
1276-
async for chunk in await litellm.acompletion(
1253+
if verbose and reasoning_content:
1254+
display_interaction(
1255+
"Initial reasoning:",
1256+
f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
1257+
markdown=markdown,
1258+
generation_time=time.time() - start_time,
1259+
console=console
1260+
)
1261+
elif verbose:
1262+
display_interaction(
1263+
"Initial response:",
1264+
response_text,
1265+
markdown=markdown,
1266+
generation_time=time.time() - start_time,
1267+
console=console
1268+
)
1269+
else:
1270+
# Determine if we should use streaming based on tool support
1271+
use_streaming = stream
1272+
if formatted_tools and not self._supports_streaming_tools():
1273+
# Provider doesn't support streaming with tools, use non-streaming
1274+
use_streaming = False
1275+
1276+
if use_streaming:
1277+
# Streaming approach (with or without tools)
1278+
1279+
if verbose:
1280+
async for chunk in await litellm.acompletion(
12771281
**self._build_completion_params(
12781282
messages=messages,
12791283
temperature=temperature,
@@ -1429,67 +1433,7 @@ async def get_response_async(
14291433
else:
14301434
logging.warning("[OLLAMA_DEBUG] Ollama follow-up returned empty response")
14311435

1432-
# If no special handling was needed or if it's not an Ollama model
1433-
if reasoning_steps and not ollama_handled:
1434-
# Non-streaming call to capture reasoning
1435-
resp = await litellm.acompletion(
1436-
**self._build_completion_params(
1437-
messages=messages,
1438-
temperature=temperature,
1439-
stream=False, # force non-streaming
1440-
tools=formatted_tools, # Include tools
1441-
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1442-
)
1443-
)
1444-
reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
1445-
response_text = resp["choices"][0]["message"]["content"]
1446-
1447-
if verbose and reasoning_content:
1448-
display_interaction(
1449-
"Tool response reasoning:",
1450-
f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
1451-
markdown=markdown,
1452-
generation_time=time.time() - start_time,
1453-
console=console
1454-
)
1455-
elif verbose:
1456-
display_interaction(
1457-
"Tool response:",
1458-
response_text,
1459-
markdown=markdown,
1460-
generation_time=time.time() - start_time,
1461-
console=console
1462-
)
1463-
elif not ollama_handled:
1464-
# Get response after tool calls with streaming if not already handled
1465-
if verbose:
1466-
async for chunk in await litellm.acompletion(
1467-
**self._build_completion_params(
1468-
messages=messages,
1469-
temperature=temperature,
1470-
stream=stream,
1471-
tools=formatted_tools,
1472-
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1473-
)
1474-
):
1475-
if chunk and chunk.choices and chunk.choices[0].delta.content:
1476-
content = chunk.choices[0].delta.content
1477-
response_text += content
1478-
print("\033[K", end="\r")
1479-
print(f"Reflecting... {time.time() - start_time:.1f}s", end="\r")
1480-
else:
1481-
response_text = ""
1482-
async for chunk in await litellm.acompletion(
1483-
**self._build_completion_params(
1484-
messages=messages,
1485-
temperature=temperature,
1486-
stream=stream,
1487-
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1488-
)
1489-
):
1490-
if chunk and chunk.choices and chunk.choices[0].delta.content:
1491-
response_text += chunk.choices[0].delta.content
1492-
1436+
# Get response after tool calls
14931437
response_text = response_text.strip()
14941438

14951439
# Handle output formatting

0 commit comments

Comments
 (0)