Skip to content

Commit bd596cc

Browse files
fix: preserve reasoning_steps functionality with sequential tool calling
- Add stored_reasoning_content to capture reasoning after tool execution - Restore reasoning_steps handling after tools while keeping loop continuation - Update final response display to include stored reasoning when available - Return reasoning content when reasoning_steps=True and content exists This ensures backward compatibility while maintaining the sequential tool calling fix Co-authored-by: Mervin Praison <[email protected]>
1 parent 12273e4 commit bd596cc

File tree

1 file changed

+147
-52
lines changed
  • src/praisonai-agents/praisonaiagents/llm

1 file changed

+147
-52
lines changed

src/praisonai-agents/praisonaiagents/llm/llm.py

Lines changed: 147 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,7 @@ def get_response(
680680
max_iterations = 10 # Prevent infinite loops
681681
iteration_count = 0
682682
final_response_text = ""
683+
stored_reasoning_content = None # Store reasoning content from tool execution
683684

684685
while iteration_count < max_iterations:
685686
try:
@@ -922,6 +923,30 @@ def get_response(
922923
else:
923924
logging.warning("[OLLAMA_DEBUG] Ollama follow-up returned empty response")
924925

926+
# Handle reasoning_steps after tool execution if not already handled by Ollama
927+
if reasoning_steps and not ollama_handled:
928+
# Make a non-streaming call to capture reasoning content
929+
reasoning_resp = litellm.completion(
930+
**self._build_completion_params(
931+
messages=messages,
932+
temperature=temperature,
933+
stream=False, # force non-streaming
934+
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
935+
)
936+
)
937+
reasoning_content = reasoning_resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
938+
response_text = reasoning_resp["choices"][0]["message"]["content"]
939+
940+
# Store reasoning content for later use
941+
if reasoning_content:
942+
stored_reasoning_content = reasoning_content
943+
944+
# Update messages with the response
945+
messages.append({
946+
"role": "assistant",
947+
"content": response_text
948+
})
949+
925950
# After tool execution, continue the loop to check if more tools are needed
926951
# instead of immediately trying to get a final response
927952
iteration_count += 1
@@ -943,16 +968,30 @@ def get_response(
943968

944969
# No tool calls were made in this iteration, return the response
945970
if verbose:
946-
display_interaction(
947-
original_prompt,
948-
response_text,
949-
markdown=markdown,
950-
generation_time=time.time() - start_time,
951-
console=console
952-
)
971+
# If we have stored reasoning content from tool execution, display it
972+
if stored_reasoning_content:
973+
display_interaction(
974+
original_prompt,
975+
f"Reasoning:\n{stored_reasoning_content}\n\nAnswer:\n{response_text}",
976+
markdown=markdown,
977+
generation_time=time.time() - start_time,
978+
console=console
979+
)
980+
else:
981+
display_interaction(
982+
original_prompt,
983+
response_text,
984+
markdown=markdown,
985+
generation_time=time.time() - start_time,
986+
console=console
987+
)
953988

954989
response_text = response_text.strip()
955990

991+
# Return reasoning content if reasoning_steps is True and we have it
992+
if reasoning_steps and stored_reasoning_content:
993+
return stored_reasoning_content
994+
956995
# Handle output formatting
957996
if output_json or output_pydantic:
958997
self.chat_history.append({"role": "user", "content": original_prompt})
@@ -1161,12 +1200,7 @@ async def get_response_async(
11611200
stream: bool = True,
11621201
**kwargs
11631202
) -> str:
1164-
"""Async version of get_response with identical functionality.
1165-
1166-
NOTE: This async version currently does NOT support sequential tool calling
1167-
like the sync version does. It will return after the first tool execution.
1168-
This is a known limitation that needs to be addressed in a future update.
1169-
"""
1203+
"""Async version of get_response with identical functionality."""
11701204
try:
11711205
import litellm
11721206
logging.info(f"Getting async response from {self.model}")
@@ -1238,46 +1272,47 @@ async def get_response_async(
12381272

12391273
response_text = ""
12401274
if reasoning_steps:
1241-
# Non-streaming call to capture reasoning
1242-
resp = await litellm.acompletion(
1243-
**self._build_completion_params(
1244-
messages=messages,
1245-
temperature=temperature,
1246-
stream=False, # force non-streaming
1247-
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1248-
)
1275+
# Non-streaming call to capture reasoning
1276+
resp = await litellm.acompletion(
1277+
**self._build_completion_params(
1278+
messages=messages,
1279+
temperature=temperature,
1280+
stream=False, # force non-streaming
1281+
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
12491282
)
1250-
reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
1251-
response_text = resp["choices"][0]["message"]["content"]
1252-
1253-
if verbose and reasoning_content:
1254-
display_interaction(
1255-
"Initial reasoning:",
1256-
f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
1257-
markdown=markdown,
1258-
generation_time=time.time() - start_time,
1259-
console=console
1260-
)
1261-
elif verbose:
1262-
display_interaction(
1263-
"Initial response:",
1264-
response_text,
1265-
markdown=markdown,
1266-
generation_time=time.time() - start_time,
1267-
console=console
1268-
)
1269-
else:
1270-
# Determine if we should use streaming based on tool support
1271-
use_streaming = stream
1272-
if formatted_tools and not self._supports_streaming_tools():
1273-
# Provider doesn't support streaming with tools, use non-streaming
1274-
use_streaming = False
1283+
)
1284+
reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
1285+
response_text = resp["choices"][0]["message"]["content"]
1286+
1287+
if verbose and reasoning_content:
1288+
display_interaction(
1289+
"Initial reasoning:",
1290+
f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
1291+
markdown=markdown,
1292+
generation_time=time.time() - start_time,
1293+
console=console
1294+
)
1295+
elif verbose:
1296+
display_interaction(
1297+
"Initial response:",
1298+
response_text,
1299+
markdown=markdown,
1300+
generation_time=time.time() - start_time,
1301+
console=console
1302+
)
1303+
else:
1304+
# Determine if we should use streaming based on tool support
1305+
use_streaming = stream
1306+
if formatted_tools and not self._supports_streaming_tools():
1307+
# Provider doesn't support streaming with tools, use non-streaming
1308+
use_streaming = False
1309+
1310+
if use_streaming:
1311+
# Streaming approach (with or without tools)
1312+
tool_calls = []
12751313

1276-
if use_streaming:
1277-
# Streaming approach (with or without tools)
1278-
1279-
if verbose:
1280-
async for chunk in await litellm.acompletion(
1314+
if verbose:
1315+
async for chunk in await litellm.acompletion(
12811316
**self._build_completion_params(
12821317
messages=messages,
12831318
temperature=temperature,
@@ -1433,7 +1468,67 @@ async def get_response_async(
14331468
else:
14341469
logging.warning("[OLLAMA_DEBUG] Ollama follow-up returned empty response")
14351470

1436-
# Get response after tool calls
1471+
# If no special handling was needed or if it's not an Ollama model
1472+
if reasoning_steps and not ollama_handled:
1473+
# Non-streaming call to capture reasoning
1474+
resp = await litellm.acompletion(
1475+
**self._build_completion_params(
1476+
messages=messages,
1477+
temperature=temperature,
1478+
stream=False, # force non-streaming
1479+
tools=formatted_tools, # Include tools
1480+
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1481+
)
1482+
)
1483+
reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
1484+
response_text = resp["choices"][0]["message"]["content"]
1485+
1486+
if verbose and reasoning_content:
1487+
display_interaction(
1488+
"Tool response reasoning:",
1489+
f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
1490+
markdown=markdown,
1491+
generation_time=time.time() - start_time,
1492+
console=console
1493+
)
1494+
elif verbose:
1495+
display_interaction(
1496+
"Tool response:",
1497+
response_text,
1498+
markdown=markdown,
1499+
generation_time=time.time() - start_time,
1500+
console=console
1501+
)
1502+
elif not ollama_handled:
1503+
# Get response after tool calls with streaming if not already handled
1504+
if verbose:
1505+
async for chunk in await litellm.acompletion(
1506+
**self._build_completion_params(
1507+
messages=messages,
1508+
temperature=temperature,
1509+
stream=stream,
1510+
tools=formatted_tools,
1511+
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1512+
)
1513+
):
1514+
if chunk and chunk.choices and chunk.choices[0].delta.content:
1515+
content = chunk.choices[0].delta.content
1516+
response_text += content
1517+
print("\033[K", end="\r")
1518+
print(f"Reflecting... {time.time() - start_time:.1f}s", end="\r")
1519+
else:
1520+
response_text = ""
1521+
async for chunk in await litellm.acompletion(
1522+
**self._build_completion_params(
1523+
messages=messages,
1524+
temperature=temperature,
1525+
stream=stream,
1526+
**{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
1527+
)
1528+
):
1529+
if chunk and chunk.choices and chunk.choices[0].delta.content:
1530+
response_text += chunk.choices[0].delta.content
1531+
14371532
response_text = response_text.strip()
14381533

14391534
# Handle output formatting

0 commit comments

Comments
 (0)