Skip to content

Commit dfc503c

Browse files
test: add comprehensive test suite for sequential tool calling
This test file validates the fix for issue #839, ensuring agents can: - Execute multiple tools in sequence - Pass results between tool calls - Return final combined results without empty responses Tests include: - Sequential execution of 2 and 3 tools - Multiple agents with sequential tools - Mocked LLM responses - Edge case prevention for empty responses Co-authored-by: Mervin Praison <[email protected]>
1 parent f7593ef commit dfc503c

File tree

1 file changed

+329
-0
lines changed

1 file changed

+329
-0
lines changed
Lines changed: 329 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,329 @@
1+
"""
2+
Test file to verify sequential tool calling functionality.
3+
4+
This tests that agents can:
5+
1. Execute multiple tools in sequence
6+
2. Pass results from one tool to another
7+
3. Return the final combined result
8+
9+
This addresses issue #839 where agents would return empty responses after first tool call.
10+
"""
11+
12+
import pytest
13+
import logging
14+
from unittest.mock import patch, MagicMock
15+
from praisonaiagents import Agent, Task, PraisonAIAgents
16+
17+
# Enable logging for better debugging
18+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
19+
20+
21+
# Define test tools that need sequential execution
22+
def get_stock_price(company_name: str) -> str:
23+
"""
24+
Get the stock price of a company.
25+
26+
Args:
27+
company_name (str): The name of the company
28+
29+
Returns:
30+
str: The stock price of the company
31+
"""
32+
print(f"[Tool Called] get_stock_price({company_name})")
33+
# Mock stock prices
34+
prices = {
35+
"Google": 100,
36+
"Apple": 150,
37+
"Microsoft": 200
38+
}
39+
price = prices.get(company_name, 50)
40+
return f"The stock price of {company_name} is {price}"
41+
42+
43+
def multiply(a: int, b: int) -> int:
44+
"""
45+
Multiply two numbers.
46+
47+
Args:
48+
a (int): First number
49+
b (int): Second number
50+
51+
Returns:
52+
int: Product of a and b
53+
"""
54+
print(f"[Tool Called] multiply({a}, {b})")
55+
return a * b
56+
57+
58+
def add(a: int, b: int) -> int:
59+
"""
60+
Add two numbers.
61+
62+
Args:
63+
a (int): First number
64+
b (int): Second number
65+
66+
Returns:
67+
int: Sum of a and b
68+
"""
69+
print(f"[Tool Called] add({a}, {b})")
70+
return a + b
71+
72+
73+
class TestSequentialToolCalling:
74+
"""Test class for sequential tool calling functionality."""
75+
76+
def test_sequential_two_tools(self):
77+
"""Test that agent can call two tools sequentially."""
78+
print("\n" + "=" * 60)
79+
print("Test: Sequential Two Tools")
80+
print("=" * 60)
81+
82+
# Create agent with tools
83+
agent = Agent(
84+
name="SequentialAgent",
85+
role="Math Assistant",
86+
goal="Help with calculations using available tools",
87+
backstory="Expert at using multiple tools to solve problems",
88+
instructions="When asked to multiply a stock price, first get the stock price, then multiply it.",
89+
tools=[get_stock_price, multiply],
90+
llm={"model": "gpt-4o"},
91+
verbose=True
92+
)
93+
94+
# Test sequential tool calling
95+
result = agent.start("What is the stock price of Google? Multiply the Google stock price by 2.")
96+
97+
print(f"\nFinal Result: {result}")
98+
99+
# Verify the result contains the expected value (100 * 2 = 200)
100+
assert result is not None, "Agent returned None instead of a result"
101+
assert result != "", "Agent returned empty string instead of a result"
102+
assert "200" in str(result), f"Expected result to contain '200', but got: {result}"
103+
104+
print("✅ Test passed: Agent successfully called tools sequentially")
105+
106+
def test_sequential_three_tools(self):
107+
"""Test that agent can call three tools sequentially."""
108+
print("\n" + "=" * 60)
109+
print("Test: Sequential Three Tools")
110+
print("=" * 60)
111+
112+
# Create agent with multiple tools
113+
agent = Agent(
114+
name="ComplexAgent",
115+
role="Advanced Math Assistant",
116+
goal="Solve complex calculations using multiple tools",
117+
backstory="Expert at chaining multiple operations together",
118+
instructions="Follow the exact steps requested by the user, using tools in sequence.",
119+
tools=[get_stock_price, multiply, add],
120+
llm={"model": "gpt-4o"},
121+
verbose=True
122+
)
123+
124+
# Test sequential tool calling with three operations
125+
result = agent.start(
126+
"Get Apple's stock price, multiply it by 3, then add 50 to the result. "
127+
"Show me each step and the final result."
128+
)
129+
130+
print(f"\nFinal Result: {result}")
131+
132+
# Apple stock is 150, 150 * 3 = 450, 450 + 50 = 500
133+
assert result is not None, "Agent returned None instead of a result"
134+
assert result != "", "Agent returned empty string instead of a result"
135+
# Check if the result mentions the expected value
136+
result_str = str(result).lower()
137+
assert any(val in result_str for val in ["500", "five hundred"]), \
138+
f"Expected result to contain '500', but got: {result}"
139+
140+
print("✅ Test passed: Agent successfully called three tools sequentially")
141+
142+
def test_multiple_agents_sequential_tools(self):
143+
"""Test multiple agents working together with sequential tool calls."""
144+
print("\n" + "=" * 60)
145+
print("Test: Multiple Agents with Sequential Tools")
146+
print("=" * 60)
147+
148+
# First agent gets stock price
149+
price_agent = Agent(
150+
name="PriceAgent",
151+
role="Stock Price Analyst",
152+
goal="Get accurate stock prices",
153+
backstory="Expert at retrieving stock market data",
154+
tools=[get_stock_price],
155+
llm={"model": "gpt-4o"}
156+
)
157+
158+
# Second agent does calculations
159+
calc_agent = Agent(
160+
name="CalcAgent",
161+
role="Financial Calculator",
162+
goal="Perform calculations on financial data",
163+
backstory="Expert at financial mathematics",
164+
tools=[multiply, add],
165+
llm={"model": "gpt-4o"}
166+
)
167+
168+
# Create tasks
169+
price_task = Task(
170+
name="get_price",
171+
description="Get the stock price of Microsoft",
172+
expected_output="The current stock price of Microsoft",
173+
agent=price_agent
174+
)
175+
176+
calc_task = Task(
177+
name="calculate",
178+
description="Take the Microsoft stock price (which is 200) and multiply it by 4, then add 100",
179+
expected_output="The final calculated value",
180+
agent=calc_agent
181+
)
182+
183+
# Create workflow
184+
workflow = PraisonAIAgents(
185+
agents=[price_agent, calc_agent],
186+
tasks=[price_task, calc_task],
187+
verbose=True
188+
)
189+
190+
# Execute workflow
191+
result = workflow.start()
192+
193+
print(f"\nWorkflow Result: {result}")
194+
195+
# Microsoft stock is 200, 200 * 4 = 800, 800 + 100 = 900
196+
if isinstance(result, dict) and 'task_results' in result:
197+
final_result = str(result['task_results'][-1])
198+
assert "900" in final_result or "nine hundred" in final_result.lower(), \
199+
f"Expected final result to contain '900', but got: {final_result}"
200+
201+
print("✅ Test passed: Multiple agents successfully used tools sequentially")
202+
203+
@patch('litellm.completion')
204+
def test_tool_calling_with_mocked_llm(self, mock_completion):
205+
"""Test sequential tool calling with mocked LLM responses."""
206+
print("\n" + "=" * 60)
207+
print("Test: Sequential Tool Calling with Mocked LLM")
208+
print("=" * 60)
209+
210+
# Mock LLM to return tool calls
211+
call_count = 0
212+
213+
def mock_llm_response(*args, **kwargs):
214+
nonlocal call_count
215+
call_count += 1
216+
217+
if call_count == 1:
218+
# First call: request to use get_stock_price tool
219+
return MagicMock(
220+
choices=[MagicMock(
221+
message={
222+
"content": "",
223+
"tool_calls": [{
224+
"id": "call_1",
225+
"function": {
226+
"name": "get_stock_price",
227+
"arguments": '{"company_name": "Google"}'
228+
}
229+
}]
230+
}
231+
)]
232+
)
233+
elif call_count == 2:
234+
# Second call: request to use multiply tool
235+
return MagicMock(
236+
choices=[MagicMock(
237+
message={
238+
"content": "",
239+
"tool_calls": [{
240+
"id": "call_2",
241+
"function": {
242+
"name": "multiply",
243+
"arguments": '{"a": 100, "b": 2}'
244+
}
245+
}]
246+
}
247+
)]
248+
)
249+
else:
250+
# Final call: return the result
251+
return MagicMock(
252+
choices=[MagicMock(
253+
message={
254+
"content": "The stock price of Google is 100. After multiplying by 2, the result is 200."
255+
}
256+
)]
257+
)
258+
259+
mock_completion.side_effect = mock_llm_response
260+
261+
# Create agent
262+
agent = Agent(
263+
name="MockedAgent",
264+
role="Test Assistant",
265+
goal="Test sequential tool calling",
266+
backstory="Test agent for validating functionality",
267+
tools=[get_stock_price, multiply],
268+
llm={"model": "gpt-4o"}
269+
)
270+
271+
# Execute
272+
result = agent.start("Get Google stock price and multiply by 2")
273+
274+
print(f"\nResult: {result}")
275+
print(f"LLM was called {call_count} times")
276+
277+
# Verify
278+
assert call_count >= 3, f"Expected at least 3 LLM calls, but got {call_count}"
279+
assert "200" in str(result), f"Expected result to contain '200', but got: {result}"
280+
281+
print("✅ Test passed: Mocked sequential tool calling works correctly")
282+
283+
284+
def test_edge_case_empty_response():
285+
"""Test that the fix prevents empty responses after tool calls."""
286+
print("\n" + "=" * 60)
287+
print("Test: Edge Case - Preventing Empty Response")
288+
print("=" * 60)
289+
290+
# Create agent with a simple tool
291+
agent = Agent(
292+
name="EdgeCaseAgent",
293+
role="Test Assistant",
294+
goal="Test edge cases",
295+
backstory="Specialized in finding edge cases",
296+
tools=[get_stock_price],
297+
llm={"model": "gpt-4o"},
298+
verbose=True
299+
)
300+
301+
# Execute a query that requires tool use
302+
result = agent.start("What is the stock price of Apple?")
303+
304+
print(f"\nResult: {result}")
305+
306+
# The main issue (#839) was agents returning empty responses
307+
assert result is not None, "Agent returned None"
308+
assert result != "", "Agent returned empty string"
309+
assert len(str(result).strip()) > 0, "Agent returned whitespace only"
310+
311+
print("✅ Test passed: Agent did not return empty response after tool call")
312+
313+
314+
if __name__ == "__main__":
315+
# Run all tests
316+
test_suite = TestSequentialToolCalling()
317+
318+
# Run individual tests
319+
test_suite.test_sequential_two_tools()
320+
test_suite.test_sequential_three_tools()
321+
test_suite.test_multiple_agents_sequential_tools()
322+
test_suite.test_tool_calling_with_mocked_llm()
323+
324+
# Run edge case test
325+
test_edge_case_empty_response()
326+
327+
print("\n" + "=" * 60)
328+
print("All tests completed!")
329+
print("=" * 60)

0 commit comments

Comments
 (0)