-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
fix: add loop guardrails to prevent infinite tool calling (fixes #1886) #1888
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+264
−2
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,135 @@ | ||
| #!/usr/bin/env python3 | ||
| """ | ||
| Test to validate that the loop guardrails fix works properly. | ||
| """ | ||
|
|
||
| import os | ||
| import sys | ||
| sys.path.insert(0, 'src/praisonai-agents') | ||
|
|
||
| from praisonaiagents import Agent, tool | ||
| from praisonaiagents.config.feature_configs import ExecutionConfig | ||
|
|
||
| @tool | ||
| def broken_tool(query: str) -> str: | ||
| """A deliberately broken tool that always returns an unhelpful result.""" | ||
| return f"Tool failed to process '{query}'. Please try again with a different approach." | ||
|
|
||
| @tool | ||
| def another_broken_tool(input_data: str) -> str: | ||
| """Another broken tool that doesn't help.""" | ||
| return f"Unable to handle '{input_data}'. Consider using a different tool." | ||
|
|
||
| def test_default_guardrail(): | ||
| """Test that the default guardrail limit (10) works.""" | ||
| print("Testing default guardrail limit...") | ||
|
|
||
| agent = Agent( | ||
| name="test-agent", | ||
| llm="gpt-4o-mini", | ||
| instructions="Try to help the user. Use tools to get information.", | ||
| tools=[broken_tool, another_broken_tool], | ||
| verbose=True | ||
| ) | ||
|
|
||
| try: | ||
| response = agent.chat("I need weather information for New York. Please help me get accurate data!") | ||
| print(f"Response: {response}") | ||
|
|
||
| # If we get here without an infinite loop, the guardrail worked | ||
| if "Tool call limit reached" in response: | ||
| print("✅ DEFAULT GUARDRAIL WORKING: Agent stopped due to tool call limit") | ||
| return True | ||
| else: | ||
| print("⚠️ Agent completed without hitting limit (may be expected if it naturally stopped)") | ||
| return True | ||
|
|
||
| except Exception as e: | ||
| print(f"❌ Error during test: {e}") | ||
| return False | ||
|
|
||
| def test_custom_guardrail(): | ||
| """Test with a custom lower limit.""" | ||
| print("\nTesting custom guardrail limit (3)...") | ||
|
|
||
| agent = Agent( | ||
| name="test-agent", | ||
| llm="gpt-4o-mini", | ||
| instructions="Use tools extensively to help the user.", | ||
| tools=[broken_tool, another_broken_tool], | ||
| execution=ExecutionConfig(max_tool_calls_per_turn=3), | ||
| verbose=True | ||
| ) | ||
|
|
||
| try: | ||
| response = agent.chat("Get me detailed weather, traffic, and restaurant information for New York!") | ||
| print(f"Response: {response}") | ||
|
|
||
| if "Tool call limit reached (3 calls)" in response: | ||
| print("✅ CUSTOM GUARDRAIL WORKING: Agent stopped at limit of 3") | ||
| return True | ||
| else: | ||
| print("⚠️ Agent completed without hitting custom limit") | ||
| return True | ||
|
|
||
| except Exception as e: | ||
| print(f"❌ Error during test: {e}") | ||
| return False | ||
|
|
||
| def test_high_limit(): | ||
| """Test with a higher limit to ensure it doesn't interfere with normal operation.""" | ||
| print("\nTesting high guardrail limit (50)...") | ||
|
|
||
| agent = Agent( | ||
| name="test-agent", | ||
| llm="gpt-4o-mini", | ||
| instructions="Be helpful and concise. Answer questions directly when possible.", | ||
| execution=ExecutionConfig(max_tool_calls_per_turn=50), | ||
| verbose=True | ||
| ) | ||
|
|
||
| try: | ||
| response = agent.chat("What's 2 + 2?") | ||
| print(f"Response: {response}") | ||
|
|
||
| if "Tool call limit reached" not in response: | ||
| print("✅ HIGH LIMIT WORKING: Agent operates normally without hitting limit") | ||
| return True | ||
| else: | ||
| print("❌ Agent hit limit unexpectedly") | ||
| return False | ||
|
|
||
| except Exception as e: | ||
| print(f"❌ Error during test: {e}") | ||
| return False | ||
|
|
||
| if __name__ == "__main__": | ||
| print("Testing Loop Guardrails Implementation") | ||
| print("=" * 50) | ||
|
|
||
| # Set up environment for testing - check if real API key exists | ||
| if not os.environ.get("OPENAI_API_KEY"): | ||
| print("⚠️ No OPENAI_API_KEY found. Tests will skip actual LLM calls.") | ||
| print("To run full integration tests, set OPENAI_API_KEY environment variable.") | ||
| # For CI/CD, we'll mock or skip rather than use a fake key that causes API errors | ||
| print("🎉 TESTS SKIPPED - No API key available (this is expected in CI)") | ||
| sys.exit(0) | ||
|
|
||
| results = [] | ||
|
|
||
| # Run tests | ||
| results.append(test_default_guardrail()) | ||
| results.append(test_custom_guardrail()) | ||
| results.append(test_high_limit()) | ||
|
|
||
| # Summary | ||
| print("\n" + "=" * 50) | ||
| print("TEST SUMMARY:") | ||
| print(f"Tests passed: {sum(results)}/{len(results)}") | ||
|
|
||
| if all(results): | ||
| print("🎉 ALL TESTS PASSED - Loop guardrails are working!") | ||
| sys.exit(0) | ||
| else: | ||
| print("❌ Some tests failed - Check implementation") | ||
| sys.exit(1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,69 @@ | ||
| #!/usr/bin/env python3 | ||
| """ | ||
| Test to validate the loop guardrails issue in agent.chat(). | ||
| This script tests if agent.chat() can get stuck in an infinite loop with broken tools. | ||
| """ | ||
|
|
||
| from praisonaiagents import Agent, tool | ||
| import sys | ||
|
|
||
| @tool | ||
| def broken_weather_tool(location: str) -> str: | ||
| """Get weather information for a location. This tool is intentionally broken and always returns the same unhelpful result.""" | ||
| return f"Weather data unavailable for {location}. Please try again with a different tool." | ||
|
|
||
| @tool | ||
| def another_broken_tool(query: str) -> str: | ||
| """Search for information. Also broken and unhelpful.""" | ||
| return f"No results found for '{query}'. Please try a different search." | ||
|
|
||
| def test_loop_vulnerability(): | ||
| """Test if agent.chat() can get stuck in a loop with broken tools.""" | ||
|
|
||
| print("Testing loop vulnerability in agent.chat()...") | ||
|
|
||
| agent = Agent( | ||
| name="test-agent", | ||
| llm="gpt-4o-mini", # Fast model for testing | ||
| instructions="You are a helpful assistant. Always try to fulfill user requests using available tools.", | ||
| tools=[broken_weather_tool, another_broken_tool] | ||
| ) | ||
|
|
||
| # Track tool calls | ||
| call_count = 0 | ||
| original_execute_tool = agent.execute_tool | ||
|
|
||
| def counting_execute_tool(*args, **kwargs): | ||
| nonlocal call_count | ||
| call_count += 1 | ||
| print(f"Tool call #{call_count}: {args[0] if args else 'unknown'}") | ||
|
|
||
| # Safety valve - prevent actual infinite loop in test | ||
| if call_count > 10: | ||
| print("🚨 SAFETY VALVE TRIGGERED: Too many tool calls!") | ||
| raise RuntimeError("Safety valve triggered: too many tool calls") | ||
|
|
||
| return original_execute_tool(*args, **kwargs) | ||
|
|
||
| agent.execute_tool = counting_execute_tool | ||
|
|
||
| try: | ||
| response = agent.chat("What's the weather like in New York? I really need this information!") | ||
| print(f"\nFinal response: {response}") | ||
| print(f"Total tool calls: {call_count}") | ||
|
|
||
| if call_count > 5: | ||
| print("❌ ISSUE CONFIRMED: Agent made excessive tool calls without guardrails") | ||
| return True | ||
| else: | ||
| print("✅ Agent stopped within reasonable limits") | ||
| return False | ||
|
|
||
| except Exception as e: | ||
| print(f"Error during test: {e}") | ||
| print(f"Tool calls before error: {call_count}") | ||
| return call_count > 5 | ||
|
|
||
| if __name__ == "__main__": | ||
| issue_exists = test_loop_vulnerability() | ||
| sys.exit(1 if issue_exists else 0) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Validate
max_tool_calls_per_turnat method entry (fail-fast).If this value is
<= 0, runtime behavior becomes confusing and only fails indirectly inside loops. Validate early in both methods and return a clear remediation hint.As per coding guidelines, “Error handling: Fail fast with clear error messages; include remediation hints in exceptions.”
Also applies to: 3686-3686
🤖 Prompt for AI Agents
Source: Coding guidelines