diff --git a/examples/01_standalone_sdk/25_agent_delegation.py b/examples/01_standalone_sdk/25_agent_delegation.py index 23532c2944..e5d9188ddd 100644 --- a/examples/01_standalone_sdk/25_agent_delegation.py +++ b/examples/01_standalone_sdk/25_agent_delegation.py @@ -9,8 +9,6 @@ import os -from pydantic import SecretStr - from openhands.sdk import ( LLM, Agent, @@ -26,7 +24,7 @@ DelegateTool, DelegationVisualizer, ) -from openhands.tools.preset.default import get_default_tools +from openhands.tools.preset.default import get_default_tools, register_builtins_agents ONLY_RUN_SIMPLE_DELEGATION = False @@ -34,22 +32,18 @@ logger = get_logger(__name__) # Configure LLM and agent -# You can get an API key from https://app.all-hands.dev/settings/api-keys -api_key = os.getenv("LLM_API_KEY") -assert api_key is not None, "LLM_API_KEY environment variable is not set." -model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929") llm = LLM( - model=model, - api_key=SecretStr(api_key), + model=os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929"), + api_key=os.getenv("LLM_API_KEY"), base_url=os.environ.get("LLM_BASE_URL", None), usage_id="agent", ) cwd = os.getcwd() -register_tool("DelegateTool", DelegateTool) -tools = get_default_tools(enable_browser=False) -tools.append(Tool(name="DelegateTool")) +tools = get_default_tools(enable_browser=True) +tools.append(Tool(name=DelegateTool.name)) +register_builtins_agents() main_agent = Agent( llm=llm, @@ -61,7 +55,7 @@ visualizer=DelegationVisualizer(name="Delegator"), ) -task_message = ( +conversation.send_message( "Forget about coding. Let's switch to travel planning. " "Let's plan a trip to London. I have two issues I need to solve: " "Lodging: what are the best areas to stay at while keeping budget in mind? " @@ -72,7 +66,6 @@ "They should keep it short. After getting the results, merge both analyses " "into a single consolidated report.\n\n" ) -conversation.send_message(task_message) conversation.run() conversation.send_message( @@ -81,18 +74,57 @@ conversation.run() # Report cost for simple delegation example -cost_1 = conversation.conversation_stats.get_combined_metrics().accumulated_cost -print(f"EXAMPLE_COST (simple delegation): {cost_1}") +cost_simple = conversation.conversation_stats.get_combined_metrics().accumulated_cost +print(f"EXAMPLE_COST (simple delegation): {cost_simple}") print("Simple delegation example done!", "\n" * 20) - -# -------- Agent Delegation Second Part: User-Defined Agent Types -------- - if ONLY_RUN_SIMPLE_DELEGATION: + # For CI: always emit the EXAMPLE_COST marker before exiting. + print(f"EXAMPLE_COST: {cost_simple}") exit(0) +# -------- Agent Delegation Second Part: Built-in Agent Types (Explore + Bash) -------- + +main_agent = Agent( + llm=llm, + tools=[Tool(name=DelegateTool.name)], +) +conversation = Conversation( + agent=main_agent, + workspace=cwd, + visualizer=DelegationVisualizer(name="Delegator (builtins)"), +) + +builtin_task_message = ( + "Demonstrate SDK built-in sub-agent types. " + "1) Spawn an 'explore' sub-agent and ask it to list the markdown files in " + "openhands-sdk/openhands/sdk/subagent/builtins/ and summarize what each " + "built-in agent type is for (based on the file contents). " + "2) Spawn a 'bash' sub-agent and ask it to run `python --version` in the " + "terminal and return the exact output. " + "3) Merge both results into a short report. " + "Do not use internet access." +) + +print("=" * 100) +print("Demonstrating built-in agent delegation (explore + bash)...") +print("=" * 100) + +conversation.send_message(builtin_task_message) +conversation.run() + +# Report cost for builtin agent types example +cost_builtin = conversation.conversation_stats.get_combined_metrics().accumulated_cost +print(f"EXAMPLE_COST (builtin agents): {cost_builtin}") + +print("Built-in agent delegation example done!", "\n" * 20) + + +# -------- Agent Delegation Third Part: User-Defined Agent Types -------- + + def create_lodging_planner(llm: LLM) -> Agent: """Create a lodging planner focused on London stays.""" skills = [ @@ -190,10 +222,12 @@ def create_activities_planner(llm: LLM) -> Agent: conversation.run() # Report cost for user-defined agent types example -cost_2 = conversation.conversation_stats.get_combined_metrics().accumulated_cost -print(f"EXAMPLE_COST (user-defined agents): {cost_2}") +cost_user_defined = ( + conversation.conversation_stats.get_combined_metrics().accumulated_cost +) +print(f"EXAMPLE_COST (user-defined agents): {cost_user_defined}") print("All done!") # Full example cost report for CI workflow -print(f"EXAMPLE_COST: {cost_1 + cost_2}") +print(f"EXAMPLE_COST: {cost_simple + cost_builtin + cost_user_defined}") diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index b83cfabcb5..229c023955 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -51,7 +51,6 @@ register_file_agents, register_plugin_agents, ) -from openhands.sdk.subagent.registry import register_builtins_agents from openhands.sdk.tool.schema import Action, Observation from openhands.sdk.utils.cipher import Cipher from openhands.sdk.workspace import LocalWorkspace @@ -417,12 +416,9 @@ def _register_file_based_agents(self) -> None: then `{project}/.openhands/agents/*.md`) 4. User-level file agents (`~/.agents/agents/*.md`, then `~/.openhands/agents/*.md`) - 5. SDK builtin agents (`subagent/builtins/*.md`) """ # register project-level and then user-level file-based agents register_file_agents(self.workspace.working_dir) - # register builtins agents - register_builtins_agents() def _ensure_agent_ready(self) -> None: """Ensure the agent is fully initialized with plugins and agents loaded. diff --git a/openhands-sdk/openhands/sdk/subagent/registry.py b/openhands-sdk/openhands/sdk/subagent/registry.py index ae5ecc13cc..e345d84a56 100644 --- a/openhands-sdk/openhands/sdk/subagent/registry.py +++ b/openhands-sdk/openhands/sdk/subagent/registry.py @@ -30,7 +30,6 @@ def create_security_expert(llm): from openhands.sdk.logger import get_logger from openhands.sdk.subagent.load import ( - load_agents_from_dir, load_project_agents, load_user_agents, ) @@ -43,8 +42,6 @@ def create_security_expert(llm): logger = get_logger(__name__) -BUILTINS_DIR = Path(__file__).parent / "builtins" - class AgentFactory(NamedTuple): """Simple container for an agent factory function and its description.""" @@ -127,11 +124,15 @@ def agent_definition_to_factory( `AgentContext`. - `model: inherit` preserves the parent LLM; an explicit model name creates a copy via `model_copy(update=...)`. + + Raises: + ValueError: If a tool provided to the agent is not registered. """ def _factory(llm: "LLM") -> "Agent": from openhands.sdk.agent.agent import Agent from openhands.sdk.context.agent_context import AgentContext + from openhands.sdk.tool.registry import list_registered_tools from openhands.sdk.tool.spec import Tool # Handle model override @@ -147,7 +148,15 @@ def _factory(llm: "LLM") -> "Agent": ) # Resolve tools - tools = [Tool(name=tool_name) for tool_name in agent_def.tools] + tools: list[Tool] = [] + registered_tools: set[str] = set(list_registered_tools()) + for tool_name in agent_def.tools: + if tool_name not in registered_tools: + raise ValueError( + f"Tool '{tool_name}' not registered" + f"but was given to agent {agent_def.name}." + ) + tools.append(Tool(name=tool_name)) return Agent( llm=llm, @@ -235,35 +244,6 @@ def register_plugin_agents(agents: list[AgentDefinition]) -> list[str]: return registered -def register_builtins_agents() -> list[str]: - """Load and register SDK builtin agents from ``subagent/builtins/*.md``. - - They are registered via ``register_agent_if_absent`` and will not - overwrite agents already registered by programmatic calls, plugins, - or project/user-level file-based definitions. - - Returns: - List of agent names that were actually registered. - """ - builtins_agents_def = load_agents_from_dir(BUILTINS_DIR) - - registered: list[str] = [] - for agent_def in builtins_agents_def: - factory = agent_definition_to_factory(agent_def) - was_registered = register_agent_if_absent( - name=agent_def.name, - factory_func=factory, - description=agent_def.description or f"Agent: {agent_def.name}", - ) - if was_registered: - registered.append(agent_def.name) - logger.info( - f"Registered file-based agent '{agent_def.name}'" - + (f" from {agent_def.source}" if agent_def.source else "") - ) - return registered - - def get_agent_factory(name: str | None) -> AgentFactory: """ Get a registered agent factory by name. diff --git a/openhands-tools/openhands/tools/__init__.py b/openhands-tools/openhands/tools/__init__.py index 4e10a017bf..622b2e954c 100644 --- a/openhands-tools/openhands/tools/__init__.py +++ b/openhands-tools/openhands/tools/__init__.py @@ -23,6 +23,7 @@ from openhands.tools.preset.default import ( get_default_agent, get_default_tools, + register_builtins_agents, register_default_tools, ) from openhands.tools.task_tracker import TaskTrackerTool @@ -44,4 +45,5 @@ "get_default_agent", "get_default_tools", "register_default_tools", + "register_builtins_agents", ] diff --git a/openhands-tools/openhands/tools/preset/__init__.py b/openhands-tools/openhands/tools/preset/__init__.py index cc44e66ce4..aa07aa4d2e 100644 --- a/openhands-tools/openhands/tools/preset/__init__.py +++ b/openhands-tools/openhands/tools/preset/__init__.py @@ -18,7 +18,7 @@ setups. """ -from .default import get_default_agent +from .default import get_default_agent, register_builtins_agents from .gemini import get_gemini_agent, get_gemini_tools from .gpt5 import get_gpt5_agent from .planning import get_planning_agent @@ -30,4 +30,5 @@ "get_gemini_tools", "get_gpt5_agent", "get_planning_agent", + "register_builtins_agents", ] diff --git a/openhands-tools/openhands/tools/preset/default.py b/openhands-tools/openhands/tools/preset/default.py index 95ef6535fb..59385215da 100644 --- a/openhands-tools/openhands/tools/preset/default.py +++ b/openhands-tools/openhands/tools/preset/default.py @@ -1,12 +1,15 @@ """Default preset configuration for OpenHands agents.""" -from openhands.sdk import Agent +from pathlib import Path + +from openhands.sdk import Agent, agent_definition_to_factory, load_agents_from_dir from openhands.sdk.context.condenser import ( LLMSummarizingCondenser, ) from openhands.sdk.context.condenser.base import CondenserBase from openhands.sdk.llm.llm import LLM from openhands.sdk.logger import get_logger +from openhands.sdk.subagent import register_agent_if_absent from openhands.sdk.tool import Tool @@ -83,3 +86,52 @@ def get_default_agent( ), ) return agent + + +def register_builtins_agents(cli_mode: bool = False) -> list[str]: + """Load and register builtin agents from ``subagent/*.md``. + + They are registered via `register_agent_if_absent` and will not + overwrite agents already registered by programmatic calls, plugins, + or project/user-level file-based definitions. + + Args: + cli_mode: Whether to load the default agent in cli mode or not. + + Returns: + List of agents which were actually registered. + """ + register_default_tools( + # Disable browser tools in CLI mode + enable_browser=not cli_mode, + ) + + subagent_dir = Path(__file__).parent / "subagents" + builtins_agents_def = load_agents_from_dir(subagent_dir) + + # if we are in cli mode, we filter out the default agent (with browser tool) + # otherwise, we filter out the default cli agent + if cli_mode: + builtins_agents_def = [ + agent for agent in builtins_agents_def if agent.name != "default" + ] + else: + builtins_agents_def = [ + agent for agent in builtins_agents_def if agent.name != "default cli mode" + ] + + registered: list[str] = [] + for agent_def in builtins_agents_def: + factory = agent_definition_to_factory(agent_def) + was_registered = register_agent_if_absent( + name=agent_def.name, + factory_func=factory, + description=agent_def.description or f"Agent: {agent_def.name}", + ) + if was_registered: + registered.append(agent_def.name) + logger.info( + f"Registered file-based agent '{agent_def.name}'" + + (f" from {agent_def.source}" if agent_def.source else "") + ) + return registered diff --git a/openhands-tools/openhands/tools/preset/subagents/bash.md b/openhands-tools/openhands/tools/preset/subagents/bash.md new file mode 100644 index 0000000000..2e4c10aa2b --- /dev/null +++ b/openhands-tools/openhands/tools/preset/subagents/bash.md @@ -0,0 +1,36 @@ +--- +name: bash +model: inherit +description: >- + Command execution specialist (terminal only). + Run a shell command + Execute a build or test script + Check system information or process status +tools: + - terminal +--- + +You are a command-line execution specialist. Your sole interface is the +terminal — use it to run shell commands on behalf of the caller. + +## Core capabilities + +- Execute arbitrary shell commands (bash/sh). +- Run builds, tests, linters, formatters, and other development tooling. +- Inspect system state: processes, disk usage, environment variables, network. +- Perform git operations (commit, push, rebase, etc.). + +## Guidelines + +1. **Be precise.** Run exactly what was requested. Do not add extra flags or + steps unless they are necessary for correctness. +2. **Check before destroying.** For destructive operations (`rm -rf`, `git + reset --hard`, `DROP TABLE`, etc.), confirm the intent and scope before + executing. +3. **Report results clearly.** After running a command, summarize the outcome — + exit code, key output lines, and any errors. +4. **Chain when appropriate.** Use `&&` to chain dependent commands so later + steps only run if earlier ones succeed. +5. **Avoid interactive commands.** Do not run commands that require interactive + input (e.g., `vim`, `less`, `git rebase -i`). Use non-interactive + alternatives instead. diff --git a/openhands-sdk/openhands/sdk/subagent/builtins/default.md b/openhands-tools/openhands/tools/preset/subagents/default.md similarity index 100% rename from openhands-sdk/openhands/sdk/subagent/builtins/default.md rename to openhands-tools/openhands/tools/preset/subagents/default.md diff --git a/openhands-tools/openhands/tools/preset/subagents/default_cli.md b/openhands-tools/openhands/tools/preset/subagents/default_cli.md new file mode 100644 index 0000000000..65eaca7074 --- /dev/null +++ b/openhands-tools/openhands/tools/preset/subagents/default_cli.md @@ -0,0 +1,8 @@ +--- +name: default cli mode +description: Default general-purpose agent +tools: + - terminal + - file_editor + - task_tracker +--- diff --git a/openhands-tools/openhands/tools/preset/subagents/explore.md b/openhands-tools/openhands/tools/preset/subagents/explore.md new file mode 100644 index 0000000000..401c16f56b --- /dev/null +++ b/openhands-tools/openhands/tools/preset/subagents/explore.md @@ -0,0 +1,43 @@ +--- +name: explore +model: inherit +description: >- + Fast codebase exploration agent (read-only). + Find files matching a pattern + Search code for a keyword or symbol + Understand how a module or feature is implemented +tools: + - terminal +--- + +You are a codebase exploration specialist. You excel at rapidly navigating, +searching, and understanding codebases. Your role is strictly **read-only** — +you never create, modify, or delete files. + +## Core capabilities + +- **File discovery** — find files by name, extension, or glob pattern. +- **Content search** — locate code, symbols, and text with regex patterns. +- **Code reading** — read and analyze source files to answer questions. + +## Constraints + +- Do **not** create, modify, move, copy, or delete any file. +- Do **not** run commands that change system state (installs, builds, writes). +- When using the terminal, restrict yourself to read-only commands: + `ls`, `find`, `cat`, `head`, `tail`, `wc`, `git status`, `git log`, + `git diff`, `git show`, `git blame`, `tree`, `file`, `stat`, `which`, + `echo`, `pwd`, `env`, `printenv`, `grep`, `glob`. +- Never use redirect operators (`>`, `>>`) or pipe to write commands. + +## Workflow guidelines + +1. Start broad, then narrow down. Use glob patterns to locate candidate files + before reading them. +2. Prefer `grep` for content searches and `glob` for file-name searches. +3. When exploring an unfamiliar area, check directory structure first (`ls`, + `tree`, or glob `**/*`) before diving into individual files. +4. Spawn parallel tool calls whenever possible — e.g., grep for a symbol in + multiple directories at once — to return results quickly. +5. Provide concise, structured answers. Summarize findings with file paths and + line numbers so the caller can act on them immediately. diff --git a/openhands-tools/pyproject.toml b/openhands-tools/pyproject.toml index 244af72200..7fe1eaddbc 100644 --- a/openhands-tools/pyproject.toml +++ b/openhands-tools/pyproject.toml @@ -38,3 +38,4 @@ namespaces = true [tool.setuptools.package-data] "*" = ["py.typed", "**/*.j2"] +"openhands.tools.preset.subagents" = ["*.md"] diff --git a/tests/sdk/subagent/test_subagent_registry.py b/tests/sdk/subagent/test_subagent_registry.py index ec38c3090f..d4f61b582e 100644 --- a/tests/sdk/subagent/test_subagent_registry.py +++ b/tests/sdk/subagent/test_subagent_registry.py @@ -168,7 +168,7 @@ def test_agent_definition_to_factory_basic() -> None: name="test-agent", description="A test agent", model="inherit", - tools=["ReadTool", "GlobTool"], + tools=[], system_prompt="You are a test agent.", ) @@ -177,10 +177,8 @@ def test_agent_definition_to_factory_basic() -> None: agent = factory(llm) assert isinstance(agent, Agent) - # Check tools - tool_names = [t.name for t in agent.tools] - assert "ReadTool" in tool_names - assert "GlobTool" in tool_names + # Check tools are empty + assert agent.tools == [] # Check skill (system prompt as always-active skill) assert agent.agent_context is not None assert agent.agent_context.system_message_suffix == "You are a test agent." @@ -230,7 +228,6 @@ def test_agent_definition_to_factory_no_system_prompt() -> None: name="no-prompt-agent", description="No prompt", model="inherit", - tools=["ReadTool"], system_prompt="", ) @@ -323,9 +320,6 @@ def test_end_to_end_md_to_factory_to_registry(tmp_path: Path) -> None: "name: e2e-test-agent\n" "description: End-to-end test agent\n" "model: inherit\n" - "tools:\n" - " - ReadTool\n" - " - GrepTool\n" "---\n\n" "You are a test agent for end-to-end testing.\n" "Focus on correctness and clarity.\n" @@ -335,7 +329,6 @@ def test_end_to_end_md_to_factory_to_registry(tmp_path: Path) -> None: agent_def = AgentDefinition.load(md_file) assert agent_def.name == "e2e-test-agent" assert agent_def.description == "End-to-end test agent" - assert agent_def.tools == ["ReadTool", "GrepTool"] # Convert to factory factory = agent_definition_to_factory(agent_def) @@ -360,6 +353,3 @@ def test_end_to_end_md_to_factory_to_registry(tmp_path: Path) -> None: ) agent = retrieved.factory_func(test_llm) assert isinstance(agent, Agent) - tool_names = [t.name for t in agent.tools] - assert "ReadTool" in tool_names - assert "GrepTool" in tool_names diff --git a/tests/tools/delegate/test_delegation.py b/tests/tools/delegate/test_delegation.py index 0dd87bbc5f..033bfa1e5c 100644 --- a/tests/tools/delegate/test_delegation.py +++ b/tests/tools/delegate/test_delegation.py @@ -10,12 +10,12 @@ from openhands.sdk.conversation.conversation_stats import ConversationStats from openhands.sdk.conversation.state import ConversationExecutionStatus from openhands.sdk.llm import LLM, TextContent -from openhands.sdk.subagent.registry import register_builtins_agents from openhands.tools.delegate import ( DelegateExecutor, DelegateObservation, ) from openhands.tools.delegate.definition import DelegateAction +from openhands.tools.preset import register_builtins_agents def create_test_executor_and_parent(): diff --git a/tests/tools/task/test_task_manager.py b/tests/tools/task/test_task_manager.py index 4755b9b98f..571f65dc1a 100644 --- a/tests/tools/task/test_task_manager.py +++ b/tests/tools/task/test_task_manager.py @@ -10,8 +10,8 @@ from openhands.sdk.subagent.registry import ( _reset_registry_for_tests, register_agent, - register_builtins_agents, ) +from openhands.tools.preset import register_builtins_agents from openhands.tools.task.manager import ( Task, TaskManager, diff --git a/tests/tools/test_builtin_agents.py b/tests/tools/test_builtin_agents.py new file mode 100644 index 0000000000..57b9338d12 --- /dev/null +++ b/tests/tools/test_builtin_agents.py @@ -0,0 +1,85 @@ +"""Tests for built-in subagents definitions.""" + +from collections.abc import Iterator +from pathlib import Path +from typing import Final + +import pytest +from pydantic import SecretStr + +import openhands.tools.preset.default as _preset_default +from openhands.sdk import LLM, Agent +from openhands.sdk.subagent.load import load_agents_from_dir +from openhands.sdk.subagent.registry import ( + _reset_registry_for_tests, + get_agent_factory, +) +from openhands.tools.preset.default import register_builtins_agents + + +# Resolve once from the installed package — works regardless of cwd. +SUBAGENTS_DIR: Final[Path] = Path(_preset_default.__file__).parent / "subagents" + + +@pytest.fixture(autouse=True) +def _clean_registry() -> Iterator[None]: + """Reset the agent registry before and after every test.""" + _reset_registry_for_tests() + yield + _reset_registry_for_tests() + + +def _make_test_llm() -> LLM: + return LLM(model="gpt-4o", api_key=SecretStr("test-key"), usage_id="test-llm") + + +def test_builtins_contains_expected_agents() -> None: + md_files = {f.stem for f in SUBAGENTS_DIR.glob("*.md")} + assert {"default", "explore", "bash"}.issubset(md_files) + + +def test_load_all_builtins() -> None: + """Every .md file in subagents/ should parse without errors.""" + agents = load_agents_from_dir(SUBAGENTS_DIR) + names = {a.name for a in agents} + assert {"default", "explore", "bash"}.issubset(names) + + +@pytest.mark.parametrize( + "cli_mode, agent_names", + [ + (False, ["default", "explore", "bash"]), + (True, ["default cli mode", "explore", "bash"]), + ], +) +def test_register_builtins_agents_registers_expected_factories( + cli_mode: bool, agent_names: list[str] +) -> None: + register_builtins_agents(cli_mode=cli_mode) + + llm = _make_test_llm() + agent_tool_names: dict[str, list[str]] = {} + for name in agent_names: + factory = get_agent_factory(name) + agent = factory.factory_func(llm) + assert isinstance(agent, Agent) + agent_tool_names[name] = [t.name for t in agent.tools] + + assert len(agent_tool_names) == 3 + + if cli_mode: + assert agent_tool_names["default cli mode"] == [ + "terminal", + "file_editor", + "task_tracker", + ] + else: + assert agent_tool_names["default"] == [ + "terminal", + "file_editor", + "task_tracker", + "browser_tool_set", + ] + + assert agent_tool_names["explore"] == ["terminal"] + assert agent_tool_names["bash"] == ["terminal"]