Skip to content
78 changes: 56 additions & 22 deletions examples/01_standalone_sdk/25_agent_delegation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
"""

import os

from pydantic import SecretStr
from pathlib import Path

from openhands.sdk import (
LLM,
Expand All @@ -34,34 +33,29 @@
logger = get_logger(__name__)

# Configure LLM and agent
# You can get an API key from https://app.all-hands.dev/settings/api-keys
api_key = os.getenv("LLM_API_KEY")
assert api_key is not None, "LLM_API_KEY environment variable is not set."
model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929")
llm = LLM(
model=model,
api_key=SecretStr(api_key),
model=os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929"),
api_key=os.getenv("LLM_API_KEY"),
base_url=os.environ.get("LLM_BASE_URL", None),
usage_id="agent",
)

cwd = os.getcwd()

register_tool("DelegateTool", DelegateTool)
tools = get_default_tools(enable_browser=False)
tools.append(Tool(name="DelegateTool"))
tools = get_default_tools(enable_browser=True)
tools.append(Tool(name=DelegateTool.name))

main_agent = Agent(
llm=llm,
tools=tools,
)
conversation = Conversation(
agent=main_agent,
workspace=cwd,
workspace=Path.cwd(),
visualizer=DelegationVisualizer(name="Delegator"),
)

task_message = (
conversation.send_message(
"Forget about coding. Let's switch to travel planning. "
"Let's plan a trip to London. I have two issues I need to solve: "
"Lodging: what are the best areas to stay at while keeping budget in mind? "
Expand All @@ -72,7 +66,6 @@
"They should keep it short. After getting the results, merge both analyses "
"into a single consolidated report.\n\n"
)
conversation.send_message(task_message)
conversation.run()

conversation.send_message(
Expand All @@ -81,18 +74,57 @@
conversation.run()

# Report cost for simple delegation example
cost_1 = conversation.conversation_stats.get_combined_metrics().accumulated_cost
print(f"EXAMPLE_COST (simple delegation): {cost_1}")
cost_simple = conversation.conversation_stats.get_combined_metrics().accumulated_cost
print(f"EXAMPLE_COST (simple delegation): {cost_simple}")

print("Simple delegation example done!", "\n" * 20)


# -------- Agent Delegation Second Part: User-Defined Agent Types --------

if ONLY_RUN_SIMPLE_DELEGATION:
# For CI: always emit the EXAMPLE_COST marker before exiting.
print(f"EXAMPLE_COST: {cost_simple}")
exit(0)


# -------- Agent Delegation Second Part: Built-in Agent Types (Explore + Bash) --------

main_agent = Agent(
llm=llm,
tools=[Tool(name=DelegateTool.name)],
)
conversation = Conversation(
agent=main_agent,
workspace=cwd,
visualizer=DelegationVisualizer(name="Delegator (builtins)"),
)

builtin_task_message = (
"Demonstrate SDK built-in sub-agent types. "
"1) Spawn an 'explore' sub-agent and ask it to list the markdown files in "
"openhands-sdk/openhands/sdk/subagent/builtins/ and summarize what each "
"built-in agent type is for (based on the file contents). "
"2) Spawn a 'bash' sub-agent and ask it to run `python --version` in the "
"terminal and return the exact output. "
"3) Merge both results into a short report. "
"Do not use internet access."
)

print("=" * 100)
print("Demonstrating built-in agent delegation (explore + bash)...")
print("=" * 100)

conversation.send_message(builtin_task_message)
conversation.run()

# Report cost for builtin agent types example
cost_builtin = conversation.conversation_stats.get_combined_metrics().accumulated_cost
print(f"EXAMPLE_COST (builtin agents): {cost_builtin}")

print("Built-in agent delegation example done!", "\n" * 20)


# -------- Agent Delegation Third Part: User-Defined Agent Types --------


def create_lodging_planner(llm: LLM) -> Agent:
"""Create a lodging planner focused on London stays."""
skills = [
Expand Down Expand Up @@ -190,10 +222,12 @@ def create_activities_planner(llm: LLM) -> Agent:
conversation.run()

# Report cost for user-defined agent types example
cost_2 = conversation.conversation_stats.get_combined_metrics().accumulated_cost
print(f"EXAMPLE_COST (user-defined agents): {cost_2}")
cost_user_defined = (
conversation.conversation_stats.get_combined_metrics().accumulated_cost
)
print(f"EXAMPLE_COST (user-defined agents): {cost_user_defined}")

print("All done!")

# Full example cost report for CI workflow
print(f"EXAMPLE_COST: {cost_1 + cost_2}")
print(f"EXAMPLE_COST: {cost_simple + cost_builtin + cost_user_defined}")
36 changes: 36 additions & 0 deletions openhands-sdk/openhands/sdk/subagent/builtins/bash.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
---
name: bash
model: inherit
description: >-
Command execution specialist (terminal only).
<example>Run a shell command</example>
<example>Execute a build or test script</example>
<example>Check system information or process status</example>
tools:
- terminal
---

You are a command-line execution specialist. Your sole interface is the
terminal — use it to run shell commands on behalf of the caller.

## Core capabilities

- Execute arbitrary shell commands (bash/sh).
- Run builds, tests, linters, formatters, and other development tooling.
- Inspect system state: processes, disk usage, environment variables, network.
- Perform git operations (commit, push, rebase, etc.).

## Guidelines

1. **Be precise.** Run exactly what was requested. Do not add extra flags or
steps unless they are necessary for correctness.
2. **Check before destroying.** For destructive operations (`rm -rf`, `git
reset --hard`, `DROP TABLE`, etc.), confirm the intent and scope before
executing.
3. **Report results clearly.** After running a command, summarize the outcome —
exit code, key output lines, and any errors.
4. **Chain when appropriate.** Use `&&` to chain dependent commands so later
steps only run if earlier ones succeed.
5. **Avoid interactive commands.** Do not run commands that require interactive
input (e.g., `vim`, `less`, `git rebase -i`). Use non-interactive
alternatives instead.
43 changes: 43 additions & 0 deletions openhands-sdk/openhands/sdk/subagent/builtins/explore.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
---
name: explore
model: inherit
description: >-
Fast codebase exploration agent (read-only).
<example>Find files matching a pattern</example>
<example>Search code for a keyword or symbol</example>
<example>Understand how a module or feature is implemented</example>
tools:
- terminal
---

You are a codebase exploration specialist. You excel at rapidly navigating,
searching, and understanding codebases. Your role is strictly **read-only** —
you never create, modify, or delete files.

## Core capabilities

- **File discovery** — find files by name, extension, or glob pattern.
- **Content search** — locate code, symbols, and text with regex patterns.
- **Code reading** — read and analyze source files to answer questions.

## Constraints

- Do **not** create, modify, move, copy, or delete any file.
- Do **not** run commands that change system state (installs, builds, writes).
- When using the terminal, restrict yourself to read-only commands:
`ls`, `find`, `cat`, `head`, `tail`, `wc`, `git status`, `git log`,
`git diff`, `git show`, `git blame`, `tree`, `file`, `stat`, `which`,
`echo`, `pwd`, `env`, `printenv`, `grep`, `glob`.
- Never use redirect operators (`>`, `>>`) or pipe to write commands.

## Workflow guidelines

1. Start broad, then narrow down. Use glob patterns to locate candidate files
before reading them.
2. Prefer `grep` for content searches and `glob` for file-name searches.
3. When exploring an unfamiliar area, check directory structure first (`ls`,
`tree`, or glob `**/*`) before diving into individual files.
4. Spawn parallel tool calls whenever possible — e.g., grep for a symbol in
multiple directories at once — to return results quickly.
5. Provide concise, structured answers. Summarize findings with file paths and
line numbers so the caller can act on them immediately.
8 changes: 7 additions & 1 deletion openhands-sdk/openhands/sdk/subagent/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def agent_definition_to_factory(
def _factory(llm: "LLM") -> "Agent":
from openhands.sdk.agent.agent import Agent
from openhands.sdk.context.agent_context import AgentContext
from openhands.sdk.tool.registry import list_registered_tools
from openhands.sdk.tool.spec import Tool

# Handle model override
Expand All @@ -147,7 +148,12 @@ def _factory(llm: "LLM") -> "Agent":
)

# Resolve tools
tools = [Tool(name=tool_name) for tool_name in agent_def.tools]
tools: list[Tool] = []
registered_tools: set[str] = set(list_registered_tools())
for tool_name in agent_def.tools:
if tool_name not in registered_tools:
logger.info(f"Tool '{tool_name}' is not registered (yet).")
tools.append(Tool(name=tool_name))

return Agent(
llm=llm,
Expand Down
14 changes: 14 additions & 0 deletions openhands-tools/openhands/tools/task/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,9 @@ def _get_sub_agent(self, subagent_type: str) -> Agent:

llm_updates: dict = {"stream": False}
sub_agent_llm = parent_llm.model_copy(update=llm_updates)
# Reset metrics such that the sub-agent has its own
# Metrics object
sub_agent_llm.reset_metrics()

return factory.factory_func(sub_agent_llm)

Expand All @@ -266,10 +269,21 @@ def _run_task(self, task: Task, prompt: str) -> Task:
task.set_error(str(e))
logger.warning(f"Task {task.id} failed with error: {e}")
finally:
self._update_parent_metrics(parent, task)
self._evict_task(task)

return task

def _update_parent_metrics(self, parent: LocalConversation, task: Task) -> None:
"""
Sync sub-agent metrics into parent before eviction destroys the conversation.
Replace (not merge) because sub-agent metrics are cumulative across resumes.
"""
if task.conversation is not None:
parent.conversation_stats.usage_to_metrics[f"task:{task.id}"] = (
task.conversation.conversation_stats.get_combined_metrics()
)

def close(self) -> None:
"""Clean up tmp directory and remove all created tasks."""
if self._tmp_dir.exists():
Expand Down
81 changes: 81 additions & 0 deletions tests/sdk/subagent/test_builtin_agents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""Tests for SDK built-in agent definitions (default, explore, bash)."""

from collections.abc import Iterator

import pytest
from pydantic import SecretStr

from openhands.sdk import LLM, Agent
from openhands.sdk.subagent.load import load_agents_from_dir
from openhands.sdk.subagent.registry import (
BUILTINS_DIR,
_reset_registry_for_tests,
get_agent_factory,
register_agent,
register_builtins_agents,
)


@pytest.fixture(autouse=True)
def _clean_registry() -> Iterator[None]:
"""Reset the agent registry before and after every test."""
_reset_registry_for_tests()
yield
_reset_registry_for_tests()


def _make_test_llm() -> LLM:
return LLM(model="gpt-4o", api_key=SecretStr("test-key"), usage_id="test-llm")


def test_builtins_contains_expected_agents() -> None:
md_files = {f.stem for f in BUILTINS_DIR.glob("*.md")}
assert {"default", "explore", "bash"}.issubset(md_files)


def test_load_all_builtins() -> None:
"""Every .md file in builtins/ should parse without errors."""
agents = load_agents_from_dir(BUILTINS_DIR)
names = {a.name for a in agents}
assert {"default", "explore", "bash"}.issubset(names)


def test_register_builtins_agents_registers_expected_factories() -> None:
register_builtins_agents()

llm = _make_test_llm()
agent_tool_names: dict[str, list[str]] = {}
for name in ("default", "explore", "bash"):
factory = get_agent_factory(name)
agent = factory.factory_func(llm)
assert isinstance(agent, Agent)
agent_tool_names[name] = [t.name for t in agent.tools]

assert agent_tool_names["default"] == [
"terminal",
"file_editor",
"task_tracker",
"browser_tool_set",
]
assert agent_tool_names["explore"] == ["terminal"]
assert agent_tool_names["bash"] == ["terminal"]


def test_builtins_do_not_overwrite_programmatic() -> None:
"""Programmatic registrations take priority over builtins."""

def custom_factory(llm: LLM) -> Agent:
return Agent(llm=llm, tools=[])

register_agent(
name="explore",
factory_func=custom_factory,
description="Custom explore",
)

registered = register_builtins_agents()
assert "explore" not in registered

factory = get_agent_factory("explore")
assert factory.description == "Custom explore"
assert factory.factory_func(_make_test_llm()).tools == []
Loading
Loading