OpenHands · VascoSch92 · Feb 27, 2026 · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026
diff --git a/examples/01_standalone_sdk/25_agent_delegation.py b/examples/01_standalone_sdk/25_agent_delegation.py
@@ -8,8 +8,7 @@
 """
 
 import os
-
-from pydantic import SecretStr
+from pathlib import Path
 
 from openhands.sdk import (
     LLM,
@@ -34,34 +33,29 @@
 logger = get_logger(__name__)
 
 # Configure LLM and agent
-# You can get an API key from https://app.all-hands.dev/settings/api-keys
-api_key = os.getenv("LLM_API_KEY")
-assert api_key is not None, "LLM_API_KEY environment variable is not set."
-model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929")
 llm = LLM(
-    model=model,
-    api_key=SecretStr(api_key),
+    model=os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929"),
+    api_key=os.getenv("LLM_API_KEY"),
     base_url=os.environ.get("LLM_BASE_URL", None),
     usage_id="agent",
 )
 
 cwd = os.getcwd()
 
-register_tool("DelegateTool", DelegateTool)
-tools = get_default_tools(enable_browser=False)
-tools.append(Tool(name="DelegateTool"))
+tools = get_default_tools(enable_browser=True)
+tools.append(Tool(name=DelegateTool.name))
 
 main_agent = Agent(
     llm=llm,
     tools=tools,
 )
 conversation = Conversation(
     agent=main_agent,
-    workspace=cwd,
+    workspace=Path.cwd(),
     visualizer=DelegationVisualizer(name="Delegator"),
 )
 
-task_message = (
+conversation.send_message(
     "Forget about coding. Let's switch to travel planning. "
     "Let's plan a trip to London. I have two issues I need to solve: "
     "Lodging: what are the best areas to stay at while keeping budget in mind? "
@@ -72,7 +66,6 @@
     "They should keep it short. After getting the results, merge both analyses "
     "into a single consolidated report.\n\n"
 )
-conversation.send_message(task_message)
 conversation.run()
 
 conversation.send_message(
@@ -81,18 +74,57 @@
 conversation.run()
 
 # Report cost for simple delegation example
-cost_1 = conversation.conversation_stats.get_combined_metrics().accumulated_cost
-print(f"EXAMPLE_COST (simple delegation): {cost_1}")
+cost_simple = conversation.conversation_stats.get_combined_metrics().accumulated_cost
+print(f"EXAMPLE_COST (simple delegation): {cost_simple}")
 
 print("Simple delegation example done!", "\n" * 20)
 
-
-# -------- Agent Delegation Second Part: User-Defined Agent Types --------
-
 if ONLY_RUN_SIMPLE_DELEGATION:
+    # For CI: always emit the EXAMPLE_COST marker before exiting.
+    print(f"EXAMPLE_COST: {cost_simple}")
     exit(0)
 
 
+# -------- Agent Delegation Second Part: Built-in Agent Types (Explore + Bash) --------
+
+main_agent = Agent(
+    llm=llm,
+    tools=[Tool(name=DelegateTool.name)],
+)
+conversation = Conversation(
+    agent=main_agent,
+    workspace=cwd,
+    visualizer=DelegationVisualizer(name="Delegator (builtins)"),
+)
+
+builtin_task_message = (
+    "Demonstrate SDK built-in sub-agent types. "
+    "1) Spawn an 'explore' sub-agent and ask it to list the markdown files in "
+    "openhands-sdk/openhands/sdk/subagent/builtins/ and summarize what each "
+    "built-in agent type is for (based on the file contents). "
+    "2) Spawn a 'bash' sub-agent and ask it to run `python --version` in the "
+    "terminal and return the exact output. "
+    "3) Merge both results into a short report. "
+    "Do not use internet access."
+)
+
+print("=" * 100)
+print("Demonstrating built-in agent delegation (explore + bash)...")
+print("=" * 100)
+
+conversation.send_message(builtin_task_message)
+conversation.run()
+
+# Report cost for builtin agent types example
+cost_builtin = conversation.conversation_stats.get_combined_metrics().accumulated_cost
+print(f"EXAMPLE_COST (builtin agents): {cost_builtin}")
+
+print("Built-in agent delegation example done!", "\n" * 20)
+
+
+# -------- Agent Delegation Third Part: User-Defined Agent Types --------
+
+
 def create_lodging_planner(llm: LLM) -> Agent:
     """Create a lodging planner focused on London stays."""
     skills = [
@@ -190,10 +222,12 @@ def create_activities_planner(llm: LLM) -> Agent:
 conversation.run()
 
 # Report cost for user-defined agent types example
-cost_2 = conversation.conversation_stats.get_combined_metrics().accumulated_cost
-print(f"EXAMPLE_COST (user-defined agents): {cost_2}")
+cost_user_defined = (
+    conversation.conversation_stats.get_combined_metrics().accumulated_cost
+)
+print(f"EXAMPLE_COST (user-defined agents): {cost_user_defined}")
 
 print("All done!")
 
 # Full example cost report for CI workflow
-print(f"EXAMPLE_COST: {cost_1 + cost_2}")
+print(f"EXAMPLE_COST: {cost_simple + cost_builtin + cost_user_defined}")
diff --git a/openhands-sdk/openhands/sdk/subagent/builtins/bash.md b/openhands-sdk/openhands/sdk/subagent/builtins/bash.md
@@ -0,0 +1,36 @@
+---
+name: bash
+model: inherit
+description: >-
+  Command execution specialist (terminal only).
+  <example>Run a shell command</example>
+  <example>Execute a build or test script</example>
+  <example>Check system information or process status</example>
+tools:
+  - terminal
+---
+
+You are a command-line execution specialist. Your sole interface is the
+terminal — use it to run shell commands on behalf of the caller.
+
+## Core capabilities
+
+- Execute arbitrary shell commands (bash/sh).
+- Run builds, tests, linters, formatters, and other development tooling.
+- Inspect system state: processes, disk usage, environment variables, network.
+- Perform git operations (commit, push, rebase, etc.).
+
+## Guidelines
+
+1. **Be precise.** Run exactly what was requested. Do not add extra flags or
+   steps unless they are necessary for correctness.
+2. **Check before destroying.** For destructive operations (`rm -rf`, `git
+   reset --hard`, `DROP TABLE`, etc.), confirm the intent and scope before
+   executing.
+3. **Report results clearly.** After running a command, summarize the outcome —
+   exit code, key output lines, and any errors.
+4. **Chain when appropriate.** Use `&&` to chain dependent commands so later
+   steps only run if earlier ones succeed.
+5. **Avoid interactive commands.** Do not run commands that require interactive
+   input (e.g., `vim`, `less`, `git rebase -i`). Use non-interactive
+   alternatives instead.
diff --git a/openhands-sdk/openhands/sdk/subagent/builtins/explore.md b/openhands-sdk/openhands/sdk/subagent/builtins/explore.md
@@ -0,0 +1,43 @@
+---
+name: explore
+model: inherit
+description: >-
+  Fast codebase exploration agent (read-only).
+  <example>Find files matching a pattern</example>
+  <example>Search code for a keyword or symbol</example>
+  <example>Understand how a module or feature is implemented</example>
+tools:
+  - terminal
+---
+
+You are a codebase exploration specialist. You excel at rapidly navigating,
+searching, and understanding codebases. Your role is strictly **read-only** —
+you never create, modify, or delete files.
+
+## Core capabilities
+
+- **File discovery** — find files by name, extension, or glob pattern.
+- **Content search** — locate code, symbols, and text with regex patterns.
+- **Code reading** — read and analyze source files to answer questions.
+
+## Constraints
+
+- Do **not** create, modify, move, copy, or delete any file.
+- Do **not** run commands that change system state (installs, builds, writes).
+- When using the terminal, restrict yourself to read-only commands:
+  `ls`, `find`, `cat`, `head`, `tail`, `wc`, `git status`, `git log`,
+  `git diff`, `git show`, `git blame`, `tree`, `file`, `stat`, `which`,
+  `echo`, `pwd`, `env`, `printenv`, `grep`, `glob`.
+- Never use redirect operators (`>`, `>>`) or pipe to write commands.
+
+## Workflow guidelines
+
+1. Start broad, then narrow down. Use glob patterns to locate candidate files
+   before reading them.
+2. Prefer `grep` for content searches and `glob` for file-name searches.
+3. When exploring an unfamiliar area, check directory structure first (`ls`,
+   `tree`, or glob `**/*`) before diving into individual files.
+4. Spawn parallel tool calls whenever possible — e.g., grep for a symbol in
+   multiple directories at once — to return results quickly.
+5. Provide concise, structured answers. Summarize findings with file paths and
+   line numbers so the caller can act on them immediately.
diff --git a/openhands-sdk/openhands/sdk/subagent/registry.py b/openhands-sdk/openhands/sdk/subagent/registry.py
@@ -132,6 +132,7 @@ def agent_definition_to_factory(
     def _factory(llm: "LLM") -> "Agent":
         from openhands.sdk.agent.agent import Agent
         from openhands.sdk.context.agent_context import AgentContext
+        from openhands.sdk.tool.registry import list_registered_tools
         from openhands.sdk.tool.spec import Tool
 
         # Handle model override
@@ -147,7 +148,12 @@ def _factory(llm: "LLM") -> "Agent":
         )
 
         # Resolve tools
-        tools = [Tool(name=tool_name) for tool_name in agent_def.tools]
+        tools: list[Tool] = []
+        registered_tools: set[str] = set(list_registered_tools())
+        for tool_name in agent_def.tools:
+            if tool_name not in registered_tools:
+                logger.info(f"Tool '{tool_name}' is not registered (yet).")
+            tools.append(Tool(name=tool_name))
 
         return Agent(
             llm=llm,

diff --git a/openhands-tools/openhands/tools/task/manager.py b/openhands-tools/openhands/tools/task/manager.py
@@ -243,6 +243,9 @@ def _get_sub_agent(self, subagent_type: str) -> Agent:
 
         llm_updates: dict = {"stream": False}
         sub_agent_llm = parent_llm.model_copy(update=llm_updates)
+        # Reset metrics such that the sub-agent has its own
+        # Metrics object
+        sub_agent_llm.reset_metrics()
 
         return factory.factory_func(sub_agent_llm)
 
@@ -266,10 +269,21 @@ def _run_task(self, task: Task, prompt: str) -> Task:
             task.set_error(str(e))
             logger.warning(f"Task {task.id} failed with error: {e}")
         finally:
+            self._update_parent_metrics(parent, task)
             self._evict_task(task)
 
         return task
 
+    def _update_parent_metrics(self, parent: LocalConversation, task: Task) -> None:
+        """
+        Sync sub-agent metrics into parent before eviction destroys the conversation.
+        Replace (not merge) because sub-agent metrics are cumulative across resumes.
+        """
+        if task.conversation is not None:
+            parent.conversation_stats.usage_to_metrics[f"task:{task.id}"] = (
+                task.conversation.conversation_stats.get_combined_metrics()
+            )
+
     def close(self) -> None:
         """Clean up tmp directory and remove all created tasks."""
         if self._tmp_dir.exists():

diff --git a/tests/sdk/subagent/test_builtin_agents.py b/tests/sdk/subagent/test_builtin_agents.py
@@ -0,0 +1,81 @@
+"""Tests for SDK built-in agent definitions (default, explore, bash)."""
+
+from collections.abc import Iterator
+
+import pytest
+from pydantic import SecretStr
+
+from openhands.sdk import LLM, Agent
+from openhands.sdk.subagent.load import load_agents_from_dir
+from openhands.sdk.subagent.registry import (
+    BUILTINS_DIR,
+    _reset_registry_for_tests,
+    get_agent_factory,
+    register_agent,
+    register_builtins_agents,
+)
+
+
+@pytest.fixture(autouse=True)
+def _clean_registry() -> Iterator[None]:
+    """Reset the agent registry before and after every test."""
+    _reset_registry_for_tests()
+    yield
+    _reset_registry_for_tests()
+
+
+def _make_test_llm() -> LLM:
+    return LLM(model="gpt-4o", api_key=SecretStr("test-key"), usage_id="test-llm")
+
+
+def test_builtins_contains_expected_agents() -> None:
+    md_files = {f.stem for f in BUILTINS_DIR.glob("*.md")}
+    assert {"default", "explore", "bash"}.issubset(md_files)
+
+
+def test_load_all_builtins() -> None:
+    """Every .md file in builtins/ should parse without errors."""
+    agents = load_agents_from_dir(BUILTINS_DIR)
+    names = {a.name for a in agents}
+    assert {"default", "explore", "bash"}.issubset(names)
+
+
+def test_register_builtins_agents_registers_expected_factories() -> None:
+    register_builtins_agents()
+
+    llm = _make_test_llm()
+    agent_tool_names: dict[str, list[str]] = {}
+    for name in ("default", "explore", "bash"):
+        factory = get_agent_factory(name)
+        agent = factory.factory_func(llm)
+        assert isinstance(agent, Agent)
+        agent_tool_names[name] = [t.name for t in agent.tools]
+
+    assert agent_tool_names["default"] == [
+        "terminal",
+        "file_editor",
+        "task_tracker",
+        "browser_tool_set",
+    ]
+    assert agent_tool_names["explore"] == ["terminal"]
+    assert agent_tool_names["bash"] == ["terminal"]
+
+
+def test_builtins_do_not_overwrite_programmatic() -> None:
+    """Programmatic registrations take priority over builtins."""
+
+    def custom_factory(llm: LLM) -> Agent:
+        return Agent(llm=llm, tools=[])
+
+    register_agent(
+        name="explore",
+        factory_func=custom_factory,
+        description="Custom explore",
+    )
+
+    registered = register_builtins_agents()
+    assert "explore" not in registered
+
+    factory = get_agent_factory("explore")
+    assert factory.description == "Custom explore"
+    assert factory.factory_func(_make_test_llm()).tools == []