Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 56 additions & 22 deletions examples/01_standalone_sdk/25_agent_delegation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
"""

import os

from pydantic import SecretStr
from pathlib import Path

from openhands.sdk import (
LLM,
Expand All @@ -34,34 +33,29 @@
logger = get_logger(__name__)

# Configure LLM and agent
# You can get an API key from https://app.all-hands.dev/settings/api-keys
api_key = os.getenv("LLM_API_KEY")
assert api_key is not None, "LLM_API_KEY environment variable is not set."
model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929")
llm = LLM(
model=model,
api_key=SecretStr(api_key),
model=os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929"),
api_key=os.getenv("LLM_API_KEY"),
base_url=os.environ.get("LLM_BASE_URL", None),
usage_id="agent",
)

cwd = os.getcwd()

register_tool("DelegateTool", DelegateTool)
tools = get_default_tools(enable_browser=False)
tools.append(Tool(name="DelegateTool"))
tools = get_default_tools(enable_browser=True)
tools.append(Tool(name=DelegateTool.name))

main_agent = Agent(
llm=llm,
tools=tools,
)
conversation = Conversation(
agent=main_agent,
workspace=cwd,
workspace=Path.cwd(),
visualizer=DelegationVisualizer(name="Delegator"),
)

task_message = (
conversation.send_message(
"Forget about coding. Let's switch to travel planning. "
"Let's plan a trip to London. I have two issues I need to solve: "
"Lodging: what are the best areas to stay at while keeping budget in mind? "
Expand All @@ -72,7 +66,6 @@
"They should keep it short. After getting the results, merge both analyses "
"into a single consolidated report.\n\n"
)
conversation.send_message(task_message)
conversation.run()

conversation.send_message(
Expand All @@ -81,18 +74,57 @@
conversation.run()

# Report cost for simple delegation example
cost_1 = conversation.conversation_stats.get_combined_metrics().accumulated_cost
print(f"EXAMPLE_COST (simple delegation): {cost_1}")
cost_simple = conversation.conversation_stats.get_combined_metrics().accumulated_cost
print(f"EXAMPLE_COST (simple delegation): {cost_simple}")

print("Simple delegation example done!", "\n" * 20)


# -------- Agent Delegation Second Part: User-Defined Agent Types --------

if ONLY_RUN_SIMPLE_DELEGATION:
# For CI: always emit the EXAMPLE_COST marker before exiting.
print(f"EXAMPLE_COST: {cost_simple}")
exit(0)


# -------- Agent Delegation Second Part: Built-in Agent Types (Explore + Bash) --------

main_agent = Agent(
llm=llm,
tools=[Tool(name=DelegateTool.name)],
)
conversation = Conversation(
agent=main_agent,
workspace=cwd,
visualizer=DelegationVisualizer(name="Delegator (builtins)"),
)

builtin_task_message = (
"Demonstrate SDK built-in sub-agent types. "
"1) Spawn an 'explore' sub-agent and ask it to list the markdown files in "
"openhands-sdk/openhands/sdk/subagent/builtins/ and summarize what each "
"built-in agent type is for (based on the file contents). "
"2) Spawn a 'bash' sub-agent and ask it to run `python --version` in the "
"terminal and return the exact output. "
"3) Merge both results into a short report. "
"Do not use internet access."
)

print("=" * 100)
print("Demonstrating built-in agent delegation (explore + bash)...")
print("=" * 100)

conversation.send_message(builtin_task_message)
conversation.run()

# Report cost for builtin agent types example
cost_builtin = conversation.conversation_stats.get_combined_metrics().accumulated_cost
print(f"EXAMPLE_COST (builtin agents): {cost_builtin}")

print("Built-in agent delegation example done!", "\n" * 20)


# -------- Agent Delegation Third Part: User-Defined Agent Types --------


def create_lodging_planner(llm: LLM) -> Agent:
"""Create a lodging planner focused on London stays."""
skills = [
Expand Down Expand Up @@ -190,10 +222,12 @@ def create_activities_planner(llm: LLM) -> Agent:
conversation.run()

# Report cost for user-defined agent types example
cost_2 = conversation.conversation_stats.get_combined_metrics().accumulated_cost
print(f"EXAMPLE_COST (user-defined agents): {cost_2}")
cost_user_defined = (
conversation.conversation_stats.get_combined_metrics().accumulated_cost
)
print(f"EXAMPLE_COST (user-defined agents): {cost_user_defined}")

print("All done!")

# Full example cost report for CI workflow
print(f"EXAMPLE_COST: {cost_1 + cost_2}")
print(f"EXAMPLE_COST: {cost_simple + cost_builtin + cost_user_defined}")
36 changes: 36 additions & 0 deletions openhands-sdk/openhands/sdk/subagent/builtins/bash.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
---
name: bash
model: inherit
description: >-
Command execution specialist (terminal only).
<example>Run a shell command</example>
<example>Execute a build or test script</example>
<example>Check system information or process status</example>
tools:
- terminal
---

You are a command-line execution specialist. Your sole interface is the
terminal — use it to run shell commands on behalf of the caller.

## Core capabilities

- Execute arbitrary shell commands (bash/sh).
- Run builds, tests, linters, formatters, and other development tooling.
- Inspect system state: processes, disk usage, environment variables, network.
- Perform git operations (commit, push, rebase, etc.).

## Guidelines

1. **Be precise.** Run exactly what was requested. Do not add extra flags or
steps unless they are necessary for correctness.
2. **Check before destroying.** For destructive operations (`rm -rf`, `git
reset --hard`, `DROP TABLE`, etc.), confirm the intent and scope before
executing.
3. **Report results clearly.** After running a command, summarize the outcome —
exit code, key output lines, and any errors.
4. **Chain when appropriate.** Use `&&` to chain dependent commands so later
steps only run if earlier ones succeed.
5. **Avoid interactive commands.** Do not run commands that require interactive
input (e.g., `vim`, `less`, `git rebase -i`). Use non-interactive
alternatives instead.
43 changes: 43 additions & 0 deletions openhands-sdk/openhands/sdk/subagent/builtins/explore.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
---
name: explore
model: inherit
description: >-
Fast codebase exploration agent (read-only).
<example>Find files matching a pattern</example>
<example>Search code for a keyword or symbol</example>
<example>Understand how a module or feature is implemented</example>
tools:
- terminal
---

You are a codebase exploration specialist. You excel at rapidly navigating,
searching, and understanding codebases. Your role is strictly **read-only** —
you never create, modify, or delete files.

## Core capabilities

- **File discovery** — find files by name, extension, or glob pattern.
- **Content search** — locate code, symbols, and text with regex patterns.
- **Code reading** — read and analyze source files to answer questions.

## Constraints

- Do **not** create, modify, move, copy, or delete any file.
- Do **not** run commands that change system state (installs, builds, writes).
- When using the terminal, restrict yourself to read-only commands:
`ls`, `find`, `cat`, `head`, `tail`, `wc`, `git status`, `git log`,
`git diff`, `git show`, `git blame`, `tree`, `file`, `stat`, `which`,
`echo`, `pwd`, `env`, `printenv`, `grep`, `glob`.
- Never use redirect operators (`>`, `>>`) or pipe to write commands.

## Workflow guidelines

1. Start broad, then narrow down. Use glob patterns to locate candidate files
before reading them.
2. Prefer `grep` for content searches and `glob` for file-name searches.
3. When exploring an unfamiliar area, check directory structure first (`ls`,
`tree`, or glob `**/*`) before diving into individual files.
4. Spawn parallel tool calls whenever possible — e.g., grep for a symbol in
multiple directories at once — to return results quickly.
5. Provide concise, structured answers. Summarize findings with file paths and
line numbers so the caller can act on them immediately.
8 changes: 7 additions & 1 deletion openhands-sdk/openhands/sdk/subagent/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def agent_definition_to_factory(
def _factory(llm: "LLM") -> "Agent":
from openhands.sdk.agent.agent import Agent
from openhands.sdk.context.agent_context import AgentContext
from openhands.sdk.tool.registry import list_registered_tools
from openhands.sdk.tool.spec import Tool

# Handle model override
Expand All @@ -147,7 +148,12 @@ def _factory(llm: "LLM") -> "Agent":
)

# Resolve tools
tools = [Tool(name=tool_name) for tool_name in agent_def.tools]
tools: list[Tool] = []
registered_tools: set[str] = set(list_registered_tools())
for tool_name in agent_def.tools:
if tool_name not in registered_tools:
logger.info(f"Tool '{tool_name}' is not registered (yet).")
tools.append(Tool(name=tool_name))

return Agent(
llm=llm,
Expand Down
78 changes: 41 additions & 37 deletions openhands-sdk/openhands/sdk/tool/tool.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
import threading
from abc import ABC, abstractmethod
from collections.abc import Sequence
from typing import (
Expand Down Expand Up @@ -42,6 +43,7 @@
ObservationT = TypeVar("ObservationT", bound=Observation)
_action_types_with_risk: dict[type, type] = {}
_action_types_with_summary: dict[type, type] = {}
_action_type_lock = threading.Lock()


def _camel_to_snake(name: str) -> str:
Expand Down Expand Up @@ -477,25 +479,26 @@ def resolve_kind(cls, kind: str) -> type:


def create_action_type_with_risk(action_type: type[Schema]) -> type[Schema]:
action_type_with_risk = _action_types_with_risk.get(action_type)
if action_type_with_risk:
with _action_type_lock:
action_type_with_risk = _action_types_with_risk.get(action_type)
if action_type_with_risk:
return action_type_with_risk

action_type_with_risk = type(
f"{action_type.__name__}WithRisk",
(action_type,),
{
"security_risk": Field(
# We do NOT add default value to make it an required field
# default=risk.SecurityRisk.UNKNOWN
description="The LLM's assessment of the safety risk of this action.", # noqa:E501
),
"__annotations__": {"security_risk": risk.SecurityRisk},
},
)
_action_types_with_risk[action_type] = action_type_with_risk
return action_type_with_risk

action_type_with_risk = type(
f"{action_type.__name__}WithRisk",
(action_type,),
{
"security_risk": Field(
# We do NOT add default value to make it an required field
# default=risk.SecurityRisk.UNKNOWN
description="The LLM's assessment of the safety risk of this action.",
),
"__annotations__": {"security_risk": risk.SecurityRisk},
},
)
_action_types_with_risk[action_type] = action_type_with_risk
return action_type_with_risk


def _create_action_type_with_summary(action_type: type[Schema]) -> type[Schema]:
"""Create a new action type with summary field for LLM to predict.
Expand All @@ -509,24 +512,25 @@ def _create_action_type_with_summary(action_type: type[Schema]) -> type[Schema]:
Returns:
A new type that includes the summary field
"""
action_type_with_summary = _action_types_with_summary.get(action_type)
if action_type_with_summary:
return action_type_with_summary

action_type_with_summary = type(
f"{action_type.__name__}WithSummary",
(action_type,),
{
"summary": Field(
default=None,
description=(
"A concise summary (approximately 10 words) describing what "
"this specific action does. Focus on the key operation and target. "
"Example: 'List all Python files in current directory'"
with _action_type_lock:
action_type_with_summary = _action_types_with_summary.get(action_type)
if action_type_with_summary:
return action_type_with_summary

action_type_with_summary = type(
f"{action_type.__name__}WithSummary",
(action_type,),
{
"summary": Field(
default=None,
description=(
"A concise summary (approximately 10 words) describing what "
"this specific action does. Focus on the key operation and target. " # noqa:E501
"Example: 'List all Python files in current directory'"
),
),
),
"__annotations__": {"summary": str | None},
},
)
_action_types_with_summary[action_type] = action_type_with_summary
return action_type_with_summary
"__annotations__": {"summary": str | None},
},
)
_action_types_with_summary[action_type] = action_type_with_summary
return action_type_with_summary
Loading
Loading