Skip to content

Commit 2bade83

Browse files
feat: add nemotron tool preset (bash + str_replace aliases for Anthropic schema compatibility)
Add a new 'nemotron' tool preset for Nemotron-3 Super (nvidia/nemotron-3-super-120b-a12b) which was fine-tuned on trajectories using Anthropic's tool schema. The preset exposes: - BashTool: A tool named 'bash' (instead of 'terminal') that wraps TerminalExecutor - StrReplaceTool: A tool named 'str_replace' (instead of 'file_editor') that wraps FileEditorExecutor This fixes the 63-67% conversation error rate observed in Nemotron evaluations, caused entirely by tool name mismatches where the model called tools like 'bash', 'str_replace', 'command', 'execute' that don't exist in the default OpenHands schema. New files: - openhands-tools/openhands/tools/nemotron/bash/ - BashTool implementation - openhands-tools/openhands/tools/nemotron/str_replace/ - StrReplaceTool implementation - openhands-tools/openhands/tools/preset/nemotron.py - Preset configuration - tests/tools/nemotron/ - Test coverage for new tools and preset Exports added: - get_nemotron_agent, get_nemotron_tools from openhands.tools.preset Fixes #2553 Co-authored-by: openhands <openhands@all-hands.dev>
1 parent d061ff0 commit 2bade83

15 files changed

Lines changed: 1221 additions & 0 deletions

File tree

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
"""Nemotron-compatible tools (Anthropic bash/str_replace schema).
2+
3+
Nemotron-3 Super (nvidia/nemotron-3-super-120b-a12b) was fine-tuned on
4+
trajectories that use the Anthropic str_replace_based_edit_tool / bash
5+
tool schema. This module exposes those exact tool names so the model's
6+
calls succeed without any prompt engineering or model-side changes.
7+
8+
bash → BashExecutor (model calls "bash", not "terminal")
9+
str_replace → StrReplaceExecutor (model calls "str_replace", not "file_editor")
10+
11+
Tools:
12+
- bash: Run shell commands (Anthropic-compatible)
13+
- str_replace: File viewing and editing operations (Anthropic-compatible)
14+
15+
Usage:
16+
To use Nemotron-compatible tools instead of the standard terminal/file_editor:
17+
18+
```python
19+
from openhands.tools.nemotron import NEMOTRON_TOOLS
20+
21+
agent = Agent(
22+
llm=llm,
23+
tools=[
24+
*NEMOTRON_TOOLS,
25+
Tool(name=TaskTrackerTool.name),
26+
],
27+
)
28+
```
29+
30+
Or use the preset:
31+
32+
```python
33+
from openhands.tools.preset.nemotron import get_nemotron_agent
34+
35+
agent = get_nemotron_agent(llm=llm)
36+
```
37+
"""
38+
39+
from openhands.sdk import Tool
40+
from openhands.tools.nemotron.bash import (
41+
BashAction,
42+
BashExecutor,
43+
BashObservation,
44+
BashTool,
45+
)
46+
from openhands.tools.nemotron.str_replace import (
47+
StrReplaceAction,
48+
StrReplaceExecutor,
49+
StrReplaceObservation,
50+
StrReplaceTool,
51+
)
52+
53+
54+
# Convenience list for easy replacement of terminal/file_editor tools
55+
NEMOTRON_TOOLS: list[Tool] = [
56+
Tool(name=BashTool.name),
57+
Tool(name=StrReplaceTool.name),
58+
]
59+
60+
__all__ = [
61+
# Convenience list
62+
"NEMOTRON_TOOLS",
63+
# Bash tool
64+
"BashTool",
65+
"BashAction",
66+
"BashObservation",
67+
"BashExecutor",
68+
# StrReplace tool
69+
"StrReplaceTool",
70+
"StrReplaceAction",
71+
"StrReplaceObservation",
72+
"StrReplaceExecutor",
73+
]
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Core tool interface
2+
from openhands.tools.nemotron.bash.definition import (
3+
BashAction,
4+
BashObservation,
5+
BashTool,
6+
)
7+
from openhands.tools.nemotron.bash.impl import BashExecutor
8+
9+
10+
__all__ = [
11+
"BashTool",
12+
"BashAction",
13+
"BashObservation",
14+
"BashExecutor",
15+
]
Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
"""Bash tool definition (Nemotron/Anthropic-compatible).
2+
3+
This is a thin wrapper around TerminalExecutor that exposes the tool as "bash"
4+
instead of "terminal", matching Anthropic's bash tool schema exactly.
5+
"""
6+
7+
import os
8+
from collections.abc import Sequence
9+
from typing import TYPE_CHECKING
10+
11+
from pydantic import Field
12+
from rich.text import Text
13+
14+
from openhands.sdk.tool import (
15+
Action,
16+
Observation,
17+
ToolAnnotations,
18+
ToolDefinition,
19+
register_tool,
20+
)
21+
from openhands.tools.terminal.constants import (
22+
MAX_CMD_OUTPUT_SIZE,
23+
NO_CHANGE_TIMEOUT_SECONDS,
24+
)
25+
from openhands.tools.terminal.metadata import CmdOutputMetadata
26+
27+
28+
if TYPE_CHECKING:
29+
from openhands.sdk.conversation.state import ConversationState
30+
31+
from openhands.sdk.llm import ImageContent, TextContent
32+
from openhands.sdk.utils import maybe_truncate
33+
34+
35+
class BashAction(Action):
36+
"""Schema for bash command execution (Anthropic-compatible).
37+
38+
This matches the Anthropic bash tool schema exactly:
39+
- command: str (required)
40+
"""
41+
42+
command: str = Field(
43+
description=(
44+
"The bash command to execute. Can be empty string to view additional "
45+
"logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to "
46+
"interrupt the currently running process. Note: You can only execute "
47+
"one bash command at a time. If you need to run multiple commands "
48+
"sequentially, you can use `&&` or `;` to chain them together."
49+
)
50+
)
51+
52+
@property
53+
def visualize(self) -> Text:
54+
"""Return Rich Text representation with PS1-style bash prompt."""
55+
content = Text()
56+
content.append("$ ", style="bold green")
57+
if self.command:
58+
content.append(self.command, style="white")
59+
else:
60+
content.append("[empty command]", style="italic")
61+
return content
62+
63+
64+
class BashObservation(Observation):
65+
"""A ToolResult that can be rendered as a CLI output."""
66+
67+
command: str | None = Field(
68+
description=(
69+
"The bash command that was executed. Can be empty string if the "
70+
"observation is from a previous command that hit soft timeout and "
71+
"is not yet finished."
72+
),
73+
)
74+
exit_code: int | None = Field(
75+
default=None,
76+
description=(
77+
"The exit code of the command. -1 indicates the process hit the "
78+
"soft timeout and is not yet finished."
79+
),
80+
)
81+
timeout: bool = Field(
82+
default=False, description="Whether the command execution timed out."
83+
)
84+
metadata: CmdOutputMetadata = Field(
85+
default_factory=CmdOutputMetadata,
86+
description="Additional metadata captured from PS1 after command execution.",
87+
)
88+
full_output_save_dir: str | None = Field(
89+
default=None,
90+
description="Directory where full output files are saved",
91+
)
92+
93+
@property
94+
def command_id(self) -> int | None:
95+
"""Get the command ID from metadata."""
96+
return self.metadata.pid
97+
98+
@property
99+
def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
100+
llm_content: list[TextContent | ImageContent] = []
101+
102+
if self.is_error:
103+
llm_content.append(TextContent(text=self.ERROR_MESSAGE_HEADER))
104+
105+
content_text = self.text
106+
107+
ret = f"{self.metadata.prefix}{content_text}{self.metadata.suffix}"
108+
if self.metadata.working_dir:
109+
ret += f"\n[Current working directory: {self.metadata.working_dir}]"
110+
if self.metadata.py_interpreter_path:
111+
ret += f"\n[Python interpreter: {self.metadata.py_interpreter_path}]"
112+
if self.metadata.exit_code != -1:
113+
ret += f"\n[Command finished with exit code {self.metadata.exit_code}]"
114+
115+
truncated_text = maybe_truncate(
116+
content=ret,
117+
truncate_after=MAX_CMD_OUTPUT_SIZE,
118+
save_dir=self.full_output_save_dir,
119+
tool_prefix="bash",
120+
)
121+
llm_content.append(TextContent(text=truncated_text))
122+
123+
return llm_content
124+
125+
@property
126+
def visualize(self) -> Text:
127+
"""Return Rich Text representation with terminal-style output formatting."""
128+
text = Text()
129+
130+
if self.is_error:
131+
text.append("❌ ", style="red bold")
132+
text.append(self.ERROR_MESSAGE_HEADER, style="bold red")
133+
134+
content_text = self.text
135+
136+
if content_text:
137+
output_lines = content_text.split("\n")
138+
for line in output_lines:
139+
if line.strip():
140+
if any(
141+
keyword in line.lower()
142+
for keyword in ["error", "failed", "exception", "traceback"]
143+
):
144+
text.append(line, style="red")
145+
elif any(
146+
keyword in line.lower() for keyword in ["warning", "warn"]
147+
):
148+
text.append(line, style="yellow")
149+
elif line.startswith("+ "):
150+
text.append(line, style="cyan")
151+
else:
152+
text.append(line, style="white")
153+
text.append("\n")
154+
155+
if hasattr(self, "metadata") and self.metadata:
156+
if self.metadata.working_dir:
157+
text.append("\n📁 ", style="blue")
158+
text.append(
159+
f"Working directory: {self.metadata.working_dir}", style="blue"
160+
)
161+
162+
if self.metadata.py_interpreter_path:
163+
text.append("\n🐍 ", style="green")
164+
text.append(
165+
f"Python interpreter: {self.metadata.py_interpreter_path}",
166+
style="green",
167+
)
168+
169+
if (
170+
hasattr(self.metadata, "exit_code")
171+
and self.metadata.exit_code is not None
172+
):
173+
if self.metadata.exit_code == 0:
174+
text.append("\n✅ ", style="green")
175+
text.append(f"Exit code: {self.metadata.exit_code}", style="green")
176+
elif self.metadata.exit_code == -1:
177+
text.append("\n⏳ ", style="yellow")
178+
text.append("Process still running (soft timeout)", style="yellow")
179+
else:
180+
text.append("\n❌ ", style="red")
181+
text.append(f"Exit code: {self.metadata.exit_code}", style="red")
182+
183+
return text
184+
185+
186+
TOOL_DESCRIPTION = f"""Run a shell command and return stdout/stderr.
187+
188+
### Command Execution
189+
* One command at a time: You can only execute one bash command at a time. \
190+
If you need to run multiple commands sequentially, use `&&` or `;` to chain \
191+
them together.
192+
* Persistent session: Commands execute in a persistent shell session where \
193+
environment variables, virtual environments, and working directory persist \
194+
between commands.
195+
* Soft timeout: Commands have a soft timeout of {NO_CHANGE_TIMEOUT_SECONDS} \
196+
seconds, once that's reached, you have the option to continue or interrupt the \
197+
command.
198+
199+
### Long-running Commands
200+
* For commands that may run indefinitely, run them in the background and \
201+
redirect output to a file, e.g. `python3 app.py > server.log 2>&1 &`.
202+
* If a bash command returns exit code `-1`, this means the process hit the \
203+
soft timeout and is not yet finished. Send empty `command` to retrieve \
204+
additional logs or send `C-c` to interrupt.
205+
206+
### Output Handling
207+
* Output truncation: If the output exceeds a maximum length, it will be \
208+
truncated before being returned.
209+
"""
210+
211+
212+
class BashTool(ToolDefinition[BashAction, BashObservation]):
213+
"""Bash tool (Anthropic-compatible) that wraps TerminalExecutor."""
214+
215+
@classmethod
216+
def create(
217+
cls,
218+
conv_state: "ConversationState",
219+
) -> Sequence["BashTool"]:
220+
"""Initialize BashTool with executor parameters.
221+
222+
Args:
223+
conv_state: Conversation state to get working directory from.
224+
"""
225+
from openhands.tools.nemotron.bash.impl import BashExecutor
226+
227+
working_dir = conv_state.workspace.working_dir
228+
if not os.path.isdir(working_dir):
229+
raise ValueError(f"working_dir '{working_dir}' is not a valid directory")
230+
231+
executor = BashExecutor(
232+
working_dir=working_dir,
233+
full_output_save_dir=conv_state.env_observation_persistence_dir,
234+
)
235+
236+
return [
237+
cls(
238+
action_type=BashAction,
239+
observation_type=BashObservation,
240+
description=TOOL_DESCRIPTION,
241+
annotations=ToolAnnotations(
242+
title="bash",
243+
readOnlyHint=False,
244+
destructiveHint=True,
245+
idempotentHint=False,
246+
openWorldHint=True,
247+
),
248+
executor=executor,
249+
)
250+
]
251+
252+
253+
register_tool(BashTool.name, BashTool)

0 commit comments

Comments
 (0)