Skip to content

Commit f13a4ba

Browse files
author
Debug Agent
committed
Merge main into feat/acp-discriminated-union-v2 (picks up #2869)
2 parents 98db65c + e9fb43d commit f13a4ba

File tree

8 files changed

+733
-42
lines changed

8 files changed

+733
-42
lines changed

openhands-sdk/openhands/sdk/agent/acp_agent.py

Lines changed: 69 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -752,12 +752,22 @@ def init_state(
752752

753753
self._initialized = True
754754

755-
# Store agent info in agent_state so it's accessible from remote
756-
# conversations (PrivateAttrs aren't serialized in state updates).
755+
# Persist agent info + the ACP session id + its cwd in agent_state.
756+
# Keeping these here (rather than on the frozen ACPAgent model) means
757+
# ConversationState's existing base_state.json persistence carries
758+
# them across agent-server restarts, and ``_start_acp_server`` on the
759+
# next launch reads them back to call ``load_session`` instead of
760+
# starting from scratch. We record ``acp_session_cwd`` alongside the
761+
# id because ACP servers key their persistence by ``cwd``: resuming
762+
# in a different working directory would at best silently miss the
763+
# prior session and at worst load a different session that happens to
764+
# exist at the new cwd.
757765
state.agent_state = {
758766
**state.agent_state,
759767
"acp_agent_name": self._agent_name,
760768
"acp_agent_version": self._agent_version,
769+
"acp_session_id": self._session_id,
770+
"acp_session_cwd": self._working_dir,
761771
}
762772

763773
def _start_acp_server(self, state: ConversationState) -> None:
@@ -777,6 +787,27 @@ def _start_acp_server(self, state: ConversationState) -> None:
777787

778788
working_dir = str(state.workspace.working_dir)
779789

790+
# Prior ACP session id — survives agent-server restarts via
791+
# ConversationState.agent_state (serialized into base_state.json).
792+
# Its presence is the signal to resume; its absence means fresh start.
793+
# ACP servers key persistence by ``cwd``; if the workspace moved we
794+
# drop the id so we don't accidentally resume (or silently load) a
795+
# session the server associates with a different directory.
796+
prior_session_id: str | None = state.agent_state.get("acp_session_id")
797+
prior_session_cwd: str | None = state.agent_state.get("acp_session_cwd")
798+
if prior_session_id is not None and prior_session_cwd not in (
799+
None,
800+
working_dir,
801+
):
802+
logger.warning(
803+
"ACP session %s was created with cwd=%s; current cwd=%s differs, "
804+
"starting a fresh session instead of resuming",
805+
prior_session_id,
806+
prior_session_cwd,
807+
working_dir,
808+
)
809+
prior_session_id = None
810+
780811
async def _init() -> tuple[Any, Any, Any, str, str, str]:
781812
# Spawn the subprocess directly so we can install a
782813
# filtering reader that skips non-JSON-RPC lines some
@@ -845,14 +876,43 @@ async def _init() -> tuple[Any, Any, Any, str, str, str]:
845876
[m.id for m in auth_methods],
846877
)
847878

848-
# Build _meta content for session options (e.g. model selection).
849-
# Extra kwargs to new_session() become the _meta dict in the
850-
# JSON-RPC request — do NOT wrap in _meta= (that double-nests).
851-
session_meta = _build_session_meta(agent_name, self.acp_model)
879+
# Resume the prior ACP session if we have its id. If the server
880+
# has forgotten it (state wiped, new host, etc.) fall through to
881+
# new_session so the conversation still starts cleanly.
882+
#
883+
# We only swallow ACPRequestError here: that is the protocol-level
884+
# "I don't know this session" signal and is recoverable by
885+
# starting fresh. Transport failures (broken pipe, EOF, timeout,
886+
# subprocess crash) propagate — there is no working connection to
887+
# fall back on, and the outer init_state handler cleans up.
888+
session_id: str | None = None
889+
if prior_session_id is not None:
890+
try:
891+
await conn.load_session(
892+
cwd=working_dir,
893+
session_id=prior_session_id,
894+
mcp_servers=[],
895+
)
896+
session_id = prior_session_id
897+
logger.info(
898+
"Resumed ACP session: %s (cwd=%s)",
899+
session_id,
900+
working_dir,
901+
)
902+
except ACPRequestError as e:
903+
logger.warning(
904+
"ACP load_session(%s) failed (%s); starting a fresh session",
905+
prior_session_id,
906+
e,
907+
)
852908

853-
# Create a new session
854-
response = await conn.new_session(cwd=working_dir, **session_meta)
855-
session_id = response.session_id
909+
if session_id is None:
910+
# Build _meta content for session options (e.g. model selection).
911+
# Extra kwargs to new_session() become the _meta dict in the
912+
# JSON-RPC request — do NOT wrap in _meta= (that double-nests).
913+
session_meta = _build_session_meta(agent_name, self.acp_model)
914+
response = await conn.new_session(cwd=working_dir, **session_meta)
915+
session_id = response.session_id
856916
await _maybe_set_session_model(
857917
conn,
858918
agent_name,

openhands-tools/openhands/tools/terminal/definition.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,17 @@ class TerminalAction(Action):
3333
"""Schema for bash command execution."""
3434

3535
command: str = Field(
36-
description="The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process. Note: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together." # noqa
36+
description=(
37+
"The bash command to execute. Can be empty string to view"
38+
" additional logs when previous exit code is `-1`. Can be a"
39+
" special key name when `is_input` is True: `C-c` (Ctrl+C),"
40+
" `C-d` (Ctrl+D/EOF), `C-z` (Ctrl+Z), or any `C-<letter>`"
41+
" for Ctrl sequences; navigation keys `UP`, `DOWN`, `LEFT`,"
42+
" `RIGHT`, `HOME`, `END`, `PGUP`, `PGDN`; and `TAB`, `ESC`,"
43+
" `BS` (Backspace), `ENTER`. Note: You can only execute one"
44+
" bash command at a time. If you need to run multiple commands"
45+
" sequentially, you can use `&&` or `;` to chain them together."
46+
)
3747
)
3848
is_input: bool = Field(
3949
default=False,
@@ -217,6 +227,8 @@ def visualize(self) -> Text:
217227
- Send empty `command` to retrieve additional logs
218228
- Send text (set `command` to the text) to STDIN of the running process
219229
- Send control commands like `C-c` (Ctrl+C), `C-d` (Ctrl+D), or `C-z` (Ctrl+Z) to interrupt the process
230+
- Send navigation keys like `UP`, `DOWN`, `LEFT`, `RIGHT`, `TAB`, `ESC`, `BS` (Backspace), `HOME`, `END`, `PGUP`, `PGDN`
231+
- Any `C-<letter>` Ctrl sequence is supported (e.g. `C-a`, `C-e`, `C-l`)
220232
- If you do C-c, you can re-start the process with a longer "timeout" parameter to let it run to completion
221233
222234
### Best Practices

openhands-tools/openhands/tools/terminal/terminal/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33

44
from openhands.tools.terminal.terminal.factory import create_terminal_session
55
from openhands.tools.terminal.terminal.interface import (
6+
SUPPORTED_SPECIAL_KEYS,
67
TerminalInterface,
78
TerminalSessionBase,
9+
parse_ctrl_key,
810
)
911
from openhands.tools.terminal.terminal.terminal_session import (
1012
TerminalCommandStatus,
@@ -27,11 +29,13 @@
2729

2830

2931
__all__ = [
32+
"SUPPORTED_SPECIAL_KEYS",
3033
"TerminalInterface",
3134
"TerminalSessionBase",
3235
"TmuxTerminal",
3336
"SubprocessTerminal",
3437
"TerminalSession",
3538
"TerminalCommandStatus",
3639
"create_terminal_session",
40+
"parse_ctrl_key",
3741
]

openhands-tools/openhands/tools/terminal/terminal/interface.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,50 @@
1212
)
1313

1414

15+
# Canonical set of named special keys that all TerminalInterface
16+
# implementations must support. Each backend maps these to its own
17+
# representation (ANSI escape bytes for PTY, tmux key names for tmux).
18+
SUPPORTED_SPECIAL_KEYS: frozenset[str] = frozenset(
19+
{
20+
"ENTER",
21+
"TAB",
22+
"BS",
23+
"ESC",
24+
"UP",
25+
"DOWN",
26+
"LEFT",
27+
"RIGHT",
28+
"HOME",
29+
"END",
30+
"PGUP",
31+
"PGDN",
32+
"C-L",
33+
"C-D",
34+
"C-C",
35+
}
36+
)
37+
38+
39+
def parse_ctrl_key(text: str) -> str | None:
40+
"""Parse a Ctrl-<letter> token and return the normalized form ``C-x``.
41+
42+
Accepts ``C-x``, ``CTRL-x``, and ``CTRL+x`` (case-insensitive)
43+
where *x* is a single ASCII letter. Returns ``None`` when *text*
44+
is not a recognized Ctrl sequence.
45+
"""
46+
upper = text.strip().upper()
47+
key: str | None = None
48+
if upper.startswith("C-"):
49+
key = upper[2:]
50+
elif upper.startswith("CTRL-"):
51+
key = upper[5:]
52+
elif upper.startswith("CTRL+"):
53+
key = upper[5:]
54+
if key and len(key) == 1 and "A" <= key <= "Z":
55+
return f"C-{key.lower()}"
56+
return None
57+
58+
1559
class TerminalInterface(ABC):
1660
"""Abstract interface for terminal backends.
1761
@@ -63,9 +107,17 @@ def close(self) -> None:
63107
def send_keys(self, text: str, enter: bool = True) -> None:
64108
"""Send text/keys to the terminal.
65109
110+
All implementations must support:
111+
- Plain text (sent verbatim)
112+
- Named specials: ENTER, TAB, BS, ESC, UP, DOWN, LEFT, RIGHT,
113+
HOME, END, PGUP, PGDN, C-L, C-D, C-C
114+
- Generic Ctrl sequences: ``C-<letter>``, ``CTRL-<letter>``,
115+
``CTRL+<letter>`` (case-insensitive, a-z)
116+
66117
Args:
67118
text: Text or key sequence to send to the terminal.
68-
enter: Whether to send Enter key after the text. Defaults to True.
119+
enter: Whether to send Enter key after the text.
120+
Defaults to True. Ignored for special/ctrl keys.
69121
"""
70122

71123
@abstractmethod

openhands-tools/openhands/tools/terminal/terminal/subprocess_terminal.py

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,32 @@
3030
)
3131
from openhands.tools.terminal.metadata import CmdOutputMetadata
3232
from openhands.tools.terminal.terminal import TerminalInterface
33+
from openhands.tools.terminal.terminal.interface import parse_ctrl_key
3334

3435

3536
logger = get_logger(__name__)
3637

3738
ENTER = b"\n"
3839

40+
# Map normalized special key names to ANSI escape bytes for PTY.
41+
_SUBPROCESS_SPECIALS: dict[str, bytes] = {
42+
"ENTER": ENTER,
43+
"TAB": b"\t",
44+
"BS": b"\x7f", # Backspace (DEL)
45+
"ESC": b"\x1b",
46+
"UP": b"\x1b[A",
47+
"DOWN": b"\x1b[B",
48+
"RIGHT": b"\x1b[C",
49+
"LEFT": b"\x1b[D",
50+
"HOME": b"\x1b[H",
51+
"END": b"\x1b[F",
52+
"PGUP": b"\x1b[5~",
53+
"PGDN": b"\x1b[6~",
54+
"C-L": b"\x0c", # Ctrl+L
55+
"C-D": b"\x04", # Ctrl+D (EOF)
56+
"C-C": b"\x03", # Ctrl+C (SIGINT)
57+
}
58+
3959

4060
def _normalize_eols(raw: bytes) -> bytes:
4161
# CRLF/LF/CR -> CR, so each logical line is terminated with \r for the TTY
@@ -341,7 +361,7 @@ def send_keys(self, text: str, enter: bool = True) -> None:
341361
- Plain text
342362
- Ctrl sequences: 'C-a'..'C-z' (Ctrl+C sends ^C byte)
343363
- Special names: 'ENTER','TAB','BS','ESC','UP','DOWN','LEFT','RIGHT',
344-
'HOME','END','PGUP','PGDN','C-L','C-D'
364+
'HOME','END','PGUP','PGDN','C-L','C-D','C-C'
345365
346366
For multi-line commands exceeding _MULTILINE_THRESHOLD lines, sends
347367
line-by-line with pacing to prevent overwhelming the shell's input
@@ -350,40 +370,19 @@ def send_keys(self, text: str, enter: bool = True) -> None:
350370
if not self._initialized:
351371
raise RuntimeError("PTY terminal is not initialized")
352372

353-
specials = {
354-
"ENTER": ENTER,
355-
"TAB": b"\t",
356-
"BS": b"\x7f", # Backspace (DEL)
357-
"ESC": b"\x1b",
358-
"UP": b"\x1b[A",
359-
"DOWN": b"\x1b[B",
360-
"RIGHT": b"\x1b[C",
361-
"LEFT": b"\x1b[D",
362-
"HOME": b"\x1b[H",
363-
"END": b"\x1b[F",
364-
"PGUP": b"\x1b[5~",
365-
"PGDN": b"\x1b[6~",
366-
"C-L": b"\x0c", # Ctrl+L
367-
"C-D": b"\x04", # Ctrl+D (EOF)
368-
}
369-
370373
upper = text.upper().strip()
371374
payload: bytes | None = None
372375

373376
# Named specials
374-
if upper in specials:
375-
payload = specials[upper]
377+
if upper in _SUBPROCESS_SPECIALS:
378+
payload = _SUBPROCESS_SPECIALS[upper]
376379
# Do NOT auto-append another EOL; special already includes it when needed.
377380
append_eol = False
378-
# Generic Ctrl-<letter>, including C-C (preferred over sending SIGINT directly)
379-
elif upper.startswith(("C-", "CTRL-", "CTRL+")):
380-
# last char after dash/plus is the key
381-
key = upper.split("-", 1)[-1].split("+", 1)[-1]
382-
if len(key) == 1 and "A" <= key <= "Z":
383-
payload = bytes([ord(key) & 0x1F])
384-
else:
385-
# Unknown form; fall back to raw text
386-
payload = text.encode("utf-8", "ignore")
381+
# Generic Ctrl-<letter>
382+
elif (ctrl := parse_ctrl_key(text)) is not None:
383+
# ctrl is "C-x" — extract the letter
384+
key_char = ctrl[-1].upper()
385+
payload = bytes([ord(key_char) & 0x1F])
387386
append_eol = False # ctrl combos are "instant"
388387
else:
389388
# Check if this is a long multi-line command that needs chunked sending

openhands-tools/openhands/tools/terminal/terminal/tmux_terminal.py

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,30 @@
1515
)
1616
from openhands.tools.terminal.metadata import CmdOutputMetadata
1717
from openhands.tools.terminal.terminal import TerminalInterface
18+
from openhands.tools.terminal.terminal.interface import parse_ctrl_key
1819

1920

2021
logger = get_logger(__name__)
2122

23+
# Map normalized special key names to tmux key names.
24+
_TMUX_SPECIALS: dict[str, str] = {
25+
"ENTER": "Enter",
26+
"TAB": "Tab",
27+
"BS": "BSpace",
28+
"ESC": "Escape",
29+
"UP": "Up",
30+
"DOWN": "Down",
31+
"LEFT": "Left",
32+
"RIGHT": "Right",
33+
"HOME": "Home",
34+
"END": "End",
35+
"PGUP": "PPage",
36+
"PGDN": "NPage",
37+
"C-L": "C-l",
38+
"C-D": "C-d",
39+
"C-C": "C-c",
40+
}
41+
2242

2343
class TmuxTerminal(TerminalInterface):
2444
"""Tmux-based terminal backend.
@@ -114,14 +134,39 @@ def close(self) -> None:
114134
def send_keys(self, text: str, enter: bool = True) -> None:
115135
"""Send text/keys to the tmux pane.
116136
137+
Supports:
138+
- Plain text (uses literal paste; preserves spaces/newlines)
139+
- Named specials: ENTER, TAB, BS, ESC, UP, DOWN, LEFT, RIGHT,
140+
HOME, END, PGUP, PGDN, C-L, C-D, C-C
141+
- Generic Ctrl sequences: C-a..C-z, CTRL-x, CTRL+x
142+
117143
Args:
118144
text: Text or key sequence to send
119-
enter: Whether to send Enter key after the text
145+
enter: Whether to send Enter key after the text.
146+
Ignored for special/ctrl keys.
120147
"""
121148
if not self._initialized or not isinstance(self.pane, libtmux.Pane):
122149
raise RuntimeError("Tmux terminal is not initialized")
123150

124-
self.pane.send_keys(text, enter=enter)
151+
# Map normalized names to tmux key names
152+
upper = text.strip().upper()
153+
154+
# 1) Named specials
155+
if upper in _TMUX_SPECIALS:
156+
self.pane.send_keys(_TMUX_SPECIALS[upper], enter=False)
157+
return
158+
159+
# 2) Generic Ctrl-<letter>
160+
ctrl = parse_ctrl_key(text)
161+
if ctrl is not None:
162+
self.pane.send_keys(ctrl, enter=False)
163+
return
164+
165+
# 3) Plain text — use literal=True so tmux doesn't split on
166+
# whitespace or interpret special tokens.
167+
self.pane.send_keys(text, enter=False, literal=True)
168+
if enter and not text.endswith("\n"):
169+
self.pane.send_keys("Enter", enter=False)
125170

126171
def read_screen(self) -> str:
127172
"""Read the current tmux pane content.

0 commit comments

Comments
 (0)