Skip to content
61 changes: 61 additions & 0 deletions .pr/test_real_world.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env python3
"""Real-world test for terminal query filtering fix.

Tests that terminal query sequences (DSR, OSC 11, etc.) are filtered from
captured terminal output before display, preventing visible escape code garbage.

Usage:
# With All-Hands LLM proxy:
LLM_BASE_URL="https://llm-proxy.eval.all-hands.dev" LLM_API_KEY="$LLM_API_KEY" \
uv run python .pr/test_real_world.py

# With direct API:
LLM_API_KEY="your-key" uv run python .pr/test_real_world.py

See: https://github.com/OpenHands/software-agent-sdk/issues/2244
"""

import os
import sys

from openhands.sdk import LLM, Agent, Conversation, Tool
from openhands.tools.terminal import TerminalTool


print("=" * 60)
print("REAL-WORLD TEST: Terminal Query Filtering Fix")
print("=" * 60)
print(f"stdin.isatty(): {sys.stdin.isatty()}")
print(f"stdout.isatty(): {sys.stdout.isatty()}")
print()

llm = LLM(
model=os.environ.get("LLM_MODEL", "claude-sonnet-4-20250514"),
api_key=os.environ["LLM_API_KEY"],
base_url=os.environ.get("LLM_BASE_URL"),
)

agent = Agent(llm=llm, tools=[Tool(name=TerminalTool.name)])
conversation = Conversation(agent=agent, workspace="/tmp")

# Commands with spinners (like gh) send terminal queries that would
# cause visible garbage if not filtered
print(">>> Sending message to agent...")
print(">>> The gh command sends terminal queries - these should be filtered")
print()

conversation.send_message("Run: gh pr list --repo OpenHands/openhands --limit 3")
conversation.run()
conversation.close()

print()
print("=" * 60)
print("TEST COMPLETE")
print("=" * 60)
print()
print("SUCCESS CRITERIA:")
print(" 1. NO visible escape codes (^[[...R, rgb:...) in the output above")
print(" 2. NO garbage on the shell prompt after this script exits")
print(" 3. Colors in the gh output should still be visible (if terminal supports)")
print()
print("The fix filters terminal QUERY sequences while preserving formatting.")
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
escape_bash_special_chars,
split_bash_commands,
)
from openhands.tools.terminal.utils.escape_filter import TerminalQueryFilter


logger = get_logger(__name__)
Expand Down Expand Up @@ -79,6 +80,8 @@ def __init__(
# Store the last command for interactive input handling
self.prev_status = None
self.prev_output = ""
# Stateful filter for terminal query sequences (handles split sequences)
self._query_filter = TerminalQueryFilter()

def initialize(self) -> None:
"""Initialize the terminal backend."""
Expand Down Expand Up @@ -119,8 +122,22 @@ def _get_command_output(
raw_command_output: str,
metadata: CmdOutputMetadata,
continue_prefix: str = "",
is_final: bool = False,
) -> str:
"""Get the command output with the previous command output removed."""
"""Get the command output with the previous command output removed.

Also filters terminal query sequences that could cause visible escape
code garbage when the output is displayed. Uses stateful filtering to
handle escape sequences that may be split across incremental outputs.
See: https://github.com/OpenHands/software-agent-sdk/issues/2244

Args:
command: The command being executed
raw_command_output: Raw output from terminal
metadata: Output metadata to populate
continue_prefix: Prefix for continuation output
is_final: If True, flush any pending filter state (command completed)
"""
# remove the previous command output from the new output if any
if self.prev_output:
command_output = raw_command_output.removeprefix(self.prev_output)
Expand All @@ -129,6 +146,15 @@ def _get_command_output(
command_output = raw_command_output
self.prev_output = raw_command_output # update current command output anyway
command_output = _remove_command_prefix(command_output, command)

# Filter terminal query sequences that would cause the terminal to
# respond when displayed, producing visible garbage.
# The filter is stateful to handle sequences split across chunks.
command_output = self._query_filter.filter(command_output)
if is_final:
# Flush any pending bytes when command completes
command_output += self._query_filter.flush()

return command_output.rstrip()

def _handle_completed_command(
Expand Down Expand Up @@ -184,13 +210,15 @@ def _handle_completed_command(
command,
raw_command_output,
metadata,
is_final=True, # Command completed, flush filter state
)
command_output = maybe_truncate(
command_output, truncate_after=MAX_CMD_OUTPUT_SIZE
)

self.prev_status = TerminalCommandStatus.COMPLETED
self.prev_output = "" # Reset previous command output
self._query_filter.reset() # Reset filter for next command
self._ready_for_next_command()
return TerminalObservation.from_text(
command=command,
Expand Down
18 changes: 18 additions & 0 deletions openhands-tools/openhands/tools/terminal/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Terminal tool utilities."""

from openhands.tools.terminal.utils.command import (
escape_bash_special_chars,
split_bash_commands,
)
from openhands.tools.terminal.utils.escape_filter import (
TerminalQueryFilter,
filter_terminal_queries,
)


__all__ = [
"escape_bash_special_chars",
"split_bash_commands",
"filter_terminal_queries",
"TerminalQueryFilter",
]
201 changes: 201 additions & 0 deletions openhands-tools/openhands/tools/terminal/utils/escape_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
"""Filter terminal query sequences from captured output.

When CLI tools (like `gh`, `npm`, etc.) run inside a PTY, they may send
terminal query sequences as part of their progress/spinner UI. These queries
get captured as output. When displayed, the terminal processes them and
responds, causing visible escape code garbage.

This module provides filtering to remove these query sequences while
preserving legitimate formatting escape codes (colors, bold, etc.).

NOTE: This module only handles queries captured from PTY output (commands
run via the terminal tool). SDK-side queries (e.g., Rich library capability
detection) are not addressed here and would require filtering at the
conversation/visualizer boundary.

See: https://github.com/OpenHands/software-agent-sdk/issues/2244
"""

import re


# Terminal query sequences that trigger responses (and cause visible garbage)
# These should be stripped from captured output before display.
#
# Reference: ECMA-48, XTerm Control Sequences
# https://invisible-island.net/xterm/ctlseqs/ctlseqs.html

# DSR (Device Status Report) - cursor position query
# Format: ESC [ 6 n -> Response: ESC [ row ; col R
_DSR_PATTERN = re.compile(rb"\x1b\[6n")

# OSC (Operating System Command) queries
# Format: ESC ] Ps ; ? (BEL | ST)
# The ";?" pattern indicates a QUERY (vs SET which has actual values)
# Examples:
# OSC 10 ; ? - foreground color query
# OSC 11 ; ? - background color query
# OSC 4 ; index ; ? - palette color query
# OSC 12 ; ? - cursor color query
# OSC 17 ; ? - highlight background query
# Terminators: BEL (\x07) or ST (ESC \)
#
# This pattern matches ANY OSC query (ending with ;?) rather than
# specific codes, making it future-proof for other query types.
_OSC_QUERY_PATTERN = re.compile(
rb"\x1b\]" # OSC introducer
rb"\d+" # Parameter number (10, 11, 4, 12, etc.)
rb"(?:;[^;\x07\x1b]*)?" # Optional sub-parameter (e.g., palette index)
rb";\?" # Query marker - the key indicator this is a query
rb"(?:\x07|\x1b\\)" # BEL or ST terminator
)

# DA (Device Attributes) primary query
# Format: ESC [ c or ESC [ 0 c
_DA_PATTERN = re.compile(rb"\x1b\[0?c")

# DA2 (Secondary Device Attributes) query
# Format: ESC [ > c or ESC [ > 0 c
_DA2_PATTERN = re.compile(rb"\x1b\[>0?c")

# DECRQSS (Request Selection or Setting) - various terminal state queries
# Format: ESC P $ q <setting> ST
_DECRQSS_PATTERN = re.compile(
rb"\x1bP\$q" # DCS introducer + DECRQSS
rb"[^\x1b]*" # Setting identifier
rb"\x1b\\" # ST terminator
)

# Pattern to detect incomplete escape sequences at end of a chunk.
# These are potential query sequence prefixes that may complete in next chunk.
# We look for:
# - \x1b alone (CSI/OSC/DCS start)
# - \x1b[ followed by optional digits/params but no command char
# - \x1b] followed by digits but no terminator
# - \x1bP followed by content but no ST terminator
_INCOMPLETE_ESC_PATTERN = re.compile(
rb"(?:"
rb"\x1b$|" # ESC at end (might be start of any sequence)
rb"\x1b\[[0-9;>]*$|" # CSI without command char
rb"\x1b\][^\x07]*$|" # OSC without BEL terminator (ST needs \x1b\)
rb"\x1bP[^\x1b]*$" # DCS without ST terminator
rb")"
)


def _filter_complete_queries(output_bytes: bytes) -> bytes:
"""Filter complete terminal query sequences from output bytes."""
output_bytes = _DSR_PATTERN.sub(b"", output_bytes)
output_bytes = _OSC_QUERY_PATTERN.sub(b"", output_bytes)
output_bytes = _DA_PATTERN.sub(b"", output_bytes)
output_bytes = _DA2_PATTERN.sub(b"", output_bytes)
output_bytes = _DECRQSS_PATTERN.sub(b"", output_bytes)
return output_bytes


class TerminalQueryFilter:
"""Stateful filter for terminal query sequences.

This filter maintains state across calls to handle escape sequences that
may be split across multiple output chunks (which happens with long-running
commands surfaced incrementally).

Usage:
filter = TerminalQueryFilter()
filtered1 = filter.filter(chunk1)
filtered2 = filter.filter(chunk2)
# ... and so on

# When command completes, reset for the next command:
filter.reset()
"""

def __init__(self) -> None:
self._pending: bytes = b""

def reset(self) -> None:
"""Reset filter state between commands."""
self._pending = b""

def filter(self, output: str) -> str:
"""Filter terminal query sequences from captured terminal output.

Removes escape sequences that would cause the terminal to respond
when the output is displayed, while preserving legitimate formatting
sequences (colors, cursor movement, etc.).

This method is stateful: incomplete escape sequences at the end of
a chunk are held until the next chunk arrives, so split sequences
are properly detected and filtered.

Args:
output: Raw terminal output that may contain query sequences.

Returns:
Filtered output with query sequences removed.
"""
# Convert to bytes for regex matching (escape sequences are byte-level)
output_bytes = output.encode("utf-8", errors="surrogateescape")

# Prepend any pending bytes from previous call
if self._pending:
output_bytes = self._pending + output_bytes
self._pending = b""

# Check for incomplete escape sequence at end
match = _INCOMPLETE_ESC_PATTERN.search(output_bytes)
if match:
# Hold the incomplete sequence for the next chunk
self._pending = output_bytes[match.start() :]
output_bytes = output_bytes[: match.start()]

# Filter complete query sequences
output_bytes = _filter_complete_queries(output_bytes)

# Convert back to string
return output_bytes.decode("utf-8", errors="surrogateescape")

def flush(self) -> str:
"""Flush any pending bytes that weren't part of a query.

Call this when output is complete to emit any trailing bytes that
turned out not to be query sequences.

Returns:
Any pending bytes as a string, filtered for queries.
"""
if not self._pending:
return ""
pending = self._pending
self._pending = b""
# Filter the pending bytes in case they form a complete query
filtered = _filter_complete_queries(pending)
return filtered.decode("utf-8", errors="surrogateescape")


# Singleton instance for simple stateless usage (backward compatibility)
_default_filter = TerminalQueryFilter()


def filter_terminal_queries(output: str) -> str:
"""Filter terminal query sequences from captured terminal output.

This is a stateless convenience function. For handling incremental output
where sequences may be split across chunks, use TerminalQueryFilter class.

Removes escape sequences that would cause the terminal to respond
when the output is displayed, while preserving legitimate formatting
sequences (colors, cursor movement, etc.).

Args:
output: Raw terminal output that may contain query sequences.

Returns:
Filtered output with query sequences removed.
"""
# Use a fresh filter for stateless behavior
temp_filter = TerminalQueryFilter()
result = temp_filter.filter(output)
# Flush any pending (shouldn't happen for complete input, but be safe)
result += temp_filter.flush()
return result
Loading
Loading