OpenHands · jpshackelford · Mar 5, 2026 · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026
diff --git a/.pr/test_real_world.py b/.pr/test_real_world.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+"""Real-world test for terminal query filtering fix.
+
+Tests that terminal query sequences (DSR, OSC 11, etc.) are filtered from
+captured terminal output before display, preventing visible escape code garbage.
+
+Usage:
+    # With All-Hands LLM proxy:
+    LLM_BASE_URL="https://llm-proxy.eval.all-hands.dev" LLM_API_KEY="$LLM_API_KEY" \
+        uv run python .pr/test_real_world.py
+
+    # With direct API:
+    LLM_API_KEY="your-key" uv run python .pr/test_real_world.py
+
+See: https://github.com/OpenHands/software-agent-sdk/issues/2244
+"""
+
+import os
+import sys
+
+from openhands.sdk import LLM, Agent, Conversation, Tool
+from openhands.tools.terminal import TerminalTool
+
+
+print("=" * 60)
+print("REAL-WORLD TEST: Terminal Query Filtering Fix")
+print("=" * 60)
+print(f"stdin.isatty(): {sys.stdin.isatty()}")
+print(f"stdout.isatty(): {sys.stdout.isatty()}")
+print()
+
+llm = LLM(
+    model=os.environ.get("LLM_MODEL", "claude-sonnet-4-20250514"),
+    api_key=os.environ["LLM_API_KEY"],
+    base_url=os.environ.get("LLM_BASE_URL"),
+)
+
+agent = Agent(llm=llm, tools=[Tool(name=TerminalTool.name)])
+conversation = Conversation(agent=agent, workspace="/tmp")
+
+# Commands with spinners (like gh) send terminal queries that would
+# cause visible garbage if not filtered
+print(">>> Sending message to agent...")
+print(">>> The gh command sends terminal queries - these should be filtered")
+print()
+
+conversation.send_message("Run: gh pr list --repo OpenHands/openhands --limit 3")
+conversation.run()
+conversation.close()
+
+print()
+print("=" * 60)
+print("TEST COMPLETE")
+print("=" * 60)
+print()
+print("SUCCESS CRITERIA:")
+print("  1. NO visible escape codes (^[[...R, rgb:...) in the output above")
+print("  2. NO garbage on the shell prompt after this script exits")
+print("  3. Colors in the gh output should still be visible (if terminal supports)")
+print()
+print("The fix filters terminal QUERY sequences while preserving formatting.")
diff --git a/openhands-tools/openhands/tools/terminal/terminal/terminal_session.py b/openhands-tools/openhands/tools/terminal/terminal/terminal_session.py
@@ -26,6 +26,7 @@
     escape_bash_special_chars,
     split_bash_commands,
 )
+from openhands.tools.terminal.utils.escape_filter import TerminalQueryFilter
 
 
 logger = get_logger(__name__)
@@ -79,6 +80,8 @@ def __init__(
         # Store the last command for interactive input handling
         self.prev_status = None
         self.prev_output = ""
+        # Stateful filter for terminal query sequences (handles split sequences)
+        self._query_filter = TerminalQueryFilter()
 
     def initialize(self) -> None:
         """Initialize the terminal backend."""
@@ -119,8 +122,22 @@ def _get_command_output(
         raw_command_output: str,
         metadata: CmdOutputMetadata,
         continue_prefix: str = "",
+        is_final: bool = False,
     ) -> str:
-        """Get the command output with the previous command output removed."""
+        """Get the command output with the previous command output removed.
+
+        Also filters terminal query sequences that could cause visible escape
+        code garbage when the output is displayed. Uses stateful filtering to
+        handle escape sequences that may be split across incremental outputs.
+        See: https://github.com/OpenHands/software-agent-sdk/issues/2244
+
+        Args:
+            command: The command being executed
+            raw_command_output: Raw output from terminal
+            metadata: Output metadata to populate
+            continue_prefix: Prefix for continuation output
+            is_final: If True, flush any pending filter state (command completed)
+        """
         # remove the previous command output from the new output if any
         if self.prev_output:
             command_output = raw_command_output.removeprefix(self.prev_output)
@@ -129,6 +146,15 @@ def _get_command_output(
             command_output = raw_command_output
         self.prev_output = raw_command_output  # update current command output anyway
         command_output = _remove_command_prefix(command_output, command)
+
+        # Filter terminal query sequences that would cause the terminal to
+        # respond when displayed, producing visible garbage.
+        # The filter is stateful to handle sequences split across chunks.
+        command_output = self._query_filter.filter(command_output)
+        if is_final:
+            # Flush any pending bytes when command completes
+            command_output += self._query_filter.flush()
+
         return command_output.rstrip()
 
     def _handle_completed_command(
@@ -184,13 +210,15 @@ def _handle_completed_command(
             command,
             raw_command_output,
             metadata,
+            is_final=True,  # Command completed, flush filter state
         )
         command_output = maybe_truncate(
             command_output, truncate_after=MAX_CMD_OUTPUT_SIZE
         )
 
         self.prev_status = TerminalCommandStatus.COMPLETED
         self.prev_output = ""  # Reset previous command output
+        self._query_filter.reset()  # Reset filter for next command
         self._ready_for_next_command()
         return TerminalObservation.from_text(
             command=command,

diff --git a/openhands-tools/openhands/tools/terminal/utils/__init__.py b/openhands-tools/openhands/tools/terminal/utils/__init__.py
@@ -0,0 +1,18 @@
+"""Terminal tool utilities."""
+
+from openhands.tools.terminal.utils.command import (
+    escape_bash_special_chars,
+    split_bash_commands,
+)
+from openhands.tools.terminal.utils.escape_filter import (
+    TerminalQueryFilter,
+    filter_terminal_queries,
+)
+
+
+__all__ = [
+    "escape_bash_special_chars",
+    "split_bash_commands",
+    "filter_terminal_queries",
+    "TerminalQueryFilter",
+]
diff --git a/openhands-tools/openhands/tools/terminal/utils/escape_filter.py b/openhands-tools/openhands/tools/terminal/utils/escape_filter.py
@@ -0,0 +1,201 @@
+"""Filter terminal query sequences from captured output.
+
+When CLI tools (like `gh`, `npm`, etc.) run inside a PTY, they may send
+terminal query sequences as part of their progress/spinner UI. These queries
+get captured as output. When displayed, the terminal processes them and
+responds, causing visible escape code garbage.
+
+This module provides filtering to remove these query sequences while
+preserving legitimate formatting escape codes (colors, bold, etc.).
+
+NOTE: This module only handles queries captured from PTY output (commands
+run via the terminal tool). SDK-side queries (e.g., Rich library capability
+detection) are not addressed here and would require filtering at the
+conversation/visualizer boundary.
+
+See: https://github.com/OpenHands/software-agent-sdk/issues/2244
+"""
+
+import re
+
+
+# Terminal query sequences that trigger responses (and cause visible garbage)
+# These should be stripped from captured output before display.
+#
+# Reference: ECMA-48, XTerm Control Sequences
+# https://invisible-island.net/xterm/ctlseqs/ctlseqs.html
+
+# DSR (Device Status Report) - cursor position query
+# Format: ESC [ 6 n  ->  Response: ESC [ row ; col R
+_DSR_PATTERN = re.compile(rb"\x1b\[6n")
+
+# OSC (Operating System Command) queries
+# Format: ESC ] Ps ; ? (BEL | ST)
+# The ";?" pattern indicates a QUERY (vs SET which has actual values)
+# Examples:
+#   OSC 10 ; ? - foreground color query
+#   OSC 11 ; ? - background color query
+#   OSC 4 ; index ; ? - palette color query
+#   OSC 12 ; ? - cursor color query
+#   OSC 17 ; ? - highlight background query
+# Terminators: BEL (\x07) or ST (ESC \)
+#
+# This pattern matches ANY OSC query (ending with ;?) rather than
+# specific codes, making it future-proof for other query types.
+_OSC_QUERY_PATTERN = re.compile(
+    rb"\x1b\]"  # OSC introducer
+    rb"\d+"  # Parameter number (10, 11, 4, 12, etc.)
+    rb"(?:;[^;\x07\x1b]*)?"  # Optional sub-parameter (e.g., palette index)
+    rb";\?"  # Query marker - the key indicator this is a query
+    rb"(?:\x07|\x1b\\)"  # BEL or ST terminator
+)
+
+# DA (Device Attributes) primary query
+# Format: ESC [ c  or  ESC [ 0 c
+_DA_PATTERN = re.compile(rb"\x1b\[0?c")
+
+# DA2 (Secondary Device Attributes) query
+# Format: ESC [ > c  or  ESC [ > 0 c
+_DA2_PATTERN = re.compile(rb"\x1b\[>0?c")
+
+# DECRQSS (Request Selection or Setting) - various terminal state queries
+# Format: ESC P $ q <setting> ST
+_DECRQSS_PATTERN = re.compile(
+    rb"\x1bP\$q"  # DCS introducer + DECRQSS
+    rb"[^\x1b]*"  # Setting identifier
+    rb"\x1b\\"  # ST terminator
+)
+
+# Pattern to detect incomplete escape sequences at end of a chunk.
+# These are potential query sequence prefixes that may complete in next chunk.
+# We look for:
+#   - \x1b alone (CSI/OSC/DCS start)
+#   - \x1b[ followed by optional digits/params but no command char
+#   - \x1b] followed by digits but no terminator
+#   - \x1bP followed by content but no ST terminator
+_INCOMPLETE_ESC_PATTERN = re.compile(
+    rb"(?:"
+    rb"\x1b$|"  # ESC at end (might be start of any sequence)
+    rb"\x1b\[[0-9;>]*$|"  # CSI without command char
+    rb"\x1b\][^\x07]*$|"  # OSC without BEL terminator (ST needs \x1b\)
+    rb"\x1bP[^\x1b]*$"  # DCS without ST terminator
+    rb")"
+)
+
+
+def _filter_complete_queries(output_bytes: bytes) -> bytes:
+    """Filter complete terminal query sequences from output bytes."""
+    output_bytes = _DSR_PATTERN.sub(b"", output_bytes)
+    output_bytes = _OSC_QUERY_PATTERN.sub(b"", output_bytes)
+    output_bytes = _DA_PATTERN.sub(b"", output_bytes)
+    output_bytes = _DA2_PATTERN.sub(b"", output_bytes)
+    output_bytes = _DECRQSS_PATTERN.sub(b"", output_bytes)
+    return output_bytes
+
+
+class TerminalQueryFilter:
+    """Stateful filter for terminal query sequences.
+
+    This filter maintains state across calls to handle escape sequences that
+    may be split across multiple output chunks (which happens with long-running
+    commands surfaced incrementally).
+
+    Usage:
+        filter = TerminalQueryFilter()
+        filtered1 = filter.filter(chunk1)
+        filtered2 = filter.filter(chunk2)
+        # ... and so on
+
+        # When command completes, reset for the next command:
+        filter.reset()
+    """
+
+    def __init__(self) -> None:
+        self._pending: bytes = b""
+
+    def reset(self) -> None:
+        """Reset filter state between commands."""
+        self._pending = b""
+
+    def filter(self, output: str) -> str:
+        """Filter terminal query sequences from captured terminal output.
+
+        Removes escape sequences that would cause the terminal to respond
+        when the output is displayed, while preserving legitimate formatting
+        sequences (colors, cursor movement, etc.).
+
+        This method is stateful: incomplete escape sequences at the end of
+        a chunk are held until the next chunk arrives, so split sequences
+        are properly detected and filtered.
+
+        Args:
+            output: Raw terminal output that may contain query sequences.
+
+        Returns:
+            Filtered output with query sequences removed.
+        """
+        # Convert to bytes for regex matching (escape sequences are byte-level)
+        output_bytes = output.encode("utf-8", errors="surrogateescape")
+
+        # Prepend any pending bytes from previous call
+        if self._pending:
+            output_bytes = self._pending + output_bytes
+            self._pending = b""
+
+        # Check for incomplete escape sequence at end
+        match = _INCOMPLETE_ESC_PATTERN.search(output_bytes)
+        if match:
+            # Hold the incomplete sequence for the next chunk
+            self._pending = output_bytes[match.start() :]
+            output_bytes = output_bytes[: match.start()]
+
+        # Filter complete query sequences
+        output_bytes = _filter_complete_queries(output_bytes)
+
+        # Convert back to string
+        return output_bytes.decode("utf-8", errors="surrogateescape")
+
+    def flush(self) -> str:
+        """Flush any pending bytes that weren't part of a query.
+
+        Call this when output is complete to emit any trailing bytes that
+        turned out not to be query sequences.
+
+        Returns:
+            Any pending bytes as a string, filtered for queries.
+        """
+        if not self._pending:
+            return ""
+        pending = self._pending
+        self._pending = b""
+        # Filter the pending bytes in case they form a complete query
+        filtered = _filter_complete_queries(pending)
+        return filtered.decode("utf-8", errors="surrogateescape")
+
+
+# Singleton instance for simple stateless usage (backward compatibility)
+_default_filter = TerminalQueryFilter()
+
+
+def filter_terminal_queries(output: str) -> str:
+    """Filter terminal query sequences from captured terminal output.
+
+    This is a stateless convenience function. For handling incremental output
+    where sequences may be split across chunks, use TerminalQueryFilter class.
+
+    Removes escape sequences that would cause the terminal to respond
+    when the output is displayed, while preserving legitimate formatting
+    sequences (colors, cursor movement, etc.).
+
+    Args:
+        output: Raw terminal output that may contain query sequences.
+
+    Returns:
+        Filtered output with query sequences removed.
+    """
+    # Use a fresh filter for stateless behavior
+    temp_filter = TerminalQueryFilter()
+    result = temp_filter.filter(output)
+    # Flush any pending (shouldn't happen for complete input, but be safe)
+    result += temp_filter.flush()
+    return result