review: close pending ACP tool cards on retry / abort

Debug Agent · claude · Debug Agent · commit 282b87817d28 · 2026-04-17T12:03:04.000-03:00
ACP servers mint fresh ``tool_call_id``s on a retried prompt(), so live-emitted pending events from the failed attempt would otherwise be orphaned on state.events — consumers that dedupe by tool_call_id and take the last-seen status as authoritative would keep those cards spinning forever. Introduces ``_cancel_inflight_tool_calls(on_event)`` which walks the accumulator and emits a terminal ``ACPToolCallEvent(status="failed", is_error=True)`` for every entry that hasn't reached a terminal status. Called before ``_reset_client_for_turn`` in both retry branches, and also before the error MessageEvent in the TimeoutError and outer-exception paths so aborted turns don't leave ghost cards behind either. Also documents the concurrency model on the bridge: on_event / on_token / on_activity all fire synchronously from the portal thread while the caller thread is blocked in portal.call(), so they do not race with the final MessageEvent / FinishAction emitted by the caller thread. Review feedback from VascoSch92 on PR #2868. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py
@@ -109,6 +109,12 @@
 # well below the ~20 min runtime-api kill threshold.
 _ACTIVITY_SIGNAL_INTERVAL: float = 30.0
 
+# ACP tool-call statuses that represent a terminal outcome.  Non-terminal
+# statuses (``pending``, ``in_progress``) mean the call is still in flight
+# and, if the turn aborts before it reaches a terminal state, the live-
+# emitted event on state.events will otherwise be orphaned forever.
+_TERMINAL_TOOL_CALL_STATUSES: frozenset[str] = frozenset({"completed", "failed"})
+
 
 def _make_dummy_llm() -> LLM:
     """Create a dummy LLM that should never be called directly."""
@@ -286,6 +292,16 @@ class _OpenHandsACPBridge:
     """Bridge between OpenHands and ACP that accumulates session updates.
 
     Implements the ``Client`` protocol from ``agent_client_protocol``.
+
+    Concurrency model — ``on_event`` / ``on_token`` / ``on_activity`` are
+    fired synchronously from ``session_update``, which runs on the
+    ``AsyncExecutor`` portal thread.  The caller thread driving
+    ``ACPAgent.step()`` is blocked inside ``portal.call()`` for the entire
+    ``prompt()`` round-trip, so these callbacks do not race with the final
+    ``MessageEvent`` / ``FinishAction`` emitted by the caller thread after
+    ``prompt()`` returns.  Consumers that keep cross-callback state (e.g.
+    hook processors reading-then-writing, visualizers) can therefore treat
+    each callback as sequential within a single turn.
     """
 
     def __init__(self) -> None:
@@ -939,6 +955,45 @@ def _reset_client_for_turn(
         self._client.on_event = on_event
         self._client.on_activity = self._on_activity
 
+    def _cancel_inflight_tool_calls(self, on_event: ConversationCallbackType) -> None:
+        """Emit a terminal ``failed`` ACPToolCallEvent for every tool call
+        in the accumulator that has not reached a terminal status yet.
+
+        ACP servers mint fresh ``tool_call_id``s on a retried turn, so any
+        ``pending`` / ``in_progress`` events already streamed during the
+        failed attempt would otherwise be orphaned on ``state.events`` —
+        no later notification reuses their id, and consumers that dedupe
+        by ``tool_call_id`` + "last-seen status wins" would keep them
+        spinning forever.  This method closes those cards before we wipe
+        the in-memory accumulator on retry / turn abort.
+
+        Called with ``on_event`` passed in explicitly because the bridge's
+        ``on_event`` attribute is about to be cleared by ``reset()``.
+        """
+        for tc in self._client.accumulated_tool_calls:
+            status = tc.get("status")
+            if status in _TERMINAL_TOOL_CALL_STATUSES:
+                continue
+            try:
+                on_event(
+                    ACPToolCallEvent(
+                        tool_call_id=tc["tool_call_id"],
+                        title=tc["title"],
+                        status="failed",
+                        tool_kind=tc.get("tool_kind"),
+                        raw_input=tc.get("raw_input"),
+                        raw_output=tc.get("raw_output"),
+                        content=tc.get("content"),
+                        is_error=True,
+                    )
+                )
+            except Exception:
+                logger.debug(
+                    "Failed to emit supersede event for %s",
+                    tc.get("tool_call_id"),
+                    exc_info=True,
+                )
+
     @observe(name="acp_agent.step", ignore_inputs=["conversation", "on_event"])
     def step(
         self,
@@ -1024,6 +1079,7 @@ async def _prompt() -> PromptResponse:
                             e,
                         )
                         time.sleep(delay)
+                        self._cancel_inflight_tool_calls(on_event)
                         self._reset_client_for_turn(on_token, on_event)
                     else:
                         raise
@@ -1048,6 +1104,7 @@ async def _prompt() -> PromptResponse:
                             e,
                         )
                         time.sleep(delay)
+                        self._cancel_inflight_tool_calls(on_event)
                         self._reset_client_for_turn(on_token, on_event)
                     else:
                         raise
@@ -1144,12 +1201,17 @@ async def _prompt() -> PromptResponse:
                     )
                 ],
             )
+            # Close any tool cards left in flight from the timed-out attempt.
+            self._cancel_inflight_tool_calls(on_event)
             on_event(MessageEvent(source="agent", llm_message=error_message))
             state.execution_status = ConversationExecutionStatus.ERROR
         except Exception as e:
             logger.error("ACP prompt failed: %s", e, exc_info=True)
             error_str = str(e)
 
+            # Close any tool cards left in flight before surfacing the error.
+            self._cancel_inflight_tool_calls(on_event)
+
             # Emit error as an agent message (existing behavior, preserved for
             # consumers that inspect MessageEvents)
             error_message = Message(
diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py
@@ -1401,6 +1401,172 @@ def test_reset_clears_on_event(self):
         assert client.on_event is None
 
 
+class TestACPCancelInflightToolCalls:
+    """Tests for _cancel_inflight_tool_calls — ensures ghost tool cards are
+    closed on retry / abort so the live-emission stream cannot leave an
+    orphaned pending event on ``state.events``.
+
+    Raised in PR review on #2866: ACP servers mint fresh ``tool_call_id``s
+    when the prompt is retried, so any pending event already fired for the
+    failed attempt would otherwise spin forever under dedup-by-id consumers.
+    """
+
+    @staticmethod
+    def _push_entry(
+        client: _OpenHandsACPBridge, tool_call_id: str, status: str
+    ) -> None:
+        client.accumulated_tool_calls.append(
+            {
+                "tool_call_id": tool_call_id,
+                "title": f"Tool {tool_call_id}",
+                "tool_kind": "read",
+                "status": status,
+                "raw_input": {"k": "v"},
+                "raw_output": None,
+                "content": None,
+            }
+        )
+
+    def test_emits_failed_event_for_pending_entries(self, tmp_path):
+        """Pending / in_progress entries get a terminal failed ACPToolCallEvent."""
+        agent = _make_agent()
+        agent._client = _OpenHandsACPBridge()
+        self._push_entry(agent._client, "tc-1", "pending")
+        self._push_entry(agent._client, "tc-2", "in_progress")
+
+        emitted: list = []
+        agent._cancel_inflight_tool_calls(emitted.append)
+
+        assert len(emitted) == 2
+        assert all(isinstance(e, ACPToolCallEvent) for e in emitted)
+        assert [e.tool_call_id for e in emitted] == ["tc-1", "tc-2"]
+        assert all(e.status == "failed" and e.is_error for e in emitted)
+
+    def test_skips_already_terminal_entries(self, tmp_path):
+        """completed / failed entries are left alone — they already closed."""
+        agent = _make_agent()
+        agent._client = _OpenHandsACPBridge()
+        self._push_entry(agent._client, "tc-done", "completed")
+        self._push_entry(agent._client, "tc-bad", "failed")
+        self._push_entry(agent._client, "tc-live", "pending")
+
+        emitted: list = []
+        agent._cancel_inflight_tool_calls(emitted.append)
+
+        # Only the pending one gets a synthetic terminal event.
+        assert [e.tool_call_id for e in emitted] == ["tc-live"]
+
+    def test_callback_errors_are_swallowed(self):
+        """A raising on_event during cancellation must not break the retry path."""
+        agent = _make_agent()
+        agent._client = _OpenHandsACPBridge()
+        self._push_entry(agent._client, "tc-1", "pending")
+        self._push_entry(agent._client, "tc-2", "pending")
+
+        seen: list = []
+
+        def flaky(event) -> None:
+            seen.append(event)
+            raise RuntimeError("boom")
+
+        agent._cancel_inflight_tool_calls(flaky)  # must not raise
+        # Both entries still attempted even though the first raised.
+        assert len(seen) == 2
+
+    def test_retry_cancels_pending_events_before_reset(self, tmp_path):
+        """Full step() retry path closes pending cards before the new attempt."""
+        from acp.schema import ToolCallStart
+
+        agent = _make_agent()
+        state = _make_state(tmp_path)
+        state.events.append(
+            SystemPromptEvent(
+                source="agent",
+                system_prompt=TextContent(text="sys"),
+                tools=[],
+            )
+        )
+        state.events.append(
+            MessageEvent(
+                source="user",
+                llm_message=Message(role="user", content=[TextContent(text="go")]),
+            )
+        )
+        conversation = MagicMock()
+        conversation.state = state
+
+        mock_client = _OpenHandsACPBridge()
+        agent._client = mock_client
+        agent._conn = MagicMock()
+        agent._session_id = "test-session"
+
+        events: list = []
+        call_count = 0
+
+        def _fake_run_async(_coro, **_kwargs):
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                # First attempt: stream a pending tool call, then fail
+                start = MagicMock(spec=ToolCallStart)
+                start.tool_call_id = "toolu_AAA"
+                start.title = "Read file"
+                start.kind = "read"
+                start.status = "pending"
+                start.raw_input = {"path": "/tmp/x"}
+                start.raw_output = None
+                start.content = None
+                asyncio.run(mock_client.session_update("sess", start))
+                raise ConnectionError("reset by peer")
+            # Retry: fresh tool call id reaches terminal state
+            start = MagicMock(spec=ToolCallStart)
+            start.tool_call_id = "toolu_BBB"
+            start.title = "Read file"
+            start.kind = "read"
+            start.status = "completed"
+            start.raw_input = {"path": "/tmp/x"}
+            start.raw_output = "ok"
+            start.content = None
+            asyncio.run(mock_client.session_update("sess", start))
+            mock_client.accumulated_text.append("done")
+            return MagicMock(usage=None)
+
+        mock_executor = MagicMock()
+        mock_executor.run_async = _fake_run_async
+        agent._executor = mock_executor
+
+        with patch("openhands.sdk.agent.acp_agent.time.sleep"):
+            agent.step(conversation, on_event=events.append)
+
+        assert call_count == 2
+        tool_events = [e for e in events if isinstance(e, ACPToolCallEvent)]
+        # Expected sequence:
+        #   toolu_AAA(pending)  — live-emitted during attempt 1
+        #   toolu_AAA(failed)   — synthetic cancellation before retry reset
+        #   toolu_BBB(completed) — attempt 2
+        by_id: dict[str, list[ACPToolCallEvent]] = {}
+        for e in tool_events:
+            by_id.setdefault(e.tool_call_id, []).append(e)
+
+        assert "toolu_AAA" in by_id
+        aaa_events = by_id["toolu_AAA"]
+        # Must end in a terminal status so consumer dedupe-by-id closes the card.
+        assert aaa_events[-1].status == "failed"
+        assert aaa_events[-1].is_error is True
+
+        assert "toolu_BBB" in by_id
+        assert by_id["toolu_BBB"][-1].status == "completed"
+
+        # The toolu_AAA cancellation comes before any toolu_BBB event.
+        aaa_idx = max(
+            i for i, e in enumerate(tool_events) if e.tool_call_id == "toolu_AAA"
+        )
+        bbb_idx = min(
+            i for i, e in enumerate(tool_events) if e.tool_call_id == "toolu_BBB"
+        )
+        assert aaa_idx < bbb_idx
+
+
 class TestACPToolCallEmission:
     """Tests for ACPToolCallEvent emission in step()."""