review: unwire per-turn callbacks after step() to close late-update race

Debug Agent · claude · Debug Agent · commit 9740e56d91ee · 2026-04-17T19:11:09.000-03:00
Prevents a trailing ACP session_update arriving between turns from
firing a stale on_event on the portal thread with no FIFOLock held by
anyone.  Also documents two invariants callers rely on: on_event
handlers must not acquire the state lock, and tool-call→final-message
ordering assumes the server drains session_update notifications before
the prompt response.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py
@@ -317,6 +317,21 @@ class _OpenHandsACPBridge:
     elsewhere) from racing with either phase.  The ordering between the
     two phases is what keeps a single consumer's cross-callback state
     (e.g. hook processors that read-then-write) consistent.
+
+    Two invariants callers rely on:
+
+    * ``on_event`` handlers MUST NOT acquire the conversation state lock
+      (``with conversation.state:``).  The bridge fires them on the portal
+      thread while the caller thread is parked inside ``portal.call()``
+      owning that lock, and ``FIFOLock`` is thread-bound — a lock-acquire
+      on the portal thread would deadlock rather than re-enter.
+    * Tool-call → final-message ordering depends on the ACP server
+      draining every ``session_update`` notification for a turn *before*
+      the prompt response returns.  Verified against
+      ``claude-agent-acp@0.29.0``; servers that interleave trailing
+      ``ToolCallProgress`` after the prompt response would invert the
+      order a consumer sees, and dedupe-by-id+"last-seen wins" would
+      treat the post-message event as authoritative.
     """
 
     def __init__(self) -> None:
@@ -1261,6 +1276,17 @@ async def _prompt() -> PromptResponse:
             # breaks the loop, emits ConversationErrorEvent, and raises
             # ConversationRunError — matching how the regular Agent works
             raise
+        finally:
+            # Unwire the per-turn callbacks now that this step has finished
+            # emitting everything it's going to emit.  If the ACP subprocess
+            # later dispatches a trailing ``session_update`` (e.g. between
+            # turns), it fires on the portal thread with no FIFOLock held
+            # by anyone — firing a stale ``on_event`` there would race
+            # with other threads mutating ``state.events``.  Clearing the
+            # callbacks turns any such late update into a no-op emit.
+            self._client.on_event = None
+            self._client.on_token = None
+            self._client.on_activity = None
 
     def ask_agent(self, question: str) -> str | None:
         """Fork the ACP session, prompt the fork, and return the response."""
diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py
@@ -459,8 +459,18 @@ def test_step_wires_on_activity(self, tmp_path):
 
         # Mock the internals so step() doesn't actually call the ACP server
         agent._client = _OpenHandsACPBridge()
+
+        # Capture on_activity while prompt() is still "running" — step()
+        # unwires the bridge callbacks in its finally block once the turn
+        # completes, so the post-return value is None by design.
+        wired_during_prompt: list = []
+
+        def _capture_run_async(_coro, **_kwargs):
+            wired_during_prompt.append(agent._client.on_activity)
+            return MagicMock(usage=None)
+
         agent._executor = MagicMock()
-        agent._executor.run_async = MagicMock(return_value=MagicMock(usage=None))
+        agent._executor.run_async = _capture_run_async
         agent._session_id = "sess-1"
         agent._initialized = True
 
@@ -470,8 +480,11 @@ def test_step_wires_on_activity(self, tmp_path):
 
         agent.step(conversation, on_event=events.append)
 
-        # Verify on_activity was wired to the bridge
-        assert agent._client.on_activity is activity_fn
+        # Verify on_activity was wired to the bridge during the turn.
+        assert wired_during_prompt == [activity_fn]
+        # And that it was cleared afterward so a late session_update
+        # cannot fire the per-turn heartbeat callback out-of-band.
+        assert agent._client.on_activity is None
 
 
 # ---------------------------------------------------------------------------
@@ -643,7 +656,12 @@ def test_step_passes_on_token(self, tmp_path):
         agent._conn = MagicMock()
         agent._session_id = "test-session"
 
+        # Capture on_token while prompt() is still running — step() clears
+        # the per-turn callbacks in its finally block once the turn ends.
+        wired_during_prompt: list = []
+
         def _fake_run_async(_coro, **_kwargs):
+            wired_during_prompt.append(mock_client.on_token)
             mock_client.accumulated_text.append("ok")
 
         mock_executor = MagicMock()
@@ -654,8 +672,10 @@ def _fake_run_async(_coro, **_kwargs):
 
         agent.step(conversation, on_event=lambda _: None, on_token=on_token)
 
-        # Verify on_token was passed to the client
-        assert mock_client.on_token == on_token
+        # Verify on_token was wired during the turn.
+        assert wired_during_prompt == [on_token]
+        # And unwired afterward so a late token chunk is a no-op.
+        assert mock_client.on_token is None
 
 
 # ---------------------------------------------------------------------------
@@ -1669,6 +1689,77 @@ def _fake_run_async(_coro, **_kwargs):
         assert events[1].tool_call_id == "tc-2"
         assert events[1].is_error is True
 
+    def test_step_clears_live_callbacks_on_return(self, tmp_path):
+        """After step() returns, bridge callbacks are unwired.
+
+        A trailing ``session_update`` that lands between turns (the ACP
+        subprocess sending a late ``ToolCallProgress`` after its prompt
+        response) would otherwise fire the previous step's ``on_event``
+        on the portal thread with no FIFOLock held by anyone, racing
+        other threads appending to ``state.events``.
+        """
+        from acp.schema import ToolCallStart
+
+        agent = _make_agent()
+        conversation = self._make_conversation_with_message(tmp_path)
+        events: list = []
+
+        mock_client = _OpenHandsACPBridge()
+        agent._client = mock_client
+        agent._conn = MagicMock()
+        agent._session_id = "test-session"
+
+        def _fake_run_async(_coro, **_kwargs):
+            mock_client.accumulated_text.append("done")
+
+        mock_executor = MagicMock()
+        mock_executor.run_async = _fake_run_async
+        agent._executor = mock_executor
+
+        agent.step(conversation, on_event=events.append, on_token=lambda _: None)
+
+        # Callbacks unwired — a late session_update is a safe no-op emit.
+        assert mock_client.on_event is None
+        assert mock_client.on_token is None
+        assert mock_client.on_activity is None
+
+        pre_count = len(events)
+        trailing = MagicMock(spec=ToolCallStart)
+        trailing.tool_call_id = "tc-late"
+        trailing.title = "Late arrival"
+        trailing.kind = "read"
+        trailing.status = "completed"
+        trailing.raw_input = None
+        trailing.raw_output = None
+        trailing.content = None
+        asyncio.run(mock_client.session_update("sess", trailing))
+        assert len(events) == pre_count  # nothing reached the stale callback
+
+    def test_step_clears_live_callbacks_on_error(self, tmp_path):
+        """Callback unwire also runs when step() raises (finally block)."""
+        agent = _make_agent()
+        conversation = self._make_conversation_with_message(tmp_path)
+        events: list = []
+
+        mock_client = _OpenHandsACPBridge()
+        agent._client = mock_client
+        agent._conn = MagicMock()
+        agent._session_id = "test-session"
+
+        def _fake_run_async(_coro, **_kwargs):
+            raise RuntimeError("boom")
+
+        mock_executor = MagicMock()
+        mock_executor.run_async = _fake_run_async
+        agent._executor = mock_executor
+
+        with pytest.raises(RuntimeError):
+            agent.step(conversation, on_event=events.append)
+
+        assert mock_client.on_event is None
+        assert mock_client.on_token is None
+        assert mock_client.on_activity is None
+
     def test_step_emits_no_tool_call_events_when_none(self, tmp_path):
         """step() emits only MessageEvent when no tool calls accumulated."""
         agent = _make_agent()