fix: #2211 Move nested handoffs to opt-in feature (#2272)

seratch · web-flow · commit 6ab83d438247 · 2026-01-20T12:08:56.000+09:00
diff --git a/src/agents/run.py b/src/agents/run.py
@@ -210,10 +210,10 @@ class RunConfig:
     agent. See the documentation in `Handoff.input_filter` for more details.
     """
 
-    nest_handoff_history: bool = True
-    """Wrap prior run history in a single assistant message before handing off when no custom
-    input filter is set. Set to False to preserve the raw transcript behavior from previous
-    releases.
+    nest_handoff_history: bool = False
+    """Opt-in beta: wrap prior run history in a single assistant message before handing off when no
+    custom input filter is set. This is disabled by default while we stabilize nested handoffs; set
+    to True to enable the collapsed transcript behavior.
     """
 
     handoff_history_mapper: HandoffHistoryMapper | None = None
diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py
@@ -288,6 +288,7 @@ async def test_structured_output():
             get_text_input_item("user_message"),
             get_text_input_item("another_message"),
         ],
+        run_config=RunConfig(nest_handoff_history=True),
     )
 
     assert result.final_output == Foo(bar="baz")
@@ -345,7 +346,36 @@ async def test_handoff_filters():
 
 
 @pytest.mark.asyncio
-async def test_default_handoff_history_nested_and_filters_respected():
+async def test_handoff_history_not_nested_by_default():
+    triage_model = FakeModel()
+    delegate_model = FakeModel()
+
+    delegate = Agent(name="delegate", model=delegate_model)
+    triage = Agent(name="triage", model=triage_model, handoffs=[delegate])
+
+    triage_model.add_multiple_turn_outputs(
+        [[get_text_message("triage summary"), get_handoff_tool_call(delegate)]]
+    )
+    delegate_model.add_multiple_turn_outputs([[get_text_message("resolution")]])
+
+    result = await Runner.run(triage, input="user_message")
+
+    assert result.final_output == "resolution"
+    assert delegate_model.first_turn_args is not None
+    delegate_input = delegate_model.first_turn_args["input"]
+    assert isinstance(delegate_input, list)
+    delegate_messages = [item for item in delegate_input if isinstance(item, dict)]
+    assert delegate_messages
+    assert _as_message(delegate_messages[0])["role"] == "user"
+    assert not any(
+        "<CONVERSATION HISTORY>" in str(item.get("content", ""))
+        for item in delegate_messages
+        if isinstance(item.get("content"), str)
+    )
+
+
+@pytest.mark.asyncio
+async def test_handoff_history_nested_and_filters_respected_when_enabled():
     model = FakeModel()
     agent_1 = Agent(
         name="delegate",
@@ -364,7 +394,9 @@ async def test_default_handoff_history_nested_and_filters_respected():
         ]
     )
 
-    result = await Runner.run(agent_2, input="user_message")
+    result = await Runner.run(
+        agent_2, input="user_message", run_config=RunConfig(nest_handoff_history=True)
+    )
 
     assert isinstance(result.input, list)
     assert len(result.input) == 1
@@ -395,14 +427,16 @@ def passthrough_filter(data: HandoffInputData) -> HandoffInputData:
         ]
     )
 
-    filtered_result = await Runner.run(triage_with_filter, input="user_message")
+    filtered_result = await Runner.run(
+        triage_with_filter, input="user_message", run_config=RunConfig(nest_handoff_history=True)
+    )
 
     assert isinstance(filtered_result.input, str)
     assert filtered_result.input == "user_message"
 
 
 @pytest.mark.asyncio
-async def test_default_handoff_history_accumulates_across_multiple_handoffs():
+async def test_handoff_history_accumulates_across_multiple_handoffs_when_enabled():
     triage_model = FakeModel()
     delegate_model = FakeModel()
     closer_model = FakeModel()
@@ -419,7 +453,9 @@ async def test_default_handoff_history_accumulates_across_multiple_handoffs():
     )
     closer_model.add_multiple_turn_outputs([[get_text_message("resolution")]])
 
-    result = await Runner.run(triage, input="user_question")
+    result = await Runner.run(
+        triage, input="user_question", run_config=RunConfig(nest_handoff_history=True)
+    )
 
     assert result.final_output == "resolution"
     assert closer_model.first_turn_args is not None
diff --git a/tests/test_agent_runner_streamed.py b/tests/test_agent_runner_streamed.py
@@ -289,6 +289,7 @@ async def test_structured_output():
             get_text_input_item("user_message"),
             get_text_input_item("another_message"),
         ],
+        run_config=RunConfig(nest_handoff_history=True),
     )
     async for _ in result.stream_events():
         pass
@@ -771,6 +772,7 @@ async def test_streaming_events():
             get_text_input_item("user_message"),
             get_text_input_item("another_message"),
         ],
+        run_config=RunConfig(nest_handoff_history=True),
     )
     async for event in result.stream_events():
         event_counts[event.type] = event_counts.get(event.type, 0) + 1
diff --git a/tests/test_run_step_processing.py b/tests/test_run_step_processing.py
@@ -211,6 +211,102 @@ async def test_handoffs_parsed_correctly():
     assert handoff_agent == agent_1
 
 
+@pytest.mark.asyncio
+async def test_history_nesting_disabled_by_default(monkeypatch: pytest.MonkeyPatch):
+    source_agent = Agent(name="source")
+    target_agent = Agent(name="target")
+    default_handoff = handoff(target_agent)
+    tool_call = cast(ResponseFunctionToolCall, get_handoff_tool_call(target_agent))
+    run_handoffs = [ToolRunHandoff(handoff=default_handoff, tool_call=tool_call)]
+    run_config = RunConfig()
+    context_wrapper = RunContextWrapper(context=None)
+    hooks = RunHooks()
+    original_input = [get_text_input_item("hello")]
+    pre_step_items: list[RunItem] = []
+    new_step_items: list[RunItem] = []
+    new_response = ModelResponse(output=[tool_call], usage=Usage(), response_id=None)
+
+    def fail_if_called(
+        _handoff_input_data: HandoffInputData,
+        *,
+        history_mapper: Any,
+    ) -> HandoffInputData:
+        _ = history_mapper
+        raise AssertionError("nest_handoff_history should be opt-in.")
+
+    monkeypatch.setattr("agents._run_impl.nest_handoff_history", fail_if_called)
+
+    result = await RunImpl.execute_handoffs(
+        agent=source_agent,
+        original_input=list(original_input),
+        pre_step_items=pre_step_items,
+        new_step_items=new_step_items,
+        new_response=new_response,
+        run_handoffs=run_handoffs,
+        hooks=hooks,
+        context_wrapper=context_wrapper,
+        run_config=run_config,
+    )
+
+    assert result.original_input == original_input
+
+
+@pytest.mark.asyncio
+async def test_run_level_history_nesting_can_be_enabled(monkeypatch: pytest.MonkeyPatch):
+    source_agent = Agent(name="source")
+    target_agent = Agent(name="target")
+    default_handoff = handoff(target_agent)
+    tool_call = cast(ResponseFunctionToolCall, get_handoff_tool_call(target_agent))
+    run_handoffs = [ToolRunHandoff(handoff=default_handoff, tool_call=tool_call)]
+    run_config = RunConfig(nest_handoff_history=True)
+    context_wrapper = RunContextWrapper(context=None)
+    hooks = RunHooks()
+    original_input = [get_text_input_item("hello")]
+    pre_step_items: list[RunItem] = []
+    new_step_items: list[RunItem] = []
+    new_response = ModelResponse(output=[tool_call], usage=Usage(), response_id=None)
+
+    calls: list[HandoffInputData] = []
+
+    def fake_nest(
+        handoff_input_data: HandoffInputData,
+        *,
+        history_mapper: Any,
+    ) -> HandoffInputData:
+        _ = history_mapper
+        calls.append(handoff_input_data)
+        return handoff_input_data.clone(
+            input_history=(
+                {
+                    "role": "assistant",
+                    "content": "nested",
+                },
+            )
+        )
+
+    monkeypatch.setattr("agents._run_impl.nest_handoff_history", fake_nest)
+
+    result = await RunImpl.execute_handoffs(
+        agent=source_agent,
+        original_input=list(original_input),
+        pre_step_items=pre_step_items,
+        new_step_items=new_step_items,
+        new_response=new_response,
+        run_handoffs=run_handoffs,
+        hooks=hooks,
+        context_wrapper=context_wrapper,
+        run_config=run_config,
+    )
+
+    assert calls
+    assert result.original_input == [
+        {
+            "role": "assistant",
+            "content": "nested",
+        }
+    ]
+
+
 @pytest.mark.asyncio
 async def test_handoff_can_disable_run_level_history_nesting(monkeypatch: pytest.MonkeyPatch):
     source_agent = Agent(name="source")
@@ -233,6 +329,7 @@ def fake_nest(
         *,
         history_mapper: Any,
     ) -> HandoffInputData:
+        _ = history_mapper
         calls.append(handoff_input_data)
         return handoff_input_data
 
@@ -274,6 +371,7 @@ def fake_nest(
         *,
         history_mapper: Any,
     ) -> HandoffInputData:
+        _ = history_mapper
         return handoff_input_data.clone(
             input_history=(
                 {