diff --git a/src/agents/run.py b/src/agents/run.py index 145169f6e9..acd2f211f9 100644 --- a/src/agents/run.py +++ b/src/agents/run.py @@ -203,10 +203,10 @@ class RunConfig: agent. See the documentation in `Handoff.input_filter` for more details. """ - nest_handoff_history: bool = True - """Wrap prior run history in a single assistant message before handing off when no custom - input filter is set. Set to False to preserve the raw transcript behavior from previous - releases. + nest_handoff_history: bool = False + """Opt-in beta: wrap prior run history in a single assistant message before handing off when no + custom input filter is set. This is disabled by default while we stabilize nested handoffs; set + to True to enable the collapsed transcript behavior. """ handoff_history_mapper: HandoffHistoryMapper | None = None diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py index 6dcfc06afe..1646d53d8c 100644 --- a/tests/test_agent_runner.py +++ b/tests/test_agent_runner.py @@ -222,6 +222,7 @@ async def test_structured_output(): get_text_input_item("user_message"), get_text_input_item("another_message"), ], + run_config=RunConfig(nest_handoff_history=True), ) assert result.final_output == Foo(bar="baz") @@ -279,7 +280,36 @@ async def test_handoff_filters(): @pytest.mark.asyncio -async def test_default_handoff_history_nested_and_filters_respected(): +async def test_handoff_history_not_nested_by_default(): + triage_model = FakeModel() + delegate_model = FakeModel() + + delegate = Agent(name="delegate", model=delegate_model) + triage = Agent(name="triage", model=triage_model, handoffs=[delegate]) + + triage_model.add_multiple_turn_outputs( + [[get_text_message("triage summary"), get_handoff_tool_call(delegate)]] + ) + delegate_model.add_multiple_turn_outputs([[get_text_message("resolution")]]) + + result = await Runner.run(triage, input="user_message") + + assert result.final_output == "resolution" + assert delegate_model.first_turn_args is not None + delegate_input = delegate_model.first_turn_args["input"] + assert isinstance(delegate_input, list) + delegate_messages = [item for item in delegate_input if isinstance(item, dict)] + assert delegate_messages + assert _as_message(delegate_messages[0])["role"] == "user" + assert not any( + "" in str(item.get("content", "")) + for item in delegate_messages + if isinstance(item.get("content"), str) + ) + + +@pytest.mark.asyncio +async def test_handoff_history_nested_and_filters_respected_when_enabled(): model = FakeModel() agent_1 = Agent( name="delegate", @@ -298,7 +328,9 @@ async def test_default_handoff_history_nested_and_filters_respected(): ] ) - result = await Runner.run(agent_2, input="user_message") + result = await Runner.run( + agent_2, input="user_message", run_config=RunConfig(nest_handoff_history=True) + ) assert isinstance(result.input, list) assert len(result.input) == 1 @@ -329,14 +361,16 @@ def passthrough_filter(data: HandoffInputData) -> HandoffInputData: ] ) - filtered_result = await Runner.run(triage_with_filter, input="user_message") + filtered_result = await Runner.run( + triage_with_filter, input="user_message", run_config=RunConfig(nest_handoff_history=True) + ) assert isinstance(filtered_result.input, str) assert filtered_result.input == "user_message" @pytest.mark.asyncio -async def test_default_handoff_history_accumulates_across_multiple_handoffs(): +async def test_handoff_history_accumulates_across_multiple_handoffs_when_enabled(): triage_model = FakeModel() delegate_model = FakeModel() closer_model = FakeModel() @@ -353,7 +387,9 @@ async def test_default_handoff_history_accumulates_across_multiple_handoffs(): ) closer_model.add_multiple_turn_outputs([[get_text_message("resolution")]]) - result = await Runner.run(triage, input="user_question") + result = await Runner.run( + triage, input="user_question", run_config=RunConfig(nest_handoff_history=True) + ) assert result.final_output == "resolution" assert closer_model.first_turn_args is not None diff --git a/tests/test_agent_runner_streamed.py b/tests/test_agent_runner_streamed.py index 222afda78c..82af07d29e 100644 --- a/tests/test_agent_runner_streamed.py +++ b/tests/test_agent_runner_streamed.py @@ -225,6 +225,7 @@ async def test_structured_output(): get_text_input_item("user_message"), get_text_input_item("another_message"), ], + run_config=RunConfig(nest_handoff_history=True), ) async for _ in result.stream_events(): pass @@ -707,6 +708,7 @@ async def test_streaming_events(): get_text_input_item("user_message"), get_text_input_item("another_message"), ], + run_config=RunConfig(nest_handoff_history=True), ) async for event in result.stream_events(): event_counts[event.type] = event_counts.get(event.type, 0) + 1 diff --git a/tests/test_run_step_processing.py b/tests/test_run_step_processing.py index a9ae223575..afac02481d 100644 --- a/tests/test_run_step_processing.py +++ b/tests/test_run_step_processing.py @@ -211,6 +211,102 @@ async def test_handoffs_parsed_correctly(): assert handoff_agent == agent_1 +@pytest.mark.asyncio +async def test_history_nesting_disabled_by_default(monkeypatch: pytest.MonkeyPatch): + source_agent = Agent(name="source") + target_agent = Agent(name="target") + default_handoff = handoff(target_agent) + tool_call = cast(ResponseFunctionToolCall, get_handoff_tool_call(target_agent)) + run_handoffs = [ToolRunHandoff(handoff=default_handoff, tool_call=tool_call)] + run_config = RunConfig() + context_wrapper = RunContextWrapper(context=None) + hooks = RunHooks() + original_input = [get_text_input_item("hello")] + pre_step_items: list[RunItem] = [] + new_step_items: list[RunItem] = [] + new_response = ModelResponse(output=[tool_call], usage=Usage(), response_id=None) + + def fail_if_called( + _handoff_input_data: HandoffInputData, + *, + history_mapper: Any, + ) -> HandoffInputData: + _ = history_mapper + raise AssertionError("nest_handoff_history should be opt-in.") + + monkeypatch.setattr("agents._run_impl.nest_handoff_history", fail_if_called) + + result = await RunImpl.execute_handoffs( + agent=source_agent, + original_input=list(original_input), + pre_step_items=pre_step_items, + new_step_items=new_step_items, + new_response=new_response, + run_handoffs=run_handoffs, + hooks=hooks, + context_wrapper=context_wrapper, + run_config=run_config, + ) + + assert result.original_input == original_input + + +@pytest.mark.asyncio +async def test_run_level_history_nesting_can_be_enabled(monkeypatch: pytest.MonkeyPatch): + source_agent = Agent(name="source") + target_agent = Agent(name="target") + default_handoff = handoff(target_agent) + tool_call = cast(ResponseFunctionToolCall, get_handoff_tool_call(target_agent)) + run_handoffs = [ToolRunHandoff(handoff=default_handoff, tool_call=tool_call)] + run_config = RunConfig(nest_handoff_history=True) + context_wrapper = RunContextWrapper(context=None) + hooks = RunHooks() + original_input = [get_text_input_item("hello")] + pre_step_items: list[RunItem] = [] + new_step_items: list[RunItem] = [] + new_response = ModelResponse(output=[tool_call], usage=Usage(), response_id=None) + + calls: list[HandoffInputData] = [] + + def fake_nest( + handoff_input_data: HandoffInputData, + *, + history_mapper: Any, + ) -> HandoffInputData: + _ = history_mapper + calls.append(handoff_input_data) + return handoff_input_data.clone( + input_history=( + { + "role": "assistant", + "content": "nested", + }, + ) + ) + + monkeypatch.setattr("agents._run_impl.nest_handoff_history", fake_nest) + + result = await RunImpl.execute_handoffs( + agent=source_agent, + original_input=list(original_input), + pre_step_items=pre_step_items, + new_step_items=new_step_items, + new_response=new_response, + run_handoffs=run_handoffs, + hooks=hooks, + context_wrapper=context_wrapper, + run_config=run_config, + ) + + assert calls + assert result.original_input == [ + { + "role": "assistant", + "content": "nested", + } + ] + + @pytest.mark.asyncio async def test_handoff_can_disable_run_level_history_nesting(monkeypatch: pytest.MonkeyPatch): source_agent = Agent(name="source") @@ -233,6 +329,7 @@ def fake_nest( *, history_mapper: Any, ) -> HandoffInputData: + _ = history_mapper calls.append(handoff_input_data) return handoff_input_data @@ -274,6 +371,7 @@ def fake_nest( *, history_mapper: Any, ) -> HandoffInputData: + _ = history_mapper return handoff_input_data.clone( input_history=( {