Nest handoff history by default

jhills20 · jhills20 · commit e895a779c79d · 2025-10-27T13:51:25.000Z
diff --git a/docs/running_agents.md b/docs/running_agents.md
@@ -51,11 +51,14 @@ The `run_config` parameter lets you configure some global settings for the agent
 -   [`model_settings`][agents.run.RunConfig.model_settings]: Overrides agent-specific settings. For example, you can set a global `temperature` or `top_p`.
 -   [`input_guardrails`][agents.run.RunConfig.input_guardrails], [`output_guardrails`][agents.run.RunConfig.output_guardrails]: A list of input or output guardrails to include on all runs.
 -   [`handoff_input_filter`][agents.run.RunConfig.handoff_input_filter]: A global input filter to apply to all handoffs, if the handoff doesn't already have one. The input filter allows you to edit the inputs that are sent to the new agent. See the documentation in [`Handoff.input_filter`][agents.handoffs.Handoff.input_filter] for more details.
+-   [`nest_handoff_history`][agents.run.RunConfig.nest_handoff_history]: When `True` (the default) the runner wraps the prior transcript in a developer-role summary message and keeps the latest user turn separate before invoking the next agent. Set this to `False` or provide a custom handoff filter if you prefer to pass through the raw transcript. You can also call [`nest_handoff_history`](agents.extensions.handoff_filters.nest_handoff_history) from your own filters to reuse the default behavior.
 -   [`tracing_disabled`][agents.run.RunConfig.tracing_disabled]: Allows you to disable [tracing](tracing.md) for the entire run.
 -   [`trace_include_sensitive_data`][agents.run.RunConfig.trace_include_sensitive_data]: Configures whether traces will include potentially sensitive data, such as LLM and tool call inputs/outputs.
 -   [`workflow_name`][agents.run.RunConfig.workflow_name], [`trace_id`][agents.run.RunConfig.trace_id], [`group_id`][agents.run.RunConfig.group_id]: Sets the tracing workflow name, trace ID and trace group ID for the run. We recommend at least setting `workflow_name`. The group ID is an optional field that lets you link traces across multiple runs.
 -   [`trace_metadata`][agents.run.RunConfig.trace_metadata]: Metadata to include on all traces.
 
+By default, the SDK now nests prior turns inside a developer summary message whenever an agent hands off to another agent. This reduces repeated assistant messages and keeps the most recent user turn explicit for the receiving agent. If you'd like to return to the legacy behavior, pass `RunConfig(nest_handoff_history=False)` or supply a `handoff_input_filter` that forwards the conversation exactly as you need.
+
 ## Conversations/chat threads
 
 Calling any of the run methods can result in one or more agents running (and hence one or more LLM calls), but it represents a single logical turn in a chat conversation. For example:
diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py
@@ -51,6 +51,7 @@
     ToolOutputGuardrailTripwireTriggered,
     UserError,
 )
+from .extensions.handoff_filters import nest_handoff_history
 from .guardrail import InputGuardrail, InputGuardrailResult, OutputGuardrail, OutputGuardrailResult
 from .handoffs import Handoff, HandoffInputData
 from .items import (
@@ -998,8 +999,8 @@ async def execute_handoffs(
             input_filter = handoff.input_filter or (
                 run_config.handoff_input_filter if run_config else None
             )
-            if input_filter:
-                logger.debug("Filtering inputs for handoff")
+            handoff_input_data: HandoffInputData | None = None
+            if input_filter or run_config.nest_handoff_history:
                 handoff_input_data = HandoffInputData(
                     input_history=tuple(original_input)
                     if isinstance(original_input, list)
@@ -1008,6 +1009,9 @@ async def execute_handoffs(
                     new_items=tuple(new_step_items),
                     run_context=context_wrapper,
                 )
+
+            if input_filter and handoff_input_data is not None:
+                logger.debug("Filtering inputs for handoff")
                 if not callable(input_filter):
                     _error_tracing.attach_error_to_span(
                         span_handoff,
@@ -1037,6 +1041,15 @@ async def execute_handoffs(
                 )
                 pre_step_items = list(filtered.pre_handoff_items)
                 new_step_items = list(filtered.new_items)
+            elif run_config.nest_handoff_history and handoff_input_data is not None:
+                nested = nest_handoff_history(handoff_input_data)
+                original_input = (
+                    nested.input_history
+                    if isinstance(nested.input_history, str)
+                    else list(nested.input_history)
+                )
+                pre_step_items = list(nested.pre_handoff_items)
+                new_step_items = list(nested.new_items)
 
         return SingleStepResult(
             original_input=original_input,
diff --git a/src/agents/extensions/handoff_filters.py b/src/agents/extensions/handoff_filters.py
@@ -1,9 +1,14 @@
 from __future__ import annotations
 
+import json
+from copy import deepcopy
+from typing import Any
+
 from ..handoffs import HandoffInputData
 from ..items import (
     HandoffCallItem,
     HandoffOutputItem,
+    ItemHelpers,
     ReasoningItem,
     RunItem,
     ToolCallItem,
@@ -34,6 +39,102 @@ def remove_all_tools(handoff_input_data: HandoffInputData) -> HandoffInputData:
     )
 
 
+def nest_handoff_history(handoff_input_data: HandoffInputData) -> HandoffInputData:
+    """Summarizes the previous transcript into a developer message for the next agent."""
+
+    normalized_history = _normalize_input_history(handoff_input_data.input_history)
+    pre_items_as_inputs = [
+        _run_item_to_plain_input(item) for item in handoff_input_data.pre_handoff_items
+    ]
+    new_items_as_inputs = [_run_item_to_plain_input(item) for item in handoff_input_data.new_items]
+    transcript = normalized_history + pre_items_as_inputs + new_items_as_inputs
+
+    developer_message = _build_developer_message(transcript)
+    latest_user = _find_latest_user_turn(transcript)
+    history_items: list[TResponseInputItem] = [developer_message]
+    if latest_user is not None:
+        history_items.append(latest_user)
+
+    filtered_pre_items = tuple(
+        item
+        for item in handoff_input_data.pre_handoff_items
+        if _get_run_item_role(item) != "assistant"
+    )
+
+    return handoff_input_data.clone(
+        input_history=tuple(history_items),
+        pre_handoff_items=filtered_pre_items,
+    )
+
+
+def _normalize_input_history(
+    input_history: str | tuple[TResponseInputItem, ...],
+) -> list[TResponseInputItem]:
+    if isinstance(input_history, str):
+        return ItemHelpers.input_to_new_input_list(input_history)
+    return [deepcopy(item) for item in input_history]
+
+
+def _run_item_to_plain_input(run_item: RunItem) -> TResponseInputItem:
+    return deepcopy(run_item.to_input_item())
+
+
+def _build_developer_message(transcript: list[TResponseInputItem]) -> TResponseInputItem:
+    if transcript:
+        summary_lines = [
+            f"{idx + 1}. {_format_transcript_item(item)}" for idx, item in enumerate(transcript)
+        ]
+    else:
+        summary_lines = ["(no previous turns recorded)"]
+
+    content = "Previous conversation before this handoff:\n" + "\n".join(summary_lines)
+    return {"role": "developer", "content": content}
+
+
+def _format_transcript_item(item: TResponseInputItem) -> str:
+    role = item.get("role")
+    if isinstance(role, str):
+        prefix = role
+        name = item.get("name")
+        if isinstance(name, str) and name:
+            prefix = f"{prefix} ({name})"
+        content_str = _stringify_content(item.get("content"))
+        return f"{prefix}: {content_str}" if content_str else prefix
+
+    item_type = item.get("type", "item")
+    rest = {k: v for k, v in item.items() if k != "type"}
+    try:
+        serialized = json.dumps(rest, ensure_ascii=False, default=str)
+    except TypeError:
+        serialized = str(rest)
+    return f"{item_type}: {serialized}" if serialized else str(item_type)
+
+
+def _stringify_content(content: Any) -> str:
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    try:
+        return json.dumps(content, ensure_ascii=False, default=str)
+    except TypeError:
+        return str(content)
+
+
+def _find_latest_user_turn(
+    transcript: list[TResponseInputItem],
+) -> TResponseInputItem | None:
+    for item in reversed(transcript):
+        if item.get("role") == "user":
+            return deepcopy(item)
+    return None
+
+
+def _get_run_item_role(run_item: RunItem) -> str | None:
+    role_candidate = run_item.to_input_item().get("role")
+    return role_candidate if isinstance(role_candidate, str) else None
+
+
 def _remove_tools_from_items(items: tuple[RunItem, ...]) -> tuple[RunItem, ...]:
     filtered_items = []
     for item in items:
diff --git a/src/agents/run.py b/src/agents/run.py
@@ -196,6 +196,11 @@ class RunConfig:
     agent. See the documentation in `Handoff.input_filter` for more details.
     """
 
+    nest_handoff_history: bool = True
+    """Wrap prior run history in a developer message before handing off when no custom input
+    filter is set. Set to False to preserve the raw transcript behavior from previous releases.
+    """
+
     input_guardrails: list[InputGuardrail[Any]] | None = None
     """A list of input guardrails to run on the initial run input."""
 
diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py
@@ -164,9 +164,9 @@ async def test_handoffs():
 
     assert result.final_output == "done"
     assert len(result.raw_responses) == 3, "should have three model responses"
-    assert len(result.to_input_list()) == 7, (
-        "should have 7 inputs: orig input, tool call, tool result, message, handoff, handoff"
-        "result, and done message"
+    assert len(result.to_input_list()) == 8, (
+        "should have 8 inputs: dev summary, latest user input, tool call, tool result, message, "
+        "handoff, handoff result, and done message"
     )
     assert result.last_agent == agent_1, "should have handed off to agent_1"
 
@@ -270,6 +270,60 @@ async def test_handoff_filters():
     )
 
 
+@pytest.mark.asyncio
+async def test_default_handoff_history_nested_and_filters_respected():
+    model = FakeModel()
+    agent_1 = Agent(
+        name="delegate",
+        model=model,
+    )
+    agent_2 = Agent(
+        name="triage",
+        model=model,
+        handoffs=[agent_1],
+    )
+
+    model.add_multiple_turn_outputs(
+        [
+            [get_text_message("triage summary"), get_handoff_tool_call(agent_1)],
+            [get_text_message("resolution")],
+        ]
+    )
+
+    result = await Runner.run(agent_2, input="user_message")
+
+    assert isinstance(result.input, list)
+    assert result.input[0]["role"] == "developer"
+    assert "Previous conversation" in result.input[0]["content"]
+    assert "triage summary" in result.input[0]["content"]
+    assert result.input[1]["role"] == "user"
+    assert result.input[1]["content"] == "user_message"
+
+    passthrough_model = FakeModel()
+    delegate = Agent(name="delegate", model=passthrough_model)
+
+    def passthrough_filter(data: HandoffInputData) -> HandoffInputData:
+        return data
+
+    triage_with_filter = Agent(
+        name="triage",
+        model=passthrough_model,
+        handoffs=[handoff(delegate, input_filter=passthrough_filter)],
+    )
+
+    passthrough_model.add_multiple_turn_outputs(
+        [
+            [get_text_message("triage summary"), get_handoff_tool_call(delegate)],
+            [get_text_message("resolution")],
+        ]
+    )
+
+    filtered_result = await Runner.run(triage_with_filter, input="user_message")
+
+    assert isinstance(filtered_result.input, str)
+    assert filtered_result.input == "user_message"
+
+
 @pytest.mark.asyncio
 async def test_async_input_filter_supported():
     # DO NOT rename this without updating pyproject.toml
diff --git a/tests/test_agent_runner_streamed.py b/tests/test_agent_runner_streamed.py
@@ -175,9 +175,9 @@ async def test_handoffs():
 
     assert result.final_output == "done"
     assert len(result.raw_responses) == 3, "should have three model responses"
-    assert len(result.to_input_list()) == 7, (
-        "should have 7 inputs: orig input, tool call, tool result, message, handoff, handoff"
-        "result, and done message"
+    assert len(result.to_input_list()) == 8, (
+        "should have 8 inputs: dev summary, latest user input, tool call, tool result, message, "
+        "handoff, handoff result, and done message"
     )
     assert result.last_agent == agent_1, "should have handed off to agent_1"
 
diff --git a/tests/test_extension_filters.py b/tests/test_extension_filters.py
@@ -2,7 +2,7 @@
 from openai.types.responses.response_reasoning_item import ResponseReasoningItem
 
 from agents import Agent, HandoffInputData, RunContextWrapper
-from agents.extensions.handoff_filters import remove_all_tools
+from agents.extensions.handoff_filters import nest_handoff_history, remove_all_tools
 from agents.items import (
     HandoffOutputItem,
     MessageOutputItem,
@@ -25,6 +25,13 @@ def _get_message_input_item(content: str) -> TResponseInputItem:
     }
 
 
+def _get_user_input_item(content: str) -> TResponseInputItem:
+    return {
+        "role": "user",
+        "content": content,
+    }
+
+
 def _get_reasoning_input_item() -> TResponseInputItem:
     return {"id": "rid", "summary": [], "type": "reasoning"}
 
@@ -219,3 +226,41 @@ def test_removes_handoffs_from_history():
     assert len(filtered_data.input_history) == 1
     assert len(filtered_data.pre_handoff_items) == 1
     assert len(filtered_data.new_items) == 1
+
+
+def test_nest_handoff_history_wraps_transcript() -> None:
+    data = HandoffInputData(
+        input_history=(_get_user_input_item("Hello"),),
+        pre_handoff_items=(_get_message_output_run_item("Assist reply"),),
+        new_items=(
+            _get_message_output_run_item("Handoff request"),
+            _get_handoff_output_run_item("transfer"),
+        ),
+        run_context=RunContextWrapper(context=()),
+    )
+
+    nested = nest_handoff_history(data)
+
+    assert isinstance(nested.input_history, tuple)
+    assert nested.input_history[0]["role"] == "developer"
+    assert "Assist reply" in nested.input_history[0]["content"]
+    assert nested.input_history[1]["role"] == "user"
+    assert nested.input_history[1]["content"] == "Hello"
+    assert len(nested.pre_handoff_items) == 0
+    assert nested.new_items == data.new_items
+
+
+def test_nest_handoff_history_handles_missing_user() -> None:
+    data = HandoffInputData(
+        input_history=(),
+        pre_handoff_items=(_get_reasoning_output_run_item(),),
+        new_items=(),
+        run_context=RunContextWrapper(context=()),
+    )
+
+    nested = nest_handoff_history(data)
+
+    assert isinstance(nested.input_history, tuple)
+    assert len(nested.input_history) == 1
+    assert nested.input_history[0]["role"] == "developer"
+    assert "reasoning" in nested.input_history[0]["content"].lower()