Add suggested corrections

DanieleMorotti · web-flow · commit a228bdf567f2 · 2025-11-14T13:05:10.000+01:00
diff --git a/src/agents/items.py b/src/agents/items.py
@@ -3,7 +3,7 @@
 import abc
 import weakref
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any, Generic, Literal, TypeVar, Union
+from typing import TYPE_CHECKING, Any, Generic, Literal, TypeVar, Union, cast
 
 import pydantic
 from openai.types.responses import (
@@ -92,15 +92,22 @@ class RunItemBase(Generic[T], abc.ABC):
     )
 
     def __post_init__(self) -> None:
-        # Store the producing agent weakly to avoid keeping it alive after the run.
+        # Store a weak reference so we can release the strong reference later if desired.
         self._agent_ref = weakref.ref(self.agent)
-        object.__delattr__(self, "agent")
 
     def __getattr__(self, name: str) -> Any:
         if name == "agent":
             return self._agent_ref() if self._agent_ref else None
         raise AttributeError(name)
 
+    def release_agent(self) -> None:
+        """Release the strong reference to the agent while keeping a weak reference."""
+        if "agent" not in self.__dict__:
+            return
+        agent = self.__dict__["agent"]
+        self._agent_ref = weakref.ref(agent) if agent is not None else None
+        object.__delattr__(self, "agent")
+
     def to_input_item(self) -> TResponseInputItem:
         """Converts this item into an input item suitable for passing to the model."""
         if isinstance(self.raw_item, dict):
@@ -161,11 +168,9 @@ class HandoffOutputItem(RunItemBase[TResponseInputItem]):
 
     def __post_init__(self) -> None:
         super().__post_init__()
-        # Handoff metadata should not hold strong references to the agents either.
+        # Maintain weak references so downstream code can release the strong references when safe.
         self._source_agent_ref = weakref.ref(self.source_agent)
         self._target_agent_ref = weakref.ref(self.target_agent)
-        object.__delattr__(self, "source_agent")
-        object.__delattr__(self, "target_agent")
 
     def __getattr__(self, name: str) -> Any:
         if name == "source_agent":
@@ -174,6 +179,17 @@ def __getattr__(self, name: str) -> Any:
             return self._target_agent_ref() if self._target_agent_ref else None
         return super().__getattr__(name)
 
+    def release_agent(self) -> None:
+        super().release_agent()
+        if "source_agent" in self.__dict__:
+            source_agent = self.__dict__["source_agent"]
+            self._source_agent_ref = weakref.ref(source_agent) if source_agent is not None else None
+            object.__delattr__(self, "source_agent")
+        if "target_agent" in self.__dict__:
+            target_agent = self.__dict__["target_agent"]
+            self._target_agent_ref = weakref.ref(target_agent) if target_agent is not None else None
+            object.__delattr__(self, "target_agent")
+
 
 ToolCallItemTypes: TypeAlias = Union[
     ResponseFunctionToolCall,
@@ -184,12 +200,13 @@ def __getattr__(self, name: str) -> Any:
     ResponseCodeInterpreterToolCall,
     ImageGenerationCall,
     LocalShellCall,
+    dict[str, Any],
 ]
 """A type that represents a tool call item."""
 
 
 @dataclass
-class ToolCallItem(RunItemBase[ToolCallItemTypes]):
+class ToolCallItem(RunItemBase[Any]):
     """Represents a tool call e.g. a function call or computer action call."""
 
     raw_item: ToolCallItemTypes
@@ -198,13 +215,19 @@ class ToolCallItem(RunItemBase[ToolCallItemTypes]):
     type: Literal["tool_call_item"] = "tool_call_item"
 
 
+ToolCallOutputTypes: TypeAlias = Union[
+    FunctionCallOutput,
+    ComputerCallOutput,
+    LocalShellCallOutput,
+    dict[str, Any],
+]
+
+
 @dataclass
-class ToolCallOutputItem(
-    RunItemBase[Union[FunctionCallOutput, ComputerCallOutput, LocalShellCallOutput]]
-):
+class ToolCallOutputItem(RunItemBase[Any]):
     """Represents the output of a tool call."""
 
-    raw_item: FunctionCallOutput | ComputerCallOutput | LocalShellCallOutput
+    raw_item: ToolCallOutputTypes
     """The raw item from the model."""
 
     output: Any
@@ -214,6 +237,25 @@ class ToolCallOutputItem(
 
     type: Literal["tool_call_output_item"] = "tool_call_output_item"
 
+    def to_input_item(self) -> TResponseInputItem:
+        """Converts the tool output into an input item for the next model turn.
+
+        Hosted tool outputs (e.g. shell/apply_patch) carry a `status` field for the SDK's
+        book-keeping, but the Responses API does not yet accept that parameter. Strip it from the
+        payload we send back to the model while keeping the original raw item intact.
+        """
+
+        if isinstance(self.raw_item, dict):
+            payload = dict(self.raw_item)
+            payload_type = payload.get("type")
+            if payload_type == "shell_call_output":
+                payload.pop("status", None)
+                payload.pop("shell_output", None)
+                payload.pop("provider_data", None)
+            return cast(TResponseInputItem, payload)
+
+        return super().to_input_item()
+
 
 @dataclass
 class ReasoningItem(RunItemBase[ResponseReasoningItem]):
diff --git a/src/agents/result.py b/src/agents/result.py
@@ -2,6 +2,7 @@
 
 import abc
 import asyncio
+import weakref
 from collections.abc import AsyncIterator
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any, Literal, cast
@@ -74,6 +75,32 @@ class RunResultBase(abc.ABC):
     def last_agent(self) -> Agent[Any]:
         """The last agent that was run."""
 
+    def release_agents(self) -> None:
+        """
+        Release strong references to agents held by this result. After calling this method,
+        accessing `item.agent` or `last_agent` may return `None` if the agent has been garbage
+        collected. Callers can use this when they are done inspecting the result and want to
+        eagerly drop any associated agent graph.
+        """
+        for item in self.new_items:
+            release = getattr(item, "release_agent", None)
+            if callable(release):
+                release()
+        self._release_last_agent_reference()
+
+    def __del__(self) -> None:
+        try:
+            # Fall back to releasing agents automatically in case the caller never invoked
+            # `release_agents()` explicitly. This keeps the no-leak guarantee confirmed by tests.
+            self.release_agents()
+        except Exception:
+            # Avoid raising from __del__.
+            pass
+
+    @abc.abstractmethod
+    def _release_last_agent_reference(self) -> None:
+        """Release stored agent reference specific to the concrete result type."""
+
     def final_output_as(self, cls: type[T], raise_if_incorrect_type: bool = False) -> T:
         """A convenience method to cast the final output to a specific type. By default, the cast
         is only for the typechecker. If you set `raise_if_incorrect_type` to True, we'll raise a
@@ -111,11 +138,33 @@ def last_response_id(self) -> str | None:
 @dataclass
 class RunResult(RunResultBase):
     _last_agent: Agent[Any]
+    _last_agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
+        init=False,
+        repr=False,
+        default=None,
+    )
+
+    def __post_init__(self) -> None:
+        self._last_agent_ref = weakref.ref(self._last_agent)
 
     @property
     def last_agent(self) -> Agent[Any]:
         """The last agent that was run."""
-        return self._last_agent
+        agent = cast("Agent[Any] | None", self.__dict__.get("_last_agent"))
+        if agent is not None:
+            return agent
+        if self._last_agent_ref:
+            agent = self._last_agent_ref()
+            if agent is not None:
+                return agent
+        raise AgentsException("Last agent reference is no longer available.")
+
+    def _release_last_agent_reference(self) -> None:
+        agent = cast("Agent[Any] | None", self.__dict__.get("_last_agent"))
+        if agent is None:
+            return
+        self._last_agent_ref = weakref.ref(agent)
+        object.__delattr__(self, "_last_agent")
 
     def __str__(self) -> str:
         return pretty_print_result(self)
@@ -150,6 +199,12 @@ class RunResultStreaming(RunResultBase):
     is_complete: bool = False
     """Whether the agent has finished running."""
 
+    _current_agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
+        init=False,
+        repr=False,
+        default=None,
+    )
+
     # Queues that the background run_loop writes to
     _event_queue: asyncio.Queue[StreamEvent | QueueCompleteSentinel] = field(
         default_factory=asyncio.Queue, repr=False
@@ -167,12 +222,29 @@ class RunResultStreaming(RunResultBase):
     # Soft cancel state
     _cancel_mode: Literal["none", "immediate", "after_turn"] = field(default="none", repr=False)
 
+    def __post_init__(self) -> None:
+        self._current_agent_ref = weakref.ref(self.current_agent)
+
     @property
     def last_agent(self) -> Agent[Any]:
         """The last agent that was run. Updates as the agent run progresses, so the true last agent
         is only available after the agent run is complete.
         """
-        return self.current_agent
+        agent = cast("Agent[Any] | None", self.__dict__.get("current_agent"))
+        if agent is not None:
+            return agent
+        if self._current_agent_ref:
+            agent = self._current_agent_ref()
+            if agent is not None:
+                return agent
+        raise AgentsException("Last agent reference is no longer available.")
+
+    def _release_last_agent_reference(self) -> None:
+        agent = cast("Agent[Any] | None", self.__dict__.get("current_agent"))
+        if agent is None:
+            return
+        self._current_agent_ref = weakref.ref(agent)
+        object.__delattr__(self, "current_agent")
 
     def cancel(self, mode: Literal["immediate", "after_turn"] = "immediate") -> None:
         """Cancel the streaming run.
diff --git a/tests/test_agent_memory_leak.py b/tests/test_agent_memory_leak.py
@@ -23,9 +23,10 @@ def _make_message(text: str) -> ResponseOutputMessage:
 @pytest.mark.asyncio
 async def test_agent_is_released_after_run() -> None:
     fake_model = FakeModel(initial_output=[_make_message("Paris")])
-    agent = Agent(name="leaker", instructions="Answer questions.", model=fake_model)
+    agent = Agent(name="leak-test-agent", instructions="Answer questions.", model=fake_model)
     agent_ref = weakref.ref(agent)
 
+    # Running the agent should not leave behind strong references once the result goes out of scope.
     await Runner.run(agent, "What is the capital of France?")
 
     del agent
diff --git a/tests/test_items_helpers.py b/tests/test_items_helpers.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import gc
 import json
 
 from openai.types.responses.response_computer_tool_call import (
@@ -57,16 +58,18 @@ def make_message(
 
 def test_extract_last_content_of_text_message() -> None:
     # Build a message containing two text segments.
-    content1 = ResponseOutputText(annotations=[], text="Hello ", type="output_text")
-    content2 = ResponseOutputText(annotations=[], text="world!", type="output_text")
+    content1 = ResponseOutputText(annotations=[], text="Hello ", type="output_text", logprobs=[])
+    content2 = ResponseOutputText(annotations=[], text="world!", type="output_text", logprobs=[])
     message = make_message([content1, content2])
     # Helpers should yield the last segment's text.
     assert ItemHelpers.extract_last_content(message) == "world!"
 
 
 def test_extract_last_content_of_refusal_message() -> None:
     # Build a message whose last content entry is a refusal.
-    content1 = ResponseOutputText(annotations=[], text="Before refusal", type="output_text")
+    content1 = ResponseOutputText(
+        annotations=[], text="Before refusal", type="output_text", logprobs=[]
+    )
     refusal = ResponseOutputRefusal(refusal="I cannot do that", type="refusal")
     message = make_message([content1, refusal])
     # Helpers should extract the refusal string when last content is a refusal.
@@ -87,8 +90,8 @@ def test_extract_last_content_non_message_returns_empty() -> None:
 
 def test_extract_last_text_returns_text_only() -> None:
     # A message whose last segment is text yields the text.
-    first_text = ResponseOutputText(annotations=[], text="part1", type="output_text")
-    second_text = ResponseOutputText(annotations=[], text="part2", type="output_text")
+    first_text = ResponseOutputText(annotations=[], text="part1", type="output_text", logprobs=[])
+    second_text = ResponseOutputText(annotations=[], text="part2", type="output_text", logprobs=[])
     message = make_message([first_text, second_text])
     assert ItemHelpers.extract_last_text(message) == "part2"
     # Whereas when last content is a refusal, extract_last_text returns None.
@@ -116,9 +119,9 @@ def test_input_to_new_input_list_deep_copies_lists() -> None:
 def test_text_message_output_concatenates_text_segments() -> None:
     # Build a message with both text and refusal segments, only text segments are concatenated.
     pieces: list[ResponseOutputText | ResponseOutputRefusal] = []
-    pieces.append(ResponseOutputText(annotations=[], text="a", type="output_text"))
+    pieces.append(ResponseOutputText(annotations=[], text="a", type="output_text", logprobs=[]))
     pieces.append(ResponseOutputRefusal(refusal="denied", type="refusal"))
-    pieces.append(ResponseOutputText(annotations=[], text="b", type="output_text"))
+    pieces.append(ResponseOutputText(annotations=[], text="b", type="output_text", logprobs=[]))
     message = make_message(pieces)
     # Wrap into MessageOutputItem to feed into text_message_output.
     item = MessageOutputItem(agent=Agent(name="test"), raw_item=message)
@@ -131,8 +134,12 @@ def test_text_message_outputs_across_list_of_runitems() -> None:
     that only MessageOutputItem instances contribute any text. The non-message
     (ReasoningItem) should be ignored by Helpers.text_message_outputs.
     """
-    message1 = make_message([ResponseOutputText(annotations=[], text="foo", type="output_text")])
-    message2 = make_message([ResponseOutputText(annotations=[], text="bar", type="output_text")])
+    message1 = make_message(
+        [ResponseOutputText(annotations=[], text="foo", type="output_text", logprobs=[])]
+    )
+    message2 = make_message(
+        [ResponseOutputText(annotations=[], text="bar", type="output_text", logprobs=[])]
+    )
     item1: RunItem = MessageOutputItem(agent=Agent(name="test"), raw_item=message1)
     item2: RunItem = MessageOutputItem(agent=Agent(name="test"), raw_item=message2)
     # Create a non-message run item of a different type, e.g., a reasoning trace.
@@ -142,6 +149,19 @@ def test_text_message_outputs_across_list_of_runitems() -> None:
     assert ItemHelpers.text_message_outputs([item1, non_message_item, item2]) == "foobar"
 
 
+def test_message_output_item_retains_agent_until_release() -> None:
+    # Construct the run item with an inline agent to ensure the run item keeps a strong reference.
+    message = make_message([ResponseOutputText(annotations=[], text="hello", type="output_text")])
+    item = MessageOutputItem(agent=Agent(name="inline"), raw_item=message)
+    assert item.agent is not None
+    assert item.agent.name == "inline"
+
+    # After explicitly releasing, the weak reference should drop once GC runs.
+    item.release_agent()
+    gc.collect()
+    assert item.agent is None
+
+
 def test_tool_call_output_item_constructs_function_call_output_dict():
     # Build a simple ResponseFunctionToolCall.
     call = ResponseFunctionToolCall(
@@ -171,7 +191,9 @@ def test_tool_call_output_item_constructs_function_call_output_dict():
 
 def test_to_input_items_for_message() -> None:
     """An output message should convert into an input dict matching the message's own structure."""
-    content = ResponseOutputText(annotations=[], text="hello world", type="output_text")
+    content = ResponseOutputText(
+        annotations=[], text="hello world", type="output_text", logprobs=[]
+    )
     message = ResponseOutputMessage(
         id="m1", content=[content], role="assistant", status="completed", type="message"
     )
@@ -184,6 +206,7 @@ def test_to_input_items_for_message() -> None:
         "content": [
             {
                 "annotations": [],
+                "logprobs": [],
                 "text": "hello world",
                 "type": "output_text",
             }
@@ -305,6 +328,7 @@ def test_input_to_new_input_list_copies_the_ones_produced_by_pydantic() -> None:
                 type="output_text",
                 text="Hey, what's up?",
                 annotations=[],
+                logprobs=[],
             )
         ],
         role="assistant",
diff --git a/tests/test_result_cast.py b/tests/test_result_cast.py