diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py index 56784004c..a2d872bf1 100644 --- a/src/agents/_run_impl.py +++ b/src/agents/_run_impl.py @@ -330,43 +330,40 @@ async def execute_tools_and_side_effects( ItemHelpers.extract_last_text(message_items[-1].raw_item) if message_items else None ) - # There are two possibilities that lead to a final output: - # 1. Structured output schema => always leads to a final output - # 2. Plain text output schema => only leads to a final output if there are no tool calls - if output_schema and not output_schema.is_plain_text() and potential_final_output_text: - final_output = output_schema.validate_json(potential_final_output_text) - return await cls.execute_final_output( - agent=agent, - original_input=original_input, - new_response=new_response, - pre_step_items=pre_step_items, - new_step_items=new_step_items, - final_output=final_output, - hooks=hooks, - context_wrapper=context_wrapper, - ) - elif ( - not output_schema or output_schema.is_plain_text() - ) and not processed_response.has_tools_or_approvals_to_run(): - return await cls.execute_final_output( - agent=agent, - original_input=original_input, - new_response=new_response, - pre_step_items=pre_step_items, - new_step_items=new_step_items, - final_output=potential_final_output_text or "", - hooks=hooks, - context_wrapper=context_wrapper, - ) - else: - # If there's no final output, we can just run again - return SingleStepResult( - original_input=original_input, - model_response=new_response, - pre_step_items=pre_step_items, - new_step_items=new_step_items, - next_step=NextStepRunAgain(), - ) + # Generate final output only when there are no pending tool calls or approval requests. + if not processed_response.has_tools_or_approvals_to_run(): + if output_schema and not output_schema.is_plain_text() and potential_final_output_text: + final_output = output_schema.validate_json(potential_final_output_text) + return await cls.execute_final_output( + agent=agent, + original_input=original_input, + new_response=new_response, + pre_step_items=pre_step_items, + new_step_items=new_step_items, + final_output=final_output, + hooks=hooks, + context_wrapper=context_wrapper, + ) + elif not output_schema or output_schema.is_plain_text(): + return await cls.execute_final_output( + agent=agent, + original_input=original_input, + new_response=new_response, + pre_step_items=pre_step_items, + new_step_items=new_step_items, + final_output=potential_final_output_text or "", + hooks=hooks, + context_wrapper=context_wrapper, + ) + + # If there's no final output, we can just run again + return SingleStepResult( + original_input=original_input, + model_response=new_response, + pre_step_items=pre_step_items, + new_step_items=new_step_items, + next_step=NextStepRunAgain(), + ) @classmethod def maybe_reset_tool_choice( diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py index c8ae5b5f2..3b6499ce4 100644 --- a/tests/test_agent_runner.py +++ b/tests/test_agent_runner.py @@ -192,11 +192,13 @@ async def test_structured_output(): [get_function_tool_call("foo", json.dumps({"bar": "baz"}))], # Second turn: a message and a handoff [get_text_message("a_message"), get_handoff_tool_call(agent_1)], - # Third turn: tool call and structured output + # Third turn: tool call with preamble message [ + get_text_message(json.dumps(Foo(bar="preamble"))), get_function_tool_call("bar", json.dumps({"bar": "baz"})), - get_final_output_message(json.dumps(Foo(bar="baz"))), ], + # Fourth turn: structured output + [get_final_output_message(json.dumps(Foo(bar="baz")))], ] ) @@ -209,10 +211,10 @@ async def test_structured_output(): ) assert result.final_output == Foo(bar="baz") - assert len(result.raw_responses) == 3, "should have three model responses" - assert len(result.to_input_list()) == 10, ( + assert len(result.raw_responses) == 4, "should have four model responses" + assert len(result.to_input_list()) == 11, ( "should have input: 2 orig inputs, function call, function call result, message, handoff, " - "handoff output, tool call, tool call result, final output message" + "handoff output, preamble message, tool call, tool call result, final output" ) assert result.last_agent == agent_1, "should have handed off to agent_1" diff --git a/tests/test_agent_runner_streamed.py b/tests/test_agent_runner_streamed.py index d4afbd2e0..ff807ca96 100644 --- a/tests/test_agent_runner_streamed.py +++ b/tests/test_agent_runner_streamed.py @@ -207,11 +207,13 @@ async def test_structured_output(): [get_function_tool_call("foo", json.dumps({"bar": "baz"}))], # Second turn: a message and a handoff [get_text_message("a_message"), get_handoff_tool_call(agent_1)], - # Third turn: tool call and structured output + # Third turn: tool call with preamble message [ + get_text_message(json.dumps(Foo(bar="preamble"))), get_function_tool_call("bar", json.dumps({"bar": "baz"})), - get_final_output_message(json.dumps(Foo(bar="baz"))), ], + # Fourth turn: structured output + [get_final_output_message(json.dumps(Foo(bar="baz")))], ] ) @@ -226,10 +228,10 @@ async def test_structured_output(): pass assert result.final_output == Foo(bar="baz") - assert len(result.raw_responses) == 3, "should have three model responses" - assert len(result.to_input_list()) == 10, ( + assert len(result.raw_responses) == 4, "should have four model responses" + assert len(result.to_input_list()) == 11, ( "should have input: 2 orig inputs, function call, function call result, message, handoff, " - "handoff output, tool call, tool call result, final output" + "handoff output, preamble message, tool call, tool call result, final output" ) assert result.last_agent == agent_1, "should have handed off to agent_1" @@ -624,11 +626,10 @@ async def test_streaming_events(): [get_function_tool_call("foo", json.dumps({"bar": "baz"}))], # Second turn: a message and a handoff [get_text_message("a_message"), get_handoff_tool_call(agent_1)], - # Third turn: tool call and structured output - [ - get_function_tool_call("bar", json.dumps({"bar": "baz"})), - get_final_output_message(json.dumps(Foo(bar="baz"))), - ], + # Third turn: tool call + [get_function_tool_call("bar", json.dumps({"bar": "baz"}))], + # Fourth turn: structured output + [get_final_output_message(json.dumps(Foo(bar="baz")))], ] ) @@ -652,7 +653,7 @@ async def test_streaming_events(): agent_data.append(event) assert result.final_output == Foo(bar="baz") - assert len(result.raw_responses) == 3, "should have three model responses" + assert len(result.raw_responses) == 4, "should have four model responses" assert len(result.to_input_list()) == 10, ( "should have input: 2 orig inputs, function call, function call result, message, handoff, " "handoff output, tool call, tool call result, final output"