Skip to content

Commit 5a9cab8

Browse files
ihowerseratch
andauthored
Prevent preamble messages from being treated as final output when tool calls are pending (openai#1689)
Co-authored-by: Kazuhiro Sera <[email protected]>
1 parent 9ad2949 commit 5a9cab8

File tree

3 files changed

+53
-53
lines changed

3 files changed

+53
-53
lines changed

src/agents/_run_impl.py

Lines changed: 34 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -330,43 +330,40 @@ async def execute_tools_and_side_effects(
330330
ItemHelpers.extract_last_text(message_items[-1].raw_item) if message_items else None
331331
)
332332

333-
# There are two possibilities that lead to a final output:
334-
# 1. Structured output schema => always leads to a final output
335-
# 2. Plain text output schema => only leads to a final output if there are no tool calls
336-
if output_schema and not output_schema.is_plain_text() and potential_final_output_text:
337-
final_output = output_schema.validate_json(potential_final_output_text)
338-
return await cls.execute_final_output(
339-
agent=agent,
340-
original_input=original_input,
341-
new_response=new_response,
342-
pre_step_items=pre_step_items,
343-
new_step_items=new_step_items,
344-
final_output=final_output,
345-
hooks=hooks,
346-
context_wrapper=context_wrapper,
347-
)
348-
elif (
349-
not output_schema or output_schema.is_plain_text()
350-
) and not processed_response.has_tools_or_approvals_to_run():
351-
return await cls.execute_final_output(
352-
agent=agent,
353-
original_input=original_input,
354-
new_response=new_response,
355-
pre_step_items=pre_step_items,
356-
new_step_items=new_step_items,
357-
final_output=potential_final_output_text or "",
358-
hooks=hooks,
359-
context_wrapper=context_wrapper,
360-
)
361-
else:
362-
# If there's no final output, we can just run again
363-
return SingleStepResult(
364-
original_input=original_input,
365-
model_response=new_response,
366-
pre_step_items=pre_step_items,
367-
new_step_items=new_step_items,
368-
next_step=NextStepRunAgain(),
369-
)
333+
# Generate final output only when there are no pending tool calls or approval requests.
334+
if not processed_response.has_tools_or_approvals_to_run():
335+
if output_schema and not output_schema.is_plain_text() and potential_final_output_text:
336+
final_output = output_schema.validate_json(potential_final_output_text)
337+
return await cls.execute_final_output(
338+
agent=agent,
339+
original_input=original_input,
340+
new_response=new_response,
341+
pre_step_items=pre_step_items,
342+
new_step_items=new_step_items,
343+
final_output=final_output,
344+
hooks=hooks,
345+
context_wrapper=context_wrapper,
346+
)
347+
elif not output_schema or output_schema.is_plain_text():
348+
return await cls.execute_final_output(
349+
agent=agent,
350+
original_input=original_input,
351+
new_response=new_response,
352+
pre_step_items=pre_step_items,
353+
new_step_items=new_step_items,
354+
final_output=potential_final_output_text or "",
355+
hooks=hooks,
356+
context_wrapper=context_wrapper,
357+
)
358+
359+
# If there's no final output, we can just run again
360+
return SingleStepResult(
361+
original_input=original_input,
362+
model_response=new_response,
363+
pre_step_items=pre_step_items,
364+
new_step_items=new_step_items,
365+
next_step=NextStepRunAgain(),
366+
)
370367

371368
@classmethod
372369
def maybe_reset_tool_choice(

tests/test_agent_runner.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -196,11 +196,13 @@ async def test_structured_output():
196196
[get_function_tool_call("foo", json.dumps({"bar": "baz"}))],
197197
# Second turn: a message and a handoff
198198
[get_text_message("a_message"), get_handoff_tool_call(agent_1)],
199-
# Third turn: tool call and structured output
199+
# Third turn: tool call with preamble message
200200
[
201+
get_text_message(json.dumps(Foo(bar="preamble"))),
201202
get_function_tool_call("bar", json.dumps({"bar": "baz"})),
202-
get_final_output_message(json.dumps(Foo(bar="baz"))),
203203
],
204+
# Fourth turn: structured output
205+
[get_final_output_message(json.dumps(Foo(bar="baz")))],
204206
]
205207
)
206208

@@ -213,10 +215,10 @@ async def test_structured_output():
213215
)
214216

215217
assert result.final_output == Foo(bar="baz")
216-
assert len(result.raw_responses) == 3, "should have three model responses"
217-
assert len(result.to_input_list()) == 10, (
218+
assert len(result.raw_responses) == 4, "should have four model responses"
219+
assert len(result.to_input_list()) == 11, (
218220
"should have input: 2 orig inputs, function call, function call result, message, handoff, "
219-
"handoff output, tool call, tool call result, final output message"
221+
"handoff output, preamble message, tool call, tool call result, final output"
220222
)
221223

222224
assert result.last_agent == agent_1, "should have handed off to agent_1"

tests/test_agent_runner_streamed.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -207,11 +207,13 @@ async def test_structured_output():
207207
[get_function_tool_call("foo", json.dumps({"bar": "baz"}))],
208208
# Second turn: a message and a handoff
209209
[get_text_message("a_message"), get_handoff_tool_call(agent_1)],
210-
# Third turn: tool call and structured output
210+
# Third turn: tool call with preamble message
211211
[
212+
get_text_message(json.dumps(Foo(bar="preamble"))),
212213
get_function_tool_call("bar", json.dumps({"bar": "baz"})),
213-
get_final_output_message(json.dumps(Foo(bar="baz"))),
214214
],
215+
# Fourth turn: structured output
216+
[get_final_output_message(json.dumps(Foo(bar="baz")))],
215217
]
216218
)
217219

@@ -226,10 +228,10 @@ async def test_structured_output():
226228
pass
227229

228230
assert result.final_output == Foo(bar="baz")
229-
assert len(result.raw_responses) == 3, "should have three model responses"
230-
assert len(result.to_input_list()) == 10, (
231+
assert len(result.raw_responses) == 4, "should have four model responses"
232+
assert len(result.to_input_list()) == 11, (
231233
"should have input: 2 orig inputs, function call, function call result, message, handoff, "
232-
"handoff output, tool call, tool call result, final output"
234+
"handoff output, preamble message, tool call, tool call result, final output"
233235
)
234236

235237
assert result.last_agent == agent_1, "should have handed off to agent_1"
@@ -624,11 +626,10 @@ async def test_streaming_events():
624626
[get_function_tool_call("foo", json.dumps({"bar": "baz"}))],
625627
# Second turn: a message and a handoff
626628
[get_text_message("a_message"), get_handoff_tool_call(agent_1)],
627-
# Third turn: tool call and structured output
628-
[
629-
get_function_tool_call("bar", json.dumps({"bar": "baz"})),
630-
get_final_output_message(json.dumps(Foo(bar="baz"))),
631-
],
629+
# Third turn: tool call
630+
[get_function_tool_call("bar", json.dumps({"bar": "baz"}))],
631+
# Fourth turn: structured output
632+
[get_final_output_message(json.dumps(Foo(bar="baz")))],
632633
]
633634
)
634635

@@ -652,7 +653,7 @@ async def test_streaming_events():
652653
agent_data.append(event)
653654

654655
assert result.final_output == Foo(bar="baz")
655-
assert len(result.raw_responses) == 3, "should have three model responses"
656+
assert len(result.raw_responses) == 4, "should have four model responses"
656657
assert len(result.to_input_list()) == 10, (
657658
"should have input: 2 orig inputs, function call, function call result, message, handoff, "
658659
"handoff output, tool call, tool call result, final output"

0 commit comments

Comments
 (0)