Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/heavy-cobras-explode.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@openai/agents-core": patch
---

fix: delay final output until tools complete
94 changes: 58 additions & 36 deletions packages/agents-core/src/runImplementation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,24 @@ export async function executeToolsAndSideEffects<TContext>(
);
}

// check if the agent produced any messages
// If the model issued any tool calls or handoffs in this turn,
// we must NOT treat any assistant message in the same turn as the final output.
// We should run the loop again so the model can see the tool results and respond.
const hadToolCallsOrActions =
(processedResponse.functions?.length ?? 0) > 0 ||
(processedResponse.computerActions?.length ?? 0) > 0 ||
(processedResponse.mcpApprovalRequests?.length ?? 0) > 0 ||
(processedResponse.handoffs?.length ?? 0) > 0;
if (hadToolCallsOrActions) {
return new SingleStepResult(
originalInput,
newResponse,
preStepItems,
newItems,
{ type: 'next_step_run_again' },
);
}
// No tool calls/actions in this turn; safe to consider a plain assistant message as final.
const messageItems = newItems.filter(
(item) => item instanceof RunMessageOutputItem,
);
Expand All @@ -583,44 +600,49 @@ export async function executeToolsAndSideEffects<TContext>(
);
}

if (
agent.outputType === 'text' &&
!processedResponse.hasToolsOrApprovalsToRun()
) {
return new SingleStepResult(
originalInput,
newResponse,
preStepItems,
newItems,
{
type: 'next_step_final_output',
output: potentialFinalOutput,
},
);
} else if (agent.outputType !== 'text' && potentialFinalOutput) {
// Structured output schema => always leads to a final output if we have text
const { parser } = getSchemaAndParserFromInputType(
agent.outputType,
'final_output',
);
const [error] = await safeExecute(() => parser(potentialFinalOutput));
if (error) {
addErrorToCurrentSpan({
message: 'Invalid output type',
data: {
error: String(error),
const hasPendingToolsOrApprovals = functionResults.some(
(result) => result.runItem instanceof RunToolApprovalItem,
);

if (!hasPendingToolsOrApprovals) {
if (agent.outputType === 'text') {
return new SingleStepResult(
originalInput,
newResponse,
preStepItems,
newItems,
{
type: 'next_step_final_output',
output: potentialFinalOutput,
},
});
throw new ModelBehaviorError('Invalid output type');
);
}

return new SingleStepResult(
originalInput,
newResponse,
preStepItems,
newItems,
{ type: 'next_step_final_output', output: potentialFinalOutput },
);
if (agent.outputType !== 'text' && potentialFinalOutput) {
// Structured output schema => always leads to a final output if we have text.
const { parser } = getSchemaAndParserFromInputType(
agent.outputType,
'final_output',
);
const [error] = await safeExecute(() => parser(potentialFinalOutput));
if (error) {
addErrorToCurrentSpan({
message: 'Invalid output type',
data: {
error: String(error),
},
});
throw new ModelBehaviorError('Invalid output type');
}

return new SingleStepResult(
originalInput,
newResponse,
preStepItems,
newItems,
{ type: 'next_step_final_output', output: potentialFinalOutput },
);
}
}

return new SingleStepResult(
Expand Down
245 changes: 242 additions & 3 deletions packages/agents-core/test/runImplementation.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,13 @@ import {
executeHandoffCalls,
executeToolsAndSideEffects,
} from '../src/runImplementation';
import { FunctionTool, FunctionToolResult, tool } from '../src/tool';
import {
FunctionTool,
FunctionToolResult,
tool,
computerTool,
hostedMcpTool,
} from '../src/tool';
import { handoff } from '../src/handoff';
import { ModelBehaviorError, UserError } from '../src/errors';
import { Computer } from '../src/computer';
Expand All @@ -41,8 +47,8 @@ import {
TEST_MODEL_RESPONSE_WITH_FUNCTION,
TEST_TOOL,
FakeModelProvider,
fakeModelMessage,
} from './stubs';
import { computerTool } from '../src/tool';
import * as protocol from '../src/types/protocol';
import { Runner } from '../src/run';
import { RunContext } from '../src/runContext';
Expand Down Expand Up @@ -877,7 +883,7 @@ describe('executeToolsAndSideEffects', () => {
state = new RunState(new RunContext(), 'test input', TEST_AGENT, 1);
});

it('continues execution when text agent has tools pending', async () => {
it('does not finalize when tools are used in the same turn (text output); runs again', async () => {
const textAgent = new Agent({ name: 'TextAgent', outputType: 'text' });
const processedResponse = processModelResponse(
TEST_MODEL_RESPONSE_WITH_FUNCTION,
Expand All @@ -903,6 +909,53 @@ describe('executeToolsAndSideEffects', () => {
expect(result.nextStep.type).toBe('next_step_run_again');
});

it('does not finalize when tools are used in the same turn (structured output); runs again', async () => {
const structuredAgent = new Agent({
name: 'StructuredAgent',
outputType: z.object({
foo: z.string(),
}),
});

const structuredResponse: ModelResponse = {
output: [
{ ...TEST_MODEL_FUNCTION_CALL },
fakeModelMessage('{"foo":"bar"}'),
],
usage: new Usage(),
} as any;

const processedResponse = processModelResponse(
structuredResponse,
structuredAgent,
[TEST_TOOL],
[],
);

expect(processedResponse.hasToolsOrApprovalsToRun()).toBe(true);

const structuredState = new RunState(
new RunContext(),
'test input',
structuredAgent,
1,
);

const result = await withTrace('test', () =>
executeToolsAndSideEffects(
structuredAgent,
'test input',
[],
structuredResponse,
processedResponse,
runner,
structuredState,
),
);

expect(result.nextStep.type).toBe('next_step_run_again');
});

it('returns final output when text agent has no tools pending', async () => {
const textAgent = new Agent({ name: 'TextAgent', outputType: 'text' });
const response: ModelResponse = {
Expand Down Expand Up @@ -973,4 +1026,190 @@ describe('executeToolsAndSideEffects', () => {
expect(result.nextStep.output).toBe('');
}
});

it('does not finalize after computer actions in the same turn; runs again', async () => {
const computerAgent = new Agent({
name: 'ComputerAgent',
outputType: 'text',
});
const fakeComputer = {
environment: 'mac',
dimensions: [1, 1] as [number, number],
screenshot: vi.fn().mockResolvedValue('img'),
click: vi.fn(),
doubleClick: vi.fn(),
drag: vi.fn(),
keypress: vi.fn(),
move: vi.fn(),
scroll: vi.fn(),
type: vi.fn(),
wait: vi.fn(),
};
const computer = computerTool({
computer: fakeComputer as unknown as Computer,
});
const computerCall: protocol.ComputerUseCallItem = {
type: 'computer_call',
id: 'comp1',
callId: 'comp1',
status: 'completed',
action: { type: 'screenshot' },
} as protocol.ComputerUseCallItem;

const computerResponse: ModelResponse = {
output: [computerCall, { ...TEST_MODEL_MESSAGE }],
usage: new Usage(),
} as any;

const processedResponse = processModelResponse(
computerResponse,
computerAgent,
[computer],
[],
);

const computerState = new RunState(
new RunContext(),
'test input',
computerAgent,
1,
);

const result = await withTrace('test', () =>
executeToolsAndSideEffects(
computerAgent,
'test input',
[],
computerResponse,
processedResponse,
runner,
computerState,
),
);

expect(result.nextStep.type).toBe('next_step_run_again');
});

it('does not finalize when hosted MCP approval happens in the same turn; runs again', async () => {
const approvalAgent = new Agent({ name: 'MCPAgent', outputType: 'text' });
const mcpTool = hostedMcpTool({
serverLabel: 'demo_server',
serverUrl: 'https://example.com',
requireApproval: {
always: { toolNames: ['demo_tool'] },
},
onApproval: async () => ({ approve: true, reason: 'approved in test' }),
});

const approvalCall: protocol.HostedToolCallItem = {
type: 'hosted_tool_call',
id: 'approval1',
name: 'mcp_approval_request',
status: 'completed',
providerData: {
type: 'mcp_approval_request',
server_label: 'demo_server',
name: 'demo_tool',
id: 'approval1',
arguments: '{}',
},
} as protocol.HostedToolCallItem;

const approvalResponse: ModelResponse = {
output: [approvalCall, { ...TEST_MODEL_MESSAGE }],
usage: new Usage(),
} as any;

const processedResponse = processModelResponse(
approvalResponse,
approvalAgent,
[mcpTool],
[],
);

const approvalState = new RunState(
new RunContext(),
'test input',
approvalAgent,
1,
);

const result = await withTrace('test', () =>
executeToolsAndSideEffects(
approvalAgent,
'test input',
[],
approvalResponse,
processedResponse,
runner,
approvalState,
),
);

expect(result.nextStep.type).toBe('next_step_run_again');
});

it('returns interruption when hosted MCP approval requires user input', async () => {
const approvalAgent = new Agent({ name: 'MCPAgent', outputType: 'text' });
const mcpTool = hostedMcpTool({
serverLabel: 'demo_server',
serverUrl: 'https://example.com',
requireApproval: {
always: { toolNames: ['demo_tool'] },
},
});

const approvalCall: protocol.HostedToolCallItem = {
type: 'hosted_tool_call',
id: 'approval1',
name: 'mcp_approval_request',
status: 'completed',
providerData: {
type: 'mcp_approval_request',
server_label: 'demo_server',
name: 'demo_tool',
id: 'approval1',
arguments: '{}',
},
} as protocol.HostedToolCallItem;

const approvalResponse: ModelResponse = {
output: [approvalCall, { ...TEST_MODEL_MESSAGE }],
usage: new Usage(),
} as any;

const processedResponse = processModelResponse(
approvalResponse,
approvalAgent,
[mcpTool],
[],
);

const approvalState = new RunState(
new RunContext(),
'test input',
approvalAgent,
1,
);

const result = await withTrace('test', () =>
executeToolsAndSideEffects(
approvalAgent,
'test input',
[],
approvalResponse,
processedResponse,
runner,
approvalState,
),
);

expect(result.nextStep.type).toBe('next_step_interruption');
if (result.nextStep.type === 'next_step_interruption') {
expect(result.nextStep.data.interruptions).toHaveLength(1);
expect(result.nextStep.data.interruptions[0].rawItem).toMatchObject({
providerData: { id: 'approval1', type: 'mcp_approval_request' },
});
}
});
});