diff --git a/.changeset/heavy-cobras-explode.md b/.changeset/heavy-cobras-explode.md new file mode 100644 index 00000000..10c675b2 --- /dev/null +++ b/.changeset/heavy-cobras-explode.md @@ -0,0 +1,5 @@ +--- +"@openai/agents-core": patch +--- + +fix: delay final output until tools complete diff --git a/packages/agents-core/src/runImplementation.ts b/packages/agents-core/src/runImplementation.ts index 056ea15f..d62e3712 100644 --- a/packages/agents-core/src/runImplementation.ts +++ b/packages/agents-core/src/runImplementation.ts @@ -559,7 +559,24 @@ export async function executeToolsAndSideEffects( ); } - // check if the agent produced any messages + // If the model issued any tool calls or handoffs in this turn, + // we must NOT treat any assistant message in the same turn as the final output. + // We should run the loop again so the model can see the tool results and respond. + const hadToolCallsOrActions = + (processedResponse.functions?.length ?? 0) > 0 || + (processedResponse.computerActions?.length ?? 0) > 0 || + (processedResponse.mcpApprovalRequests?.length ?? 0) > 0 || + (processedResponse.handoffs?.length ?? 0) > 0; + if (hadToolCallsOrActions) { + return new SingleStepResult( + originalInput, + newResponse, + preStepItems, + newItems, + { type: 'next_step_run_again' }, + ); + } + // No tool calls/actions in this turn; safe to consider a plain assistant message as final. const messageItems = newItems.filter( (item) => item instanceof RunMessageOutputItem, ); @@ -583,44 +600,49 @@ export async function executeToolsAndSideEffects( ); } - if ( - agent.outputType === 'text' && - !processedResponse.hasToolsOrApprovalsToRun() - ) { - return new SingleStepResult( - originalInput, - newResponse, - preStepItems, - newItems, - { - type: 'next_step_final_output', - output: potentialFinalOutput, - }, - ); - } else if (agent.outputType !== 'text' && potentialFinalOutput) { - // Structured output schema => always leads to a final output if we have text - const { parser } = getSchemaAndParserFromInputType( - agent.outputType, - 'final_output', - ); - const [error] = await safeExecute(() => parser(potentialFinalOutput)); - if (error) { - addErrorToCurrentSpan({ - message: 'Invalid output type', - data: { - error: String(error), + const hasPendingToolsOrApprovals = functionResults.some( + (result) => result.runItem instanceof RunToolApprovalItem, + ); + + if (!hasPendingToolsOrApprovals) { + if (agent.outputType === 'text') { + return new SingleStepResult( + originalInput, + newResponse, + preStepItems, + newItems, + { + type: 'next_step_final_output', + output: potentialFinalOutput, }, - }); - throw new ModelBehaviorError('Invalid output type'); + ); } - return new SingleStepResult( - originalInput, - newResponse, - preStepItems, - newItems, - { type: 'next_step_final_output', output: potentialFinalOutput }, - ); + if (agent.outputType !== 'text' && potentialFinalOutput) { + // Structured output schema => always leads to a final output if we have text. + const { parser } = getSchemaAndParserFromInputType( + agent.outputType, + 'final_output', + ); + const [error] = await safeExecute(() => parser(potentialFinalOutput)); + if (error) { + addErrorToCurrentSpan({ + message: 'Invalid output type', + data: { + error: String(error), + }, + }); + throw new ModelBehaviorError('Invalid output type'); + } + + return new SingleStepResult( + originalInput, + newResponse, + preStepItems, + newItems, + { type: 'next_step_final_output', output: potentialFinalOutput }, + ); + } } return new SingleStepResult( diff --git a/packages/agents-core/test/runImplementation.test.ts b/packages/agents-core/test/runImplementation.test.ts index cb243c2d..4842c729 100644 --- a/packages/agents-core/test/runImplementation.test.ts +++ b/packages/agents-core/test/runImplementation.test.ts @@ -27,7 +27,13 @@ import { executeHandoffCalls, executeToolsAndSideEffects, } from '../src/runImplementation'; -import { FunctionTool, FunctionToolResult, tool } from '../src/tool'; +import { + FunctionTool, + FunctionToolResult, + tool, + computerTool, + hostedMcpTool, +} from '../src/tool'; import { handoff } from '../src/handoff'; import { ModelBehaviorError, UserError } from '../src/errors'; import { Computer } from '../src/computer'; @@ -41,8 +47,8 @@ import { TEST_MODEL_RESPONSE_WITH_FUNCTION, TEST_TOOL, FakeModelProvider, + fakeModelMessage, } from './stubs'; -import { computerTool } from '../src/tool'; import * as protocol from '../src/types/protocol'; import { Runner } from '../src/run'; import { RunContext } from '../src/runContext'; @@ -877,7 +883,7 @@ describe('executeToolsAndSideEffects', () => { state = new RunState(new RunContext(), 'test input', TEST_AGENT, 1); }); - it('continues execution when text agent has tools pending', async () => { + it('does not finalize when tools are used in the same turn (text output); runs again', async () => { const textAgent = new Agent({ name: 'TextAgent', outputType: 'text' }); const processedResponse = processModelResponse( TEST_MODEL_RESPONSE_WITH_FUNCTION, @@ -903,6 +909,53 @@ describe('executeToolsAndSideEffects', () => { expect(result.nextStep.type).toBe('next_step_run_again'); }); + it('does not finalize when tools are used in the same turn (structured output); runs again', async () => { + const structuredAgent = new Agent({ + name: 'StructuredAgent', + outputType: z.object({ + foo: z.string(), + }), + }); + + const structuredResponse: ModelResponse = { + output: [ + { ...TEST_MODEL_FUNCTION_CALL }, + fakeModelMessage('{"foo":"bar"}'), + ], + usage: new Usage(), + } as any; + + const processedResponse = processModelResponse( + structuredResponse, + structuredAgent, + [TEST_TOOL], + [], + ); + + expect(processedResponse.hasToolsOrApprovalsToRun()).toBe(true); + + const structuredState = new RunState( + new RunContext(), + 'test input', + structuredAgent, + 1, + ); + + const result = await withTrace('test', () => + executeToolsAndSideEffects( + structuredAgent, + 'test input', + [], + structuredResponse, + processedResponse, + runner, + structuredState, + ), + ); + + expect(result.nextStep.type).toBe('next_step_run_again'); + }); + it('returns final output when text agent has no tools pending', async () => { const textAgent = new Agent({ name: 'TextAgent', outputType: 'text' }); const response: ModelResponse = { @@ -973,4 +1026,190 @@ describe('executeToolsAndSideEffects', () => { expect(result.nextStep.output).toBe(''); } }); + + it('does not finalize after computer actions in the same turn; runs again', async () => { + const computerAgent = new Agent({ + name: 'ComputerAgent', + outputType: 'text', + }); + const fakeComputer = { + environment: 'mac', + dimensions: [1, 1] as [number, number], + screenshot: vi.fn().mockResolvedValue('img'), + click: vi.fn(), + doubleClick: vi.fn(), + drag: vi.fn(), + keypress: vi.fn(), + move: vi.fn(), + scroll: vi.fn(), + type: vi.fn(), + wait: vi.fn(), + }; + const computer = computerTool({ + computer: fakeComputer as unknown as Computer, + }); + const computerCall: protocol.ComputerUseCallItem = { + type: 'computer_call', + id: 'comp1', + callId: 'comp1', + status: 'completed', + action: { type: 'screenshot' }, + } as protocol.ComputerUseCallItem; + + const computerResponse: ModelResponse = { + output: [computerCall, { ...TEST_MODEL_MESSAGE }], + usage: new Usage(), + } as any; + + const processedResponse = processModelResponse( + computerResponse, + computerAgent, + [computer], + [], + ); + + const computerState = new RunState( + new RunContext(), + 'test input', + computerAgent, + 1, + ); + + const result = await withTrace('test', () => + executeToolsAndSideEffects( + computerAgent, + 'test input', + [], + computerResponse, + processedResponse, + runner, + computerState, + ), + ); + + expect(result.nextStep.type).toBe('next_step_run_again'); + }); + + it('does not finalize when hosted MCP approval happens in the same turn; runs again', async () => { + const approvalAgent = new Agent({ name: 'MCPAgent', outputType: 'text' }); + const mcpTool = hostedMcpTool({ + serverLabel: 'demo_server', + serverUrl: 'https://example.com', + requireApproval: { + always: { toolNames: ['demo_tool'] }, + }, + onApproval: async () => ({ approve: true, reason: 'approved in test' }), + }); + + const approvalCall: protocol.HostedToolCallItem = { + type: 'hosted_tool_call', + id: 'approval1', + name: 'mcp_approval_request', + status: 'completed', + providerData: { + type: 'mcp_approval_request', + server_label: 'demo_server', + name: 'demo_tool', + id: 'approval1', + arguments: '{}', + }, + } as protocol.HostedToolCallItem; + + const approvalResponse: ModelResponse = { + output: [approvalCall, { ...TEST_MODEL_MESSAGE }], + usage: new Usage(), + } as any; + + const processedResponse = processModelResponse( + approvalResponse, + approvalAgent, + [mcpTool], + [], + ); + + const approvalState = new RunState( + new RunContext(), + 'test input', + approvalAgent, + 1, + ); + + const result = await withTrace('test', () => + executeToolsAndSideEffects( + approvalAgent, + 'test input', + [], + approvalResponse, + processedResponse, + runner, + approvalState, + ), + ); + + expect(result.nextStep.type).toBe('next_step_run_again'); + }); + + it('returns interruption when hosted MCP approval requires user input', async () => { + const approvalAgent = new Agent({ name: 'MCPAgent', outputType: 'text' }); + const mcpTool = hostedMcpTool({ + serverLabel: 'demo_server', + serverUrl: 'https://example.com', + requireApproval: { + always: { toolNames: ['demo_tool'] }, + }, + }); + + const approvalCall: protocol.HostedToolCallItem = { + type: 'hosted_tool_call', + id: 'approval1', + name: 'mcp_approval_request', + status: 'completed', + providerData: { + type: 'mcp_approval_request', + server_label: 'demo_server', + name: 'demo_tool', + id: 'approval1', + arguments: '{}', + }, + } as protocol.HostedToolCallItem; + + const approvalResponse: ModelResponse = { + output: [approvalCall, { ...TEST_MODEL_MESSAGE }], + usage: new Usage(), + } as any; + + const processedResponse = processModelResponse( + approvalResponse, + approvalAgent, + [mcpTool], + [], + ); + + const approvalState = new RunState( + new RunContext(), + 'test input', + approvalAgent, + 1, + ); + + const result = await withTrace('test', () => + executeToolsAndSideEffects( + approvalAgent, + 'test input', + [], + approvalResponse, + processedResponse, + runner, + approvalState, + ), + ); + + expect(result.nextStep.type).toBe('next_step_interruption'); + if (result.nextStep.type === 'next_step_interruption') { + expect(result.nextStep.data.interruptions).toHaveLength(1); + expect(result.nextStep.data.interruptions[0].rawItem).toMatchObject({ + providerData: { id: 'approval1', type: 'mcp_approval_request' }, + }); + } + }); });