openai · seratch · Sep 12, 2025 · Sep 9, 2025 · Sep 11, 2025
diff --git a/.changeset/heavy-cobras-explode.md b/.changeset/heavy-cobras-explode.md
@@ -0,0 +1,5 @@
+---
+"@openai/agents-core": patch
+---
+
+fix: delay final output until tools complete
diff --git a/packages/agents-core/src/runImplementation.ts b/packages/agents-core/src/runImplementation.ts
@@ -559,7 +559,24 @@ export async function executeToolsAndSideEffects<TContext>(
     );
   }
 
-  // check if the agent produced any messages
+  // If the model issued any tool calls or handoffs in this turn,
+  // we must NOT treat any assistant message in the same turn as the final output.
+  // We should run the loop again so the model can see the tool results and respond.
+  const hadToolCallsOrActions =
+    (processedResponse.functions?.length ?? 0) > 0 ||
+    (processedResponse.computerActions?.length ?? 0) > 0 ||
+    (processedResponse.mcpApprovalRequests?.length ?? 0) > 0 ||
+    (processedResponse.handoffs?.length ?? 0) > 0;
+  if (hadToolCallsOrActions) {
+    return new SingleStepResult(
+      originalInput,
+      newResponse,
+      preStepItems,
+      newItems,
+      { type: 'next_step_run_again' },
+    );
+  }
+  // No tool calls/actions in this turn; safe to consider a plain assistant message as final.
   const messageItems = newItems.filter(
     (item) => item instanceof RunMessageOutputItem,
   );
@@ -583,44 +600,49 @@ export async function executeToolsAndSideEffects<TContext>(
     );
   }
 
-  if (
-    agent.outputType === 'text' &&
-    !processedResponse.hasToolsOrApprovalsToRun()
-  ) {
-    return new SingleStepResult(
-      originalInput,
-      newResponse,
-      preStepItems,
-      newItems,
-      {
-        type: 'next_step_final_output',
-        output: potentialFinalOutput,
-      },
-    );
-  } else if (agent.outputType !== 'text' && potentialFinalOutput) {
-    // Structured output schema => always leads to a final output if we have text
-    const { parser } = getSchemaAndParserFromInputType(
-      agent.outputType,
-      'final_output',
-    );
-    const [error] = await safeExecute(() => parser(potentialFinalOutput));
-    if (error) {
-      addErrorToCurrentSpan({
-        message: 'Invalid output type',
-        data: {
-          error: String(error),
+  const hasPendingToolsOrApprovals = functionResults.some(
+    (result) => result.runItem instanceof RunToolApprovalItem,
+  );
+
+  if (!hasPendingToolsOrApprovals) {
+    if (agent.outputType === 'text') {
+      return new SingleStepResult(
+        originalInput,
+        newResponse,
+        preStepItems,
+        newItems,
+        {
+          type: 'next_step_final_output',
+          output: potentialFinalOutput,
         },
-      });
-      throw new ModelBehaviorError('Invalid output type');
+      );
     }
 
-    return new SingleStepResult(
-      originalInput,
-      newResponse,
-      preStepItems,
-      newItems,
-      { type: 'next_step_final_output', output: potentialFinalOutput },
-    );
+    if (agent.outputType !== 'text' && potentialFinalOutput) {
+      // Structured output schema => always leads to a final output if we have text.
+      const { parser } = getSchemaAndParserFromInputType(
+        agent.outputType,
+        'final_output',
+      );
+      const [error] = await safeExecute(() => parser(potentialFinalOutput));
+      if (error) {
+        addErrorToCurrentSpan({
+          message: 'Invalid output type',
+          data: {
+            error: String(error),
+          },
+        });
+        throw new ModelBehaviorError('Invalid output type');
+      }
+
+      return new SingleStepResult(
+        originalInput,
+        newResponse,
+        preStepItems,
+        newItems,
+        { type: 'next_step_final_output', output: potentialFinalOutput },
+      );
+    }
   }
 
   return new SingleStepResult(

diff --git a/packages/agents-core/test/runImplementation.test.ts b/packages/agents-core/test/runImplementation.test.ts
@@ -27,7 +27,13 @@ import {
   executeHandoffCalls,
   executeToolsAndSideEffects,
 } from '../src/runImplementation';
-import { FunctionTool, FunctionToolResult, tool } from '../src/tool';
+import {
+  FunctionTool,
+  FunctionToolResult,
+  tool,
+  computerTool,
+  hostedMcpTool,
+} from '../src/tool';
 import { handoff } from '../src/handoff';
 import { ModelBehaviorError, UserError } from '../src/errors';
 import { Computer } from '../src/computer';
@@ -41,8 +47,8 @@ import {
   TEST_MODEL_RESPONSE_WITH_FUNCTION,
   TEST_TOOL,
   FakeModelProvider,
+  fakeModelMessage,
 } from './stubs';
-import { computerTool } from '../src/tool';
 import * as protocol from '../src/types/protocol';
 import { Runner } from '../src/run';
 import { RunContext } from '../src/runContext';
@@ -877,7 +883,7 @@ describe('executeToolsAndSideEffects', () => {
     state = new RunState(new RunContext(), 'test input', TEST_AGENT, 1);
   });
 
-  it('continues execution when text agent has tools pending', async () => {
+  it('does not finalize when tools are used in the same turn (text output); runs again', async () => {
     const textAgent = new Agent({ name: 'TextAgent', outputType: 'text' });
     const processedResponse = processModelResponse(
       TEST_MODEL_RESPONSE_WITH_FUNCTION,
@@ -903,6 +909,53 @@ describe('executeToolsAndSideEffects', () => {
     expect(result.nextStep.type).toBe('next_step_run_again');
   });
 
+  it('does not finalize when tools are used in the same turn (structured output); runs again', async () => {
+    const structuredAgent = new Agent({
+      name: 'StructuredAgent',
+      outputType: z.object({
+        foo: z.string(),
+      }),
+    });
+
+    const structuredResponse: ModelResponse = {
+      output: [
+        { ...TEST_MODEL_FUNCTION_CALL },
+        fakeModelMessage('{"foo":"bar"}'),
+      ],
+      usage: new Usage(),
+    } as any;
+
+    const processedResponse = processModelResponse(
+      structuredResponse,
+      structuredAgent,
+      [TEST_TOOL],
+      [],
+    );
+
+    expect(processedResponse.hasToolsOrApprovalsToRun()).toBe(true);
+
+    const structuredState = new RunState(
+      new RunContext(),
+      'test input',
+      structuredAgent,
+      1,
+    );
+
+    const result = await withTrace('test', () =>
+      executeToolsAndSideEffects(
+        structuredAgent,
+        'test input',
+        [],
+        structuredResponse,
+        processedResponse,
+        runner,
+        structuredState,
+      ),
+    );
+
+    expect(result.nextStep.type).toBe('next_step_run_again');
+  });
+
   it('returns final output when text agent has no tools pending', async () => {
     const textAgent = new Agent({ name: 'TextAgent', outputType: 'text' });
     const response: ModelResponse = {
@@ -973,4 +1026,190 @@ describe('executeToolsAndSideEffects', () => {
       expect(result.nextStep.output).toBe('');
     }
   });
+
+  it('does not finalize after computer actions in the same turn; runs again', async () => {
+    const computerAgent = new Agent({
+      name: 'ComputerAgent',
+      outputType: 'text',
+    });
+    const fakeComputer = {
+      environment: 'mac',
+      dimensions: [1, 1] as [number, number],
+      screenshot: vi.fn().mockResolvedValue('img'),
+      click: vi.fn(),
+      doubleClick: vi.fn(),
+      drag: vi.fn(),
+      keypress: vi.fn(),
+      move: vi.fn(),
+      scroll: vi.fn(),
+      type: vi.fn(),
+      wait: vi.fn(),
+    };
+    const computer = computerTool({
+      computer: fakeComputer as unknown as Computer,
+    });
+    const computerCall: protocol.ComputerUseCallItem = {
+      type: 'computer_call',
+      id: 'comp1',
+      callId: 'comp1',
+      status: 'completed',
+      action: { type: 'screenshot' },
+    } as protocol.ComputerUseCallItem;
+
+    const computerResponse: ModelResponse = {
+      output: [computerCall, { ...TEST_MODEL_MESSAGE }],
+      usage: new Usage(),
+    } as any;
+
+    const processedResponse = processModelResponse(
+      computerResponse,
+      computerAgent,
+      [computer],
+      [],
+    );
+
+    const computerState = new RunState(
+      new RunContext(),
+      'test input',
+      computerAgent,
+      1,
+    );
+
+    const result = await withTrace('test', () =>
+      executeToolsAndSideEffects(
+        computerAgent,
+        'test input',
+        [],
+        computerResponse,
+        processedResponse,
+        runner,
+        computerState,
+      ),
+    );
+
+    expect(result.nextStep.type).toBe('next_step_run_again');
+  });
+
+  it('does not finalize when hosted MCP approval happens in the same turn; runs again', async () => {
+    const approvalAgent = new Agent({ name: 'MCPAgent', outputType: 'text' });
+    const mcpTool = hostedMcpTool({
+      serverLabel: 'demo_server',
+      serverUrl: 'https://example.com',
+      requireApproval: {
+        always: { toolNames: ['demo_tool'] },
+      },
+      onApproval: async () => ({ approve: true, reason: 'approved in test' }),
+    });
+
+    const approvalCall: protocol.HostedToolCallItem = {
+      type: 'hosted_tool_call',
+      id: 'approval1',
+      name: 'mcp_approval_request',
+      status: 'completed',
+      providerData: {
+        type: 'mcp_approval_request',
+        server_label: 'demo_server',
+        name: 'demo_tool',
+        id: 'approval1',
+        arguments: '{}',
+      },
+    } as protocol.HostedToolCallItem;
+
+    const approvalResponse: ModelResponse = {
+      output: [approvalCall, { ...TEST_MODEL_MESSAGE }],
+      usage: new Usage(),
+    } as any;
+
+    const processedResponse = processModelResponse(
+      approvalResponse,
+      approvalAgent,
+      [mcpTool],
+      [],
+    );
+
+    const approvalState = new RunState(
+      new RunContext(),
+      'test input',
+      approvalAgent,
+      1,
+    );
+
+    const result = await withTrace('test', () =>
+      executeToolsAndSideEffects(
+        approvalAgent,
+        'test input',
+        [],
+        approvalResponse,
+        processedResponse,
+        runner,
+        approvalState,
+      ),
+    );
+
+    expect(result.nextStep.type).toBe('next_step_run_again');
+  });
+
+  it('returns interruption when hosted MCP approval requires user input', async () => {
+    const approvalAgent = new Agent({ name: 'MCPAgent', outputType: 'text' });
+    const mcpTool = hostedMcpTool({
+      serverLabel: 'demo_server',
+      serverUrl: 'https://example.com',
+      requireApproval: {
+        always: { toolNames: ['demo_tool'] },
+      },
+    });
+
+    const approvalCall: protocol.HostedToolCallItem = {
+      type: 'hosted_tool_call',
+      id: 'approval1',
+      name: 'mcp_approval_request',
+      status: 'completed',
+      providerData: {
+        type: 'mcp_approval_request',
+        server_label: 'demo_server',
+        name: 'demo_tool',
+        id: 'approval1',
+        arguments: '{}',
+      },
+    } as protocol.HostedToolCallItem;
+
+    const approvalResponse: ModelResponse = {
+      output: [approvalCall, { ...TEST_MODEL_MESSAGE }],
+      usage: new Usage(),
+    } as any;
+
+    const processedResponse = processModelResponse(
+      approvalResponse,
+      approvalAgent,
+      [mcpTool],
+      [],
+    );
+
+    const approvalState = new RunState(
+      new RunContext(),
+      'test input',
+      approvalAgent,
+      1,
+    );
+
+    const result = await withTrace('test', () =>
+      executeToolsAndSideEffects(
+        approvalAgent,
+        'test input',
+        [],
+        approvalResponse,
+        processedResponse,
+        runner,
+        approvalState,
+      ),
+    );
+
+    expect(result.nextStep.type).toBe('next_step_interruption');
+    if (result.nextStep.type === 'next_step_interruption') {
+      expect(result.nextStep.data.interruptions).toHaveLength(1);
+      expect(result.nextStep.data.interruptions[0].rawItem).toMatchObject({
+        providerData: { id: 'approval1', type: 'mcp_approval_request' },
+      });
+    }
+  });
 });