diff --git a/.changeset/khaki-rivers-tease.md b/.changeset/khaki-rivers-tease.md new file mode 100644 index 000000000..ad1ee0f17 --- /dev/null +++ b/.changeset/khaki-rivers-tease.md @@ -0,0 +1,7 @@ +--- +'@openai/agents': patch +'@openai/agents-core': patch +'@openai/agents-openai': patch +--- + +feat: #1097 support overrideArguments for approved tool calls diff --git a/examples/agent-patterns/README.md b/examples/agent-patterns/README.md index 9309673aa..68a11db7e 100644 --- a/examples/agent-patterns/README.md +++ b/examples/agent-patterns/README.md @@ -1,7 +1,6 @@ # Agent Pattern Examples -This directory contains small scripts that demonstrate different agent patterns. -Run them with `pnpm` using the commands shown below. +This directory contains small scripts that demonstrate different agent patterns. Run them with `pnpm` using the commands shown below. - `agents-as-tools.ts` – Orchestrate translator agents using them as tools. ```bash @@ -23,11 +22,11 @@ Run them with `pnpm` using the commands shown below. ```bash pnpm -F agent-patterns start:forcing-tool-use ``` -- `human-in-the-loop.ts` – Manually approve certain tool calls. +- `human-in-the-loop.ts` – Manually approve certain tool calls and override approved arguments. ```bash pnpm examples:human-in-the-loop ``` -- `human-in-the-loop-stream.ts` – Streaming version of human approval. +- `human-in-the-loop-stream.ts` – Streaming version of human approval with approved-argument overrides. ```bash pnpm examples:streamed:human-in-the-loop ``` diff --git a/examples/agent-patterns/human-in-the-loop-stream.ts b/examples/agent-patterns/human-in-the-loop-stream.ts index 80119b7db..a0ac75761 100644 --- a/examples/agent-patterns/human-in-the-loop-stream.ts +++ b/examples/agent-patterns/human-in-the-loop-stream.ts @@ -20,6 +20,12 @@ async function confirm(question: string): Promise { } async function main() { + const APPROVER_NAME = 'Kaz'; + const temperatureParams = z.object({ + city: z.string(), + approver: z.string().nullable().optional(), + }); + // Define a tool that requires approval for certain inputs const getWeatherTool = tool({ name: 'get_weather', @@ -40,12 +46,11 @@ async function main() { const getTemperatureTool = tool({ name: 'get_temperature', description: 'Get the temperature for a given city', - parameters: z.object({ - city: z.string(), - }), + parameters: temperatureParams, needsApproval: async (_ctx, { city }) => city.includes('Oakland'), - execute: async ({ city }) => { - return `The temperature in ${city} is 20° Celsius`; + execute: async ({ city, approver }) => { + const approvedBy = approver ? ` Approved by ${approver}.` : ''; + return `The temperature in ${city} is 20° Celsius.${approvedBy}`; }, }); @@ -68,7 +73,7 @@ async function main() { let stream = await run( mainAgent, - 'What is the weather and temperature in San Francisco and Oakland? Use available tools as needed.', + 'Please check both San Francisco and Oakland, and do not consider the task complete until you have provided the weather and temperature for both cities.', { stream: true }, ); stream.toTextStream({ compatibleWithNodeStreams: true }).pipe(process.stdout); @@ -89,7 +94,18 @@ async function main() { `Agent ${interruption.agent.name} would like to use the tool ${interruption.rawItem.name} with "${interruption.rawItem.arguments}". Do you approve?`, ); if (ok) { - state.approve(interruption); + if (interruption.name === 'get_temperature') { + const parsedArgs = temperatureParams.parse( + JSON.parse(interruption.rawItem.arguments), + ); + const overrideArguments = { ...parsedArgs, approver: APPROVER_NAME }; + console.log( + `Injecting approver="${APPROVER_NAME}" into the approved tool call.`, + ); + state.approve(interruption, { overrideArguments }); + } else { + state.approve(interruption); + } } else { state.reject(interruption); } diff --git a/examples/agent-patterns/human-in-the-loop.ts b/examples/agent-patterns/human-in-the-loop.ts index 2d2f11319..cf8940e5a 100644 --- a/examples/agent-patterns/human-in-the-loop.ts +++ b/examples/agent-patterns/human-in-the-loop.ts @@ -22,15 +22,21 @@ const weatherAgent = new Agent({ tools: [getWeatherTool], }); +const temperatureParams = z.object({ + city: z.string(), + approver: z.string().nullable().optional(), +}); + +const APPROVER_NAME = 'Kaz'; + const getTemperatureTool = tool({ name: 'get_temperature', description: 'Get the temperature for a given city', - parameters: z.object({ - city: z.string(), - }), + parameters: temperatureParams, needsApproval: async (_ctx, { city }) => city.includes('Oakland'), - execute: async ({ city }) => { - return `The temperature in ${city} is 20° Celsius`; + execute: async ({ city, approver }) => { + const approvedBy = approver ? ` Approved by ${approver}.` : ''; + return `The temperature in ${city} is 20° Celsius.${approvedBy}`; }, }); @@ -73,7 +79,7 @@ async function confirm(question: string) { async function main() { let result: RunResult> = await run( agent, - 'What is the weather and temperature in San Francisco and Oakland? Use available tools as needed.', + 'Please check both San Francisco and Oakland, and do not consider the task complete until you have provided the weather and temperature for both cities.', ); let hasInterruptions = result.interruptions?.length > 0; while (hasInterruptions) { @@ -96,7 +102,21 @@ async function main() { ); if (confirmed) { - state.approve(interruption); + if ( + interruption.rawItem.type === 'function_call' && + interruption.name === 'get_temperature' + ) { + const parsedArgs = temperatureParams.parse( + JSON.parse(interruption.rawItem.arguments), + ); + const overrideArguments = { ...parsedArgs, approver: APPROVER_NAME }; + console.log( + `Injecting approver="${APPROVER_NAME}" into the approved tool call.`, + ); + state.approve(interruption, { overrideArguments }); + } else { + state.approve(interruption); + } } else { state.reject(interruption); } diff --git a/examples/memory/hitl-session-scenario.ts b/examples/memory/hitl-session-scenario.ts index 133ae1096..11b8e1683 100644 --- a/examples/memory/hitl-session-scenario.ts +++ b/examples/memory/hitl-session-scenario.ts @@ -5,6 +5,7 @@ import { Agent, type AgentInputItem, type Model, + OpenAIResponsesHistoryRewriteSession, type Session, OpenAIConversationsSession, run, @@ -22,6 +23,8 @@ const USER_MESSAGES = [ 'Update note for customer 104.', 'Delete note for customer 104.', ]; +const OVERRIDE_QUERY = + 'Update note for customer 104. Mark the callback as manually rescheduled for Tuesday at 10 AM.'; const TOOL_OUTPUTS: Record string> = { [TOOL_ECHO]: (message) => `approved:${message}`, @@ -57,6 +60,11 @@ type ScenarioStep = { toolName: string; approval: ApprovalAction; expectedOutput: string; + overrideArguments?: { query: string }; + saveOverrideArguments?: boolean; + expectedFunctionCallCountForApprovedCall?: number; + expectedCorrectedFunctionCallCountForApprovedCall?: number; + expectedError?: string; }; async function runScenario( @@ -76,88 +84,139 @@ async function runScenario( toolUseBehavior: 'stop_on_first_tool', }); - let result = await run(agent, step.message, { session }); - if (result.interruptions.length === 0) { - throw new Error(`[${label}] expected at least one tool approval.`); - } + try { + let result = await run(agent, step.message, { session }); + if (result.interruptions.length === 0) { + throw new Error(`[${label}] expected at least one tool approval.`); + } - while (result.interruptions.length > 0) { - for (const interruption of result.interruptions) { - if (step.approval === 'reject') { - result.state.reject(interruption); - } else { - result.state.approve(interruption); + let approvedCallId: string | undefined; + while (result.interruptions.length > 0) { + for (const interruption of result.interruptions) { + if (step.approval === 'reject') { + result.state.reject(interruption); + } else { + approvedCallId = + interruption.rawItem.type === 'function_call' + ? interruption.rawItem.callId + : approvedCallId; + if ( + step.overrideArguments && + interruption.rawItem.type === 'function_call' + ) { + result.state.approve(interruption, { + overrideArguments: step.overrideArguments, + ...(typeof step.saveOverrideArguments === 'boolean' + ? { saveOverrideArguments: step.saveOverrideArguments } + : {}), + }); + } else { + result.state.approve(interruption); + } + } } + result = await run(agent, result.state, { session }); } - result = await run(agent, result.state, { session }); - } - if (!result.finalOutput) { - throw new Error(`[${label}] expected a final output after approval.`); - } - if (result.finalOutput !== step.expectedOutput) { - throw new Error( - `[${label}] expected final output "${step.expectedOutput}" but got "${result.finalOutput}".`, - ); - } + if (step.expectedError) { + throw new Error( + `[${label}] expected an error containing "${step.expectedError}" but the run completed successfully.`, + ); + } - const items = await session.getItems(); - const toolResults = items.filter( - (item) => item.type === 'function_call_result', - ); - const userMessages = items.filter( - (item) => getUserText(item) === step.message, - ); - const lastToolCall = findLastItem(items, isFunctionCall); - const lastToolResult = findLastItem(items, isFunctionCallResult); + if (!result.finalOutput) { + throw new Error(`[${label}] expected a final output after approval.`); + } + if (result.finalOutput !== step.expectedOutput) { + throw new Error( + `[${label}] expected final output "${step.expectedOutput}" but got "${result.finalOutput}".`, + ); + } - if (toolResults.length === 0) { - throw new Error(`[${label}] expected tool outputs in session history.`); - } - if (userMessages.length === 0) { - throw new Error(`[${label}] expected user input in session history.`); - } - if (!lastToolCall) { - throw new Error(`[${label}] expected a tool call in session history.`); - } - if (lastToolCall.name !== step.toolName) { - throw new Error( - `[${label}] expected tool call "${step.toolName}" but got "${lastToolCall.name}".`, + const items = await session.getItems(); + const toolResults = items.filter( + (item) => item.type === 'function_call_result', ); - } - if (!lastToolResult) { - throw new Error(`[${label}] expected a tool result in session history.`); - } - const allowedResultNames = new Set([step.toolName, lastToolCall.callId]); - if (!allowedResultNames.has(lastToolResult.name)) { - throw new Error( - `[${label}] expected tool result "${step.toolName}" but got "${lastToolResult.name}".`, + const userMessages = items.filter( + (item) => getUserText(item) === step.message, ); - } - if (lastToolResult.callId !== lastToolCall.callId) { - throw new Error( - `[${label}] expected tool result callId "${lastToolCall.callId}" but got "${lastToolResult.callId}".`, + const lastToolCall = findLastItem(items, isFunctionCall); + const lastToolResult = findLastItem(items, isFunctionCallResult); + + if (toolResults.length === 0) { + throw new Error(`[${label}] expected tool outputs in session history.`); + } + if (userMessages.length === 0) { + throw new Error(`[${label}] expected user input in session history.`); + } + if (!lastToolCall) { + throw new Error(`[${label}] expected a tool call in session history.`); + } + if (lastToolCall.name !== step.toolName) { + throw new Error( + `[${label}] expected tool call "${step.toolName}" but got "${lastToolCall.name}".`, + ); + } + if (!lastToolResult) { + throw new Error(`[${label}] expected a tool result in session history.`); + } + const allowedResultNames = new Set([step.toolName, lastToolCall.callId]); + if (!allowedResultNames.has(lastToolResult.name)) { + throw new Error( + `[${label}] expected tool result "${step.toolName}" but got "${lastToolResult.name}".`, + ); + } + if (lastToolResult.callId !== lastToolCall.callId) { + throw new Error( + `[${label}] expected tool result callId "${lastToolCall.callId}" but got "${lastToolResult.callId}".`, + ); + } + if (step.overrideArguments) { + if (!approvedCallId) { + throw new Error(`[${label}] expected an approved function call id.`); + } + validateOverridePersistence(items, label, { + callId: approvedCallId, + overrideArguments: step.overrideArguments, + expectedCorrectedFunctionCallCount: + step.expectedCorrectedFunctionCallCountForApprovedCall ?? 1, + expectedFunctionCallCount: + step.expectedFunctionCallCountForApprovedCall ?? 1, + expectedOutput: step.expectedOutput, + }); + } + + logSessionSummary(items, label); + console.log( + `[${label}] final output: ${result.finalOutput} (items: ${items.length})`, ); + } catch (error) { + if (!step.expectedError) { + throw error; + } + const message = String(error); + if (!message.includes(step.expectedError)) { + throw new Error( + `[${label}] expected an error containing "${step.expectedError}" but got "${message}".`, + ); + } + console.log(`[${label}] expected error: ${step.expectedError}`); } - - logSessionSummary(items, label); - console.log( - `[${label}] final output: ${result.finalOutput} (items: ${items.length})`, - ); } async function runFileSessionScenario(model?: string | Model): Promise { const tmpRoot = path.resolve(process.cwd(), 'tmp'); await mkdir(tmpRoot, { recursive: true }); const tempDir = await mkdtemp(path.join(tmpRoot, 'hitl-scenario-')); - const session = new FileSession({ dir: tempDir }); + const labelPrefix = 'FileSession+HistoryRewrite'; + const session = createRewriteCapableFileSession(tempDir); const sessionId = await session.getSessionId(); const sessionFile = path.join(tempDir, `${sessionId}.json`); - let rehydratedSession: FileSession | undefined; + let rehydratedSession: Session | undefined; - console.log(`[FileSession] session id: ${sessionId}`); - console.log(`[FileSession] file: ${sessionFile}`); - console.log('[FileSession] cleanup: always'); + console.log(`[${labelPrefix}] session id: ${sessionId}`); + console.log(`[${labelPrefix}] file: ${sessionFile}`); + console.log(`[${labelPrefix}] cleanup: always`); const steps: ScenarioStep[] = [ { @@ -168,11 +227,14 @@ async function runFileSessionScenario(model?: string | Model): Promise { expectedOutput: TOOL_OUTPUTS[TOOL_ECHO](USER_MESSAGES[0]), }, { - name: 'turn 2 (rehydrated)', + name: 'turn 2 (rehydrated override)', message: USER_MESSAGES[1], toolName: TOOL_NOTE, approval: 'approve', - expectedOutput: TOOL_OUTPUTS[TOOL_NOTE](USER_MESSAGES[1]), + expectedOutput: TOOL_OUTPUTS[TOOL_NOTE](OVERRIDE_QUERY), + overrideArguments: { query: OVERRIDE_QUERY }, + expectedFunctionCallCountForApprovedCall: 1, + expectedCorrectedFunctionCallCountForApprovedCall: 1, }, { name: 'turn 3 (rejected)', @@ -184,20 +246,20 @@ async function runFileSessionScenario(model?: string | Model): Promise { ]; try { - await runScenario(session, `FileSession ${steps[0].name}`, steps[0], { + await runScenario(session, `${labelPrefix} ${steps[0].name}`, steps[0], { model, }); - rehydratedSession = new FileSession({ dir: tempDir, sessionId }); - console.log(`[FileSession] rehydrated session id: ${sessionId}`); + rehydratedSession = createRewriteCapableFileSession(tempDir, sessionId); + console.log(`[${labelPrefix}] rehydrated session id: ${sessionId}`); await runScenario( rehydratedSession, - `FileSession ${steps[1].name}`, + `${labelPrefix} ${steps[1].name}`, steps[1], { model }, ); await runScenario( rehydratedSession, - `FileSession ${steps[2].name}`, + `${labelPrefix} ${steps[2].name}`, steps[2], { model }, ); @@ -235,18 +297,15 @@ async function runOpenAISessionScenario(model?: string | Model): Promise { expectedOutput: TOOL_OUTPUTS[TOOL_ECHO](USER_MESSAGES[0]), }, { - name: 'turn 2 (rehydrated)', + name: 'turn 2 (rehydrated override)', message: USER_MESSAGES[1], toolName: TOOL_NOTE, approval: 'approve', - expectedOutput: TOOL_OUTPUTS[TOOL_NOTE](USER_MESSAGES[1]), - }, - { - name: 'turn 3 (rejected)', - message: USER_MESSAGES[2], - toolName: TOOL_ECHO, - approval: 'reject', - expectedOutput: REJECTION_OUTPUT, + expectedOutput: TOOL_OUTPUTS[TOOL_NOTE](OVERRIDE_QUERY), + overrideArguments: { query: OVERRIDE_QUERY }, + saveOverrideArguments: false, + expectedFunctionCallCountForApprovedCall: 1, + expectedCorrectedFunctionCallCountForApprovedCall: 0, }, ]; @@ -269,12 +328,6 @@ async function runOpenAISessionScenario(model?: string | Model): Promise { steps[1], { model }, ); - await runScenario( - rehydratedSession, - `OpenAIConversationsSession ${steps[2].name}`, - steps[2], - { model }, - ); if (shouldKeep) { console.log(`[OpenAIConversationsSession] kept session id: ${sessionId}`); return; @@ -286,6 +339,15 @@ async function runOpenAISessionScenario(model?: string | Model): Promise { } } +function createRewriteCapableFileSession( + dir: string, + sessionId?: string, +): OpenAIResponsesHistoryRewriteSession { + return new OpenAIResponsesHistoryRewriteSession({ + underlyingSession: new FileSession({ dir, sessionId }), + }); +} + function getUserText(item: AgentInputItem): string | undefined { if (item.type !== 'message' || item.role !== 'user') { return undefined; @@ -358,6 +420,91 @@ function logSessionSummary(items: AgentInputItem[], label: string): void { } } +function validateOverridePersistence( + items: AgentInputItem[], + label: string, + args: { + callId: string; + overrideArguments: { query: string }; + expectedFunctionCallCount: number; + expectedCorrectedFunctionCallCount: number; + expectedOutput: string; + }, +): void { + const persistedCalls = items.filter( + (item): item is AgentInputItem & { type: 'function_call' } => + item.type === 'function_call' && item.callId === args.callId, + ); + const persistedResults = items.filter( + (item): item is AgentInputItem & { type: 'function_call_result' } => + item.type === 'function_call_result' && item.callId === args.callId, + ); + const expectedArguments = JSON.stringify(args.overrideArguments); + const correctedCalls = persistedCalls.filter( + (item) => item.arguments === expectedArguments, + ); + const observedArguments = persistedCalls + .map((item) => item.arguments ?? '') + .join(' | '); + + console.log( + `[${label}] override history: callId=${args.callId} function_calls=${persistedCalls.length} expected=${args.expectedFunctionCallCount}`, + ); + console.log( + `[${label}] override corrected calls: ${correctedCalls.length} expected=${args.expectedCorrectedFunctionCallCount}`, + ); + console.log( + `[${label}] override persisted args: ${truncateText( + observedArguments, + 240, + )}`, + ); + + if (persistedCalls.length !== args.expectedFunctionCallCount) { + throw new Error( + `[${label}] expected ${args.expectedFunctionCallCount} persisted function_call items for callId "${args.callId}" but found ${persistedCalls.length}.`, + ); + } + if (correctedCalls.length !== args.expectedCorrectedFunctionCallCount) { + throw new Error( + `[${label}] expected ${args.expectedCorrectedFunctionCallCount} corrected function_call items for callId "${args.callId}" but found ${correctedCalls.length}.`, + ); + } + if (persistedResults.length === 0) { + throw new Error( + `[${label}] expected at least one persisted function_call_result for callId "${args.callId}".`, + ); + } + + const lastResult = persistedResults[persistedResults.length - 1]; + const resultOutput = extractOutputText(lastResult.output); + console.log( + `[${label}] override persisted output: ${truncateText(resultOutput, 240)}`, + ); + if (resultOutput !== args.expectedOutput) { + throw new Error( + `[${label}] expected persisted tool result "${args.expectedOutput}" but found "${resultOutput}".`, + ); + } +} + +function extractOutputText(output: unknown): string { + if (typeof output === 'string') { + return output; + } + + if ( + output && + typeof output === 'object' && + 'text' in output && + typeof output.text === 'string' + ) { + return output.text; + } + + return formatOutput(output); +} + function isFunctionCall( item: AgentInputItem, ): item is AgentInputItem & { type: 'function_call' } { diff --git a/packages/agents-core/src/index.ts b/packages/agents-core/src/index.ts index f8b5abbb5..85f75b9e8 100644 --- a/packages/agents-core/src/index.ts +++ b/packages/agents-core/src/index.ts @@ -289,12 +289,22 @@ export type { export { RequestUsage, Usage } from './usage'; export type { Session, + ServerManagedConversationSession, + SessionFunctionCallItem, + SessionHistoryMutation, + SessionHistoryRewriteArgs, + SessionHistoryRewriteAwareSession, SessionInputCallback, OpenAIResponsesCompactionArgs, OpenAIResponsesCompactionAwareSession, OpenAIResponsesCompactionResult, } from './memory/session'; -export { isOpenAIResponsesCompactionAwareSession } from './memory/session'; +export { + isOpenAIResponsesCompactionAwareSession, + isServerManagedConversationSession, + isSessionHistoryRewriteAwareSession, + SERVER_MANAGED_CONVERSATION_SESSION, +} from './memory/session'; export { MemorySession } from './memory/memorySession'; /** diff --git a/packages/agents-core/src/memory/historyMutations.ts b/packages/agents-core/src/memory/historyMutations.ts new file mode 100644 index 000000000..257ef5f37 --- /dev/null +++ b/packages/agents-core/src/memory/historyMutations.ts @@ -0,0 +1,43 @@ +import type { AgentInputItem } from '../types'; +import type { SessionHistoryMutation } from './session'; + +/** + * Applies persisted-history mutations and returns a new canonical item list. + */ +export function applySessionHistoryMutations( + items: AgentInputItem[], + mutations: SessionHistoryMutation[], +): AgentInputItem[] { + let nextItems = items.map((item) => structuredClone(item)); + + for (const mutation of mutations) { + if (mutation.type === 'replace_function_call') { + nextItems = applyReplaceFunctionCallMutation(nextItems, mutation); + } + } + + return nextItems; +} + +function applyReplaceFunctionCallMutation( + items: AgentInputItem[], + mutation: Extract, +): AgentInputItem[] { + const replacement = structuredClone(mutation.replacement); + const nextItems: AgentInputItem[] = []; + let keptReplacement = false; + + for (const item of items) { + if (item.type === 'function_call' && item.callId === mutation.callId) { + if (!keptReplacement) { + nextItems.push(replacement); + keptReplacement = true; + } + continue; + } + + nextItems.push(item); + } + + return nextItems; +} diff --git a/packages/agents-core/src/memory/memorySession.ts b/packages/agents-core/src/memory/memorySession.ts index 1ae244f3b..b31d41c18 100644 --- a/packages/agents-core/src/memory/memorySession.ts +++ b/packages/agents-core/src/memory/memorySession.ts @@ -1,8 +1,13 @@ import { randomUUID } from '@openai/agents-core/_shims'; import type { AgentInputItem } from '../types'; -import type { Session } from './session'; +import type { + Session, + SessionHistoryRewriteArgs, + SessionHistoryRewriteAwareSession, +} from './session'; import { logger, Logger } from '../logger'; +import { applySessionHistoryMutations } from './historyMutations'; export type MemorySessionOptions = { sessionId?: string; @@ -13,7 +18,9 @@ export type MemorySessionOptions = { /** * Simple in-memory session store intended for demos or tests. Not recommended for production use. */ -export class MemorySession implements Session { +export class MemorySession + implements Session, SessionHistoryRewriteAwareSession +{ private readonly sessionId: string; private readonly logger: Logger; @@ -78,6 +85,17 @@ export class MemorySession implements Session { this.logger.debug(`Clearing memory session (${this.sessionId})`); this.items = []; } + + async applyHistoryMutations(args: SessionHistoryRewriteArgs): Promise { + if (args.mutations.length === 0) { + return; + } + + this.logger.debug( + `Applying history mutations to memory session (${this.sessionId}): ${JSON.stringify(args.mutations)}`, + ); + this.items = applySessionHistoryMutations(this.items, args.mutations); + } } function cloneAgentItem(item: T): T { diff --git a/packages/agents-core/src/memory/session.ts b/packages/agents-core/src/memory/session.ts index afc0e572d..161fa121f 100644 --- a/packages/agents-core/src/memory/session.ts +++ b/packages/agents-core/src/memory/session.ts @@ -45,6 +45,83 @@ export interface Session { clearSession(): Promise; } +/** + * Marker for session implementations whose conversation history is owned by a server-side system + * and therefore cannot be rewritten in place by the SDK. + */ +export const SERVER_MANAGED_CONVERSATION_SESSION: unique symbol = Symbol( + 'SERVER_MANAGED_CONVERSATION_SESSION', +); + +/** + * Session subtype whose persisted history is managed by a remote conversation service. + */ +export interface ServerManagedConversationSession extends Session { + readonly [SERVER_MANAGED_CONVERSATION_SESSION]: true; +} + +export function isServerManagedConversationSession( + session: Session | undefined, +): session is ServerManagedConversationSession { + return ( + !!session && + typeof session === 'object' && + (session as ServerManagedConversationSession)[ + SERVER_MANAGED_CONVERSATION_SESSION + ] === true + ); +} + +export type SessionFunctionCallItem = Extract< + AgentInputItem, + { type: 'function_call' } +>; + +export type ReplaceFunctionCallSessionHistoryMutation = { + /** + * Replace the canonical persisted function call for this call id and drop any later duplicate + * function-call items with the same call id. + */ + type: 'replace_function_call'; + /** + * Stable tool call identifier shared by the function call and its output. + */ + callId: string; + /** + * Canonical function-call item to keep in persisted history. + */ + replacement: SessionFunctionCallItem; +}; + +export type SessionHistoryMutation = ReplaceFunctionCallSessionHistoryMutation; + +export type SessionHistoryRewriteArgs = { + /** + * Ordered history mutations to apply to the persisted session items. + */ + mutations: SessionHistoryMutation[]; +}; + +/** + * Session subtype that can rewrite previously persisted history items after a turn finishes. + */ +export interface SessionHistoryRewriteAwareSession extends Session { + /** + * Apply the provided history mutations to the persisted session items. + */ + applyHistoryMutations(args: SessionHistoryRewriteArgs): Promise | void; +} + +export function isSessionHistoryRewriteAwareSession( + session: Session | undefined, +): session is SessionHistoryRewriteAwareSession { + return ( + !!session && + typeof (session as SessionHistoryRewriteAwareSession) + .applyHistoryMutations === 'function' + ); +} + /** * Session subtype that can run compaction logic after a completed turn is persisted. */ diff --git a/packages/agents-core/src/run.ts b/packages/agents-core/src/run.ts index ef96e09e8..805ba8082 100644 --- a/packages/agents-core/src/run.ts +++ b/packages/agents-core/src/run.ts @@ -35,11 +35,13 @@ import { Usage } from './usage'; import { convertAgentOutputTypeToSerializable } from './utils/tools'; import { DEFAULT_MAX_TURNS } from './runner/constants'; import { StreamEventResponseCompleted } from './types/protocol'; -import type { Session, SessionInputCallback } from './memory/session'; +import { type Session, type SessionInputCallback } from './memory/session'; import type { AgentInputItem } from './types'; import { ServerConversationTracker, applyCallModelInputFilter, + createServerConversationReplayTracker, + resolveServerConversationContext, } from './runner/conversation'; import { createGuardrailTracker, @@ -62,6 +64,7 @@ import { isAbortError, } from './runner/streaming'; import { + assertOverrideHistoryPersistenceSupport, createSessionPersistenceTracker, prepareInputItemsWithSession, saveStreamInputToSession, @@ -75,7 +78,11 @@ import { handleInterruptedOutcome, resumeInterruptedTurn, } from './runner/runLoop'; -import { applyTraceOverrides, getTracing } from './runner/tracing'; +import { + applyTraceOverrides, + applyTraceRedactionPolicyToState, + getTracing, +} from './runner/tracing'; import type { ReasoningItemIdPolicy } from './runner/items'; import type { AgentArtifacts, @@ -478,12 +485,29 @@ export class Runner extends RunHooks> { const resumedPreviousResponseId = resumingFromState ? (input as RunState)._previousResponseId : undefined; + const session = effectiveOptions.session; + const serverConversation = resolveServerConversationContext({ + explicitConversationId: effectiveOptions.conversationId, + resumedConversationId, + explicitPreviousResponseId: effectiveOptions.previousResponseId, + resumedPreviousResponseId, + session, + }); + const runOptions = { + ...effectiveOptions, + conversationId: serverConversation.conversationId, + previousResponseId: serverConversation.previousResponseId, + }; const serverManagesConversation = - Boolean(effectiveOptions.conversationId ?? resumedConversationId) || - Boolean(effectiveOptions.previousResponseId ?? resumedPreviousResponseId); + serverConversation.serverConversationChainAvailable; + const historyIsServerManaged = serverConversation.historyIsServerManaged; // When the server tracks conversation history we defer to it for previous turns so local session // persistence can focus solely on the new delta being generated in this process. - const session = effectiveOptions.session; + assertOverrideHistoryPersistenceSupport({ + input, + session, + historyIsServerManaged, + }); const sessionPersistence = createSessionPersistenceTracker({ session, hasCallModelInputFilter, @@ -498,15 +522,14 @@ export class Runner extends RunHooks> { session, sessionInputCallback, { - // When the server tracks conversation state we only send the new turn inputs; - // previous messages are recovered via conversationId/previousResponseId. + // Only omit prepended history once we have a concrete server-side conversation chain. includeHistoryInPreparedInput: !serverManagesConversation, preserveDroppedNewItems: serverManagesConversation, }, ); if (serverManagesConversation && session) { - // When the server manages memory we only persist the new turn inputs locally so the - // conversation service stays the single source of truth for prior exchanges. + // Keep the model payload scoped to the new turn delta even when Session persists the + // transcript for a remote conversation service. const sessionItems = prepared.sessionItems; if (sessionItems && sessionItems.length > 0) { preparedInput = sessionItems; @@ -524,11 +547,11 @@ export class Runner extends RunHooks> { sessionPersistence?.buildPersistInputOnce(serverManagesConversation); const executeRun = async () => { - if (effectiveOptions.stream) { + if (runOptions.stream) { const streamResult = await this.#runIndividualStream( agent, preparedInput, - effectiveOptions, + runOptions, ensureStreamInputPersisted, sessionPersistence?.recordTurnItems, preserveTurnPersistenceOnResume, @@ -538,7 +561,7 @@ export class Runner extends RunHooks> { const runResult = await this.#runIndividualNonStream( agent, preparedInput, - effectiveOptions, + runOptions, sessionPersistence?.recordTurnItems, preserveTurnPersistenceOnResume, ); @@ -656,6 +679,11 @@ export class Runner extends RunHooks> { (isResumedState ? state._reasoningItemIdPolicy : undefined) ?? this.config.reasoningItemIdPolicy; state.setReasoningItemIdPolicy(resolvedReasoningItemIdPolicy); + applyTraceRedactionPolicyToState( + state, + this.config.traceIncludeSensitiveData, + isResumedState, + ); const resolvedConversationId = options.conversationId ?? @@ -671,14 +699,12 @@ export class Runner extends RunHooks> { ); } - const serverConversationTracker = - resolvedConversationId || resolvedPreviousResponseId - ? new ServerConversationTracker({ - conversationId: resolvedConversationId, - previousResponseId: resolvedPreviousResponseId, - reasoningItemIdPolicy: resolvedReasoningItemIdPolicy, - }) - : undefined; + const serverConversationTracker = createServerConversationReplayTracker({ + conversationId: resolvedConversationId, + previousResponseId: resolvedPreviousResponseId, + session: options.session, + reasoningItemIdPolicy: resolvedReasoningItemIdPolicy, + }); if (serverConversationTracker && isResumedState) { serverConversationTracker.primeFromState({ @@ -805,7 +831,7 @@ export class Runner extends RunHooks> { handoffs: preparedCall.serializedHandoffs, tracing: getTracing( this.config.tracingDisabled, - this.config.traceIncludeSensitiveData, + state._traceIncludeSensitiveData, ), signal: options.signal, }, @@ -987,13 +1013,12 @@ export class Runner extends RunHooks> { options.previousResponseId ?? result.state._previousResponseId; const serverManagesConversation = Boolean(resolvedConversationId) || Boolean(resolvedPreviousResponseId); - const serverConversationTracker = serverManagesConversation - ? new ServerConversationTracker({ - conversationId: resolvedConversationId, - previousResponseId: resolvedPreviousResponseId, - reasoningItemIdPolicy: resolvedReasoningItemIdPolicy, - }) - : undefined; + const serverConversationTracker = createServerConversationReplayTracker({ + conversationId: resolvedConversationId, + previousResponseId: resolvedPreviousResponseId, + session: options.session, + reasoningItemIdPolicy: resolvedReasoningItemIdPolicy, + }); if (serverConversationTracker) { result.state.setConversationContext( serverConversationTracker.conversationId, @@ -1196,7 +1221,7 @@ export class Runner extends RunHooks> { ), tracing: getTracing( this.config.tracingDisabled, - this.config.traceIncludeSensitiveData, + result.state._traceIncludeSensitiveData, ), signal: options.signal, }, @@ -1466,6 +1491,11 @@ export class Runner extends RunHooks> { if (isResumedState) { state._agentToolInvocation = undefined; } + applyTraceRedactionPolicyToState( + state, + this.config.traceIncludeSensitiveData, + isResumedState, + ); const resolvedConversationId = options.conversationId ?? (isResumedState ? state._conversationId : undefined); diff --git a/packages/agents-core/src/runState.ts b/packages/agents-core/src/runState.ts index d76c44df9..84451e91a 100644 --- a/packages/agents-core/src/runState.ts +++ b/packages/agents-core/src/runState.ts @@ -44,6 +44,10 @@ import { Tool, } from './tool'; import type { AgentToolInvocation } from './agentToolInvocation'; +import type { + SessionFunctionCallItem, + SessionHistoryMutation, +} from './memory/session'; import { getFunctionToolQualifiedName, toolQualifiedName, @@ -77,8 +81,11 @@ import { * - 1.7: Adds optional approval rejection messages. * - 1.8: Adds tool search item variants, batched computer actions, and GA computer tool * aliasing to serialized run state payloads. + * - 1.9: Adds pending session history mutations for canonical local session rewrites. + * - 1.10: Adds pending execution-only approval override metadata for resume-time validation, + * plus traceIncludeSensitiveData persistence for approval override tracing. */ -export const CURRENT_SCHEMA_VERSION = '1.8' as const; +export const CURRENT_SCHEMA_VERSION = '1.10' as const; const SUPPORTED_SCHEMA_VERSIONS = [ '1.0', '1.1', @@ -88,11 +95,21 @@ const SUPPORTED_SCHEMA_VERSIONS = [ '1.5', '1.6', '1.7', + '1.8', + '1.9', CURRENT_SCHEMA_VERSION, ] as const; type SupportedSchemaVersion = (typeof SUPPORTED_SCHEMA_VERSIONS)[number]; const $schemaVersion = z.enum(SUPPORTED_SCHEMA_VERSIONS); +const sessionHistoryMutationSchema = z.discriminatedUnion('type', [ + z.object({ + type: z.literal('replace_function_call'), + callId: z.string(), + replacement: protocol.FunctionCallItem, + }), +]); + type ContextOverrideStrategy = 'merge' | 'replace'; type RunStateContextOverrideOptions = { @@ -387,6 +404,15 @@ export const SerializedRunState = z.object({ conversationId: z.string().optional(), previousResponseId: z.string().optional(), reasoningItemIdPolicy: z.enum(['preserve', 'omit']).optional(), + executionOnlyApprovalOverrideCallIds: z + .array(z.string()) + .optional() + .default([]), + sessionHistoryMutations: z + .array(sessionHistoryMutationSchema) + .optional() + .default([]), + traceIncludeSensitiveData: z.boolean().optional(), trace: serializedTraceSchema.nullable(), }); @@ -403,6 +429,51 @@ type ToolSearchRuntimeToolState = { nextOrder: number; }; +type ApproveRunToolOptions = { + alwaysApprove?: boolean; + overrideArguments?: Record; + saveOverrideArguments?: boolean; +}; + +function isFunctionCallItem( + item: RunItem['rawItem'] | AgentInputItem | undefined, +): item is protocol.FunctionCallItem { + return Boolean( + item && typeof item === 'object' && item.type === 'function_call', + ); +} + +function createFunctionCallOverride( + toolCall: protocol.FunctionCallItem, + serializedArguments: string, + options: { omitId?: boolean } = {}, +): protocol.FunctionCallItem { + const nextItem: protocol.FunctionCallItem = { + ...toolCall, + arguments: serializedArguments, + }; + + if (options.omitId) { + delete nextItem.id; + } + + return nextItem; +} + +function deserializeFunctionCallArgumentsForTrace( + serializedArguments: string | undefined, +): unknown { + if (typeof serializedArguments !== 'string') { + return serializedArguments ?? null; + } + + try { + return JSON.parse(serializedArguments); + } catch { + return serializedArguments; + } +} + /** * Serializable snapshot of an agent's run, including context, usage and trace. * While this class has publicly writable properties (prefixed with `_`), they are not meant to be @@ -541,6 +612,15 @@ export class RunState> { * Trace associated with this run if tracing is enabled. */ public _trace: Trace | null = null; + /** + * Whether approval override tracing may include sensitive payload data. + */ + public _traceIncludeSensitiveData = true; + /** + * Whether this state was restored from a snapshot that did not persist trace redaction policy. + * This is runtime-only metadata and is not serialized. + */ + public _traceIncludeSensitiveDataNeedsConfigFallback = false; /** * Runtime-only tool_search-loaded tools, scoped by agent name and preserved across turns for * the lifetime of this in-memory run. @@ -549,6 +629,14 @@ export class RunState> { string, ToolSearchRuntimeToolState >(); + /** + * Pending persisted-history rewrites to apply after the current turn is written to a local session. + */ + public _sessionHistoryMutations: SessionHistoryMutation[]; + /** + * Call IDs whose approval overrides apply only to the immediate resumed execution. + */ + public _executionOnlyApprovalOverrideCallIds: string[]; constructor( context: RunContext, @@ -573,6 +661,8 @@ export class RunState> { this._outputGuardrailResults = []; this._toolInputGuardrailResults = []; this._toolOutputGuardrailResults = []; + this._executionOnlyApprovalOverrideCallIds = []; + this._sessionHistoryMutations = []; this._trace = getCurrentTrace(); } @@ -601,6 +691,14 @@ export class RunState> { this._currentAgentSpan = span; } + /** + * Updates whether tracing may include potentially sensitive payload data. + */ + public setTraceIncludeSensitiveData(includeSensitiveData: boolean): void { + this._traceIncludeSensitiveData = includeSensitiveData; + this._traceIncludeSensitiveDataNeedsConfigFallback = false; + } + private getOrCreateToolSearchRuntimeToolState( agentName: string, ): ToolSearchRuntimeToolState { @@ -767,14 +865,319 @@ export class RunState> { * @param approvalItem - The tool call approval item to approve. * @param options - Options for the approval. * @param options.alwaysApprove - Approve this tool for all future calls in this run. + * @param options.overrideArguments - Replace the approved function-call arguments for this tool call. + * @param options.saveOverrideArguments - Whether the corrected function-call arguments must also be + * saved into replay/session history. Defaults to `true`; set to `false` for execution-only overrides. */ approve( approvalItem: RunToolApprovalItem, - options: { alwaysApprove?: boolean } = { + options: ApproveRunToolOptions = { alwaysApprove: false, }, ) { - this._context.approveTool(approvalItem, options); + const { + overrideArguments, + saveOverrideArguments, + alwaysApprove = false, + } = options; + + if ( + typeof saveOverrideArguments !== 'undefined' && + typeof overrideArguments === 'undefined' + ) { + throw new UserError( + 'saveOverrideArguments can only be used together with overrideArguments.', + this, + ); + } + + if (typeof overrideArguments !== 'undefined') { + this.applyApprovalArgumentOverride(approvalItem, overrideArguments, { + alwaysApprove, + saveOverrideArguments, + }); + } + + this._context.approveTool(approvalItem, { alwaysApprove }); + } + + private applyApprovalArgumentOverride( + approvalItem: RunToolApprovalItem, + overrideArguments: Record, + options: { + alwaysApprove?: boolean; + saveOverrideArguments?: boolean; + }, + ): void { + if (options.alwaysApprove) { + throw new UserError( + 'overrideArguments cannot be used together with alwaysApprove.', + this, + ); + } + + if (approvalItem.rawItem.type !== 'function_call') { + throw new UserError( + 'overrideArguments is only supported for function_call approvals.', + this, + ); + } + + if ( + !overrideArguments || + typeof overrideArguments !== 'object' || + Array.isArray(overrideArguments) + ) { + throw new UserError( + 'overrideArguments must be a plain JSON object.', + this, + ); + } + + let serializedArguments: string; + try { + serializedArguments = JSON.stringify(overrideArguments); + } catch (error) { + throw new UserError( + `overrideArguments must be JSON serializable. ${String(error)}`, + this, + ); + } + + if (typeof serializedArguments !== 'string') { + throw new UserError( + 'overrideArguments could not be serialized to JSON.', + this, + ); + } + + const originalArguments = approvalItem.rawItem.arguments; + const callId = approvalItem.rawItem.callId; + const shouldSaveOverrideArguments = options.saveOverrideArguments !== false; + const hasServerManagedConversation = Boolean( + this._conversationId || this._previousResponseId, + ); + if (shouldSaveOverrideArguments && hasServerManagedConversation) { + throw new UserError( + 'saveOverrideArguments requires local canonical history. Server-managed conversations cannot persist corrected function_call arguments. Pass saveOverrideArguments: false to apply the override only to the current execution.', + this, + ); + } + + const updatedToolCall = createFunctionCallOverride( + approvalItem.rawItem, + serializedArguments, + ); + + approvalItem.rawItem = updatedToolCall; + this.replaceFunctionCallInInterruptions(callId, updatedToolCall); + if (shouldSaveOverrideArguments) { + this.clearExecutionOnlyApprovalOverrideCallId(callId); + } else { + this.recordExecutionOnlyApprovalOverride(callId); + } + + if (this._lastProcessedResponse) { + for (const functionRun of this._lastProcessedResponse.functions) { + if (functionRun.toolCall.callId !== callId) { + continue; + } + functionRun.toolCall = updatedToolCall; + } + + if (shouldSaveOverrideArguments) { + this.replaceFunctionCallInRunItems( + this._lastProcessedResponse.newItems, + callId, + updatedToolCall, + updatedToolCall, + ); + } + } + + if (shouldSaveOverrideArguments) { + this.replaceFunctionCallInRunItems( + this._generatedItems, + callId, + updatedToolCall, + updatedToolCall, + ); + + this.replaceFunctionCallInModelResponses(callId, updatedToolCall); + this.recordSessionHistoryMutation({ + type: 'replace_function_call', + callId, + replacement: updatedToolCall as SessionFunctionCallItem, + }); + } + + this.recordApprovalArgumentOverrideTrace( + approvalItem.name ?? approvalItem.rawItem.name, + callId, + originalArguments, + serializedArguments, + ); + } + + private recordApprovalArgumentOverrideTrace( + toolName: string, + callId: string, + originalArguments: string | undefined, + serializedArguments: string, + ): void { + if (!this._traceIncludeSensitiveData) { + return; + } + + const parent = this._currentAgentSpan ?? this._trace; + if (!parent) { + return; + } + + try { + const span = getGlobalTraceProvider().createSpan( + { + data: { + type: 'custom', + name: `approval override: ${toolName}`, + data: { + tool_name: toolName, + call_id: callId, + original_arguments: + deserializeFunctionCallArgumentsForTrace(originalArguments), + override_arguments: + deserializeFunctionCallArgumentsForTrace(serializedArguments), + }, + }, + }, + parent, + ); + span.start(); + span.end(); + } catch (error) { + logger.warn( + `Failed to record approval override trace for ${toolName}: ${String(error)}`, + ); + } + } + + private replaceFunctionCallInInterruptions( + callId: string, + toolCall: protocol.FunctionCallItem, + ): void { + if (this._currentStep?.type !== 'next_step_interruption') { + return; + } + + for (const interruption of this.getInterruptions()) { + if ( + interruption.rawItem.type === 'function_call' && + interruption.rawItem.callId === callId + ) { + interruption.rawItem = toolCall; + } + } + } + + private replaceFunctionCallInRunItems( + items: RunItem[], + callId: string, + storedToolCall: protocol.FunctionCallItem, + replayToolCall: protocol.FunctionCallItem, + ): void { + for (const item of items) { + if (!isFunctionCallItem(item.rawItem) || item.rawItem.callId !== callId) { + continue; + } + + if (item instanceof RunToolCallItem) { + item.rawItem = replayToolCall; + continue; + } + + if (item instanceof RunToolApprovalItem) { + item.rawItem = storedToolCall; + } + } + } + + private replaceFunctionCallInModelResponses( + callId: string, + toolCall: protocol.FunctionCallItem, + ): void { + for (const response of this._modelResponses) { + for (let index = 0; index < response.output.length; index += 1) { + const item = response.output[index]; + if (!isFunctionCallItem(item) || item.callId !== callId) { + continue; + } + response.output[index] = toolCall; + } + } + + if (!this._lastTurnResponse) { + return; + } + + for ( + let index = 0; + index < this._lastTurnResponse.output.length; + index += 1 + ) { + const item = this._lastTurnResponse.output[index]; + if (!isFunctionCallItem(item) || item.callId !== callId) { + continue; + } + this._lastTurnResponse.output[index] = toolCall; + } + } + + public getSessionHistoryMutations(): SessionHistoryMutation[] { + return this._sessionHistoryMutations.map((mutation) => + structuredClone(mutation), + ); + } + + public hasPendingExecutionOnlyApprovalOverrides(): boolean { + return this._executionOnlyApprovalOverrideCallIds.length > 0; + } + + public clearExecutionOnlyApprovalOverrides(): void { + this._executionOnlyApprovalOverrideCallIds = []; + } + + public clearSessionHistoryMutations(): void { + this._sessionHistoryMutations = []; + } + + private recordSessionHistoryMutation(mutation: SessionHistoryMutation): void { + const replacementIndex = this._sessionHistoryMutations.findIndex( + (existing) => + existing.type === mutation.type && existing.callId === mutation.callId, + ); + + if (replacementIndex >= 0) { + this._sessionHistoryMutations[replacementIndex] = + structuredClone(mutation); + return; + } + + this._sessionHistoryMutations.push(structuredClone(mutation)); + } + + private recordExecutionOnlyApprovalOverride(callId: string): void { + if (this._executionOnlyApprovalOverrideCallIds.includes(callId)) { + return; + } + + this._executionOnlyApprovalOverrideCallIds.push(callId); + } + + private clearExecutionOnlyApprovalOverrideCallId(callId: string): void { + this._executionOnlyApprovalOverrideCallIds = + this._executionOnlyApprovalOverrideCallIds.filter( + (existingCallId) => existingCallId !== callId, + ); } /** @@ -887,6 +1290,10 @@ export class RunState> { conversationId: this._conversationId, previousResponseId: this._previousResponseId, reasoningItemIdPolicy: this._reasoningItemIdPolicy, + executionOnlyApprovalOverrideCallIds: + this._executionOnlyApprovalOverrideCallIds, + sessionHistoryMutations: this._sessionHistoryMutations, + traceIncludeSensitiveData: this._traceIncludeSensitiveData, trace: this._trace ? (this._trace.toJSON({ includeTracingApiKey }) as any) : null, @@ -981,7 +1388,11 @@ function assertSchemaVersionSupportsToolSearch( schemaVersion: SupportedSchemaVersion, stateJson: z.infer, ): void { - if (schemaVersion === '1.8') { + if ( + schemaVersion === '1.8' || + schemaVersion === '1.9' || + schemaVersion === '1.10' + ) { return; } @@ -1368,6 +1779,18 @@ async function buildRunStateFromJson>( state._conversationId = stateJson.conversationId ?? undefined; state._previousResponseId = stateJson.previousResponseId ?? undefined; state._reasoningItemIdPolicy = stateJson.reasoningItemIdPolicy ?? undefined; + state._executionOnlyApprovalOverrideCallIds = + stateJson.executionOnlyApprovalOverrideCallIds ?? []; + state._sessionHistoryMutations = (stateJson.sessionHistoryMutations ?? + []) as SessionHistoryMutation[]; + if (typeof stateJson.traceIncludeSensitiveData === 'boolean') { + state.setTraceIncludeSensitiveData(stateJson.traceIncludeSensitiveData); + } else { + // Legacy snapshots pre-1.10 did not persist this policy. Start redacted until a + // Runner instance re-applies its configured tracing behavior for resumed execution. + state._traceIncludeSensitiveData = false; + state._traceIncludeSensitiveDataNeedsConfigFallback = true; + } // rebuild tool use tracker state._toolUseTracker = new AgentToolUseTracker(); diff --git a/packages/agents-core/src/runner/conversation.ts b/packages/agents-core/src/runner/conversation.ts index 95a4ef2fe..135f839cc 100644 --- a/packages/agents-core/src/runner/conversation.ts +++ b/packages/agents-core/src/runner/conversation.ts @@ -1,6 +1,10 @@ import { Agent, AgentOutputType } from '../agent'; import { UserError } from '../errors'; import { RunItem } from '../items'; +import { + isServerManagedConversationSession, + type Session, +} from '../memory/session'; import { ModelResponse } from '../model'; import { RunContext } from '../runContext'; import { AgentInputItem } from '../types'; @@ -49,6 +53,13 @@ export type FilterApplicationResult = { filterApplied: boolean; }; +export type ResolvedServerConversationContext = { + conversationId?: string; + previousResponseId?: string; + historyIsServerManaged: boolean; + serverConversationChainAvailable: boolean; +}; + /** * Applies the optional callModelInputFilter and returns the filtered input alongside the original * items so downstream tracking and session persistence stay in sync with what the model saw. @@ -177,6 +188,70 @@ export async function applyCallModelInputFilter( } } +export function resolveServerConversationContext(options: { + explicitConversationId?: string; + resumedConversationId?: string; + explicitPreviousResponseId?: string; + resumedPreviousResponseId?: string; + session?: Session; +}): ResolvedServerConversationContext { + const { + explicitConversationId, + resumedConversationId, + explicitPreviousResponseId, + resumedPreviousResponseId, + session, + } = options; + const isTaggedServerManagedSession = + isServerManagedConversationSession(session); + const conversationId = explicitConversationId ?? resumedConversationId; + const previousResponseId = + explicitPreviousResponseId ?? resumedPreviousResponseId; + + const historyIsServerManaged = + Boolean(conversationId) || + Boolean(previousResponseId) || + isTaggedServerManagedSession; + const serverConversationChainAvailable = + Boolean(conversationId) || + Boolean(explicitPreviousResponseId) || + (Boolean(resumedPreviousResponseId) && !isTaggedServerManagedSession); + + return { + conversationId, + previousResponseId, + historyIsServerManaged, + serverConversationChainAvailable, + }; +} + +export function createServerConversationReplayTracker(options: { + conversationId?: string; + previousResponseId?: string; + session?: Session; + reasoningItemIdPolicy?: ReasoningItemIdPolicy; +}): ServerConversationTracker | undefined { + const { conversationId, previousResponseId, session, reasoningItemIdPolicy } = + options; + const hasServerConversationContext = + Boolean(conversationId) || Boolean(previousResponseId); + if ( + !hasServerConversationContext && + !isServerManagedConversationSession(session) + ) { + return undefined; + } + + return new ServerConversationTracker({ + conversationId, + previousResponseId, + reasoningItemIdPolicy, + // Conversation ids already pin the server-side transcript. Session-only and + // previous_response_id flows still need to capture the latest response id after each turn. + captureResponseIds: !conversationId, + }); +} + /** * Tracks which items have already been sent to or received from the Responses API when the caller * supplies `conversationId`/`previousResponseId`. This ensures we only send the delta each turn. @@ -185,6 +260,7 @@ export class ServerConversationTracker { public conversationId?: string; public previousResponseId?: string; private readonly reasoningItemIdPolicy?: ReasoningItemIdPolicy; + private readonly captureResponseIds: boolean; // Using this flag because WeakSet does not provide a way to check its size. private sentInitialInput = false; @@ -201,14 +277,17 @@ export class ServerConversationTracker { conversationId, previousResponseId, reasoningItemIdPolicy, + captureResponseIds = true, }: { conversationId?: string; previousResponseId?: string; reasoningItemIdPolicy?: ReasoningItemIdPolicy; + captureResponseIds?: boolean; }) { this.conversationId = conversationId ?? undefined; this.previousResponseId = previousResponseId ?? undefined; this.reasoningItemIdPolicy = reasoningItemIdPolicy; + this.captureResponseIds = captureResponseIds; } /** @@ -252,7 +331,11 @@ export class ServerConversationTracker { } const latestResponse = modelResponses[modelResponses.length - 1]; - if (!this.conversationId && latestResponse?.responseId) { + if ( + this.captureResponseIds && + !this.conversationId && + latestResponse?.responseId + ) { this.previousResponseId = latestResponse.responseId; } @@ -283,7 +366,11 @@ export class ServerConversationTracker { this.serverItems.add(item); } } - if (!this.conversationId && modelResponse.responseId) { + if ( + this.captureResponseIds && + !this.conversationId && + modelResponse.responseId + ) { this.previousResponseId = modelResponse.responseId; } } diff --git a/packages/agents-core/src/runner/runLoop.ts b/packages/agents-core/src/runner/runLoop.ts index 0e3453a06..50647814b 100644 --- a/packages/agents-core/src/runner/runLoop.ts +++ b/packages/agents-core/src/runner/runLoop.ts @@ -76,6 +76,7 @@ export async function resumeInterruptedTurn< state, toolErrorFormatter, ); + state.clearExecutionOnlyApprovalOverrides(); applyTurnResult({ state, diff --git a/packages/agents-core/src/runner/sessionPersistence.ts b/packages/agents-core/src/runner/sessionPersistence.ts index 1adab9406..c7b9ca4fb 100644 --- a/packages/agents-core/src/runner/sessionPersistence.ts +++ b/packages/agents-core/src/runner/sessionPersistence.ts @@ -1,7 +1,10 @@ import { UserError } from '../errors'; import { isOpenAIResponsesCompactionAwareSession, + isSessionHistoryRewriteAwareSession, type Session, + type SessionFunctionCallItem, + type SessionHistoryMutation, type SessionInputCallback, } from '../memory/session'; import { RunResult, StreamedRunResult } from '../result'; @@ -38,6 +41,45 @@ export type SessionPersistenceTracker = { ) => (() => Promise) | undefined; }; +export function assertOverrideHistoryPersistenceSupport(options: { + input: string | AgentInputItem[] | RunState; + session?: Session; + historyIsServerManaged: boolean; +}): void { + const { input, session, historyIsServerManaged } = options; + if (!(input instanceof RunState)) { + return; + } + + if (hasPendingExecutionOnlyOverride(input) && !historyIsServerManaged) { + throw new UserError( + 'saveOverrideArguments: false is only supported when using conversationId, previousResponseId, or a server-managed session.', + input, + ); + } + + const mutations = input.getSessionHistoryMutations(); + if (mutations.length === 0) { + return; + } + + if (historyIsServerManaged) { + throw new UserError( + 'saveOverrideArguments requires local canonical history. Server-managed conversations cannot persist corrected function_call arguments. Pass saveOverrideArguments: false to apply the override only to the current execution.', + input, + ); + } + + if (!session || isSessionHistoryRewriteAwareSession(session)) { + return; + } + + throw new UserError( + 'saveOverrideArguments requires a session that supports persisted-history rewrites. Use MemorySession, OpenAIResponsesHistoryRewriteSession, or another SessionHistoryRewriteAwareSession, or pass saveOverrideArguments: false to apply the override only to the current execution.', + input, + ); +} + export function createSessionPersistenceTracker(options: { session?: Session; hasCallModelInputFilter: boolean; @@ -143,6 +185,10 @@ export function createSessionPersistenceTracker(options: { return new SessionPersistenceTrackerImpl(); } +function hasPendingExecutionOnlyOverride(state: RunState): boolean { + return state.hasPendingExecutionOnlyApprovalOverrides(); +} + function cloneItems(items: AgentInputItem[]): AgentInputItem[] { return items.map((item) => structuredClone(item)); } @@ -438,6 +484,17 @@ function normalizeItemsForSessionPersistence( ); } +function normalizeHistoryMutationsForSessionPersistence( + mutations: SessionHistoryMutation[], +): SessionHistoryMutation[] { + return mutations.map((mutation) => ({ + ...mutation, + replacement: normalizeItemsForSessionPersistence([ + mutation.replacement, + ])[0] as SessionFunctionCallItem, + })); +} + type SessionBinaryContext = { mediaType?: string; }; @@ -587,19 +644,42 @@ async function persistRunItemsToSession(options: { ]; if (itemsToSave.length === 0) { + await applySessionHistoryMutationsOnSession(session, state); + await runCompactionOnSession(session, lastResponseId, state); state._currentTurnPersistedItemCount = alreadyPersistedCount + newRunItems.length; - await runCompactionOnSession(session, lastResponseId, state); return; } const sanitizedItems = normalizeItemsForSessionPersistence(itemsToSave); await session.addItems(sanitizedItems); + await applySessionHistoryMutationsOnSession(session, state); await runCompactionOnSession(session, lastResponseId, state); state._currentTurnPersistedItemCount = alreadyPersistedCount + newRunItems.length; } +async function applySessionHistoryMutationsOnSession( + session: Session, + state: RunState, +): Promise { + const mutations = state.getSessionHistoryMutations(); + if (mutations.length === 0) { + return; + } + + const normalizedMutations = + normalizeHistoryMutationsForSessionPersistence(mutations); + + if (isSessionHistoryRewriteAwareSession(session)) { + await session.applyHistoryMutations({ mutations: normalizedMutations }); + state.clearSessionHistoryMutations(); + return; + } + + state.clearSessionHistoryMutations(); +} + async function runCompactionOnSession( session: Session | undefined, responseId: string | undefined, diff --git a/packages/agents-core/src/runner/tracing.ts b/packages/agents-core/src/runner/tracing.ts index a88fbfaf1..779f5951f 100644 --- a/packages/agents-core/src/runner/tracing.ts +++ b/packages/agents-core/src/runner/tracing.ts @@ -1,6 +1,7 @@ import { Agent } from '../agent'; import { Handoff } from '../handoff'; import { ModelTracing } from '../model'; +import type { RunState } from '../runState'; import { Tool } from '../tool'; import { setCurrentSpan } from '../tracing/context'; import { createAgentSpan } from '../tracing'; @@ -105,6 +106,16 @@ export function applyTraceOverrides( return { trace, currentSpan }; } +export function applyTraceRedactionPolicyToState( + state: RunState, + traceIncludeSensitiveData: boolean, + isResumedState: boolean, +): void { + if (!isResumedState || state._traceIncludeSensitiveDataNeedsConfigFallback) { + state.setTraceIncludeSensitiveData(traceIncludeSensitiveData); + } +} + /** * Ensures an agent span exists and updates tool metadata if already present. * Returns the span so callers can pass it through run state. diff --git a/packages/agents-core/test/hitlMemorySessionScenario.test.ts b/packages/agents-core/test/hitlMemorySessionScenario.test.ts index 90b49edfa..efe71c06e 100644 --- a/packages/agents-core/test/hitlMemorySessionScenario.test.ts +++ b/packages/agents-core/test/hitlMemorySessionScenario.test.ts @@ -151,6 +151,50 @@ describe('MemorySession HITL scenario', () => { await (rehydrated ?? session).clearSession(); } }); + + it('persists overridden function-call arguments in session history', async () => { + executeCounts.clear(); + const session = new MemorySession(); + const model = new ScenarioModel(); + const agent = new Agent({ + name: 'MemorySession Override', + instructions: `Always call ${TOOL_ECHO} before responding.`, + model, + tools: [approvalEchoTool, approvalNoteTool], + modelSettings: { toolChoice: TOOL_ECHO }, + toolUseBehavior: 'stop_on_first_tool', + }); + + try { + const firstRun = await run(agent, USER_MESSAGES[0], { session }); + expect(firstRun.interruptions).toHaveLength(1); + + firstRun.state.approve(firstRun.interruptions[0], { + overrideArguments: { query: 'Overridden query' }, + }); + + const resumed = await run(agent, firstRun.state, { session }); + expect(resumed.finalOutput).toBe('approved:Overridden query'); + + const items = await session.getItems(); + const functionCalls = items.filter( + (item): item is protocol.FunctionCallItem => + item.type === 'function_call', + ); + const lastCall = findLastFunctionCall(items); + const lastResult = findLastFunctionCallResult(items); + + expect(functionCalls).toHaveLength(1); + expect(lastCall?.arguments).toBe( + JSON.stringify({ query: 'Overridden query' }), + ); + expect(extractToolOutputText(lastResult)).toBe( + 'approved:Overridden query', + ); + } finally { + await session.clearSession(); + } + }); }); async function runScenarioStep( diff --git a/packages/agents-core/test/memory/historyMutations.test.ts b/packages/agents-core/test/memory/historyMutations.test.ts new file mode 100644 index 000000000..8c57c9d68 --- /dev/null +++ b/packages/agents-core/test/memory/historyMutations.test.ts @@ -0,0 +1,115 @@ +import { describe, expect, it } from 'vitest'; + +import type { AgentInputItem } from '../../src'; +import { applySessionHistoryMutations } from '../../src/memory/historyMutations'; + +describe('applySessionHistoryMutations', () => { + it('replaces the first matching function call and drops later duplicates', () => { + const items: AgentInputItem[] = [ + { + type: 'message', + role: 'user', + content: 'hello', + }, + { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '1' }), + }, + { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: 'stale-duplicate' }), + }, + { + type: 'function_call_result', + name: 'lookup_customer_profile', + callId: 'call_override', + status: 'completed', + output: { + type: 'text', + text: 'Customer 2 details.', + }, + }, + ]; + + const rewritten = applySessionHistoryMutations(items, [ + { + type: 'replace_function_call', + callId: 'call_override', + replacement: { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '2' }), + }, + }, + ]); + + expect(rewritten).toEqual([ + { + type: 'message', + role: 'user', + content: 'hello', + }, + { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '2' }), + }, + { + type: 'function_call_result', + name: 'lookup_customer_profile', + callId: 'call_override', + status: 'completed', + output: { + type: 'text', + text: 'Customer 2 details.', + }, + }, + ]); + }); + + it('leaves history unchanged when compaction already removed the target function call', () => { + const items: AgentInputItem[] = [ + { + type: 'message', + role: 'user', + content: 'hello', + }, + { + type: 'function_call_result', + name: 'lookup_customer_profile', + callId: 'call_override', + status: 'completed', + output: { + type: 'text', + text: 'Customer 2 details.', + }, + }, + ]; + + const rewritten = applySessionHistoryMutations(items, [ + { + type: 'replace_function_call', + callId: 'call_override', + replacement: { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '2' }), + }, + }, + ]); + + expect(rewritten).toEqual(items); + }); +}); diff --git a/packages/agents-core/test/run.stream.test.ts b/packages/agents-core/test/run.stream.test.ts index 6856d2a9d..bbd722588 100644 --- a/packages/agents-core/test/run.stream.test.ts +++ b/packages/agents-core/test/run.stream.test.ts @@ -23,6 +23,7 @@ import { tool, user, Session, + SERVER_MANAGED_CONVERSATION_SESSION, InputGuardrailTripwireTriggered, OutputGuardrailTripwireTriggered, RunState, @@ -1412,6 +1413,36 @@ describe('Runner.run (streaming)', () => { execute: async ({ test }) => `result:${test}`, }); + class ServerManagedRecordingSession implements Session { + readonly [SERVER_MANAGED_CONVERSATION_SESSION] = true as const; + + public added: AgentInputItem[][] = []; + public history: AgentInputItem[] = []; + + async getSessionId(): Promise { + return 'server-managed-stream-session'; + } + + async getItems(): Promise { + return this.history.map((item) => structuredClone(item)); + } + + async addItems(items: AgentInputItem[]): Promise { + const cloned = items.map((item) => structuredClone(item)); + this.added.push(cloned); + this.history.push(...cloned); + } + + async popItem(): Promise { + return this.history.pop(); + } + + async clearSession(): Promise { + this.added = []; + this.history = []; + } + } + async function drain>( result: StreamedRunResult, ) { @@ -1478,6 +1509,66 @@ describe('Runner.run (streaming)', () => { }); }); + it('keeps prior history on the first streaming request when no explicit server chain is available', async () => { + const model = new TrackingStreamingModel([ + buildTurn( + [ + fakeModelMessage('a_message'), + buildToolCall('call-stream-session-1', 'foo'), + ], + 'resp-stream-session-1', + ), + buildTurn([fakeModelMessage('done')], 'resp-stream-session-2'), + ]); + + const agent = new Agent({ + name: 'StreamServerManagedSessionOnly', + model, + tools: [serverTool], + }); + + const session = new ServerManagedRecordingSession(); + session.history = [user('Persisted remote history')]; + const runner = new Runner(); + + const result = await runner.run(agent, 'user_message', { + stream: true, + session, + }); + await drain(result); + expect(result.finalOutput).toBe('done'); + + expect(model.requests).toHaveLength(2); + expect(model.requests[0].conversationId).toBeUndefined(); + + const firstItems = model.requests[0].input as AgentInputItem[]; + expect(firstItems.length).toBeGreaterThan(1); + expect( + firstItems.some( + (item) => + item.type === 'message' && + getFirstTextContent(item) === 'Persisted remote history', + ), + ).toBe(true); + expect( + firstItems.some( + (item) => + item.type === 'message' && + getFirstTextContent(item) === 'user_message', + ), + ).toBe(true); + + const secondItems = model.requests[1].input as AgentInputItem[]; + expect(secondItems).toHaveLength(1); + expect(model.requests[1].previousResponseId).toBe( + 'resp-stream-session-1', + ); + expect(secondItems[0]).toMatchObject({ + type: 'function_call_result', + callId: 'call-stream-session-1', + }); + }); + it('keeps server tracker aligned with filtered inputs when streaming', async () => { const model = new TrackingStreamingModel([ buildTurn( @@ -2092,6 +2183,125 @@ describe('Runner.run (streaming)', () => { }); }); + it('preserves restored trace redaction when a streamed run interrupts again', async () => { + const approvalTool = tool({ + name: 'test', + description: 'approval tool', + parameters: z.object({ test: z.string() }), + needsApproval: async () => true, + execute: async ({ test }) => `result:${test}`, + }); + + const model = new TrackingStreamingModel([ + buildTurn([buildToolCall('call-stream-trace-1', 'foo')], 'resp-1'), + buildTurn([buildToolCall('call-stream-trace-2', 'bar')], 'resp-2'), + ]); + + const agent = new Agent({ + name: 'StreamApprovalTraceResumeAgent', + model, + tools: [approvalTool], + }); + + const firstRunner = new Runner({ + traceIncludeSensitiveData: false, + }); + const firstResult = await firstRunner.run(agent, 'user_message', { + stream: true, + }); + + await drain(firstResult); + + expect(firstResult.interruptions).toHaveLength(1); + expect(firstResult.state._traceIncludeSensitiveData).toBe(false); + + const restored = await RunState.fromString( + agent, + firstResult.state.toString(), + ); + expect(restored._traceIncludeSensitiveData).toBe(false); + + restored.approve(restored.getInterruptions()[0]); + + const resumed = await new Runner().run(agent, restored, { + stream: true, + }); + + await drain(resumed); + + expect(resumed.interruptions).toHaveLength(1); + expect(resumed.state._traceIncludeSensitiveData).toBe(false); + expect(model.requests).toHaveLength(2); + expect(model.requests[1].tracing).toBe('enabled_without_data'); + }); + + it('reapplies runner trace redaction for legacy serialized streamed states', async () => { + const approvalTool = tool({ + name: 'test', + description: 'approval tool', + parameters: z.object({ test: z.string() }), + needsApproval: async () => true, + execute: async ({ test }) => `result:${test}`, + }); + + const model = new TrackingStreamingModel([ + buildTurn( + [buildToolCall('call-stream-trace-legacy-1', 'foo')], + 'resp-stream-legacy-1', + ), + buildTurn( + [buildToolCall('call-stream-trace-legacy-2', 'bar')], + 'resp-stream-legacy-2', + ), + ]); + + const agent = new Agent({ + name: 'StreamLegacyTraceResumeAgent', + model, + tools: [approvalTool], + }); + + const firstResult = await new Runner({ + traceIncludeSensitiveData: false, + }).run(agent, 'user_message', { + stream: true, + }); + + await drain(firstResult); + + expect(firstResult.interruptions).toHaveLength(1); + expect(firstResult.state._traceIncludeSensitiveData).toBe(false); + + const legacyJson = firstResult.state.toJSON() as Record; + delete legacyJson.traceIncludeSensitiveData; + legacyJson.$schemaVersion = '1.9'; + + const restored = await RunState.fromString( + agent, + JSON.stringify(legacyJson), + ); + expect(restored._traceIncludeSensitiveData).toBe(false); + expect(restored._traceIncludeSensitiveDataNeedsConfigFallback).toBe(true); + + restored.approve(restored.getInterruptions()[0]); + + const resumed = await new Runner({ + traceIncludeSensitiveData: false, + }).run(agent, restored, { + stream: true, + }); + + await drain(resumed); + + expect(resumed.interruptions).toHaveLength(1); + expect(resumed.state._traceIncludeSensitiveData).toBe(false); + expect(resumed.state._traceIncludeSensitiveDataNeedsConfigFallback).toBe( + false, + ); + expect(model.requests).toHaveLength(2); + expect(model.requests[1].tracing).toBe('enabled_without_data'); + }); + it('uses runner-level toolErrorFormatter when resuming a rejected approval', async () => { const approvalTool = tool({ name: 'test', diff --git a/packages/agents-core/test/run.test.ts b/packages/agents-core/test/run.test.ts index b4d993816..bbc80b617 100644 --- a/packages/agents-core/test/run.test.ts +++ b/packages/agents-core/test/run.test.ts @@ -17,6 +17,7 @@ import { ModelResponse, OutputGuardrailTripwireTriggered, Session, + SERVER_MANAGED_CONVERSATION_SESSION, UserError, ModelInputData, type OutputGuardrailFunctionArgs, @@ -28,6 +29,7 @@ import { setTraceProcessors, setTracingDisabled, BatchTraceProcessor, + MemorySession as RewriteableMemorySession, user, assistant, } from '../src'; @@ -3079,6 +3081,110 @@ describe('Runner.run', () => { expect(functionCalls).toHaveLength(1); }); + it('does not duplicate already-persisted tool results when overriding a later approval in the same turn', async () => { + const getWeatherTool = tool({ + name: 'get_weather', + description: 'Get weather for a city', + parameters: z.object({ city: z.string() }), + needsApproval: async () => true, + execute: async ({ city }) => `Sunny, 72°F in ${city}`, + }); + + const model = new FakeModel([ + { + output: [ + { + type: 'function_call', + id: 'fc_weather_a', + callId: 'call_weather_a', + name: 'get_weather', + status: 'completed', + arguments: JSON.stringify({ city: 'Oakland' }), + providerData: {}, + } as protocol.FunctionCallItem, + { + type: 'function_call', + id: 'fc_weather_b', + callId: 'call_weather_b', + name: 'get_weather', + status: 'completed', + arguments: JSON.stringify({ city: 'San Francisco' }), + providerData: {}, + } as protocol.FunctionCallItem, + ], + usage: new Usage(), + }, + { + output: [fakeModelMessage('All approvals complete.')], + usage: new Usage(), + }, + ]); + + const agent = new Agent({ + name: 'Assistant', + instructions: 'Use get_weather for both cities.', + model, + tools: [getWeatherTool], + toolUseBehavior: 'run_llm_again', + }); + + const session = new RewriteableMemorySession(); + + const first = await run(agent, 'Need weather updates for two cities.', { + session, + }); + + expect(first.interruptions).toHaveLength(2); + const firstApproval = first.interruptions.find( + (item) => + item.rawItem.type === 'function_call' && + item.rawItem.callId === 'call_weather_a', + ); + expect(firstApproval).toBeDefined(); + + first.state.approve(firstApproval!); + + const second = await run(agent, first.state, { session }); + + expect(second.interruptions).toHaveLength(1); + const pendingOverrideApproval = second.interruptions.find( + (item) => + item.rawItem.type === 'function_call' && + item.rawItem.callId === 'call_weather_b', + ); + expect(pendingOverrideApproval).toBeDefined(); + + second.state.approve(pendingOverrideApproval!, { + overrideArguments: { city: 'San Jose' }, + }); + + const final = await run(agent, second.state, { session }); + expect(final.finalOutput).toBe('All approvals complete.'); + + const allItems = await session.getItems(); + const firstToolResults = allItems.filter( + (item): item is protocol.FunctionCallResultItem => + item.type === 'function_call_result' && + item.callId === 'call_weather_a', + ); + const secondToolCalls = allItems.filter( + (item): item is protocol.FunctionCallItem => + item.type === 'function_call' && item.callId === 'call_weather_b', + ); + const secondToolResults = allItems.filter( + (item): item is protocol.FunctionCallResultItem => + item.type === 'function_call_result' && + item.callId === 'call_weather_b', + ); + + expect(firstToolResults).toHaveLength(1); + expect(secondToolCalls).toHaveLength(1); + expect(secondToolCalls[0]?.arguments).toBe( + JSON.stringify({ city: 'San Jose' }), + ); + expect(secondToolResults).toHaveLength(1); + }); + it('does not duplicate already persisted items when the resumed run continues into additional turns', async () => { // Regression test for session persistence across resumed runs that execute additional turns. // @@ -5430,7 +5536,7 @@ describe('Runner.run', () => { markSpy.mockRestore(); }); - it('does not resend prior items when resuming with conversationId', async () => { + it('replays corrected function calls when overriding arguments without server-managed conversation', async () => { const approvalTool = tool({ name: 'test', description: 'tool that requires approval', @@ -5440,53 +5546,55 @@ describe('Runner.run', () => { }); const model = new TrackingModel([ - buildResponse([buildToolCall('call-approved', 'foo')], 'resp-1'), - buildResponse([fakeModelMessage('done')], 'resp-2'), + buildResponse( + [buildToolCall('call-local-override', 'foo')], + 'resp-local-1', + ), + buildResponse([fakeModelMessage('done')], 'resp-local-2'), ]); const agent = new Agent({ - name: 'ApprovalAgent', + name: 'ApprovalLocalOverrideAgent', model, tools: [approvalTool], }); const runner = new Runner(); - const firstResult = await runner.run(agent, 'user_message', { - conversationId: 'conv-approval', - }); + const firstResult = await runner.run(agent, 'user_message'); expect(firstResult.interruptions).toHaveLength(1); - const approvalItem = firstResult.interruptions[0]; - firstResult.state.approve(approvalItem); - - const secondResult = await runner.run(agent, firstResult.state, { - conversationId: 'conv-approval', + firstResult.state.approve(firstResult.interruptions[0], { + overrideArguments: { test: 'bar' }, }); + const secondResult = await runner.run(agent, firstResult.state); + expect(secondResult.finalOutput).toBe('done'); expect(model.requests).toHaveLength(2); - const firstInput = model.requests[0].input; - expect(Array.isArray(firstInput)).toBe(true); - const firstItems = firstInput as AgentInputItem[]; - expect(firstItems).toHaveLength(1); - expect(firstItems[0]).toMatchObject({ + const secondItems = model.requests[1].input as AgentInputItem[]; + expect(secondItems).toHaveLength(3); + expect(secondItems[0]).toMatchObject({ role: 'user', content: 'user_message', }); - - const secondRequest = model.requests[1]; - expect(secondRequest.conversationId).toBe('conv-approval'); - expect(Array.isArray(secondRequest.input)).toBe(true); - const secondItems = secondRequest.input as AgentInputItem[]; - expect(secondItems).toHaveLength(1); - expect(secondItems[0]).toMatchObject({ + expect(secondItems[1]).toMatchObject({ + type: 'function_call', + callId: 'call-local-override', + name: 'test', + arguments: JSON.stringify({ test: 'bar' }), + }); + expect(secondItems[2]).toMatchObject({ type: 'function_call_result', - callId: 'call-approved', + callId: 'call-local-override', + output: { + type: 'text', + text: 'result:bar', + }, }); }); - it('does not resend prior items when resuming with previousResponseId', async () => { + it('rejects execution-only overrides when resuming without conversationId or previousResponseId', async () => { const approvalTool = tool({ name: 'test', description: 'tool that requires approval', @@ -5496,43 +5604,709 @@ describe('Runner.run', () => { }); const model = new TrackingModel([ - buildResponse([buildToolCall('call-prev', 'foo')], 'resp-prev-1'), - buildResponse([fakeModelMessage('done')], 'resp-prev-2'), + buildResponse( + [buildToolCall('call-local-exec-only', 'foo')], + 'resp-local-1', + ), + buildResponse([fakeModelMessage('done')], 'resp-local-2'), ]); const agent = new Agent({ - name: 'ApprovalPrevAgent', + name: 'ApprovalLocalExecutionOnlyAgent', model, tools: [approvalTool], }); const runner = new Runner(); - const firstResult = await runner.run(agent, 'user_message', { - previousResponseId: 'initial-response', + const firstResult = await runner.run(agent, 'user_message'); + + expect(firstResult.interruptions).toHaveLength(1); + firstResult.state.approve(firstResult.interruptions[0], { + overrideArguments: { test: 'bar' }, + saveOverrideArguments: false, + }); + + await expect(runner.run(agent, firstResult.state)).rejects.toThrow( + 'saveOverrideArguments: false is only supported when using conversationId, previousResponseId, or a server-managed session', + ); + expect(model.requests).toHaveLength(1); + }); + + class ServerManagedPlainSession implements Session { + readonly [SERVER_MANAGED_CONVERSATION_SESSION] = true as const; + items: AgentInputItem[] = []; + + async getSessionId(): Promise { + return 'server-managed-session'; + } + + async getItems(): Promise { + return this.items.map((item) => structuredClone(item)); + } + + async addItems(items: AgentInputItem[]): Promise { + this.items.push(...items.map((item) => structuredClone(item))); + } + + async popItem(): Promise { + return this.items.pop(); + } + + async clearSession(): Promise { + this.items = []; + } + } + + it('supports execution-only overrides when the session history is server-managed', async () => { + const approvalTool = tool({ + name: 'test', + description: 'tool that requires approval', + parameters: z.object({ test: z.string() }), + needsApproval: async () => true, + execute: async ({ test }) => `result:${test}`, + }); + + const model = new TrackingModel([ + buildResponse( + [buildToolCall('call-server-managed-session', 'foo')], + 'resp-server-managed-1', + ), + ]); + + const agent = new Agent({ + name: 'ApprovalServerManagedSessionAgent', + model, + tools: [approvalTool], + toolUseBehavior: 'stop_on_first_tool', }); + const runner = new Runner(); + const session = new ServerManagedPlainSession(); + const firstResult = await runner.run(agent, 'user_message', { session }); + expect(firstResult.interruptions).toHaveLength(1); - const approvalItem = firstResult.interruptions[0]; - firstResult.state.approve(approvalItem); + firstResult.state.approve(firstResult.interruptions[0], { + overrideArguments: { test: 'bar' }, + saveOverrideArguments: false, + }); const secondResult = await runner.run(agent, firstResult.state, { - previousResponseId: 'initial-response', + session, + }); + + expect(secondResult.finalOutput).toBe('result:bar'); + expect(model.requests).toHaveLength(1); + expect(await session.getItems()).toMatchObject([ + { + role: 'user', + content: 'user_message', + }, + { + type: 'function_call', + callId: 'call-server-managed-session', + arguments: JSON.stringify({ test: 'foo' }), + }, + { + type: 'function_call_result', + callId: 'call-server-managed-session', + output: { + type: 'text', + text: 'result:bar', + }, + }, + ]); + }); + + it('replays only corrected deltas when resuming with a server-managed session override', async () => { + const approvalTool = tool({ + name: 'test', + description: 'tool that requires approval', + parameters: z.object({ test: z.string() }), + needsApproval: async () => true, + execute: async ({ test }) => `result:${test}`, + }); + + const model = new TrackingModel([ + buildResponse( + [buildToolCall('call-server-managed-replay', 'foo')], + 'resp-server-managed-replay-1', + ), + buildResponse( + [fakeModelMessage('done')], + 'resp-server-managed-replay-2', + ), + ]); + + const agent = new Agent({ + name: 'ApprovalServerManagedReplayAgent', + model, + tools: [approvalTool], + }); + + const runner = new Runner(); + const session = new ServerManagedPlainSession(); + const firstResult = await runner.run(agent, 'user_message', { session }); + + expect(firstResult.interruptions).toHaveLength(1); + firstResult.state.approve(firstResult.interruptions[0], { + overrideArguments: { test: 'bar' }, + saveOverrideArguments: false, + }); + + const secondResult = await runner.run(agent, firstResult.state, { + session, }); expect(secondResult.finalOutput).toBe('done'); expect(model.requests).toHaveLength(2); + expect(model.requests[1].previousResponseId).toBe( + 'resp-server-managed-replay-1', + ); - expect(model.requests[0].previousResponseId).toBe('initial-response'); - - const secondRequest = model.requests[1]; - expect(secondRequest.previousResponseId).toBe('resp-prev-1'); - expect(Array.isArray(secondRequest.input)).toBe(true); - const secondItems = secondRequest.input as AgentInputItem[]; + const secondItems = model.requests[1].input as AgentInputItem[]; expect(secondItems).toHaveLength(1); expect(secondItems[0]).toMatchObject({ type: 'function_call_result', - callId: 'call-prev', + callId: 'call-server-managed-replay', + output: { + type: 'text', + text: 'result:bar', + }, + }); + expect( + secondItems.some( + (item) => + item.type === 'message' || + (item.type === 'function_call' && + item.callId === 'call-server-managed-replay'), + ), + ).toBe(false); + }); + + it('keeps prior history on later fresh runs when no explicit server chain is available', async () => { + const approvalTool = tool({ + name: 'test', + description: 'tool that requires approval', + parameters: z.object({ test: z.string() }), + needsApproval: async () => true, + execute: async ({ test }) => `result:${test}`, + }); + + const model = new TrackingModel([ + buildResponse( + [buildToolCall('call-server-managed-fresh-turn', 'foo')], + 'resp-server-managed-fresh-turn-1', + ), + buildResponse( + [fakeModelMessage('done')], + 'resp-server-managed-fresh-turn-2', + ), + buildResponse( + [fakeModelMessage('follow-up done')], + 'resp-server-managed-fresh-turn-3', + ), + ]); + + const agent = new Agent({ + name: 'ApprovalServerManagedFreshTurnAgent', + model, + tools: [approvalTool], + }); + + const runner = new Runner(); + const session = new ServerManagedPlainSession(); + const firstResult = await runner.run(agent, 'user_message', { session }); + + expect(firstResult.interruptions).toHaveLength(1); + firstResult.state.approve(firstResult.interruptions[0], { + overrideArguments: { test: 'bar' }, + saveOverrideArguments: false, + }); + + const resumedResult = await runner.run(agent, firstResult.state, { + session, + }); + expect(resumedResult.finalOutput).toBe('done'); + + const freshTurnResult = await runner.run(agent, 'fresh_message', { + session, }); + + expect(freshTurnResult.finalOutput).toBe('follow-up done'); + expect(model.requests).toHaveLength(3); + + const thirdItems = model.requests[2].input as AgentInputItem[]; + expect(thirdItems.length).toBeGreaterThan(1); + expect( + thirdItems.some( + (item) => + item.type === 'message' && + item.role === 'user' && + getFirstTextContent(item) === 'fresh_message', + ), + ).toBe(true); + expect( + thirdItems.some( + (item) => + item.type === 'function_call' && + item.callId === 'call-server-managed-fresh-turn', + ), + ).toBe(true); + }); + + it('fails before resuming when saveOverrideArguments is required for a non-rewrite-aware session', async () => { + class PlainSession implements Session { + items: AgentInputItem[] = []; + + async getSessionId(): Promise { + return 'plain-session'; + } + + async getItems(): Promise { + return this.items.map((item) => structuredClone(item)); + } + + async addItems(items: AgentInputItem[]): Promise { + this.items.push(...items.map((item) => structuredClone(item))); + } + + async popItem(): Promise { + return this.items.pop(); + } + + async clearSession(): Promise { + this.items = []; + } + } + + const approvalTool = tool({ + name: 'test', + description: 'tool that requires approval', + parameters: z.object({ test: z.string() }), + needsApproval: async () => true, + execute: async ({ test }) => `result:${test}`, + }); + + const model = new TrackingModel([ + buildResponse( + [buildToolCall('call-plain-session', 'foo')], + 'resp-plain-1', + ), + buildResponse([fakeModelMessage('done')], 'resp-plain-2'), + ]); + + const agent = new Agent({ + name: 'ApprovalPlainSessionAgent', + model, + tools: [approvalTool], + }); + + const runner = new Runner(); + const session = new PlainSession(); + const firstResult = await runner.run(agent, 'user_message', { session }); + + expect(firstResult.interruptions).toHaveLength(1); + firstResult.state.approve(firstResult.interruptions[0], { + overrideArguments: { test: 'bar' }, + }); + + await expect( + runner.run(agent, firstResult.state, { session }), + ).rejects.toThrow( + 'saveOverrideArguments requires a session that supports persisted-history rewrites', + ); + expect(model.requests).toHaveLength(1); + }); + + it('does not resend prior items when resuming with conversationId', async () => { + const approvalTool = tool({ + name: 'test', + description: 'tool that requires approval', + parameters: z.object({ test: z.string() }), + needsApproval: async () => true, + execute: async ({ test }) => `result:${test}`, + }); + + const model = new TrackingModel([ + buildResponse([buildToolCall('call-approved', 'foo')], 'resp-1'), + buildResponse([fakeModelMessage('done')], 'resp-2'), + ]); + + const agent = new Agent({ + name: 'ApprovalAgent', + model, + tools: [approvalTool], + }); + + const runner = new Runner(); + const firstResult = await runner.run(agent, 'user_message', { + conversationId: 'conv-approval', + }); + + expect(firstResult.interruptions).toHaveLength(1); + const approvalItem = firstResult.interruptions[0]; + firstResult.state.approve(approvalItem); + + const secondResult = await runner.run(agent, firstResult.state, { + conversationId: 'conv-approval', + }); + + expect(secondResult.finalOutput).toBe('done'); + expect(model.requests).toHaveLength(2); + + const firstInput = model.requests[0].input; + expect(Array.isArray(firstInput)).toBe(true); + const firstItems = firstInput as AgentInputItem[]; + expect(firstItems).toHaveLength(1); + expect(firstItems[0]).toMatchObject({ + role: 'user', + content: 'user_message', + }); + + const secondRequest = model.requests[1]; + expect(secondRequest.conversationId).toBe('conv-approval'); + expect(Array.isArray(secondRequest.input)).toBe(true); + const secondItems = secondRequest.input as AgentInputItem[]; + expect(secondItems).toHaveLength(1); + expect(secondItems[0]).toMatchObject({ + type: 'function_call_result', + callId: 'call-approved', + }); + }); + + it('rejects saveOverrideArguments defaults when overriding arguments with conversationId', async () => { + const approvalTool = tool({ + name: 'test', + description: 'tool that requires approval', + parameters: z.object({ test: z.string() }), + needsApproval: async () => true, + execute: async ({ test }) => `result:${test}`, + }); + + const model = new TrackingModel([ + buildResponse( + [buildToolCall('call-approved-override', 'foo')], + 'resp-1', + ), + buildResponse([fakeModelMessage('done')], 'resp-2'), + ]); + + const agent = new Agent({ + name: 'ApprovalOverrideAgent', + model, + tools: [approvalTool], + }); + + const runner = new Runner(); + const firstResult = await runner.run(agent, 'user_message', { + conversationId: 'conv-approval-override', + }); + + expect(firstResult.interruptions).toHaveLength(1); + const approvalItem = firstResult.interruptions[0]; + expect(() => + firstResult.state.approve(approvalItem, { + overrideArguments: { test: 'bar' }, + }), + ).toThrow('saveOverrideArguments requires local canonical history'); + }); + + it('supports execution-only overrides with conversationId', async () => { + const approvalTool = tool({ + name: 'test', + description: 'tool that requires approval', + parameters: z.object({ test: z.string() }), + needsApproval: async () => true, + execute: async ({ test }) => `result:${test}`, + }); + + const model = new TrackingModel([ + buildResponse( + [buildToolCall('call-approved-override', 'foo')], + 'resp-1', + ), + buildResponse([fakeModelMessage('done')], 'resp-2'), + ]); + + const agent = new Agent({ + name: 'ApprovalOverrideAgent', + model, + tools: [approvalTool], + }); + + const runner = new Runner(); + const firstResult = await runner.run(agent, 'user_message', { + conversationId: 'conv-approval-override', + }); + + expect(firstResult.interruptions).toHaveLength(1); + const approvalItem = firstResult.interruptions[0]; + firstResult.state.approve(approvalItem, { + overrideArguments: { test: 'bar' }, + saveOverrideArguments: false, + }); + + const secondResult = await runner.run(agent, firstResult.state, { + conversationId: 'conv-approval-override', + }); + + expect(secondResult.finalOutput).toBe('done'); + expect(model.requests).toHaveLength(2); + + const secondItems = model.requests[1].input as AgentInputItem[]; + expect(secondItems).toHaveLength(1); + expect(secondItems[0]).toMatchObject({ + type: 'function_call_result', + callId: 'call-approved-override', + output: { + type: 'text', + text: 'result:bar', + }, + }); + }); + + it('does not resend prior items when resuming with previousResponseId', async () => { + const approvalTool = tool({ + name: 'test', + description: 'tool that requires approval', + parameters: z.object({ test: z.string() }), + needsApproval: async () => true, + execute: async ({ test }) => `result:${test}`, + }); + + const model = new TrackingModel([ + buildResponse([buildToolCall('call-prev', 'foo')], 'resp-prev-1'), + buildResponse([fakeModelMessage('done')], 'resp-prev-2'), + ]); + + const agent = new Agent({ + name: 'ApprovalPrevAgent', + model, + tools: [approvalTool], + }); + + const runner = new Runner(); + const firstResult = await runner.run(agent, 'user_message', { + previousResponseId: 'initial-response', + }); + + expect(firstResult.interruptions).toHaveLength(1); + const approvalItem = firstResult.interruptions[0]; + firstResult.state.approve(approvalItem); + + const secondResult = await runner.run(agent, firstResult.state, { + previousResponseId: 'initial-response', + }); + + expect(secondResult.finalOutput).toBe('done'); + expect(model.requests).toHaveLength(2); + + expect(model.requests[0].previousResponseId).toBe('initial-response'); + + const secondRequest = model.requests[1]; + expect(secondRequest.previousResponseId).toBe('resp-prev-1'); + expect(Array.isArray(secondRequest.input)).toBe(true); + const secondItems = secondRequest.input as AgentInputItem[]; + expect(secondItems).toHaveLength(1); + expect(secondItems[0]).toMatchObject({ + type: 'function_call_result', + callId: 'call-prev', + }); + }); + + it('rejects saveOverrideArguments defaults when overriding arguments with previousResponseId', async () => { + const approvalTool = tool({ + name: 'test', + description: 'tool that requires approval', + parameters: z.object({ test: z.string() }), + needsApproval: async () => true, + execute: async ({ test }) => `result:${test}`, + }); + + const model = new TrackingModel([ + buildResponse( + [buildToolCall('call-prev-override', 'foo')], + 'resp-prev-1', + ), + buildResponse([fakeModelMessage('done')], 'resp-prev-2'), + ]); + + const agent = new Agent({ + name: 'ApprovalPrevOverrideAgent', + model, + tools: [approvalTool], + }); + + const runner = new Runner(); + const firstResult = await runner.run(agent, 'user_message', { + previousResponseId: 'initial-response', + }); + + expect(firstResult.interruptions).toHaveLength(1); + const approvalItem = firstResult.interruptions[0]; + expect(() => + firstResult.state.approve(approvalItem, { + overrideArguments: { test: 'bar' }, + }), + ).toThrow('saveOverrideArguments requires local canonical history'); + }); + + it('supports execution-only overrides with previousResponseId', async () => { + const approvalTool = tool({ + name: 'test', + description: 'tool that requires approval', + parameters: z.object({ test: z.string() }), + needsApproval: async () => true, + execute: async ({ test }) => `result:${test}`, + }); + + const model = new TrackingModel([ + buildResponse( + [buildToolCall('call-prev-override', 'foo')], + 'resp-prev-1', + ), + buildResponse([fakeModelMessage('done')], 'resp-prev-2'), + ]); + + const agent = new Agent({ + name: 'ApprovalPrevOverrideAgent', + model, + tools: [approvalTool], + }); + + const runner = new Runner(); + const firstResult = await runner.run(agent, 'user_message', { + previousResponseId: 'initial-response', + }); + + expect(firstResult.interruptions).toHaveLength(1); + const approvalItem = firstResult.interruptions[0]; + firstResult.state.approve(approvalItem, { + overrideArguments: { test: 'bar' }, + saveOverrideArguments: false, + }); + + const secondResult = await runner.run(agent, firstResult.state, { + previousResponseId: 'initial-response', + }); + + expect(secondResult.finalOutput).toBe('done'); + expect(model.requests).toHaveLength(2); + expect(model.requests[1].previousResponseId).toBe('resp-prev-1'); + + const secondItems = model.requests[1].input as AgentInputItem[]; + expect(secondItems).toHaveLength(1); + expect(secondItems[0]).toMatchObject({ + type: 'function_call_result', + callId: 'call-prev-override', + output: { + type: 'text', + text: 'result:bar', + }, + }); + }); + + it('preserves restored trace redaction when a resumed run interrupts again', async () => { + const approvalTool = tool({ + name: 'test', + description: 'tool that requires approval', + parameters: z.object({ test: z.string() }), + needsApproval: async () => true, + execute: async ({ test }) => `result:${test}`, + }); + + const model = new TrackingModel([ + buildResponse([buildToolCall('call-trace-1', 'foo')], 'resp-trace-1'), + buildResponse([buildToolCall('call-trace-2', 'bar')], 'resp-trace-2'), + ]); + + const agent = new Agent({ + name: 'ApprovalTraceResumeAgent', + model, + tools: [approvalTool], + }); + + const firstRunner = new Runner({ + traceIncludeSensitiveData: false, + }); + const firstResult = await firstRunner.run(agent, 'user_message'); + + expect(firstResult.interruptions).toHaveLength(1); + expect(firstResult.state._traceIncludeSensitiveData).toBe(false); + + const restored = await RunState.fromString( + agent, + firstResult.state.toString(), + ); + expect(restored._traceIncludeSensitiveData).toBe(false); + + restored.approve(restored.getInterruptions()[0]); + + const resumed = await new Runner().run(agent, restored); + + expect(resumed.interruptions).toHaveLength(1); + expect(resumed.state._traceIncludeSensitiveData).toBe(false); + expect(model.requests).toHaveLength(2); + expect(model.requests[1].tracing).toBe('enabled_without_data'); + }); + + it('reapplies runner trace redaction for legacy serialized states', async () => { + const approvalTool = tool({ + name: 'test', + description: 'tool that requires approval', + parameters: z.object({ test: z.string() }), + needsApproval: async () => true, + execute: async ({ test }) => `result:${test}`, + }); + + const model = new TrackingModel([ + buildResponse( + [buildToolCall('call-trace-legacy-1', 'foo')], + 'resp-trace-legacy-1', + ), + buildResponse( + [buildToolCall('call-trace-legacy-2', 'bar')], + 'resp-trace-legacy-2', + ), + ]); + + const agent = new Agent({ + name: 'ApprovalLegacyTraceResumeAgent', + model, + tools: [approvalTool], + }); + + const firstResult = await new Runner({ + traceIncludeSensitiveData: false, + }).run(agent, 'user_message'); + + expect(firstResult.interruptions).toHaveLength(1); + expect(firstResult.state._traceIncludeSensitiveData).toBe(false); + + const legacyJson = firstResult.state.toJSON() as Record; + delete legacyJson.traceIncludeSensitiveData; + legacyJson.$schemaVersion = '1.9'; + + const restored = await RunState.fromString( + agent, + JSON.stringify(legacyJson), + ); + expect(restored._traceIncludeSensitiveData).toBe(false); + expect(restored._traceIncludeSensitiveDataNeedsConfigFallback).toBe(true); + + restored.approve(restored.getInterruptions()[0]); + + const resumed = await new Runner({ + traceIncludeSensitiveData: false, + }).run(agent, restored); + + expect(resumed.interruptions).toHaveLength(1); + expect(resumed.state._traceIncludeSensitiveData).toBe(false); + expect(resumed.state._traceIncludeSensitiveDataNeedsConfigFallback).toBe( + false, + ); + expect(model.requests).toHaveLength(2); + expect(model.requests[1].tracing).toBe('enabled_without_data'); }); it('does not resend items when resuming multiple times without new approvals', async () => { diff --git a/packages/agents-core/test/runState.test.ts b/packages/agents-core/test/runState.test.ts index 5b86c45c7..212d27a0c 100644 --- a/packages/agents-core/test/runState.test.ts +++ b/packages/agents-core/test/runState.test.ts @@ -33,12 +33,124 @@ import { FakeEditor, } from './stubs'; import { RunResult } from '../src/result'; -import { createAgentSpan } from '../src/tracing'; +import { createAgentSpan, type Span, type Trace } from '../src/tracing'; +import { + defaultProcessor, + type TracingProcessor, +} from '../src/tracing/processor'; import { getGlobalTraceProvider } from '../src/tracing/provider'; import type { MCPServer, MCPTool } from '../src/mcp'; import { z } from 'zod'; +class TestTracingProcessor implements TracingProcessor { + public spansEnded: Span[] = []; + + async onTraceStart(_trace: Trace): Promise { + /* noop */ + } + + async onTraceEnd(_trace: Trace): Promise { + /* noop */ + } + + async onSpanStart(_span: Span): Promise { + /* noop */ + } + + async onSpanEnd(span: Span): Promise { + this.spansEnded.push(span); + } + + async shutdown(): Promise { + /* noop */ + } + + async forceFlush(): Promise { + /* noop */ + } +} + describe('RunState', () => { + function buildOverrideableApprovalState(options?: { + conversationId?: string; + previousResponseId?: string; + }) { + const context = new RunContext(); + const sendEmailTool = tool({ + name: 'send_email', + description: 'Send an email.', + parameters: z.object({ recipient: z.string() }), + execute: async ({ recipient }) => recipient, + needsApproval: async () => true, + }); + const agent = new Agent({ + name: 'OverrideAgent', + tools: [sendEmailTool], + }); + const state = new RunState(context, 'input', agent, 2); + if (options?.conversationId || options?.previousResponseId) { + state.setConversationContext( + options.conversationId, + options.previousResponseId, + ); + } + + const rawItem: protocol.FunctionCallItem = { + id: 'fc_override', + type: 'function_call', + name: 'send_email', + callId: 'call-override', + status: 'completed', + arguments: JSON.stringify({ recipient: 'alice@example.com' }), + }; + const approvalItem = new ToolApprovalItem(rawItem, agent); + state._currentStep = { + type: 'next_step_interruption', + data: { + interruptions: [approvalItem], + }, + }; + state._generatedItems.push( + new RunToolCallItem(rawItem, agent), + approvalItem, + ); + state._modelResponses = [ + { + usage: new Usage(), + output: [structuredClone(rawItem)], + responseId: 'resp-override', + }, + ]; + state._lastTurnResponse = { + usage: new Usage(), + output: [structuredClone(rawItem)], + responseId: 'resp-override', + }; + state._lastProcessedResponse = { + newItems: [ + new RunToolCallItem(structuredClone(rawItem), agent), + new ToolApprovalItem(structuredClone(rawItem), agent), + ], + toolsUsed: ['send_email'], + handoffs: [], + functions: [ + { + toolCall: structuredClone(rawItem), + tool: sendEmailTool as any, + }, + ], + computerActions: [], + shellActions: [], + applyPatchActions: [], + mcpApprovalRequests: [], + hasToolsOrApprovalsToRun() { + return this.functions.length > 0; + }, + }; + + return { agent, state, approvalItem, rawItem }; + } + it('initializes with default values', () => { const context = new RunContext({ foo: 'bar' }); const agent = new Agent({ name: 'TestAgent' }); @@ -830,6 +942,368 @@ describe('RunState', () => { ).toBe(true); }); + it('approve with overrideArguments updates function-call execution state', () => { + const { state, approvalItem } = buildOverrideableApprovalState(); + + state.approve(approvalItem, { + overrideArguments: { recipient: 'bob@example.com' }, + }); + + expect(approvalItem.arguments).toBe( + JSON.stringify({ recipient: 'bob@example.com' }), + ); + expect( + state._context.isToolApproved({ + toolName: 'send_email', + callId: 'call-override', + }), + ).toBe(true); + expect((state._generatedItems[0] as RunToolCallItem).rawItem).toMatchObject( + { + id: 'fc_override', + arguments: JSON.stringify({ recipient: 'bob@example.com' }), + }, + ); + expect(state._lastProcessedResponse?.functions[0]?.toolCall).toMatchObject({ + id: 'fc_override', + arguments: JSON.stringify({ recipient: 'bob@example.com' }), + }); + expect(state._modelResponses[0].output[0]).toMatchObject({ + id: 'fc_override', + arguments: JSON.stringify({ recipient: 'bob@example.com' }), + }); + expect(state._lastTurnResponse?.output[0]).toMatchObject({ + id: 'fc_override', + arguments: JSON.stringify({ recipient: 'bob@example.com' }), + }); + expect(state.getSessionHistoryMutations()).toEqual([ + { + type: 'replace_function_call', + callId: 'call-override', + replacement: expect.objectContaining({ + id: 'fc_override', + arguments: JSON.stringify({ recipient: 'bob@example.com' }), + }), + }, + ]); + }); + + it('approve with overrideArguments and saveOverrideArguments=false keeps replay history unchanged', () => { + const { state, approvalItem, rawItem } = buildOverrideableApprovalState(); + + state.approve(approvalItem, { + overrideArguments: { recipient: 'bob@example.com' }, + saveOverrideArguments: false, + }); + + expect(approvalItem.arguments).toBe( + JSON.stringify({ recipient: 'bob@example.com' }), + ); + expect((state._generatedItems[0] as RunToolCallItem).rawItem).toMatchObject( + { + id: 'fc_override', + arguments: JSON.stringify({ recipient: 'alice@example.com' }), + }, + ); + expect(state._lastProcessedResponse?.functions[0]?.toolCall).toMatchObject({ + id: 'fc_override', + arguments: JSON.stringify({ recipient: 'bob@example.com' }), + }); + expect(state._modelResponses[0].output[0]).toEqual(rawItem); + expect(state._lastTurnResponse?.output[0]).toEqual(rawItem); + expect(state.getSessionHistoryMutations()).toEqual([]); + expect(state.hasPendingExecutionOnlyApprovalOverrides()).toBe(true); + }); + + it('approve with overrideArguments records a custom trace span', async () => { + const traceProvider = getGlobalTraceProvider(); + const processor = new TestTracingProcessor(); + traceProvider.setProcessors([processor]); + traceProvider.setDisabled(false); + + try { + const { agent, state, approvalItem } = buildOverrideableApprovalState(); + const trace = traceProvider.createTrace({ + name: 'approval-override-trace', + }); + await trace.start(); + state._trace = trace; + state._currentAgentSpan = createAgentSpan( + { data: { name: agent.name } }, + trace, + ); + state._currentAgentSpan.start(); + + state.approve(approvalItem, { + overrideArguments: { recipient: 'bob@example.com' }, + }); + + state._currentAgentSpan.end(); + await trace.end(); + + const overrideSpan = processor.spansEnded.find( + (span) => + span.spanData.type === 'custom' && + 'name' in span.spanData && + span.spanData.name === 'approval override: send_email', + ); + expect(overrideSpan).toBeDefined(); + expect(overrideSpan?.parentId).toBe(state._currentAgentSpan.spanId); + expect((overrideSpan?.spanData as any).data).toEqual({ + tool_name: 'send_email', + call_id: 'call-override', + original_arguments: { recipient: 'alice@example.com' }, + override_arguments: { recipient: 'bob@example.com' }, + }); + } finally { + traceProvider.setProcessors([defaultProcessor()]); + traceProvider.setDisabled(true); + } + }); + + it('approve with overrideArguments does not record a custom trace span when sensitive data is disabled after restore', async () => { + const traceProvider = getGlobalTraceProvider(); + const processor = new TestTracingProcessor(); + traceProvider.setProcessors([processor]); + traceProvider.setDisabled(false); + + try { + const { agent, state } = buildOverrideableApprovalState(); + state.setTraceIncludeSensitiveData(false); + + const restored = await RunState.fromString(agent, state.toString()); + expect(restored._traceIncludeSensitiveData).toBe(false); + + const [approvalItem] = restored.getInterruptions(); + expect(approvalItem).toBeDefined(); + + const trace = traceProvider.createTrace({ + name: 'approval-override-redacted-trace', + }); + await trace.start(); + restored._trace = trace; + restored._currentAgentSpan = createAgentSpan( + { data: { name: agent.name } }, + trace, + ); + restored._currentAgentSpan.start(); + + restored.approve(approvalItem, { + overrideArguments: { recipient: 'bob@example.com' }, + }); + + restored._currentAgentSpan.end(); + await trace.end(); + + const overrideSpan = processor.spansEnded.find( + (span) => + span.spanData.type === 'custom' && + 'name' in span.spanData && + span.spanData.name === 'approval override: send_email', + ); + expect(overrideSpan).toBeUndefined(); + } finally { + traceProvider.setProcessors([defaultProcessor()]); + traceProvider.setDisabled(true); + } + }); + + it('legacy snapshots without trace redaction metadata redact approval override traces by default', async () => { + const traceProvider = getGlobalTraceProvider(); + const processor = new TestTracingProcessor(); + traceProvider.setProcessors([processor]); + traceProvider.setDisabled(false); + + try { + const { agent, state } = buildOverrideableApprovalState(); + const legacyJson = state.toJSON() as Record; + delete legacyJson.traceIncludeSensitiveData; + legacyJson.$schemaVersion = '1.9'; + + const restored = await RunState.fromString( + agent, + JSON.stringify(legacyJson), + ); + expect(restored._traceIncludeSensitiveData).toBe(false); + expect(restored._traceIncludeSensitiveDataNeedsConfigFallback).toBe(true); + + const [approvalItem] = restored.getInterruptions(); + expect(approvalItem).toBeDefined(); + + const trace = traceProvider.createTrace({ + name: 'approval-override-legacy-redacted-trace', + }); + await trace.start(); + restored._trace = trace; + restored._currentAgentSpan = createAgentSpan( + { data: { name: agent.name } }, + trace, + ); + restored._currentAgentSpan.start(); + + restored.approve(approvalItem, { + overrideArguments: { recipient: 'bob@example.com' }, + }); + + restored._currentAgentSpan.end(); + await trace.end(); + + const overrideSpan = processor.spansEnded.find( + (span) => + span.spanData.type === 'custom' && + 'name' in span.spanData && + span.spanData.name === 'approval override: send_email', + ); + expect(overrideSpan).toBeUndefined(); + } finally { + traceProvider.setProcessors([defaultProcessor()]); + traceProvider.setDisabled(true); + } + }); + + it('approve rejects saveOverrideArguments defaults for server-managed conversations', () => { + const { state, approvalItem } = buildOverrideableApprovalState({ + previousResponseId: 'resp-existing', + }); + + expect(() => + state.approve(approvalItem, { + overrideArguments: { recipient: 'bob@example.com' }, + }), + ).toThrow('saveOverrideArguments requires local canonical history'); + }); + + it('approve with overrideArguments and saveOverrideArguments=false keeps server-managed replay history unchanged', () => { + const { state, approvalItem, rawItem } = buildOverrideableApprovalState({ + previousResponseId: 'resp-existing', + }); + + state.approve(approvalItem, { + overrideArguments: { recipient: 'bob@example.com' }, + saveOverrideArguments: false, + }); + + expect(approvalItem.arguments).toBe( + JSON.stringify({ recipient: 'bob@example.com' }), + ); + expect((state._generatedItems[0] as RunToolCallItem).rawItem).toMatchObject( + { + type: 'function_call', + id: 'fc_override', + callId: 'call-override', + arguments: JSON.stringify({ recipient: 'alice@example.com' }), + }, + ); + expect(state._lastProcessedResponse?.functions[0]?.toolCall).toMatchObject({ + id: 'fc_override', + arguments: JSON.stringify({ recipient: 'bob@example.com' }), + }); + expect(state._modelResponses[0].output[0]).toEqual(rawItem); + expect(state._lastTurnResponse?.output[0]).toEqual(rawItem); + expect(state.getSessionHistoryMutations()).toEqual([]); + expect(state.hasPendingExecutionOnlyApprovalOverrides()).toBe(true); + }); + + it('approve rejects saveOverrideArguments without overrideArguments', () => { + const { state, approvalItem } = buildOverrideableApprovalState(); + + expect(() => + state.approve(approvalItem, { + saveOverrideArguments: false, + }), + ).toThrow( + 'saveOverrideArguments can only be used together with overrideArguments', + ); + }); + + it('approve rejects overrideArguments when alwaysApprove is also set', () => { + const { state, approvalItem } = buildOverrideableApprovalState(); + + expect(() => + state.approve(approvalItem, { + alwaysApprove: true, + overrideArguments: { recipient: 'bob@example.com' }, + }), + ).toThrow('overrideArguments cannot be used together with alwaysApprove'); + }); + + it('approve rejects overrideArguments for non-function approvals', () => { + const context = new RunContext(); + const agent = new Agent({ name: 'NonFunctionOverrideAgent' }); + const state = new RunState(context, '', agent, 1); + const approvalItem = new ToolApprovalItem( + { + type: 'hosted_tool_call', + id: 'approval-1', + name: 'search', + status: 'completed', + arguments: '{}', + }, + agent, + ); + + expect(() => + state.approve(approvalItem, { + overrideArguments: { recipient: 'bob@example.com' }, + }), + ).toThrow( + 'overrideArguments is only supported for function_call approvals', + ); + }); + + it('serialization round-trip preserves overrideArguments execution state', async () => { + const { agent, state, approvalItem } = buildOverrideableApprovalState(); + + state.approve(approvalItem, { + overrideArguments: { recipient: 'bob@example.com' }, + }); + + const restored = await RunState.fromString(agent, state.toString()); + expect( + ( + (restored._generatedItems[0] as RunToolCallItem) + .rawItem as protocol.FunctionCallItem + ).arguments, + ).toBe(JSON.stringify({ recipient: 'bob@example.com' })); + expect( + restored._lastProcessedResponse?.functions[0]?.toolCall.arguments, + ).toBe(JSON.stringify({ recipient: 'bob@example.com' })); + expect(restored.getSessionHistoryMutations()).toEqual([ + { + type: 'replace_function_call', + callId: 'call-override', + replacement: expect.objectContaining({ + id: 'fc_override', + arguments: JSON.stringify({ recipient: 'bob@example.com' }), + }), + }, + ]); + }); + + it('serialization round-trip preserves server-managed execution-only override state', async () => { + const { agent, state, approvalItem } = buildOverrideableApprovalState({ + previousResponseId: 'resp-existing', + }); + + state.approve(approvalItem, { + overrideArguments: { recipient: 'bob@example.com' }, + saveOverrideArguments: false, + }); + + const restored = await RunState.fromString(agent, state.toString()); + expect( + ( + (restored._generatedItems[0] as RunToolCallItem) + .rawItem as protocol.FunctionCallItem + ).arguments, + ).toBe(JSON.stringify({ recipient: 'alice@example.com' })); + expect( + restored._lastProcessedResponse?.functions[0]?.toolCall.arguments, + ).toBe(JSON.stringify({ recipient: 'bob@example.com' })); + expect(restored.getSessionHistoryMutations()).toEqual([]); + expect(restored.hasPendingExecutionOnlyApprovalOverrides()).toBe(true); + }); + it('returns undefined when approval status is unknown', () => { const context = new RunContext(); expect( diff --git a/packages/agents-core/test/runner/sessionPersistence.test.ts b/packages/agents-core/test/runner/sessionPersistence.test.ts index 1f89e3f54..2726d03d5 100644 --- a/packages/agents-core/test/runner/sessionPersistence.test.ts +++ b/packages/agents-core/test/runner/sessionPersistence.test.ts @@ -1425,6 +1425,299 @@ describe('saveToSession', () => { ); }); + it('applies session history mutations before running compaction', async () => { + class TrackingSession implements Session { + items: AgentInputItem[] = [ + { + type: 'message', + role: 'user', + content: 'hello', + }, + { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '1' }), + }, + ]; + events: string[] = []; + + async getSessionId(): Promise { + return 'session'; + } + + async getItems(): Promise { + return this.items.map((item) => structuredClone(item)); + } + + async addItems(items: AgentInputItem[]): Promise { + this.events.push(`addItems:${items.length}`); + this.items.push(...items); + } + + async popItem(): Promise { + return undefined; + } + + async clearSession(): Promise { + this.items = []; + } + + async applyHistoryMutations(args: { + mutations: Array<{ + type: 'replace_function_call'; + callId: string; + replacement: Extract; + }>; + }): Promise { + this.events.push(`applyHistoryMutations:${args.mutations.length}`); + for (const mutation of args.mutations) { + const nextItems: AgentInputItem[] = []; + let keptReplacement = false; + for (const item of this.items) { + if ( + item.type === 'function_call' && + item.callId === mutation.callId + ) { + if (!keptReplacement) { + nextItems.push(structuredClone(mutation.replacement)); + keptReplacement = true; + } + continue; + } + + nextItems.push(item); + } + this.items = nextItems; + } + } + + async runCompaction( + args?: OpenAIResponsesCompactionArgs, + ): Promise { + this.events.push(`runCompaction:${args?.responseId}`); + return null; + } + } + + const textAgent = new Agent({ + name: 'RewriteRecorder', + outputType: 'text', + instructions: 'capture', + }); + const agent = textAgent as unknown as Agent< + UnknownContext, + AgentOutputType + >; + const session = new TrackingSession(); + const context = new RunContext(undefined as UnknownContext); + const state = new RunState< + UnknownContext, + Agent + >(context, 'hello', agent, 10); + + const correctedCall: protocol.FunctionCallItem = { + type: 'function_call', + id: 'fc_override', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '2' }), + providerData: {}, + }; + const toolOutputText = 'Customer 2 details.'; + const toolOutputItem = getToolCallOutputItem(correctedCall, toolOutputText); + + state._modelResponses.push({ + output: [], + usage: new Usage(), + responseId: 'resp_override', + }); + state._generatedItems = [ + new ToolCallItem(correctedCall, textAgent), + new ToolCallOutputItem(toolOutputItem, textAgent, toolOutputText), + ]; + state._sessionHistoryMutations = [ + { + type: 'replace_function_call', + callId: correctedCall.callId, + replacement: correctedCall, + }, + ]; + + await saveToSession(session, [], new RunResult(state)); + + expect(session.events).toEqual([ + 'addItems:2', + 'applyHistoryMutations:1', + 'runCompaction:resp_override', + ]); + expect(state.getSessionHistoryMutations()).toEqual([]); + + expect(session.items).toEqual([ + { + type: 'message', + role: 'user', + content: 'hello', + }, + { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '2' }), + providerData: {}, + }, + { + type: 'function_call_result', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + output: { + type: 'text', + text: toolOutputText, + }, + }, + ]); + }); + + it('applies session history mutations even when a resumed save only carries approval placeholders', async () => { + class TrackingSession implements Session { + items: AgentInputItem[] = [ + { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '1' }), + }, + ]; + events: string[] = []; + + async getSessionId(): Promise { + return 'session'; + } + + async getItems(): Promise { + return this.items.map((item) => structuredClone(item)); + } + + async addItems(items: AgentInputItem[]): Promise { + this.events.push(`addItems:${items.length}`); + this.items.push(...items); + } + + async popItem(): Promise { + return undefined; + } + + async clearSession(): Promise { + this.items = []; + } + + async applyHistoryMutations(args: { + mutations: Array<{ + type: 'replace_function_call'; + callId: string; + replacement: Extract; + }>; + }): Promise { + this.events.push(`applyHistoryMutations:${args.mutations.length}`); + for (const mutation of args.mutations) { + this.items = this.items.map((item) => { + if ( + item.type === 'function_call' && + item.callId === mutation.callId + ) { + return structuredClone(mutation.replacement); + } + return item; + }); + } + } + + async runCompaction( + args?: OpenAIResponsesCompactionArgs, + ): Promise { + this.events.push(`runCompaction:${args?.responseId}`); + return null; + } + } + + const textAgent = new Agent({ + name: 'ApprovalOnlyRewriteRecorder', + outputType: 'text', + instructions: 'capture', + }); + const agent = textAgent as unknown as Agent< + UnknownContext, + AgentOutputType + >; + const session = new TrackingSession(); + const context = new RunContext(undefined as UnknownContext); + const state = new RunState< + UnknownContext, + Agent + >(context, 'hello', agent, 10); + + const correctedCall: protocol.FunctionCallItem = { + type: 'function_call', + id: 'fc_override', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '2' }), + providerData: {}, + }; + const pendingApprovalCall: protocol.FunctionCallItem = { + type: 'function_call', + id: 'fc_pending', + callId: 'call_pending', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '3' }), + providerData: {}, + }; + + state._modelResponses.push({ + output: [], + usage: new Usage(), + responseId: 'resp_override', + }); + state._generatedItems = [ + new ToolCallItem(correctedCall, textAgent), + new ToolApprovalItem(pendingApprovalCall, textAgent), + ]; + state._currentTurnPersistedItemCount = 1; + state._sessionHistoryMutations = [ + { + type: 'replace_function_call', + callId: correctedCall.callId, + replacement: correctedCall, + }, + ]; + + await saveToSession(session, [], new RunResult(state)); + + expect(session.events).toEqual([ + 'applyHistoryMutations:1', + 'runCompaction:resp_override', + ]); + expect(state.getSessionHistoryMutations()).toEqual([]); + expect(state._currentTurnPersistedItemCount).toBe(2); + expect(session.items).toEqual([ + { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '2' }), + providerData: {}, + }, + ]); + }); + it('propagates lastResponseId to sessions after persisting items', async () => { class TrackingSession implements Session { items: AgentInputItem[] = []; diff --git a/packages/agents-openai/src/index.ts b/packages/agents-openai/src/index.ts index 48ffbc8c9..d3cedbc68 100644 --- a/packages/agents-openai/src/index.ts +++ b/packages/agents-openai/src/index.ts @@ -51,3 +51,7 @@ export { type OpenAIResponsesCompactionSessionOptions, type OpenAIResponsesCompactionDecisionContext, } from './memory/openaiResponsesCompactionSession'; +export { + OpenAIResponsesHistoryRewriteSession, + type OpenAIResponsesHistoryRewriteSessionOptions, +} from './memory/openaiResponsesHistoryRewriteSession'; diff --git a/packages/agents-openai/src/memory/openaiConversationsSession.ts b/packages/agents-openai/src/memory/openaiConversationsSession.ts index 1df7eab89..4f82f31d4 100644 --- a/packages/agents-openai/src/memory/openaiConversationsSession.ts +++ b/packages/agents-openai/src/memory/openaiConversationsSession.ts @@ -1,8 +1,15 @@ import OpenAI from 'openai'; -import type { AgentInputItem, Session } from '@openai/agents-core'; +import { + protocol, + SERVER_MANAGED_CONVERSATION_SESSION, +} from '@openai/agents-core'; +import type { + AgentInputItem, + ServerManagedConversationSession, + Session, +} from '@openai/agents-core'; import { getDefaultOpenAIClient, getDefaultOpenAIKey } from '../defaults'; import { convertToOutputItem, getInputItems } from '../openaiResponsesModel'; -import { protocol } from '@openai/agents-core'; import type { ConversationItem as APIConversationItem } from 'openai/resources/conversations/items'; import type { Message as APIConversationMessage } from 'openai/resources/conversations/conversations'; import { @@ -28,10 +35,14 @@ export async function startOpenAIConversationsSession( } export class OpenAIConversationsSession - implements Session, OpenAISessionApiTagged<'conversations'> + implements + Session, + ServerManagedConversationSession, + OpenAISessionApiTagged<'conversations'> { // Marks this session as backed by the Conversations API so Responses-only integrations can reject it. readonly [OPENAI_SESSION_API] = 'conversations' as const; + readonly [SERVER_MANAGED_CONVERSATION_SESSION] = true as const; #client: OpenAI; #conversationId?: string; diff --git a/packages/agents-openai/src/memory/openaiResponsesCompactionSession.ts b/packages/agents-openai/src/memory/openaiResponsesCompactionSession.ts index d9c5de928..53db5fd90 100644 --- a/packages/agents-openai/src/memory/openaiResponsesCompactionSession.ts +++ b/packages/agents-openai/src/memory/openaiResponsesCompactionSession.ts @@ -10,6 +10,9 @@ import type { OpenAIResponsesCompactionArgs, OpenAIResponsesCompactionAwareSession as OpenAIResponsesCompactionSessionLike, Session, + SessionHistoryMutation, + SessionHistoryRewriteArgs, + SessionHistoryRewriteAwareSession, } from '@openai/agents-core'; import type { OpenAIResponsesCompactionResult } from '@openai/agents-core'; import { DEFAULT_OPENAI_MODEL, getDefaultOpenAIClient } from '../defaults'; @@ -82,6 +85,11 @@ export type OpenAIResponsesCompactionSessionOptions = { * - `auto` (default): Uses `input` when the last response was not stored or no response id is available. * - `previous_response_id`: Uses the server-managed response chain. * - `input`: Sends the locally stored session items as input and does not require a response id. + * + * Local history rewrites (for example, approval override argument corrections) and local-only + * persisted tool outputs without a newer response id temporarily force compaction through + * `input` until a newer response id is observed, because the stored `previous_response_id` + * chain no longer matches the canonical local transcript. */ compactionMode?: OpenAIResponsesCompactionMode; /** @@ -110,6 +118,7 @@ export type OpenAIResponsesCompactionSessionOptions = { export class OpenAIResponsesCompactionSession implements OpenAIResponsesCompactionSessionLike, + SessionHistoryRewriteAwareSession, OpenAISessionApiTagged<'responses'> { readonly [OPENAI_SESSION_API] = 'responses' as const; @@ -125,6 +134,9 @@ export class OpenAIResponsesCompactionSession ) => boolean | Promise; private compactionCandidateItems: AgentInputItem[] | undefined; private sessionItems: AgentInputItem[] | undefined; + private hasPendingLocalHistoryRewrite: boolean; + private localHistoryRewriteResponseId?: string; + private hasUnacknowledgedLocalSessionAdds: boolean; constructor(options: OpenAIResponsesCompactionSessionOptions) { this.client = resolveClient(options); @@ -145,20 +157,35 @@ export class OpenAIResponsesCompactionSession this.compactionCandidateItems = undefined; this.sessionItems = undefined; this.lastStore = undefined; + this.hasPendingLocalHistoryRewrite = false; + this.localHistoryRewriteResponseId = undefined; + this.hasUnacknowledgedLocalSessionAdds = false; } async runCompaction( args: OpenAIResponsesCompactionArgs = {}, ): Promise { + const previousResponseId = this.responseId; this.responseId = args.responseId ?? this.responseId ?? undefined; if (args.store !== undefined) { this.lastStore = args.store; } + const turnHasLocalAddsWithoutNewResponseId = + this.hasUnacknowledgedLocalSessionAdds && + (typeof args.responseId === 'undefined' || + args.responseId === previousResponseId); + if ( + typeof args.responseId !== 'undefined' && + args.responseId !== previousResponseId + ) { + this.hasUnacknowledgedLocalSessionAdds = false; + } const requestedMode = args.compactionMode ?? this.compactionMode; - const resolvedMode = resolveCompactionMode({ + const resolvedMode = this.resolveCompactionMode({ requestedMode, responseId: this.responseId, store: args.store ?? this.lastStore, + turnHasLocalAddsWithoutResponseId: turnHasLocalAddsWithoutNewResponseId, }); if (resolvedMode === 'previous_response_id' && !this.responseId) { @@ -186,6 +213,17 @@ export class OpenAIResponsesCompactionSession return null; } + const unresolvedFunctionCalls = + findUnresolvedFunctionCallsWithoutResults(sessionItems); + if (unresolvedFunctionCalls.length > 0) { + logger.debug('compact: blocked unresolved function calls %o', { + responseId: this.responseId, + compactionMode: resolvedMode, + unresolvedCallIds: unresolvedFunctionCalls.map((item) => item.callId), + }); + return null; + } + logger.debug('compact: start %o', { responseId: this.responseId, model: this.model, @@ -231,12 +269,39 @@ export class OpenAIResponsesCompactionSession return this.underlyingSession.getItems(limit); } + async applyHistoryMutations(args: SessionHistoryRewriteArgs): Promise { + if (args.mutations.length === 0) { + return; + } + + if (isSessionHistoryRewriteDelegate(this.underlyingSession)) { + await this.underlyingSession.applyHistoryMutations(args); + await this.refreshCachesFromUnderlyingSession(); + this.markLocalHistoryRewrite(); + return; + } + + const rewrittenItems = applySessionHistoryMutations( + await this.underlyingSession.getItems(), + args.mutations, + ); + await this.underlyingSession.clearSession(); + if (rewrittenItems.length > 0) { + await this.underlyingSession.addItems(rewrittenItems); + } + this.sessionItems = rewrittenItems; + this.compactionCandidateItems = + selectCompactionCandidateItems(rewrittenItems); + this.markLocalHistoryRewrite(); + } + async addItems(items: AgentInputItem[]) { if (items.length === 0) { return; } await this.underlyingSession.addItems(items); + this.hasUnacknowledgedLocalSessionAdds = true; if (this.compactionCandidateItems) { const candidates = selectCompactionCandidateItems(items); if (candidates.length > 0) { @@ -285,6 +350,15 @@ export class OpenAIResponsesCompactionSession await this.underlyingSession.clearSession(); this.compactionCandidateItems = []; this.sessionItems = []; + this.hasPendingLocalHistoryRewrite = false; + this.localHistoryRewriteResponseId = undefined; + this.hasUnacknowledgedLocalSessionAdds = false; + } + + private async refreshCachesFromUnderlyingSession(): Promise { + const history = await this.underlyingSession.getItems(); + this.sessionItems = history; + this.compactionCandidateItems = selectCompactionCandidateItems(history); } private async ensureCompactionCandidates(): Promise<{ @@ -313,6 +387,72 @@ export class OpenAIResponsesCompactionSession sessionItems: [...history], }; } + + private markLocalHistoryRewrite(): void { + this.hasPendingLocalHistoryRewrite = true; + this.localHistoryRewriteResponseId = this.responseId; + } + + private resolveCompactionMode(options: { + requestedMode: OpenAIResponsesCompactionMode; + responseId: string | undefined; + store: boolean | undefined; + turnHasLocalAddsWithoutResponseId: boolean; + }): ResolvedCompactionMode { + const resolvedMode = resolveCompactionMode(options); + + if ( + options.turnHasLocalAddsWithoutResponseId && + resolvedMode === 'previous_response_id' + ) { + this.hasUnacknowledgedLocalSessionAdds = false; + this.markLocalHistoryRewrite(); + logger.debug( + 'compact: forcing input mode after local session delta without new response id %o', + { + responseId: this.responseId, + requestedMode: options.requestedMode, + }, + ); + return 'input'; + } + + if (!this.hasPendingLocalHistoryRewrite) { + return resolvedMode; + } + + if ( + typeof this.localHistoryRewriteResponseId !== 'undefined' && + typeof options.responseId !== 'undefined' && + options.responseId !== this.localHistoryRewriteResponseId + ) { + this.hasPendingLocalHistoryRewrite = false; + this.localHistoryRewriteResponseId = undefined; + return resolvedMode; + } + + if ( + this.hasPendingLocalHistoryRewrite && + resolvedMode === 'previous_response_id' + ) { + if ( + typeof this.localHistoryRewriteResponseId === 'undefined' && + typeof options.responseId !== 'undefined' + ) { + this.localHistoryRewriteResponseId = options.responseId; + } + logger.debug( + 'compact: forcing input mode after local history rewrite %o', + { + responseId: options.responseId, + requestedMode: options.requestedMode, + }, + ); + return 'input'; + } + + return resolvedMode; + } } type ResolvedCompactionMode = Exclude; @@ -367,6 +507,31 @@ function selectCompactionCandidateItems( }); } +function findUnresolvedFunctionCallsWithoutResults( + items: AgentInputItem[], +): Extract[] { + const functionCalls = new Map< + string, + Extract + >(); + const resolvedCallIds = new Set(); + + for (const item of items) { + if (item.type === 'function_call') { + functionCalls.set(item.callId, item); + continue; + } + + if (item.type === 'function_call_result') { + resolvedCallIds.add(item.callId); + } + } + + return [...functionCalls.values()].filter( + (item) => !resolvedCallIds.has(item.callId), + ); +} + function assertSupportedOpenAIResponsesCompactionModel(model: string): void { if (!isOpenAIModelName(model)) { throw new Error( @@ -425,3 +590,51 @@ function isOpenAIConversationsSessionDelegate( ] === 'conversations' ); } + +function isSessionHistoryRewriteDelegate( + session: Session | undefined, +): session is SessionHistoryRewriteAwareSession { + return ( + !!session && + typeof (session as SessionHistoryRewriteAwareSession) + .applyHistoryMutations === 'function' + ); +} + +function applySessionHistoryMutations( + items: AgentInputItem[], + mutations: SessionHistoryMutation[], +): AgentInputItem[] { + let nextItems = items.map((item) => structuredClone(item)); + + for (const mutation of mutations) { + if (mutation.type === 'replace_function_call') { + nextItems = applyReplaceFunctionCallMutation(nextItems, mutation); + } + } + + return nextItems; +} + +function applyReplaceFunctionCallMutation( + items: AgentInputItem[], + mutation: Extract, +): AgentInputItem[] { + const replacement = structuredClone(mutation.replacement); + const nextItems: AgentInputItem[] = []; + let keptReplacement = false; + + for (const item of items) { + if (item.type === 'function_call' && item.callId === mutation.callId) { + if (!keptReplacement) { + nextItems.push(replacement); + keptReplacement = true; + } + continue; + } + + nextItems.push(item); + } + + return nextItems; +} diff --git a/packages/agents-openai/src/memory/openaiResponsesHistoryRewriteSession.ts b/packages/agents-openai/src/memory/openaiResponsesHistoryRewriteSession.ts new file mode 100644 index 000000000..14130f201 --- /dev/null +++ b/packages/agents-openai/src/memory/openaiResponsesHistoryRewriteSession.ts @@ -0,0 +1,176 @@ +import { + getLogger, + isOpenAIResponsesCompactionAwareSession, + MemorySession, + UserError, +} from '@openai/agents-core'; +import type { + AgentInputItem, + OpenAIResponsesCompactionArgs, + OpenAIResponsesCompactionAwareSession, + OpenAIResponsesCompactionResult, + Session, + SessionHistoryMutation, + SessionHistoryRewriteArgs, + SessionHistoryRewriteAwareSession, +} from '@openai/agents-core'; +import { + OPENAI_SESSION_API, + type OpenAISessionApiTagged, +} from './openaiSessionApi'; + +const logger = getLogger('openai-agents:openai:history-rewrite'); + +export type OpenAIResponsesHistoryRewriteSessionOptions = { + /** + * Session store that receives rewritten local history. + * + * Defaults to an in-memory session for demos and tests. + */ + underlyingSession?: Session & { [OPENAI_SESSION_API]?: 'responses' }; +}; + +/** + * Session decorator that keeps local Responses-style history canonical after targeted rewrites. + * + * This decorator never calls the OpenAI API. It rewrites the underlying local session by applying + * structured history mutations after the runner persists a turn. Do not use it with + * `OpenAIConversationsSession`, which owns server-managed history. + */ +export class OpenAIResponsesHistoryRewriteSession + implements + SessionHistoryRewriteAwareSession, + OpenAIResponsesCompactionAwareSession, + OpenAISessionApiTagged<'responses'> +{ + readonly [OPENAI_SESSION_API] = 'responses' as const; + + private readonly underlyingSession: Session; + + constructor(options: OpenAIResponsesHistoryRewriteSessionOptions = {}) { + if (isOpenAIConversationsSessionDelegate(options.underlyingSession)) { + throw new UserError( + 'OpenAIResponsesHistoryRewriteSession does not support OpenAIConversationsSession as an underlying session.', + ); + } + + this.underlyingSession = options.underlyingSession ?? new MemorySession(); + } + + async getSessionId(): Promise { + return this.underlyingSession.getSessionId(); + } + + async getItems(limit?: number): Promise { + return this.underlyingSession.getItems(limit); + } + + async addItems(items: AgentInputItem[]): Promise { + await this.underlyingSession.addItems(items); + } + + async popItem(): Promise { + return this.underlyingSession.popItem(); + } + + async clearSession(): Promise { + await this.underlyingSession.clearSession(); + } + + async applyHistoryMutations(args: SessionHistoryRewriteArgs): Promise { + if (args.mutations.length === 0) { + return; + } + + if (isSessionHistoryRewriteDelegate(this.underlyingSession)) { + await this.underlyingSession.applyHistoryMutations(args); + return; + } + + const rewrittenItems = applySessionHistoryMutations( + await this.underlyingSession.getItems(), + args.mutations, + ); + + logger.debug('rewrite: replacing session history %o', { + mutationCount: args.mutations.length, + outputItemCount: rewrittenItems.length, + }); + + await this.underlyingSession.clearSession(); + if (rewrittenItems.length > 0) { + await this.underlyingSession.addItems(rewrittenItems); + } + } + + async runCompaction( + args?: OpenAIResponsesCompactionArgs, + ): Promise { + if (!isOpenAIResponsesCompactionAwareSession(this.underlyingSession)) { + return null; + } + + return this.underlyingSession.runCompaction(args); + } +} + +function isOpenAIConversationsSessionDelegate( + underlyingSession: Session | undefined, +): underlyingSession is Session & OpenAISessionApiTagged<'conversations'> { + return ( + !!underlyingSession && + typeof underlyingSession === 'object' && + OPENAI_SESSION_API in underlyingSession && + (underlyingSession as OpenAISessionApiTagged<'conversations'>)[ + OPENAI_SESSION_API + ] === 'conversations' + ); +} + +function isSessionHistoryRewriteDelegate( + session: Session | undefined, +): session is SessionHistoryRewriteAwareSession { + return ( + !!session && + typeof (session as SessionHistoryRewriteAwareSession) + .applyHistoryMutations === 'function' + ); +} + +function applySessionHistoryMutations( + items: AgentInputItem[], + mutations: SessionHistoryMutation[], +): AgentInputItem[] { + let nextItems = items.map((item) => structuredClone(item)); + + for (const mutation of mutations) { + if (mutation.type === 'replace_function_call') { + nextItems = applyReplaceFunctionCallMutation(nextItems, mutation); + } + } + + return nextItems; +} + +function applyReplaceFunctionCallMutation( + items: AgentInputItem[], + mutation: Extract, +): AgentInputItem[] { + const replacement = structuredClone(mutation.replacement); + const nextItems: AgentInputItem[] = []; + let keptReplacement = false; + + for (const item of items) { + if (item.type === 'function_call' && item.callId === mutation.callId) { + if (!keptReplacement) { + nextItems.push(replacement); + keptReplacement = true; + } + continue; + } + + nextItems.push(item); + } + + return nextItems; +} diff --git a/packages/agents-openai/test/hitlOpenAIConversationsSession.test.ts b/packages/agents-openai/test/hitlOpenAIConversationsSession.test.ts index 24e01fd15..40711ad36 100644 --- a/packages/agents-openai/test/hitlOpenAIConversationsSession.test.ts +++ b/packages/agents-openai/test/hitlOpenAIConversationsSession.test.ts @@ -113,9 +113,10 @@ describe('OpenAIConversationsSession HITL scenario', () => { }, ); const listItems = vi.fn(() => ({ - // eslint-disable-next-line require-yield -- empty iterator is intentional. async *[Symbol.asyncIterator]() { - return; + for (const item of storedItems) { + yield item; + } }, })); const client = { @@ -182,6 +183,81 @@ describe('OpenAIConversationsSession HITL scenario', () => { expect(executeCounts.get(TOOL_ECHO)).toBe(1); expect(executeCounts.get(TOOL_NOTE)).toBe(1); }); + + it('supports execution-only overrides for conversations-backed sessions', async () => { + const storedItems: Array> = []; + const createItems = vi.fn( + async (_conversationId: string, payload: { items: any[] }) => { + storedItems.push(...payload.items); + return {}; + }, + ); + const listItems = vi.fn(() => ({ + async *[Symbol.asyncIterator]() { + for (const item of storedItems) { + yield item; + } + }, + })); + const client = { + conversations: { + items: { + create: createItems, + list: listItems, + delete: vi.fn(), + }, + create: vi.fn(), + delete: vi.fn(), + }, + } as any; + const session = new OpenAIConversationsSession({ + conversationId: 'conv_override', + client, + }); + const model = new ScenarioModel(); + const agent = new Agent({ + name: 'OpenAIConversationsSession override', + instructions: `Always call ${TOOL_ECHO} before responding.`, + model, + tools: [approvalEchoTool], + modelSettings: { toolChoice: TOOL_ECHO }, + toolUseBehavior: 'stop_on_first_tool', + }); + + const firstRun = await run(agent, USER_MESSAGES[0], { session }); + expect(firstRun.interruptions).toHaveLength(1); + + const approval = firstRun.interruptions[0]; + expect(approval.rawItem.type).toBe('function_call'); + const approvalCallId = + approval.rawItem.type === 'function_call' + ? approval.rawItem.callId + : undefined; + firstRun.state.approve(approval, { + overrideArguments: { query: 'Overridden query' }, + saveOverrideArguments: false, + }); + + const resumed = await run(agent, firstRun.state, { session }); + expect(resumed.interruptions).toHaveLength(0); + expect(resumed.finalOutput).toMatch(/^approved:/); + + const functionCalls = storedItems.filter( + (item) => item.type === 'function_call', + ); + const functionOutputs = storedItems.filter( + (item) => item.type === 'function_call_output', + ); + + expect(functionCalls).toHaveLength(1); + expect(functionOutputs).toHaveLength(1); + expect(functionCalls[0]?.call_id).toBe(approvalCallId); + expect(functionCalls[0]?.arguments).toBe( + JSON.stringify({ query: USER_MESSAGES[0] }), + ); + expect(functionOutputs[0]?.call_id).toBe(approvalCallId); + expect(extractOutputText(functionOutputs[0])).toBe(resumed.finalOutput); + }); }); async function runScenarioStep( diff --git a/packages/agents-openai/test/openaiResponsesCompactionSession.test.ts b/packages/agents-openai/test/openaiResponsesCompactionSession.test.ts index 176faf29b..cbd0726ae 100644 --- a/packages/agents-openai/test/openaiResponsesCompactionSession.test.ts +++ b/packages/agents-openai/test/openaiResponsesCompactionSession.test.ts @@ -1,7 +1,20 @@ import { describe, expect, it, vi } from 'vitest'; +import { z } from 'zod'; -import { MemorySession } from '@openai/agents-core'; +import { + Agent, + MemorySession, + type Model, + type ModelRequest, + type ModelResponse, + protocol, + Runner, + type ResponseStreamEvent, + tool, + Usage, +} from '@openai/agents-core'; import { UserError } from '@openai/agents-core'; +import type { AgentInputItem, Session } from '@openai/agents-core'; import { OpenAIResponsesCompactionSession } from '../src'; import { OPENAI_SESSION_API } from '../src/memory/openaiSessionApi'; @@ -114,7 +127,7 @@ describe('OpenAIResponsesCompactionSession', () => { }, ] as any); - await session.runCompaction({ force: true }); + await session.runCompaction({ responseId: 'resp_pending', force: true }); expect(compact).toHaveBeenCalledTimes(1); const [request] = compact.mock.calls[0] ?? []; @@ -251,6 +264,79 @@ describe('OpenAIResponsesCompactionSession', () => { expect(secondRequest.input).toHaveLength(1); }); + it('forces input compaction after local history rewrites even when a stored response id exists', async () => { + const compact = vi.fn().mockResolvedValue({ + output: [], + usage: { + input_tokens: 0, + output_tokens: 0, + total_tokens: 0, + }, + }); + const session = new OpenAIResponsesCompactionSession({ + client: { responses: { compact } } as any, + compactionMode: 'auto', + underlyingSession: new MemorySession(), + }); + + await session.addItems([ + { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '1' }), + }, + { + type: 'function_call_result', + callId: 'call_override', + output: { + type: 'text', + text: 'Customer 1 details.', + }, + }, + ] as AgentInputItem[]); + + await session.applyHistoryMutations({ + mutations: [ + { + type: 'replace_function_call', + callId: 'call_override', + replacement: { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '2' }), + }, + }, + ], + }); + + await session.runCompaction({ + responseId: 'resp_store', + store: true, + force: true, + }); + + expect(compact).toHaveBeenCalledTimes(1); + const [request] = compact.mock.calls[0] ?? []; + expect(request.previous_response_id).toBeUndefined(); + expect(request.input).toHaveLength(2); + expect(request.input[0]).toMatchObject({ + type: 'function_call', + call_id: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '2' }), + }); + expect(request.input[1]).toMatchObject({ + type: 'function_call_output', + call_id: 'call_override', + output: 'Customer 1 details.', + }); + }); + it('allows custom compaction decisions using the stored history', async () => { const compact = vi.fn().mockResolvedValue({ output: [ @@ -357,7 +443,7 @@ describe('OpenAIResponsesCompactionSession', () => { expect(compact).not.toHaveBeenCalled(); }); - it('replaces history after compaction and reuses the stored response id', async () => { + it('replaces history after compaction and falls back to input when later turns only add local items', async () => { const compact = vi .fn() .mockResolvedValueOnce({ @@ -435,10 +521,23 @@ describe('OpenAIResponsesCompactionSession', () => { await session.runCompaction({ force: true }); expect(compact).toHaveBeenCalledTimes(2); - expect(compact).toHaveBeenLastCalledWith({ - previous_response_id: 'resp_store', - model: 'gpt-4.1', - }); + const [secondRequest] = compact.mock.calls[1] ?? []; + expect(secondRequest.previous_response_id).toBeUndefined(); + expect(secondRequest.model).toBe('gpt-4.1'); + expect(secondRequest.input).toMatchObject([ + { + type: 'message', + role: 'assistant', + status: 'completed', + content: [{ type: 'output_text', text: 'compacted output' }], + }, + { + type: 'message', + role: 'assistant', + status: 'completed', + content: [{ type: 'output_text', text: 'follow up' }], + }, + ]); expect(await session.getItems()).toEqual([ { type: 'message', @@ -460,4 +559,482 @@ describe('OpenAIResponsesCompactionSession', () => { UserError, ); }); + + it('skips compaction when input mode sees an unresolved function_call', async () => { + const compact = vi.fn(); + const session = new OpenAIResponsesCompactionSession({ + client: { responses: { compact } } as any, + compactionMode: 'input', + shouldTriggerCompaction: () => true, + }); + + await session.addItems([ + { + type: 'message', + role: 'user', + content: 'Needs approval.', + }, + { + type: 'function_call', + callId: 'call_pending', + name: 'approved_echo', + status: 'completed', + arguments: JSON.stringify({ query: 'Needs approval.' }), + }, + ] as AgentInputItem[]); + + await expect(session.runCompaction({ force: true })).resolves.toBeNull(); + expect(compact).not.toHaveBeenCalled(); + }); + + it('skips compaction when previous_response_id mode sees an unresolved function_call', async () => { + const compact = vi.fn(); + const session = new OpenAIResponsesCompactionSession({ + client: { responses: { compact } } as any, + compactionMode: 'previous_response_id', + shouldTriggerCompaction: () => true, + }); + + await session.addItems([ + { + type: 'message', + role: 'user', + content: 'Needs approval.', + }, + { + type: 'function_call', + callId: 'call_pending', + name: 'approved_echo', + status: 'completed', + arguments: JSON.stringify({ query: 'Needs approval.' }), + }, + ] as AgentInputItem[]); + + await expect( + session.runCompaction({ responseId: 'resp_pending', force: true }), + ).resolves.toBeNull(); + expect(compact).not.toHaveBeenCalled(); + }); + + it('forces input compaction after local-only tool outputs without a newer response id', async () => { + const compact = vi.fn().mockResolvedValue({ + output: [], + usage: { + input_tokens: 0, + output_tokens: 0, + total_tokens: 0, + }, + }); + const session = new OpenAIResponsesCompactionSession({ + client: { responses: { compact } } as any, + compactionMode: 'previous_response_id', + shouldTriggerCompaction: () => true, + }); + + await session.addItems([ + { + type: 'message', + role: 'user', + content: 'Needs approval.', + }, + { + type: 'function_call', + callId: 'call_pending', + name: 'approved_echo', + status: 'completed', + arguments: JSON.stringify({ query: 'Needs approval.' }), + }, + ] as AgentInputItem[]); + + await expect( + session.runCompaction({ responseId: 'resp_pending', force: true }), + ).resolves.toBeNull(); + expect(compact).not.toHaveBeenCalled(); + + await session.addItems([ + { + type: 'function_call_result', + callId: 'call_pending', + output: { + type: 'text', + text: 'approved:Needs approval.', + }, + }, + ] as AgentInputItem[]); + + await session.runCompaction({ force: true }); + + expect(compact).toHaveBeenCalledTimes(1); + expect(compact).toHaveBeenCalledWith({ + input: [ + { + role: 'user', + content: 'Needs approval.', + }, + { + type: 'function_call', + call_id: 'call_pending', + name: 'approved_echo', + status: 'completed', + arguments: JSON.stringify({ query: 'Needs approval.' }), + }, + { + type: 'function_call_output', + call_id: 'call_pending', + output: 'approved:Needs approval.', + }, + ], + model: 'gpt-4.1', + }); + }); + + it('skips compaction on interrupted HITL turns until the tool result exists', async () => { + const compact = vi.fn().mockResolvedValue({ + output: [], + usage: { + input_tokens: 0, + output_tokens: 0, + total_tokens: 0, + }, + }); + const session = new OpenAIResponsesCompactionSession({ + client: { responses: { compact } } as any, + compactionMode: 'input', + shouldTriggerCompaction: () => true, + }); + const approvalEchoTool = tool({ + name: 'approved_echo', + description: 'Echoes back the approved query.', + parameters: z.object({ query: z.string() }), + async execute({ query }: { query: string }) { + return `approved:${query}`; + }, + }); + approvalEchoTool.needsApproval = async () => true; + const model = new ApprovalScenarioModel(); + const agent = new Agent({ + name: 'Compaction interruption repro', + instructions: 'Always call approved_echo before responding.', + model: 'test-model', + tools: [approvalEchoTool], + modelSettings: { toolChoice: 'approved_echo' }, + toolUseBehavior: 'stop_on_first_tool', + }); + const runner = new Runner({ + modelProvider: { + getModel: vi.fn(async () => model), + }, + }); + + const firstResult = await runner.run(agent, 'Needs approval.', { + session, + }); + + expect(firstResult.interruptions).toHaveLength(1); + expect(compact).not.toHaveBeenCalled(); + await expect(session.getItems()).resolves.toMatchObject([ + { + type: 'message', + role: 'user', + content: 'Needs approval.', + }, + { + type: 'function_call', + name: 'approved_echo', + }, + ]); + + firstResult.state.approve(firstResult.interruptions[0]); + + const resumed = await runner.run(agent, firstResult.state, { session }); + expect(resumed.finalOutput).toBe('approved:Needs approval.'); + expect(compact).toHaveBeenCalledTimes(1); + }); + + it('rewrites history before compaction when the underlying session is not rewrite-aware', async () => { + class PlainSession implements Session { + items: AgentInputItem[] = []; + + async getSessionId(): Promise { + return 'session'; + } + + async getItems(): Promise { + return this.items.map((item) => structuredClone(item)); + } + + async addItems(items: AgentInputItem[]): Promise { + this.items.push(...items); + } + + async popItem(): Promise { + return this.items.pop(); + } + + async clearSession(): Promise { + this.items = []; + } + } + + const compact = vi.fn().mockResolvedValue({ + output: [], + usage: { + input_tokens: 0, + output_tokens: 0, + total_tokens: 0, + }, + }); + const session = new OpenAIResponsesCompactionSession({ + client: { responses: { compact } } as any, + underlyingSession: new PlainSession(), + compactionMode: 'input', + }); + + await session.addItems([ + { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '1' }), + }, + { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '2' }), + }, + { + type: 'function_call_result', + callId: 'call_override', + output: { + type: 'text', + text: 'Customer 2 details.', + }, + }, + ] as AgentInputItem[]); + + await session.applyHistoryMutations({ + mutations: [ + { + type: 'replace_function_call', + callId: 'call_override', + replacement: { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '2' }), + }, + }, + ], + }); + + expect(await session.getItems()).toEqual([ + { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '2' }), + }, + { + type: 'function_call_result', + callId: 'call_override', + output: { + type: 'text', + text: 'Customer 2 details.', + }, + }, + ]); + + await session.runCompaction({ force: true }); + + expect(compact).toHaveBeenCalledWith({ + input: [ + { + type: 'function_call', + call_id: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '2' }), + }, + { + type: 'function_call_output', + call_id: 'call_override', + output: 'Customer 2 details.', + }, + ], + model: 'gpt-4.1', + }); + }); + + it('does not append a replacement when the underlying session already trimmed the original call', async () => { + class PlainSession implements Session { + items: AgentInputItem[] = []; + + async getSessionId(): Promise { + return 'session'; + } + + async getItems(): Promise { + return this.items.map((item) => structuredClone(item)); + } + + async addItems(items: AgentInputItem[]): Promise { + this.items.push(...items); + } + + async popItem(): Promise { + return this.items.pop(); + } + + async clearSession(): Promise { + this.items = []; + } + } + + const compact = vi.fn().mockResolvedValue({ + output: [], + usage: { + input_tokens: 0, + output_tokens: 0, + total_tokens: 0, + }, + }); + const session = new OpenAIResponsesCompactionSession({ + client: { responses: { compact } } as any, + underlyingSession: new PlainSession(), + compactionMode: 'input', + }); + + await session.addItems([ + { + type: 'message', + role: 'user', + content: 'hello', + }, + { + type: 'function_call_result', + callId: 'call_override', + output: { + type: 'text', + text: 'Customer 2 details.', + }, + }, + ] as AgentInputItem[]); + + await session.applyHistoryMutations({ + mutations: [ + { + type: 'replace_function_call', + callId: 'call_override', + replacement: { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '2' }), + }, + }, + ], + }); + + expect(await session.getItems()).toEqual([ + { + type: 'message', + role: 'user', + content: 'hello', + }, + { + type: 'function_call_result', + callId: 'call_override', + output: { + type: 'text', + text: 'Customer 2 details.', + }, + }, + ]); + + await session.runCompaction({ force: true }); + + expect(compact).toHaveBeenCalledTimes(1); + const [request] = compact.mock.calls[0] ?? []; + expect(request.model).toBe('gpt-4.1'); + expect(request.input).toMatchObject([ + { + role: 'user', + content: 'hello', + }, + { + type: 'function_call_output', + call_id: 'call_override', + output: 'Customer 2 details.', + }, + ]); + }); }); + +class ApprovalScenarioModel implements Model { + #counter = 0; + + async getResponse(request: ModelRequest): Promise { + const toolName = + typeof request.modelSettings.toolChoice === 'string' + ? request.modelSettings.toolChoice + : 'approved_echo'; + const callId = `call_${(this.#counter += 1)}`; + const toolCall: protocol.FunctionCallItem = { + id: `fc_${callId}`, + type: 'function_call', + name: toolName, + callId, + status: 'completed', + arguments: JSON.stringify({ + query: extractLastUserMessage(request.input), + }), + providerData: {}, + }; + + return { + usage: new Usage(), + output: [toolCall], + }; + } + + // eslint-disable-next-line require-yield -- this scenario does not stream. + async *getStreamedResponse( + _request: ModelRequest, + ): AsyncIterable { + throw new Error('Streaming is not supported in this scenario.'); + } +} + +function extractLastUserMessage(input: ModelRequest['input']): string { + if (typeof input === 'string') { + return input; + } + + for (let index = input.length - 1; index >= 0; index -= 1) { + const item = input[index]; + if (item.type !== 'message' || item.role !== 'user') { + continue; + } + + if (typeof item.content === 'string') { + return item.content; + } + + for (const contentItem of item.content) { + if ( + contentItem.type === 'input_text' && + typeof contentItem.text === 'string' + ) { + return contentItem.text; + } + } + } + + return ''; +} diff --git a/packages/agents-openai/test/openaiResponsesHistoryRewriteSession.test.ts b/packages/agents-openai/test/openaiResponsesHistoryRewriteSession.test.ts new file mode 100644 index 000000000..a6ff62450 --- /dev/null +++ b/packages/agents-openai/test/openaiResponsesHistoryRewriteSession.test.ts @@ -0,0 +1,252 @@ +import { describe, expect, it } from 'vitest'; + +import { MemorySession, RequestUsage, UserError } from '@openai/agents-core'; +import type { + AgentInputItem, + OpenAIResponsesCompactionArgs, + OpenAIResponsesCompactionResult, + Session, +} from '@openai/agents-core'; + +import { OpenAIResponsesHistoryRewriteSession } from '../src'; +import { OPENAI_SESSION_API } from '../src/memory/openaiSessionApi'; + +describe('OpenAIResponsesHistoryRewriteSession', () => { + it('rejects conversations-backed sessions', () => { + const underlyingSession = new MemorySession(); + Object.defineProperty(underlyingSession, OPENAI_SESSION_API, { + value: 'conversations', + }); + + expect(() => { + new OpenAIResponsesHistoryRewriteSession({ + underlyingSession, + }); + }).toThrow(UserError); + }); + + it('rewrites local history when the underlying session is not rewrite-aware', async () => { + class PlainSession implements Session { + items: AgentInputItem[] = []; + + async getSessionId(): Promise { + return 'session'; + } + + async getItems(): Promise { + return this.items.map((item) => structuredClone(item)); + } + + async addItems(items: AgentInputItem[]): Promise { + this.items.push(...items); + } + + async popItem(): Promise { + return this.items.pop(); + } + + async clearSession(): Promise { + this.items = []; + } + } + + const session = new OpenAIResponsesHistoryRewriteSession({ + underlyingSession: new PlainSession(), + }); + + await session.addItems([ + { + type: 'message', + role: 'user', + content: 'hello', + }, + { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '1' }), + }, + { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '2' }), + }, + { + type: 'function_call_result', + callId: 'call_override', + output: { + type: 'text', + text: 'Customer 2 details.', + }, + }, + ] as AgentInputItem[]); + + await session.applyHistoryMutations({ + mutations: [ + { + type: 'replace_function_call', + callId: 'call_override', + replacement: { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '2' }), + }, + }, + ], + }); + + expect(await session.getItems()).toEqual([ + { + type: 'message', + role: 'user', + content: 'hello', + }, + { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '2' }), + }, + { + type: 'function_call_result', + callId: 'call_override', + output: { + type: 'text', + text: 'Customer 2 details.', + }, + }, + ]); + }); + + it('does not append a replacement when the underlying session already trimmed the original call', async () => { + class PlainSession implements Session { + items: AgentInputItem[] = []; + + async getSessionId(): Promise { + return 'session'; + } + + async getItems(): Promise { + return this.items.map((item) => structuredClone(item)); + } + + async addItems(items: AgentInputItem[]): Promise { + this.items.push(...items); + } + + async popItem(): Promise { + return this.items.pop(); + } + + async clearSession(): Promise { + this.items = []; + } + } + + const session = new OpenAIResponsesHistoryRewriteSession({ + underlyingSession: new PlainSession(), + }); + + await session.addItems([ + { + type: 'message', + role: 'user', + content: 'hello', + }, + { + type: 'function_call_result', + callId: 'call_override', + output: { + type: 'text', + text: 'Customer 2 details.', + }, + }, + ] as AgentInputItem[]); + + await session.applyHistoryMutations({ + mutations: [ + { + type: 'replace_function_call', + callId: 'call_override', + replacement: { + type: 'function_call', + callId: 'call_override', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '2' }), + }, + }, + ], + }); + + expect(await session.getItems()).toEqual([ + { + type: 'message', + role: 'user', + content: 'hello', + }, + { + type: 'function_call_result', + callId: 'call_override', + output: { + type: 'text', + text: 'Customer 2 details.', + }, + }, + ]); + }); + + it('forwards compaction requests when the underlying session supports them', async () => { + class TrackingSession implements Session { + async getSessionId(): Promise { + return 'session'; + } + + async getItems(): Promise { + return []; + } + + async addItems(_items: AgentInputItem[]): Promise {} + + async popItem(): Promise { + return undefined; + } + + async clearSession(): Promise {} + + async runCompaction( + args?: OpenAIResponsesCompactionArgs, + ): Promise { + return { + usage: new RequestUsage({ + inputTokens: args?.responseId === 'resp_1' ? 1 : 0, + outputTokens: 2, + totalTokens: 3, + }), + }; + } + } + + const session = new OpenAIResponsesHistoryRewriteSession({ + underlyingSession: new TrackingSession() as Session & { + [OPENAI_SESSION_API]?: 'responses'; + }, + }); + + await expect( + session.runCompaction({ responseId: 'resp_1' }), + ).resolves.toMatchObject({ + usage: { + inputTokens: 1, + outputTokens: 2, + totalTokens: 3, + }, + }); + }); +}); diff --git a/packages/agents/test/index.test.ts b/packages/agents/test/index.test.ts index dce67f61c..7ed8f76e8 100644 --- a/packages/agents/test/index.test.ts +++ b/packages/agents/test/index.test.ts @@ -1,4 +1,11 @@ -import { Agent, toolNamespace, toolSearchTool } from '../src/index'; +import { + Agent, + OpenAIResponsesHistoryRewriteSession, + SessionHistoryRewriteArgs, + isSessionHistoryRewriteAwareSession, + toolNamespace, + toolSearchTool, +} from '../src/index'; import { RealtimeAgent } from '../src/realtime'; import { isZodObject } from '../src/utils'; import { describe, test, expect } from 'vitest'; @@ -43,3 +50,29 @@ describe('Tool search exports', () => { }); }); }); + +describe('Session history rewrite exports', () => { + test('history rewrite helpers should be available from the umbrella package', () => { + const session = new OpenAIResponsesHistoryRewriteSession(); + expect(typeof session.applyHistoryMutations).toBe('function'); + expect(isSessionHistoryRewriteAwareSession(session)).toBe(true); + + const args: SessionHistoryRewriteArgs = { + mutations: [ + { + type: 'replace_function_call', + callId: 'call_test', + replacement: { + type: 'function_call', + callId: 'call_test', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '1' }), + }, + }, + ], + }; + + expect(args.mutations).toHaveLength(1); + }); +});