diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx index 2414c340f4c..c6e27a14cf2 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx +++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx @@ -910,6 +910,227 @@ describe('useGeminiStream', () => { }); }); + it('should not resubmit tool responses that have already been submitted to Gemini', async () => { + // This tests the fix for the unrecoverable 400 error where already-submitted + // tools were being resubmitted due to stale closure in handleCompletedTools. + // Once triggered, ALL subsequent prompts fail and session must be /clear'd. + + const alreadySubmittedTool = { + request: { + callId: 'already-submitted-call', + name: 'tool1', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-race', + }, + status: 'success', + responseSubmittedToGemini: true, // KEY: Already submitted + response: { + callId: 'already-submitted-call', + responseParts: [{ text: 'already sent' }], + errorType: undefined, + }, + tool: { displayName: 'Tool1' }, + invocation: { + getDescription: () => 'Mock description', + } as unknown as AnyToolInvocation, + } as TrackedCompletedToolCall; + + const newTool = { + request: { + callId: 'new-call', + name: 'tool2', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-race', + }, + status: 'success', + responseSubmittedToGemini: false, // Not yet submitted + response: { + callId: 'new-call', + responseParts: [{ text: 'new response' }], + errorType: undefined, + }, + tool: { displayName: 'Tool2' }, + invocation: { + getDescription: () => 'Mock description', + } as unknown as AnyToolInvocation, + } as TrackedCompletedToolCall; + + let capturedOnComplete: + | ((tools: TrackedToolCall[]) => Promise) + | null = null; + + mockUseReactToolScheduler.mockImplementation((onComplete) => { + capturedOnComplete = onComplete; + return [ + [alreadySubmittedTool, newTool], // Current tracked state with flags + mockScheduleToolCalls, + mockMarkToolsAsSubmitted, + vi.fn(), + ]; + }); + + renderHook(() => + useGeminiStream( + new MockedGeminiClientClass(mockConfig), + [], + mockAddItem, + mockConfig, + mockLoadedSettings, + mockOnDebugMessage, + mockHandleSlashCommand, + false, + () => 'vscode' as EditorType, + () => {}, + () => Promise.resolve(), + false, + () => {}, + () => {}, + () => {}, + 80, + 24, + ), + ); + + // Simulate scheduler calling onComplete - scheduler core doesn't track + // responseSubmittedToGemini, so we strip it to simulate real behavior + const fromScheduler = [ + { ...alreadySubmittedTool, responseSubmittedToGemini: undefined }, + { ...newTool, responseSubmittedToGemini: undefined }, + ]; + + await act(async () => { + if (capturedOnComplete) { + await capturedOnComplete(fromScheduler as TrackedToolCall[]); + } + }); + + // Only the NEW tool should be marked as submitted + // The already-submitted tool should be filtered out + await waitFor(() => { + expect(mockMarkToolsAsSubmitted).toHaveBeenCalledTimes(1); + expect(mockMarkToolsAsSubmitted).toHaveBeenCalledWith(['new-call']); + }); + }); + + it('should await submitQuery before marking tools as submitted (race condition fix)', async () => { + // This tests the fix for the race condition where markToolsAsSubmitted was + // called BEFORE submitQuery completed, allowing user prompts to race ahead + // of tool responses and causing unrecoverable 400 errors. + // See: https://github.com/google-gemini/gemini-cli/issues/16144 + + const toolCallResponseParts: PartListUnion = [{ text: 'tool response' }]; + + const completedTool = { + request: { + callId: 'race-test-call', + name: 'test_tool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-race-test', + }, + status: 'success', + responseSubmittedToGemini: false, + response: { + callId: 'race-test-call', + responseParts: toolCallResponseParts, + errorType: undefined, + }, + tool: { displayName: 'TestTool' }, + invocation: { + getDescription: () => 'Mock description', + } as unknown as AnyToolInvocation, + } as TrackedCompletedToolCall; + + // Track the order of operations + const callOrder: string[] = []; + let resolveStreamPromise: () => void; + const streamPromise = new Promise((resolve) => { + resolveStreamPromise = resolve; + }); + + // Mock sendMessageStream to be a slow async operation + mockSendMessageStream.mockImplementation(() => { + callOrder.push('sendMessageStream:start'); + return (async function* () { + await streamPromise; // Wait until we explicitly resolve + callOrder.push('sendMessageStream:end'); + yield { type: ServerGeminiEventType.Finished, text: '' }; + })(); + }); + + // Track when markToolsAsSubmitted is called + mockMarkToolsAsSubmitted.mockImplementation((callIds: string[]) => { + callOrder.push(`markToolsAsSubmitted:${callIds.join(',')}`); + }); + + let capturedOnComplete: + | ((tools: TrackedToolCall[]) => Promise) + | null = null; + + mockUseReactToolScheduler.mockImplementation((onComplete) => { + capturedOnComplete = onComplete; + return [ + [completedTool], + mockScheduleToolCalls, + mockMarkToolsAsSubmitted, + vi.fn(), + ]; + }); + + renderHook(() => + useGeminiStream( + new MockedGeminiClientClass(mockConfig), + [], + mockAddItem, + mockConfig, + mockLoadedSettings, + mockOnDebugMessage, + mockHandleSlashCommand, + false, + () => 'vscode' as EditorType, + () => {}, + () => Promise.resolve(), + false, + () => {}, + () => {}, + () => {}, + 80, + 24, + ), + ); + + // Start tool completion (this will call submitQuery -> sendMessageStream) + const completionPromise = act(async () => { + if (capturedOnComplete) { + await capturedOnComplete([completedTool] as TrackedToolCall[]); + } + }); + + // Give time for sendMessageStream to start but NOT complete + await new Promise((r) => setTimeout(r, 10)); + + // At this point, sendMessageStream should have started but not finished + // markToolsAsSubmitted should NOT have been called yet (the fix!) + expect(callOrder).toContain('sendMessageStream:start'); + expect(callOrder).not.toContain('markToolsAsSubmitted:race-test-call'); + + // Now resolve the stream + resolveStreamPromise!(); + await completionPromise; + + // After stream completes, markToolsAsSubmitted should be called + await waitFor(() => { + expect(callOrder).toContain('markToolsAsSubmitted:race-test-call'); + }); + + // Verify the correct order: stream must complete before marking + const streamEndIndex = callOrder.indexOf('sendMessageStream:end'); + const markIndex = callOrder.indexOf('markToolsAsSubmitted:race-test-call'); + expect(streamEndIndex).toBeLessThan(markIndex); + }); + it('should not flicker streaming state to Idle between tool completion and submission', async () => { const toolCallResponseParts: PartListUnion = [ { text: 'tool 1 final response' }, @@ -2797,6 +3018,136 @@ describe('useGeminiStream', () => { expect(result.current.loopDetectionConfirmationRequest).not.toBeNull(); }); }); + + it('should await retry submitQuery when user disables loop detection (race condition fix)', async () => { + // This tests the fix for the race condition where the retry submitQuery was + // called fire-and-forget after user clicked "disable" in the loop detection dialog. + // Since streamingState was Idle at that point, the guard at line 946 didn't block + // concurrent requests, allowing user prompts to race with the retry. + // See: https://github.com/google-gemini/gemini-cli/issues/16144 (related) + + const mockLoopDetectionService = { + disableForSession: vi.fn(), + }; + const mockClient = { + ...new MockedGeminiClientClass(mockConfig), + getLoopDetectionService: () => mockLoopDetectionService, + }; + mockConfig.getGeminiClient = vi.fn().mockReturnValue(mockClient); + + // Track request order and timing + const requestOrder: string[] = []; + let resolveRetryPromise: () => void; + const retryPromise = new Promise((resolve) => { + resolveRetryPromise = resolve; + }); + + // First call: returns LoopDetected + mockSendMessageStream.mockImplementationOnce(() => { + requestOrder.push('first-request:start'); + return (async function* () { + requestOrder.push('first-request:loopdetected'); + yield { type: ServerGeminiEventType.LoopDetected }; + })(); + }); + + // Second call: retry after user clicks "disable" - this is slow + mockSendMessageStream.mockImplementationOnce(() => { + requestOrder.push('retry-request:start'); + return (async function* () { + await retryPromise; // Wait until we explicitly resolve + requestOrder.push('retry-request:finished'); + yield { type: ServerGeminiEventType.Content, value: 'Retry success' }; + yield { type: ServerGeminiEventType.Finished, text: '' }; + })(); + }); + + // Third call: user's new prompt (should be blocked/queued) + mockSendMessageStream.mockImplementationOnce(() => { + requestOrder.push('user-prompt:start'); + return (async function* () { + requestOrder.push('user-prompt:finished'); + yield { type: ServerGeminiEventType.Content, value: 'New prompt' }; + yield { type: ServerGeminiEventType.Finished, text: '' }; + })(); + }); + + const { result } = renderTestHook(); + + // Send first query that triggers loop detection + await act(async () => { + await result.current.submitQuery('original query'); + }); + + // Wait for loop detection confirmation request + await waitFor(() => { + expect(result.current.loopDetectionConfirmationRequest).not.toBeNull(); + }); + + // At this point, streamingState should be Idle (which is the bug!) + // The fix should ensure streamingState stays Responding during retry + expect(result.current.streamingState).toBe(StreamingState.Idle); + + // User clicks "disable" - this triggers the retry + // Start the completion promise but don't await it yet + const completionPromise = act(async () => { + result.current.loopDetectionConfirmationRequest?.onComplete({ + userSelection: 'disable', + }); + }); + + // Give time for retry to start but NOT complete + await new Promise((r) => setTimeout(r, 10)); + + // Verify retry has started + expect(requestOrder).toContain('retry-request:start'); + // Verify retry has NOT finished + expect(requestOrder).not.toContain('retry-request:finished'); + + // Try to send a new user prompt while retry is in progress + // This simulates a fast user typing immediately after clicking "disable" + const userPromptPromise = act(async () => { + await result.current.submitQuery('new user prompt'); + }); + + // Give time for the user prompt to potentially start + await new Promise((r) => setTimeout(r, 10)); + + // THE FIX: After awaiting submitQuery in onComplete: + // - streamingState should be Responding during retry + // - The guard at line 946 should block the user prompt + // - User prompt should NOT start until retry finishes + + // BEFORE FIX: Both retry and user prompt would be racing + // AFTER FIX: User prompt waits for retry to complete + + // Now resolve the retry + resolveRetryPromise!(); + + // Wait for everything to complete + await completionPromise; + await userPromptPromise; + + // Verify the correct order: retry must complete before user prompt starts + await waitFor(() => { + expect(requestOrder).toContain('retry-request:finished'); + }); + + // Check that retry finished BEFORE user prompt started + // (This is the assertion that will fail on buggy code) + const retryFinishIndex = requestOrder.indexOf('retry-request:finished'); + const userPromptStartIndex = requestOrder.indexOf('user-prompt:start'); + + // If user-prompt was properly blocked, either: + // 1. It started after retry finished (retryFinishIndex < userPromptStartIndex) + // 2. Or it was dropped entirely (userPromptStartIndex === -1) + if (userPromptStartIndex !== -1) { + expect(retryFinishIndex).toBeLessThan(userPromptStartIndex); + } + + // Clean up mock implementations to not affect subsequent tests + mockSendMessageStream.mockReset(); + }); }); describe('Agent Execution Events', () => { diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 4522af13c70..22523da2465 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -117,6 +117,8 @@ export const useGeminiStream = ( const turnCancelledRef = useRef(false); const activeQueryIdRef = useRef(null); const [isResponding, setIsResponding] = useState(false); + // Ref for synchronous blocking of concurrent queries (React state updates are batched) + const isRespondingRef = useRef(false); const [thought, setThought] = useState(null); const [pendingHistoryItem, pendingHistoryItemRef, setPendingHistoryItem] = useStateAndRef(null); @@ -215,8 +217,10 @@ export const useGeminiStream = ( const onExec = useCallback(async (done: Promise) => { setIsResponding(true); + isRespondingRef.current = true; await done; setIsResponding(false); + isRespondingRef.current = false; }, []); const { handleShellCommand, activeShellPtyId, lastShellOutputTime } = useShellCommandProcessor( @@ -251,6 +255,7 @@ export const useGeminiStream = ( Date.now(), ); setIsResponding(false); + isRespondingRef.current = false; } prevActiveShellPtyIdRef.current = activeShellPtyId; }, [activeShellPtyId, addItem]); @@ -374,6 +379,7 @@ export const useGeminiStream = ( Date.now(), ); setIsResponding(false); + isRespondingRef.current = false; } } @@ -617,6 +623,7 @@ export const useGeminiStream = ( userMessageTimestamp, ); setIsResponding(false); + isRespondingRef.current = false; setThought(null); // Reset thought when user cancels }, [addItem, pendingHistoryItemRef, setPendingHistoryItem, setThought], @@ -807,6 +814,7 @@ export const useGeminiStream = ( userMessageTimestamp, ); setIsResponding(false); + isRespondingRef.current = false; }, [addItem, pendingHistoryItemRef, setPendingHistoryItem, setIsResponding], ); @@ -942,9 +950,12 @@ export const useGeminiStream = ( spanMetadata.input = query; const queryId = `${Date.now()}-${Math.random()}`; activeQueryIdRef.current = queryId; + // Block concurrent non-continuation queries using both state and ref + // (ref provides synchronous check since React state updates are batched) if ( (streamingState === StreamingState.Responding || - streamingState === StreamingState.WaitingForConfirmation) && + streamingState === StreamingState.WaitingForConfirmation || + isRespondingRef.current) && !options?.isContinuation ) return; @@ -995,6 +1006,7 @@ export const useGeminiStream = ( } setIsResponding(true); + isRespondingRef.current = true; setInitError(null); // Store query and prompt_id for potential retry on loop detection @@ -1025,7 +1037,7 @@ export const useGeminiStream = ( loopDetectedRef.current = false; // Show the confirmation dialog to choose whether to disable loop detection setLoopDetectionConfirmationRequest({ - onComplete: (result: { + onComplete: async (result: { userSelection: 'disable' | 'keep'; }) => { setLoopDetectionConfirmationRequest(null); @@ -1044,8 +1056,9 @@ export const useGeminiStream = ( ); if (lastQueryRef.current && lastPromptIdRef.current) { - // eslint-disable-next-line @typescript-eslint/no-floating-promises - submitQuery( + // Await submitQuery to ensure retry completes before user can send another prompt + // This prevents race conditions where user prompts could race with retry + await submitQuery( lastQueryRef.current, { isContinuation: true }, lastPromptIdRef.current, @@ -1085,6 +1098,7 @@ export const useGeminiStream = ( } finally { if (activeQueryIdRef.current === queryId) { setIsResponding(false); + isRespondingRef.current = false; } } }); @@ -1153,6 +1167,17 @@ export const useGeminiStream = ( ( tc: TrackedToolCall, ): tc is TrackedCompletedToolCall | TrackedCancelledToolCall => { + // Check if we've already submitted this tool call. + // We need to look up the tracked version because the incoming 'tc' + // comes directly from the scheduler core and lacks the + // 'responseSubmittedToGemini' flag. + const trackedToolCall = toolCalls.find( + (t) => t.request.callId === tc.request.callId, + ); + if (trackedToolCall?.responseSubmittedToGemini) { + return false; + } + const isTerminalState = tc.status === 'success' || tc.status === 'error' || @@ -1217,6 +1242,7 @@ export const useGeminiStream = ( Date.now(), ); setIsResponding(false); + isRespondingRef.current = false; const callIdsToMarkAsSubmitted = geminiTools.map( (toolCall) => toolCall.request.callId, @@ -1243,6 +1269,7 @@ export const useGeminiStream = ( ); } setIsResponding(false); + isRespondingRef.current = false; if (geminiClient) { // We need to manually add the function responses to the history @@ -1275,21 +1302,25 @@ export const useGeminiStream = ( (toolCall) => toolCall.request.prompt_id, ); - markToolsAsSubmitted(callIdsToMarkAsSubmitted); - // Don't continue if model was switched due to quota error if (modelSwitchedFromQuotaError) { + markToolsAsSubmitted(callIdsToMarkAsSubmitted); return; } - // eslint-disable-next-line @typescript-eslint/no-floating-promises - submitQuery( + // IMPORTANT: We must await submitQuery BEFORE marking tools as submitted. + // Otherwise, streamingState becomes Idle and user prompts can race ahead + // of the tool response, causing function call/response mismatch errors. + await submitQuery( responsesToSend, { isContinuation: true, }, prompt_ids[0], ); + + // Only mark as submitted after the API call is complete + markToolsAsSubmitted(callIdsToMarkAsSubmitted); }, [ submitQuery, @@ -1298,6 +1329,7 @@ export const useGeminiStream = ( performMemoryRefresh, modelSwitchedFromQuotaError, addItem, + toolCalls, ], ); diff --git a/packages/cli/src/ui/hooks/useSessionBrowser.ts b/packages/cli/src/ui/hooks/useSessionBrowser.ts index 1dbced887d8..ca636ebe44a 100644 --- a/packages/cli/src/ui/hooks/useSessionBrowser.ts +++ b/packages/cli/src/ui/hooks/useSessionBrowser.ts @@ -24,7 +24,7 @@ export const useSessionBrowser = ( uiHistory: HistoryItemWithoutId[], clientHistory: Array<{ role: 'user' | 'model'; parts: Part[] }>, resumedSessionData: ResumedSessionData, - ) => void, + ) => void | Promise, ) => { const [isSessionBrowserOpen, setIsSessionBrowserOpen] = useState(false); @@ -73,7 +73,8 @@ export const useSessionBrowser = ( const historyData = convertSessionToHistoryFormats( conversation.messages, ); - onLoadHistory( + // Await to ensure chat is fully initialized before user can send prompts + await onLoadHistory( historyData.uiHistory, historyData.clientHistory, resumedSessionData, diff --git a/packages/cli/src/ui/hooks/useSessionResume.test.ts b/packages/cli/src/ui/hooks/useSessionResume.test.ts index e135006471e..d248db849ac 100644 --- a/packages/cli/src/ui/hooks/useSessionResume.test.ts +++ b/packages/cli/src/ui/hooks/useSessionResume.test.ts @@ -62,7 +62,7 @@ describe('useSessionResume', () => { expect(result.current.loadHistoryForResume).toBeInstanceOf(Function); }); - it('should clear history and add items when loading history', () => { + it('should clear history and add items when loading history', async () => { const { result } = renderHook(() => useSessionResume(getDefaultProps())); const uiHistory: HistoryItemWithoutId[] = [ @@ -86,8 +86,8 @@ describe('useSessionResume', () => { filePath: '/path/to/session.json', }; - act(() => { - result.current.loadHistoryForResume( + await act(async () => { + await result.current.loadHistoryForResume( uiHistory, clientHistory, resumedData, @@ -116,7 +116,7 @@ describe('useSessionResume', () => { ); }); - it('should not load history if Gemini client is not initialized', () => { + it('should not load history if Gemini client is not initialized', async () => { const { result } = renderHook(() => useSessionResume({ ...getDefaultProps(), @@ -141,8 +141,8 @@ describe('useSessionResume', () => { filePath: '/path/to/session.json', }; - act(() => { - result.current.loadHistoryForResume( + await act(async () => { + await result.current.loadHistoryForResume( uiHistory, clientHistory, resumedData, @@ -154,7 +154,7 @@ describe('useSessionResume', () => { expect(mockGeminiClient.resumeChat).not.toHaveBeenCalled(); }); - it('should handle empty history arrays', () => { + it('should handle empty history arrays', async () => { const { result } = renderHook(() => useSessionResume(getDefaultProps())); const resumedData: ResumedSessionData = { @@ -168,8 +168,8 @@ describe('useSessionResume', () => { filePath: '/path/to/session.json', }; - act(() => { - result.current.loadHistoryForResume([], [], resumedData); + await act(async () => { + await result.current.loadHistoryForResume([], [], resumedData); }); expect(mockHistoryManager.clearItems).toHaveBeenCalled(); @@ -441,4 +441,77 @@ describe('useSessionResume', () => { expect(mockHistoryManager.addItem).toHaveBeenCalledTimes(2); }); }); + + describe('race condition prevention', () => { + /** + * Regression test for session resume race condition. + * + * Before the fix: loadHistoryForResume called resumeChat fire-and-forget. + * If user started typing before resumeChat completed, they'd hit uninitialized + * chat state because resumeChat initializes this.chat via startChat(). + * + * After the fix: loadHistoryForResume awaits resumeChat, ensuring chat is + * fully initialized before the function returns. + */ + it('should await resumeChat before loadHistoryForResume completes', async () => { + // Track when resumeChat completes + let resumeChatResolved = false; + let resolveResumeChat: () => void; + + // Mock resumeChat to be slow (simulates async chat initialization) + mockGeminiClient.resumeChat.mockImplementation(() => new Promise((resolve) => { + resolveResumeChat = () => { + resumeChatResolved = true; + resolve(); + }; + })); + + const { result } = renderHook(() => useSessionResume(getDefaultProps())); + + const uiHistory: HistoryItemWithoutId[] = [ + { type: 'user', text: 'Hello' }, + ]; + const clientHistory = [ + { role: 'user' as const, parts: [{ text: 'Hello' }] }, + ]; + const resumedData: ResumedSessionData = { + conversation: { + sessionId: 'race-test-123', + projectHash: 'project-123', + startTime: '2025-01-01T00:00:00Z', + lastUpdated: '2025-01-01T01:00:00Z', + messages: [] as MessageRecord[], + }, + filePath: '/path/to/session.json', + }; + + // Start loadHistoryForResume (should await resumeChat) + let loadHistoryResolved = false; + const loadPromise = result.current + .loadHistoryForResume(uiHistory, clientHistory, resumedData) + .then(() => { + loadHistoryResolved = true; + }); + + // Give time for any synchronous work to complete + await act(async () => { + await Promise.resolve(); + }); + + // CRITICAL ASSERTION: loadHistoryForResume should NOT have resolved yet + // because resumeChat hasn't completed + expect(resumeChatResolved).toBe(false); + expect(loadHistoryResolved).toBe(false); + + // Now resolve resumeChat + await act(async () => { + resolveResumeChat!(); + await loadPromise; + }); + + // Now both should be resolved + expect(resumeChatResolved).toBe(true); + expect(loadHistoryResolved).toBe(true); + }); + }); }); diff --git a/packages/cli/src/ui/hooks/useSessionResume.ts b/packages/cli/src/ui/hooks/useSessionResume.ts index 228ca6ac2cb..1810c056d46 100644 --- a/packages/cli/src/ui/hooks/useSessionResume.ts +++ b/packages/cli/src/ui/hooks/useSessionResume.ts @@ -5,6 +5,7 @@ */ import { useCallback, useEffect, useRef } from 'react'; +import { coreEvents } from '@google/gemini-cli-core'; import type { Config, ResumedSessionData } from '@google/gemini-cli-core'; import type { Part } from '@google/genai'; import type { HistoryItemWithoutId } from '../types.js'; @@ -45,7 +46,7 @@ export function useSessionResume({ }); const loadHistoryForResume = useCallback( - ( + async ( uiHistory: HistoryItemWithoutId[], clientHistory: Array<{ role: 'user' | 'model'; parts: Part[] }>, resumedData: ResumedSessionData, @@ -64,8 +65,8 @@ export function useSessionResume({ refreshStaticRef.current(); // Force Static component to re-render with the updated history. // Give the history to the Gemini client. - // eslint-disable-next-line @typescript-eslint/no-floating-promises - config.getGeminiClient()?.resumeChat(clientHistory, resumedData); + // Await to ensure chat is initialized before user can send prompts. + await config.getGeminiClient()?.resumeChat(clientHistory, resumedData); }, [config, isGeminiClientInitialized, setQuittingMessages], ); @@ -84,11 +85,19 @@ export function useSessionResume({ const historyData = convertSessionToHistoryFormats( resumedSessionData.conversation.messages, ); - loadHistoryForResume( - historyData.uiHistory, - historyData.clientHistory, - resumedSessionData, - ); + // Use async IIFE to properly await the async callback + // This ensures chat is fully initialized before user can send prompts + void (async () => { + try { + await loadHistoryForResume( + historyData.uiHistory, + historyData.clientHistory, + resumedSessionData, + ); + } catch (error) { + coreEvents.emitFeedback('error', 'Error resuming session:', error); + } + })(); } }, [ resumedSessionData, diff --git a/packages/core/src/agents/local-executor.ts b/packages/core/src/agents/local-executor.ts index fc866c97b51..8e121e7d301 100644 --- a/packages/core/src/agents/local-executor.ts +++ b/packages/core/src/agents/local-executor.ts @@ -738,10 +738,9 @@ export class LocalAgentExecutor { let submittedOutput: string | null = null; let taskCompleted = false; - // We'll collect promises for the tool executions - const toolExecutionPromises: Array> = []; - // And we'll need a place to store the synchronous results (like complete_task or blocked calls) - const syncResponseParts: Part[] = []; + // Track results by index to preserve the original function call order. + // Each entry will be either a resolved Part[] or a Promise. + const resultsByIndex: Array> = []; for (const [index, functionCall] of functionCalls.entries()) { const callId = functionCall.id ?? `${promptId}-${index}`; @@ -757,13 +756,15 @@ export class LocalAgentExecutor { // We already have a completion from this turn. Ignore subsequent ones. const error = 'Task already marked complete in this turn. Ignoring duplicate call.'; - syncResponseParts.push({ - functionResponse: { - name: TASK_COMPLETE_TOOL_NAME, - response: { error }, - id: callId, + resultsByIndex[index] = [ + { + functionResponse: { + name: TASK_COMPLETE_TOOL_NAME, + response: { error }, + id: callId, + }, }, - }); + ]; this.emitActivity('ERROR', { context: 'tool_call', name: functionCall.name, @@ -784,13 +785,15 @@ export class LocalAgentExecutor { if (!validationResult.success) { taskCompleted = false; // Validation failed, revoke completion const error = `Output validation failed: ${JSON.stringify(validationResult.error.flatten())}`; - syncResponseParts.push({ - functionResponse: { - name: TASK_COMPLETE_TOOL_NAME, - response: { error }, - id: callId, + resultsByIndex[index] = [ + { + functionResponse: { + name: TASK_COMPLETE_TOOL_NAME, + response: { error }, + id: callId, + }, }, - }); + ]; this.emitActivity('ERROR', { context: 'tool_call', name: functionCall.name, @@ -808,13 +811,15 @@ export class LocalAgentExecutor { ? outputValue : JSON.stringify(outputValue, null, 2); } - syncResponseParts.push({ - functionResponse: { - name: TASK_COMPLETE_TOOL_NAME, - response: { result: 'Output submitted and task completed.' }, - id: callId, + resultsByIndex[index] = [ + { + functionResponse: { + name: TASK_COMPLETE_TOOL_NAME, + response: { result: 'Output submitted and task completed.' }, + id: callId, + }, }, - }); + ]; this.emitActivity('TOOL_CALL_END', { name: functionCall.name, output: 'Output submitted and task completed.', @@ -823,13 +828,15 @@ export class LocalAgentExecutor { // Failed to provide required output. taskCompleted = false; // Revoke completion status const error = `Missing required argument '${outputName}' for completion.`; - syncResponseParts.push({ - functionResponse: { - name: TASK_COMPLETE_TOOL_NAME, - response: { error }, - id: callId, + resultsByIndex[index] = [ + { + functionResponse: { + name: TASK_COMPLETE_TOOL_NAME, + response: { error }, + id: callId, + }, }, - }); + ]; this.emitActivity('ERROR', { context: 'tool_call', name: functionCall.name, @@ -848,13 +855,15 @@ export class LocalAgentExecutor { typeof resultArg === 'string' ? resultArg : JSON.stringify(resultArg, null, 2); - syncResponseParts.push({ - functionResponse: { - name: TASK_COMPLETE_TOOL_NAME, - response: { status: 'Result submitted and task completed.' }, - id: callId, + resultsByIndex[index] = [ + { + functionResponse: { + name: TASK_COMPLETE_TOOL_NAME, + response: { status: 'Result submitted and task completed.' }, + id: callId, + }, }, - }); + ]; this.emitActivity('TOOL_CALL_END', { name: functionCall.name, output: 'Result submitted and task completed.', @@ -864,13 +873,15 @@ export class LocalAgentExecutor { taskCompleted = false; // Revoke completion const error = 'Missing required "result" argument. You must provide your findings when calling complete_task.'; - syncResponseParts.push({ - functionResponse: { - name: TASK_COMPLETE_TOOL_NAME, - response: { error }, - id: callId, + resultsByIndex[index] = [ + { + functionResponse: { + name: TASK_COMPLETE_TOOL_NAME, + response: { error }, + id: callId, + }, }, - }); + ]; this.emitActivity('ERROR', { context: 'tool_call', name: functionCall.name, @@ -887,13 +898,15 @@ export class LocalAgentExecutor { debugLogger.warn(`[LocalAgentExecutor] Blocked call: ${error}`); - syncResponseParts.push({ - functionResponse: { - name: functionCall.name as string, - id: callId, - response: { error }, + resultsByIndex[index] = [ + { + functionResponse: { + name: functionCall.name as string, + id: callId, + response: { error }, + }, }, - }); + ]; this.emitActivity('ERROR', { context: 'tool_call_unauthorized', @@ -913,8 +926,8 @@ export class LocalAgentExecutor { prompt_id: promptId, }; - // Create a promise for the tool execution - const executionPromise = (async () => { + // Create a promise for the tool execution (stored at this index) + resultsByIndex[index] = (async () => { const agentContext = Object.create(this.runtimeContext); agentContext.getToolRegistry = () => this.toolRegistry; agentContext.getApprovalMode = () => ApprovalMode.YOLO; @@ -940,16 +953,16 @@ export class LocalAgentExecutor { return toolResponse.responseParts; })(); - - toolExecutionPromises.push(executionPromise); } - // Wait for all tool executions to complete - const asyncResults = await Promise.all(toolExecutionPromises); + // Resolve all results (promises and immediate values) in original order + const resolvedResults = await Promise.all( + resultsByIndex.map((entry) => Promise.resolve(entry)), + ); - // Combine all response parts - const toolResponseParts: Part[] = [...syncResponseParts]; - for (const result of asyncResults) { + // Combine all response parts in original function call order + const toolResponseParts: Part[] = []; + for (const result of resolvedResults) { if (result) { toolResponseParts.push(...result); } diff --git a/packages/core/src/tools/mcp-client.test.ts b/packages/core/src/tools/mcp-client.test.ts index a448fd288b8..e19619dbd04 100644 --- a/packages/core/src/tools/mcp-client.test.ts +++ b/packages/core/src/tools/mcp-client.test.ts @@ -1915,9 +1915,16 @@ describe('connectToMcpServer - OAuth with transport fallback', () => { it('should handle HTTP 404 → SSE 401 → OAuth → SSE+OAuth succeeds', async () => { // Tests that OAuth flow works when SSE (not HTTP) requires auth + // Include www-authenticate header in the 401 error to avoid fetch timeout + const wwwAuthHeader = `Bearer realm="test", resource_metadata="http://test-server/.well-known/oauth-protected-resource"`; vi.mocked(mockedClient.connect) .mockRejectedValueOnce(new StreamableHTTPError(404, 'Not Found')) - .mockRejectedValueOnce(new StreamableHTTPError(401, 'Unauthorized')) + .mockRejectedValueOnce( + new StreamableHTTPError( + 401, + `Unauthorized\nwww-authenticate: ${wwwAuthHeader}`, + ), + ) .mockResolvedValueOnce(undefined); const client = await connectToMcpServer( diff --git a/packages/core/src/utils/generateContentResponseUtilities.test.ts b/packages/core/src/utils/generateContentResponseUtilities.test.ts index 0562f918889..c25a829b965 100644 --- a/packages/core/src/utils/generateContentResponseUtilities.test.ts +++ b/packages/core/src/utils/generateContentResponseUtilities.test.ts @@ -158,7 +158,7 @@ describe('generateContentResponseUtilities', () => { ]); }); - it('should handle llmContent with fileData for Gemini 3 model (should be siblings)', () => { + it('should handle llmContent with fileData for Gemini 3 model (should be nested)', () => { const llmContent: Part = { fileData: { mimeType: 'application/pdf', fileUri: 'gs://...' }, }; @@ -174,9 +174,9 @@ describe('generateContentResponseUtilities', () => { name: toolName, id: callId, response: { output: 'Binary content provided (1 item(s)).' }, + parts: [llmContent], }, }, - llmContent, ]); }); @@ -202,7 +202,7 @@ describe('generateContentResponseUtilities', () => { ]); }); - it('should handle llmContent with fileData for non-Gemini 3 models', () => { + it('should handle llmContent with fileData for non-Gemini 3 models (should omit siblings)', () => { const llmContent: Part = { fileData: { mimeType: 'application/pdf', fileUri: 'gs://...' }, }; @@ -212,15 +212,17 @@ describe('generateContentResponseUtilities', () => { llmContent, DEFAULT_GEMINI_MODEL, ); + // Binary content is omitted for non-multimodal models, so message should reflect that expect(result).toEqual([ { functionResponse: { name: toolName, id: callId, - response: { output: 'Binary content provided (1 item(s)).' }, + response: { + output: `Binary content was provided but omitted because model '${DEFAULT_GEMINI_MODEL}' does not support it in function responses.`, + }, }, }, - llmContent, ]); }); diff --git a/packages/core/src/utils/generateContentResponseUtilities.ts b/packages/core/src/utils/generateContentResponseUtilities.ts index 5151da9f6d4..d4fed071c6b 100644 --- a/packages/core/src/utils/generateContentResponseUtilities.ts +++ b/packages/core/src/utils/generateContentResponseUtilities.ts @@ -97,17 +97,18 @@ export function convertToFunctionResponse( }; const isMultimodalFRSupported = supportsMultimodalFunctionResponse(model); - const siblingParts: Part[] = [...fileDataParts]; + const siblingParts: Part[] = []; - if (inlineDataParts.length > 0) { - if (isMultimodalFRSupported) { - // Nest inlineData if supported by the model + if (isMultimodalFRSupported) { + const binaryParts = [...fileDataParts, ...inlineDataParts]; + if (binaryParts.length > 0) { + // Nest all binary content if supported by the model (part.functionResponse as unknown as { parts: Part[] }).parts = - inlineDataParts; - } else { - // Otherwise treat as siblings - siblingParts.push(...inlineDataParts); + binaryParts; } + } else { + // Otherwise treat as siblings (which will be dropped below) + siblingParts.push(...fileDataParts, ...inlineDataParts); } // Add descriptive text if the response object is empty but we have binary content @@ -122,7 +123,18 @@ export function convertToFunctionResponse( } if (siblingParts.length > 0) { - return [part, ...siblingParts]; + debugLogger.warn( + `Model ${model} does not support multimodal function responses. Sibling parts will be omitted to prevent API errors in parallel function calling.`, + ); + // If binary parts were omitted, update the response text to avoid misleading the model + if ( + textParts.length === 0 && + (inlineDataParts.length > 0 || fileDataParts.length > 0) + ) { + part.functionResponse!.response = { + output: `Binary content was provided but omitted because model '${model}' does not support it in function responses.`, + }; + } } return [part];