diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx index 2414c340f4c..ccb46e60ef2 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx +++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx @@ -910,6 +910,227 @@ describe('useGeminiStream', () => { }); }); + it('should not resubmit tool responses that have already been submitted to Gemini', async () => { + // This tests the fix for the unrecoverable 400 error where already-submitted + // tools were being resubmitted due to stale closure in handleCompletedTools. + // Once triggered, ALL subsequent prompts fail and session must be /clear'd. + + const alreadySubmittedTool = { + request: { + callId: 'already-submitted-call', + name: 'tool1', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-race', + }, + status: 'success', + responseSubmittedToGemini: true, // KEY: Already submitted + response: { + callId: 'already-submitted-call', + responseParts: [{ text: 'already sent' }], + errorType: undefined, + }, + tool: { displayName: 'Tool1' }, + invocation: { + getDescription: () => 'Mock description', + } as unknown as AnyToolInvocation, + } as TrackedCompletedToolCall; + + const newTool = { + request: { + callId: 'new-call', + name: 'tool2', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-race', + }, + status: 'success', + responseSubmittedToGemini: false, // Not yet submitted + response: { + callId: 'new-call', + responseParts: [{ text: 'new response' }], + errorType: undefined, + }, + tool: { displayName: 'Tool2' }, + invocation: { + getDescription: () => 'Mock description', + } as unknown as AnyToolInvocation, + } as TrackedCompletedToolCall; + + let capturedOnComplete: + | ((tools: TrackedToolCall[]) => Promise) + | null = null; + + mockUseReactToolScheduler.mockImplementation((onComplete) => { + capturedOnComplete = onComplete; + return [ + [alreadySubmittedTool, newTool], // Current tracked state with flags + mockScheduleToolCalls, + mockMarkToolsAsSubmitted, + vi.fn(), + ]; + }); + + renderHook(() => + useGeminiStream( + new MockedGeminiClientClass(mockConfig), + [], + mockAddItem, + mockConfig, + mockLoadedSettings, + mockOnDebugMessage, + mockHandleSlashCommand, + false, + () => 'vscode' as EditorType, + () => {}, + () => Promise.resolve(), + false, + () => {}, + () => {}, + () => {}, + 80, + 24, + ), + ); + + // Simulate scheduler calling onComplete - scheduler core doesn't track + // responseSubmittedToGemini, so we strip it to simulate real behavior + const fromScheduler = [ + { ...alreadySubmittedTool, responseSubmittedToGemini: undefined }, + { ...newTool, responseSubmittedToGemini: undefined }, + ]; + + await act(async () => { + if (capturedOnComplete) { + await capturedOnComplete(fromScheduler as TrackedToolCall[]); + } + }); + + // Only the NEW tool should be marked as submitted + // The already-submitted tool should be filtered out + await waitFor(() => { + expect(mockMarkToolsAsSubmitted).toHaveBeenCalledTimes(1); + expect(mockMarkToolsAsSubmitted).toHaveBeenCalledWith(['new-call']); + }); + }); + + it('should await submitQuery before marking tools as submitted (race condition fix)', async () => { + // This tests the fix for the race condition where markToolsAsSubmitted was + // called BEFORE submitQuery completed, allowing user prompts to race ahead + // of tool responses and causing unrecoverable 400 errors. + // See: https://github.com/google-gemini/gemini-cli/issues/16144 + + const toolCallResponseParts: PartListUnion = [{ text: 'tool response' }]; + + const completedTool = { + request: { + callId: 'race-test-call', + name: 'test_tool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-race-test', + }, + status: 'success', + responseSubmittedToGemini: false, + response: { + callId: 'race-test-call', + responseParts: toolCallResponseParts, + errorType: undefined, + }, + tool: { displayName: 'TestTool' }, + invocation: { + getDescription: () => 'Mock description', + } as unknown as AnyToolInvocation, + } as TrackedCompletedToolCall; + + // Track the order of operations + const callOrder: string[] = []; + let resolveStreamPromise: () => void; + const streamPromise = new Promise((resolve) => { + resolveStreamPromise = resolve; + }); + + // Mock sendMessageStream to be a slow async operation + mockSendMessageStream.mockImplementation(() => { + callOrder.push('sendMessageStream:start'); + return (async function* () { + await streamPromise; // Wait until we explicitly resolve + callOrder.push('sendMessageStream:end'); + yield { type: ServerGeminiEventType.Finished, text: '' }; + })(); + }); + + // Track when markToolsAsSubmitted is called + mockMarkToolsAsSubmitted.mockImplementation((callIds: string[]) => { + callOrder.push(`markToolsAsSubmitted:${callIds.join(',')}`); + }); + + let capturedOnComplete: + | ((tools: TrackedToolCall[]) => Promise) + | null = null; + + mockUseReactToolScheduler.mockImplementation((onComplete) => { + capturedOnComplete = onComplete; + return [ + [completedTool], + mockScheduleToolCalls, + mockMarkToolsAsSubmitted, + vi.fn(), + ]; + }); + + renderHook(() => + useGeminiStream( + new MockedGeminiClientClass(mockConfig), + [], + mockAddItem, + mockConfig, + mockLoadedSettings, + mockOnDebugMessage, + mockHandleSlashCommand, + false, + () => 'vscode' as EditorType, + () => {}, + () => Promise.resolve(), + false, + () => {}, + () => {}, + () => {}, + 80, + 24, + ), + ); + + // Start tool completion (this will call submitQuery -> sendMessageStream) + const completionPromise = act(async () => { + if (capturedOnComplete) { + await capturedOnComplete([completedTool] as TrackedToolCall[]); + } + }); + + // Give time for sendMessageStream to start but NOT complete + await new Promise((r) => setTimeout(r, 10)); + + // At this point, sendMessageStream should have started but not finished + // markToolsAsSubmitted should NOT have been called yet (the fix!) + expect(callOrder).toContain('sendMessageStream:start'); + expect(callOrder).not.toContain('markToolsAsSubmitted:race-test-call'); + + // Now resolve the stream + resolveStreamPromise!(); + await completionPromise; + + // After stream completes, markToolsAsSubmitted should be called + await waitFor(() => { + expect(callOrder).toContain('markToolsAsSubmitted:race-test-call'); + }); + + // Verify the correct order: stream must complete before marking + const streamEndIndex = callOrder.indexOf('sendMessageStream:end'); + const markIndex = callOrder.indexOf('markToolsAsSubmitted:race-test-call'); + expect(streamEndIndex).toBeLessThan(markIndex); + }); + it('should not flicker streaming state to Idle between tool completion and submission', async () => { const toolCallResponseParts: PartListUnion = [ { text: 'tool 1 final response' }, diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 4522af13c70..b6b2c9348f7 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -1153,6 +1153,17 @@ export const useGeminiStream = ( ( tc: TrackedToolCall, ): tc is TrackedCompletedToolCall | TrackedCancelledToolCall => { + // Check if we've already submitted this tool call. + // We need to look up the tracked version because the incoming 'tc' + // comes directly from the scheduler core and lacks the + // 'responseSubmittedToGemini' flag. + const trackedToolCall = toolCalls.find( + (t) => t.request.callId === tc.request.callId, + ); + if (trackedToolCall?.responseSubmittedToGemini) { + return false; + } + const isTerminalState = tc.status === 'success' || tc.status === 'error' || @@ -1275,21 +1286,25 @@ export const useGeminiStream = ( (toolCall) => toolCall.request.prompt_id, ); - markToolsAsSubmitted(callIdsToMarkAsSubmitted); - // Don't continue if model was switched due to quota error if (modelSwitchedFromQuotaError) { + markToolsAsSubmitted(callIdsToMarkAsSubmitted); return; } - // eslint-disable-next-line @typescript-eslint/no-floating-promises - submitQuery( + // IMPORTANT: We must await submitQuery BEFORE marking tools as submitted. + // Otherwise, streamingState becomes Idle and user prompts can race ahead + // of the tool response, causing function call/response mismatch errors. + await submitQuery( responsesToSend, { isContinuation: true, }, prompt_ids[0], ); + + // Only mark as submitted after the API call is complete + markToolsAsSubmitted(callIdsToMarkAsSubmitted); }, [ submitQuery, @@ -1298,6 +1313,7 @@ export const useGeminiStream = ( performMemoryRefresh, modelSwitchedFromQuotaError, addItem, + toolCalls, ], ); diff --git a/packages/core/src/agents/local-executor.ts b/packages/core/src/agents/local-executor.ts index fc866c97b51..8e121e7d301 100644 --- a/packages/core/src/agents/local-executor.ts +++ b/packages/core/src/agents/local-executor.ts @@ -738,10 +738,9 @@ export class LocalAgentExecutor { let submittedOutput: string | null = null; let taskCompleted = false; - // We'll collect promises for the tool executions - const toolExecutionPromises: Array> = []; - // And we'll need a place to store the synchronous results (like complete_task or blocked calls) - const syncResponseParts: Part[] = []; + // Track results by index to preserve the original function call order. + // Each entry will be either a resolved Part[] or a Promise. + const resultsByIndex: Array> = []; for (const [index, functionCall] of functionCalls.entries()) { const callId = functionCall.id ?? `${promptId}-${index}`; @@ -757,13 +756,15 @@ export class LocalAgentExecutor { // We already have a completion from this turn. Ignore subsequent ones. const error = 'Task already marked complete in this turn. Ignoring duplicate call.'; - syncResponseParts.push({ - functionResponse: { - name: TASK_COMPLETE_TOOL_NAME, - response: { error }, - id: callId, + resultsByIndex[index] = [ + { + functionResponse: { + name: TASK_COMPLETE_TOOL_NAME, + response: { error }, + id: callId, + }, }, - }); + ]; this.emitActivity('ERROR', { context: 'tool_call', name: functionCall.name, @@ -784,13 +785,15 @@ export class LocalAgentExecutor { if (!validationResult.success) { taskCompleted = false; // Validation failed, revoke completion const error = `Output validation failed: ${JSON.stringify(validationResult.error.flatten())}`; - syncResponseParts.push({ - functionResponse: { - name: TASK_COMPLETE_TOOL_NAME, - response: { error }, - id: callId, + resultsByIndex[index] = [ + { + functionResponse: { + name: TASK_COMPLETE_TOOL_NAME, + response: { error }, + id: callId, + }, }, - }); + ]; this.emitActivity('ERROR', { context: 'tool_call', name: functionCall.name, @@ -808,13 +811,15 @@ export class LocalAgentExecutor { ? outputValue : JSON.stringify(outputValue, null, 2); } - syncResponseParts.push({ - functionResponse: { - name: TASK_COMPLETE_TOOL_NAME, - response: { result: 'Output submitted and task completed.' }, - id: callId, + resultsByIndex[index] = [ + { + functionResponse: { + name: TASK_COMPLETE_TOOL_NAME, + response: { result: 'Output submitted and task completed.' }, + id: callId, + }, }, - }); + ]; this.emitActivity('TOOL_CALL_END', { name: functionCall.name, output: 'Output submitted and task completed.', @@ -823,13 +828,15 @@ export class LocalAgentExecutor { // Failed to provide required output. taskCompleted = false; // Revoke completion status const error = `Missing required argument '${outputName}' for completion.`; - syncResponseParts.push({ - functionResponse: { - name: TASK_COMPLETE_TOOL_NAME, - response: { error }, - id: callId, + resultsByIndex[index] = [ + { + functionResponse: { + name: TASK_COMPLETE_TOOL_NAME, + response: { error }, + id: callId, + }, }, - }); + ]; this.emitActivity('ERROR', { context: 'tool_call', name: functionCall.name, @@ -848,13 +855,15 @@ export class LocalAgentExecutor { typeof resultArg === 'string' ? resultArg : JSON.stringify(resultArg, null, 2); - syncResponseParts.push({ - functionResponse: { - name: TASK_COMPLETE_TOOL_NAME, - response: { status: 'Result submitted and task completed.' }, - id: callId, + resultsByIndex[index] = [ + { + functionResponse: { + name: TASK_COMPLETE_TOOL_NAME, + response: { status: 'Result submitted and task completed.' }, + id: callId, + }, }, - }); + ]; this.emitActivity('TOOL_CALL_END', { name: functionCall.name, output: 'Result submitted and task completed.', @@ -864,13 +873,15 @@ export class LocalAgentExecutor { taskCompleted = false; // Revoke completion const error = 'Missing required "result" argument. You must provide your findings when calling complete_task.'; - syncResponseParts.push({ - functionResponse: { - name: TASK_COMPLETE_TOOL_NAME, - response: { error }, - id: callId, + resultsByIndex[index] = [ + { + functionResponse: { + name: TASK_COMPLETE_TOOL_NAME, + response: { error }, + id: callId, + }, }, - }); + ]; this.emitActivity('ERROR', { context: 'tool_call', name: functionCall.name, @@ -887,13 +898,15 @@ export class LocalAgentExecutor { debugLogger.warn(`[LocalAgentExecutor] Blocked call: ${error}`); - syncResponseParts.push({ - functionResponse: { - name: functionCall.name as string, - id: callId, - response: { error }, + resultsByIndex[index] = [ + { + functionResponse: { + name: functionCall.name as string, + id: callId, + response: { error }, + }, }, - }); + ]; this.emitActivity('ERROR', { context: 'tool_call_unauthorized', @@ -913,8 +926,8 @@ export class LocalAgentExecutor { prompt_id: promptId, }; - // Create a promise for the tool execution - const executionPromise = (async () => { + // Create a promise for the tool execution (stored at this index) + resultsByIndex[index] = (async () => { const agentContext = Object.create(this.runtimeContext); agentContext.getToolRegistry = () => this.toolRegistry; agentContext.getApprovalMode = () => ApprovalMode.YOLO; @@ -940,16 +953,16 @@ export class LocalAgentExecutor { return toolResponse.responseParts; })(); - - toolExecutionPromises.push(executionPromise); } - // Wait for all tool executions to complete - const asyncResults = await Promise.all(toolExecutionPromises); + // Resolve all results (promises and immediate values) in original order + const resolvedResults = await Promise.all( + resultsByIndex.map((entry) => Promise.resolve(entry)), + ); - // Combine all response parts - const toolResponseParts: Part[] = [...syncResponseParts]; - for (const result of asyncResults) { + // Combine all response parts in original function call order + const toolResponseParts: Part[] = []; + for (const result of resolvedResults) { if (result) { toolResponseParts.push(...result); } diff --git a/packages/core/src/tools/mcp-client.test.ts b/packages/core/src/tools/mcp-client.test.ts index a448fd288b8..e19619dbd04 100644 --- a/packages/core/src/tools/mcp-client.test.ts +++ b/packages/core/src/tools/mcp-client.test.ts @@ -1915,9 +1915,16 @@ describe('connectToMcpServer - OAuth with transport fallback', () => { it('should handle HTTP 404 → SSE 401 → OAuth → SSE+OAuth succeeds', async () => { // Tests that OAuth flow works when SSE (not HTTP) requires auth + // Include www-authenticate header in the 401 error to avoid fetch timeout + const wwwAuthHeader = `Bearer realm="test", resource_metadata="http://test-server/.well-known/oauth-protected-resource"`; vi.mocked(mockedClient.connect) .mockRejectedValueOnce(new StreamableHTTPError(404, 'Not Found')) - .mockRejectedValueOnce(new StreamableHTTPError(401, 'Unauthorized')) + .mockRejectedValueOnce( + new StreamableHTTPError( + 401, + `Unauthorized\nwww-authenticate: ${wwwAuthHeader}`, + ), + ) .mockResolvedValueOnce(undefined); const client = await connectToMcpServer( diff --git a/packages/core/src/utils/generateContentResponseUtilities.test.ts b/packages/core/src/utils/generateContentResponseUtilities.test.ts index 0562f918889..88206b9bfb1 100644 --- a/packages/core/src/utils/generateContentResponseUtilities.test.ts +++ b/packages/core/src/utils/generateContentResponseUtilities.test.ts @@ -158,7 +158,7 @@ describe('generateContentResponseUtilities', () => { ]); }); - it('should handle llmContent with fileData for Gemini 3 model (should be siblings)', () => { + it('should handle llmContent with fileData for Gemini 3 model (should be nested)', () => { const llmContent: Part = { fileData: { mimeType: 'application/pdf', fileUri: 'gs://...' }, }; @@ -174,9 +174,9 @@ describe('generateContentResponseUtilities', () => { name: toolName, id: callId, response: { output: 'Binary content provided (1 item(s)).' }, + parts: [llmContent], }, }, - llmContent, ]); }); @@ -202,7 +202,7 @@ describe('generateContentResponseUtilities', () => { ]); }); - it('should handle llmContent with fileData for non-Gemini 3 models', () => { + it('should handle llmContent with fileData for non-Gemini 3 models (should omit siblings)', () => { const llmContent: Part = { fileData: { mimeType: 'application/pdf', fileUri: 'gs://...' }, }; @@ -220,7 +220,6 @@ describe('generateContentResponseUtilities', () => { response: { output: 'Binary content provided (1 item(s)).' }, }, }, - llmContent, ]); }); diff --git a/packages/core/src/utils/generateContentResponseUtilities.ts b/packages/core/src/utils/generateContentResponseUtilities.ts index 5151da9f6d4..5b5b4dd85d6 100644 --- a/packages/core/src/utils/generateContentResponseUtilities.ts +++ b/packages/core/src/utils/generateContentResponseUtilities.ts @@ -97,17 +97,18 @@ export function convertToFunctionResponse( }; const isMultimodalFRSupported = supportsMultimodalFunctionResponse(model); - const siblingParts: Part[] = [...fileDataParts]; + const siblingParts: Part[] = []; - if (inlineDataParts.length > 0) { - if (isMultimodalFRSupported) { - // Nest inlineData if supported by the model + if (isMultimodalFRSupported) { + const binaryParts = [...fileDataParts, ...inlineDataParts]; + if (binaryParts.length > 0) { + // Nest all binary content if supported by the model (part.functionResponse as unknown as { parts: Part[] }).parts = - inlineDataParts; - } else { - // Otherwise treat as siblings - siblingParts.push(...inlineDataParts); + binaryParts; } + } else { + // Otherwise treat as siblings (which will be dropped below) + siblingParts.push(...fileDataParts, ...inlineDataParts); } // Add descriptive text if the response object is empty but we have binary content @@ -122,7 +123,9 @@ export function convertToFunctionResponse( } if (siblingParts.length > 0) { - return [part, ...siblingParts]; + debugLogger.warn( + `Model ${model} does not support multimodal function responses. Sibling parts will be omitted to prevent API errors in parallel function calling.`, + ); } return [part];