diff --git a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx index 78d0b0cf9b0..ed3949f2686 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx +++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx @@ -922,6 +922,227 @@ describe('useGeminiStream', () => { }); }); + it('should not resubmit tool responses that have already been submitted to Gemini', async () => { + // This tests the fix for the unrecoverable 400 error where already-submitted + // tools were being resubmitted due to stale closure in handleCompletedTools. + // Once triggered, ALL subsequent prompts fail and session must be /clear'd. + + const alreadySubmittedTool = { + request: { + callId: 'already-submitted-call', + name: 'tool1', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-race', + }, + status: 'success', + responseSubmittedToGemini: true, // KEY: Already submitted + response: { + callId: 'already-submitted-call', + responseParts: [{ text: 'already sent' }], + errorType: undefined, + }, + tool: { displayName: 'Tool1' }, + invocation: { + getDescription: () => 'Mock description', + } as unknown as AnyToolInvocation, + } as TrackedCompletedToolCall; + + const newTool = { + request: { + callId: 'new-call', + name: 'tool2', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-race', + }, + status: 'success', + responseSubmittedToGemini: false, // Not yet submitted + response: { + callId: 'new-call', + responseParts: [{ text: 'new response' }], + errorType: undefined, + }, + tool: { displayName: 'Tool2' }, + invocation: { + getDescription: () => 'Mock description', + } as unknown as AnyToolInvocation, + } as TrackedCompletedToolCall; + + let capturedOnComplete: + | ((tools: TrackedToolCall[]) => Promise) + | null = null; + + mockUseReactToolScheduler.mockImplementation((onComplete) => { + capturedOnComplete = onComplete; + return [ + [alreadySubmittedTool, newTool], // Current tracked state with flags + mockScheduleToolCalls, + mockMarkToolsAsSubmitted, + vi.fn(), + ]; + }); + + renderHook(() => + useGeminiStream( + new MockedGeminiClientClass(mockConfig), + [], + mockAddItem, + mockConfig, + mockLoadedSettings, + mockOnDebugMessage, + mockHandleSlashCommand, + false, + () => 'vscode' as EditorType, + () => {}, + () => Promise.resolve(), + false, + () => {}, + () => {}, + () => {}, + 80, + 24, + ), + ); + + // Simulate scheduler calling onComplete - scheduler core doesn't track + // responseSubmittedToGemini, so we strip it to simulate real behavior + const fromScheduler = [ + { ...alreadySubmittedTool, responseSubmittedToGemini: undefined }, + { ...newTool, responseSubmittedToGemini: undefined }, + ]; + + await act(async () => { + if (capturedOnComplete) { + await capturedOnComplete(fromScheduler as TrackedToolCall[]); + } + }); + + // Only the NEW tool should be marked as submitted + // The already-submitted tool should be filtered out + await waitFor(() => { + expect(mockMarkToolsAsSubmitted).toHaveBeenCalledTimes(1); + expect(mockMarkToolsAsSubmitted).toHaveBeenCalledWith(['new-call']); + }); + }); + + it('should await submitQuery before marking tools as submitted (race condition fix)', async () => { + // This tests the fix for the race condition where markToolsAsSubmitted was + // called BEFORE submitQuery completed, allowing user prompts to race ahead + // of tool responses and causing unrecoverable 400 errors. + // See: https://github.com/google-gemini/gemini-cli/issues/16144 + + const toolCallResponseParts: PartListUnion = [{ text: 'tool response' }]; + + const completedTool = { + request: { + callId: 'race-test-call', + name: 'test_tool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-id-race-test', + }, + status: 'success', + responseSubmittedToGemini: false, + response: { + callId: 'race-test-call', + responseParts: toolCallResponseParts, + errorType: undefined, + }, + tool: { displayName: 'TestTool' }, + invocation: { + getDescription: () => 'Mock description', + } as unknown as AnyToolInvocation, + } as TrackedCompletedToolCall; + + // Track the order of operations + const callOrder: string[] = []; + let resolveStreamPromise: () => void; + const streamPromise = new Promise((resolve) => { + resolveStreamPromise = resolve; + }); + + // Mock sendMessageStream to be a slow async operation + mockSendMessageStream.mockImplementation(() => { + callOrder.push('sendMessageStream:start'); + return (async function* () { + await streamPromise; // Wait until we explicitly resolve + callOrder.push('sendMessageStream:end'); + yield { type: ServerGeminiEventType.Finished, text: '' }; + })(); + }); + + // Track when markToolsAsSubmitted is called + mockMarkToolsAsSubmitted.mockImplementation((callIds: string[]) => { + callOrder.push(`markToolsAsSubmitted:${callIds.join(',')}`); + }); + + let capturedOnComplete: + | ((tools: TrackedToolCall[]) => Promise) + | null = null; + + mockUseReactToolScheduler.mockImplementation((onComplete) => { + capturedOnComplete = onComplete; + return [ + [completedTool], + mockScheduleToolCalls, + mockMarkToolsAsSubmitted, + vi.fn(), + ]; + }); + + renderHook(() => + useGeminiStream( + new MockedGeminiClientClass(mockConfig), + [], + mockAddItem, + mockConfig, + mockLoadedSettings, + mockOnDebugMessage, + mockHandleSlashCommand, + false, + () => 'vscode' as EditorType, + () => {}, + () => Promise.resolve(), + false, + () => {}, + () => {}, + () => {}, + 80, + 24, + ), + ); + + // Start tool completion (this will call submitQuery -> sendMessageStream) + const completionPromise = act(async () => { + if (capturedOnComplete) { + await capturedOnComplete([completedTool] as TrackedToolCall[]); + } + }); + + // Give time for sendMessageStream to start but NOT complete + await new Promise((r) => setTimeout(r, 10)); + + // At this point, sendMessageStream should have started but not finished + // markToolsAsSubmitted should NOT have been called yet (the fix!) + expect(callOrder).toContain('sendMessageStream:start'); + expect(callOrder).not.toContain('markToolsAsSubmitted:race-test-call'); + + // Now resolve the stream + resolveStreamPromise!(); + await completionPromise; + + // After stream completes, markToolsAsSubmitted should be called + await waitFor(() => { + expect(callOrder).toContain('markToolsAsSubmitted:race-test-call'); + }); + + // Verify the correct order: stream must complete before marking + const streamEndIndex = callOrder.indexOf('sendMessageStream:end'); + const markIndex = callOrder.indexOf('markToolsAsSubmitted:race-test-call'); + expect(streamEndIndex).toBeLessThan(markIndex); + }); + it('should not flicker streaming state to Idle between tool completion and submission', async () => { const toolCallResponseParts: PartListUnion = [ { text: 'tool 1 final response' }, diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 048c7080d82..abd462ccd7e 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -1188,6 +1188,17 @@ export const useGeminiStream = ( ( tc: TrackedToolCall, ): tc is TrackedCompletedToolCall | TrackedCancelledToolCall => { + // Check if we've already submitted this tool call. + // We need to look up the tracked version because the incoming 'tc' + // comes directly from the scheduler core and lacks the + // 'responseSubmittedToGemini' flag. + const trackedToolCall = toolCalls.find( + (t) => t.request.callId === tc.request.callId, + ); + if (trackedToolCall?.responseSubmittedToGemini) { + return false; + } + const isTerminalState = tc.status === 'success' || tc.status === 'error' || @@ -1304,21 +1315,25 @@ export const useGeminiStream = ( (toolCall) => toolCall.request.prompt_id, ); - markToolsAsSubmitted(callIdsToMarkAsSubmitted); - // Don't continue if model was switched due to quota error if (modelSwitchedFromQuotaError) { + markToolsAsSubmitted(callIdsToMarkAsSubmitted); return; } - // eslint-disable-next-line @typescript-eslint/no-floating-promises - submitQuery( + // IMPORTANT: We must await submitQuery BEFORE marking tools as submitted. + // Otherwise, streamingState becomes Idle and user prompts can race ahead + // of the tool response, causing function call/response mismatch errors. + await submitQuery( responsesToSend, { isContinuation: true, }, prompt_ids[0], ); + + // Only mark as submitted after the API call is complete + markToolsAsSubmitted(callIdsToMarkAsSubmitted); }, [ submitQuery, @@ -1327,6 +1342,7 @@ export const useGeminiStream = ( performMemoryRefresh, modelSwitchedFromQuotaError, addItem, + toolCalls, ], );