Skip to content
221 changes: 221 additions & 0 deletions packages/cli/src/ui/hooks/useGeminiStream.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -910,6 +910,227 @@ describe('useGeminiStream', () => {
});
});

it('should not resubmit tool responses that have already been submitted to Gemini', async () => {
// This tests the fix for the unrecoverable 400 error where already-submitted
// tools were being resubmitted due to stale closure in handleCompletedTools.
// Once triggered, ALL subsequent prompts fail and session must be /clear'd.

const alreadySubmittedTool = {
request: {
callId: 'already-submitted-call',
name: 'tool1',
args: {},
isClientInitiated: false,
prompt_id: 'prompt-id-race',
},
status: 'success',
responseSubmittedToGemini: true, // KEY: Already submitted
response: {
callId: 'already-submitted-call',
responseParts: [{ text: 'already sent' }],
errorType: undefined,
},
tool: { displayName: 'Tool1' },
invocation: {
getDescription: () => 'Mock description',
} as unknown as AnyToolInvocation,
} as TrackedCompletedToolCall;

const newTool = {
request: {
callId: 'new-call',
name: 'tool2',
args: {},
isClientInitiated: false,
prompt_id: 'prompt-id-race',
},
status: 'success',
responseSubmittedToGemini: false, // Not yet submitted
response: {
callId: 'new-call',
responseParts: [{ text: 'new response' }],
errorType: undefined,
},
tool: { displayName: 'Tool2' },
invocation: {
getDescription: () => 'Mock description',
} as unknown as AnyToolInvocation,
} as TrackedCompletedToolCall;

let capturedOnComplete:
| ((tools: TrackedToolCall[]) => Promise<void>)
| null = null;

mockUseReactToolScheduler.mockImplementation((onComplete) => {
capturedOnComplete = onComplete;
return [
[alreadySubmittedTool, newTool], // Current tracked state with flags
mockScheduleToolCalls,
mockMarkToolsAsSubmitted,
vi.fn(),
];
});

renderHook(() =>
useGeminiStream(
new MockedGeminiClientClass(mockConfig),
[],
mockAddItem,
mockConfig,
mockLoadedSettings,
mockOnDebugMessage,
mockHandleSlashCommand,
false,
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
false,
() => {},
() => {},
() => {},
80,
24,
),
);

// Simulate scheduler calling onComplete - scheduler core doesn't track
// responseSubmittedToGemini, so we strip it to simulate real behavior
const fromScheduler = [
{ ...alreadySubmittedTool, responseSubmittedToGemini: undefined },
{ ...newTool, responseSubmittedToGemini: undefined },
];

await act(async () => {
if (capturedOnComplete) {
await capturedOnComplete(fromScheduler as TrackedToolCall[]);
}
});

// Only the NEW tool should be marked as submitted
// The already-submitted tool should be filtered out
await waitFor(() => {
expect(mockMarkToolsAsSubmitted).toHaveBeenCalledTimes(1);
expect(mockMarkToolsAsSubmitted).toHaveBeenCalledWith(['new-call']);
});
});

it('should await submitQuery before marking tools as submitted (race condition fix)', async () => {
// This tests the fix for the race condition where markToolsAsSubmitted was
// called BEFORE submitQuery completed, allowing user prompts to race ahead
// of tool responses and causing unrecoverable 400 errors.
// See: https://github.com/google-gemini/gemini-cli/issues/16144

const toolCallResponseParts: PartListUnion = [{ text: 'tool response' }];

const completedTool = {
request: {
callId: 'race-test-call',
name: 'test_tool',
args: {},
isClientInitiated: false,
prompt_id: 'prompt-id-race-test',
},
status: 'success',
responseSubmittedToGemini: false,
response: {
callId: 'race-test-call',
responseParts: toolCallResponseParts,
errorType: undefined,
},
tool: { displayName: 'TestTool' },
invocation: {
getDescription: () => 'Mock description',
} as unknown as AnyToolInvocation,
} as TrackedCompletedToolCall;

// Track the order of operations
const callOrder: string[] = [];
let resolveStreamPromise: () => void;
const streamPromise = new Promise<void>((resolve) => {
resolveStreamPromise = resolve;
});

// Mock sendMessageStream to be a slow async operation
mockSendMessageStream.mockImplementation(() => {
callOrder.push('sendMessageStream:start');
return (async function* () {
await streamPromise; // Wait until we explicitly resolve
callOrder.push('sendMessageStream:end');
yield { type: ServerGeminiEventType.Finished, text: '' };
})();
});

// Track when markToolsAsSubmitted is called
mockMarkToolsAsSubmitted.mockImplementation((callIds: string[]) => {
callOrder.push(`markToolsAsSubmitted:${callIds.join(',')}`);
});

let capturedOnComplete:
| ((tools: TrackedToolCall[]) => Promise<void>)
| null = null;

mockUseReactToolScheduler.mockImplementation((onComplete) => {
capturedOnComplete = onComplete;
return [
[completedTool],
mockScheduleToolCalls,
mockMarkToolsAsSubmitted,
vi.fn(),
];
});

renderHook(() =>
useGeminiStream(
new MockedGeminiClientClass(mockConfig),
[],
mockAddItem,
mockConfig,
mockLoadedSettings,
mockOnDebugMessage,
mockHandleSlashCommand,
false,
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
false,
() => {},
() => {},
() => {},
80,
24,
),
);

// Start tool completion (this will call submitQuery -> sendMessageStream)
const completionPromise = act(async () => {
if (capturedOnComplete) {
await capturedOnComplete([completedTool] as TrackedToolCall[]);
}
});

// Give time for sendMessageStream to start but NOT complete
await new Promise((r) => setTimeout(r, 10));

// At this point, sendMessageStream should have started but not finished
// markToolsAsSubmitted should NOT have been called yet (the fix!)
expect(callOrder).toContain('sendMessageStream:start');
expect(callOrder).not.toContain('markToolsAsSubmitted:race-test-call');

// Now resolve the stream
resolveStreamPromise!();
await completionPromise;

// After stream completes, markToolsAsSubmitted should be called
await waitFor(() => {
expect(callOrder).toContain('markToolsAsSubmitted:race-test-call');
});

// Verify the correct order: stream must complete before marking
const streamEndIndex = callOrder.indexOf('sendMessageStream:end');
const markIndex = callOrder.indexOf('markToolsAsSubmitted:race-test-call');
expect(streamEndIndex).toBeLessThan(markIndex);
});

it('should not flicker streaming state to Idle between tool completion and submission', async () => {
const toolCallResponseParts: PartListUnion = [
{ text: 'tool 1 final response' },
Expand Down
24 changes: 20 additions & 4 deletions packages/cli/src/ui/hooks/useGeminiStream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1153,6 +1153,17 @@ export const useGeminiStream = (
(
tc: TrackedToolCall,
): tc is TrackedCompletedToolCall | TrackedCancelledToolCall => {
// Check if we've already submitted this tool call.
// We need to look up the tracked version because the incoming 'tc'
// comes directly from the scheduler core and lacks the
// 'responseSubmittedToGemini' flag.
const trackedToolCall = toolCalls.find(
(t) => t.request.callId === tc.request.callId,
);
if (trackedToolCall?.responseSubmittedToGemini) {
return false;
}

const isTerminalState =
tc.status === 'success' ||
tc.status === 'error' ||
Expand Down Expand Up @@ -1275,21 +1286,25 @@ export const useGeminiStream = (
(toolCall) => toolCall.request.prompt_id,
);

markToolsAsSubmitted(callIdsToMarkAsSubmitted);

// Don't continue if model was switched due to quota error
if (modelSwitchedFromQuotaError) {
markToolsAsSubmitted(callIdsToMarkAsSubmitted);
return;
}

// eslint-disable-next-line @typescript-eslint/no-floating-promises
submitQuery(
// IMPORTANT: We must await submitQuery BEFORE marking tools as submitted.
// Otherwise, streamingState becomes Idle and user prompts can race ahead
// of the tool response, causing function call/response mismatch errors.
await submitQuery(
responsesToSend,
{
isContinuation: true,
},
prompt_ids[0],
);

// Only mark as submitted after the API call is complete
markToolsAsSubmitted(callIdsToMarkAsSubmitted);
},
[
submitQuery,
Expand All @@ -1298,6 +1313,7 @@ export const useGeminiStream = (
performMemoryRefresh,
modelSwitchedFromQuotaError,
addItem,
toolCalls,
],
);

Expand Down
Loading
Loading