Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
221 changes: 221 additions & 0 deletions packages/cli/src/ui/hooks/useGeminiStream.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -922,6 +922,227 @@ describe('useGeminiStream', () => {
});
});

it('should not resubmit tool responses that have already been submitted to Gemini', async () => {
// This tests the fix for the unrecoverable 400 error where already-submitted
// tools were being resubmitted due to stale closure in handleCompletedTools.
// Once triggered, ALL subsequent prompts fail and session must be /clear'd.

const alreadySubmittedTool = {
request: {
callId: 'already-submitted-call',
name: 'tool1',
args: {},
isClientInitiated: false,
prompt_id: 'prompt-id-race',
},
status: 'success',
responseSubmittedToGemini: true, // KEY: Already submitted
response: {
callId: 'already-submitted-call',
responseParts: [{ text: 'already sent' }],
errorType: undefined,
},
tool: { displayName: 'Tool1' },
invocation: {
getDescription: () => 'Mock description',
} as unknown as AnyToolInvocation,
} as TrackedCompletedToolCall;

const newTool = {
request: {
callId: 'new-call',
name: 'tool2',
args: {},
isClientInitiated: false,
prompt_id: 'prompt-id-race',
},
status: 'success',
responseSubmittedToGemini: false, // Not yet submitted
response: {
callId: 'new-call',
responseParts: [{ text: 'new response' }],
errorType: undefined,
},
tool: { displayName: 'Tool2' },
invocation: {
getDescription: () => 'Mock description',
} as unknown as AnyToolInvocation,
} as TrackedCompletedToolCall;

let capturedOnComplete:
| ((tools: TrackedToolCall[]) => Promise<void>)
| null = null;

mockUseReactToolScheduler.mockImplementation((onComplete) => {
capturedOnComplete = onComplete;
return [
[alreadySubmittedTool, newTool], // Current tracked state with flags
mockScheduleToolCalls,
mockMarkToolsAsSubmitted,
vi.fn(),
];
});

renderHook(() =>
useGeminiStream(
new MockedGeminiClientClass(mockConfig),
[],
mockAddItem,
mockConfig,
mockLoadedSettings,
mockOnDebugMessage,
mockHandleSlashCommand,
false,
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
false,
() => {},
() => {},
() => {},
80,
24,
),
);

// Simulate scheduler calling onComplete - scheduler core doesn't track
// responseSubmittedToGemini, so we strip it to simulate real behavior
const fromScheduler = [
{ ...alreadySubmittedTool, responseSubmittedToGemini: undefined },
{ ...newTool, responseSubmittedToGemini: undefined },
];

await act(async () => {
if (capturedOnComplete) {
await capturedOnComplete(fromScheduler as TrackedToolCall[]);
}
});

// Only the NEW tool should be marked as submitted
// The already-submitted tool should be filtered out
await waitFor(() => {
expect(mockMarkToolsAsSubmitted).toHaveBeenCalledTimes(1);
expect(mockMarkToolsAsSubmitted).toHaveBeenCalledWith(['new-call']);
});
});

it('should await submitQuery before marking tools as submitted (race condition fix)', async () => {
// This tests the fix for the race condition where markToolsAsSubmitted was
// called BEFORE submitQuery completed, allowing user prompts to race ahead
// of tool responses and causing unrecoverable 400 errors.
// See: https://github.com/google-gemini/gemini-cli/issues/16144

const toolCallResponseParts: PartListUnion = [{ text: 'tool response' }];

const completedTool = {
request: {
callId: 'race-test-call',
name: 'test_tool',
args: {},
isClientInitiated: false,
prompt_id: 'prompt-id-race-test',
},
status: 'success',
responseSubmittedToGemini: false,
response: {
callId: 'race-test-call',
responseParts: toolCallResponseParts,
errorType: undefined,
},
tool: { displayName: 'TestTool' },
invocation: {
getDescription: () => 'Mock description',
} as unknown as AnyToolInvocation,
} as TrackedCompletedToolCall;

// Track the order of operations
const callOrder: string[] = [];
let resolveStreamPromise: () => void;
const streamPromise = new Promise<void>((resolve) => {
resolveStreamPromise = resolve;
});

// Mock sendMessageStream to be a slow async operation
mockSendMessageStream.mockImplementation(() => {
callOrder.push('sendMessageStream:start');
return (async function* () {
await streamPromise; // Wait until we explicitly resolve
callOrder.push('sendMessageStream:end');
yield { type: ServerGeminiEventType.Finished, text: '' };
})();
});

// Track when markToolsAsSubmitted is called
mockMarkToolsAsSubmitted.mockImplementation((callIds: string[]) => {
callOrder.push(`markToolsAsSubmitted:${callIds.join(',')}`);
});

let capturedOnComplete:
| ((tools: TrackedToolCall[]) => Promise<void>)
| null = null;

mockUseReactToolScheduler.mockImplementation((onComplete) => {
capturedOnComplete = onComplete;
return [
[completedTool],
mockScheduleToolCalls,
mockMarkToolsAsSubmitted,
vi.fn(),
];
});

renderHook(() =>
useGeminiStream(
new MockedGeminiClientClass(mockConfig),
[],
mockAddItem,
mockConfig,
mockLoadedSettings,
mockOnDebugMessage,
mockHandleSlashCommand,
false,
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
false,
() => {},
() => {},
() => {},
80,
24,
),
);

// Start tool completion (this will call submitQuery -> sendMessageStream)
const completionPromise = act(async () => {
if (capturedOnComplete) {
await capturedOnComplete([completedTool] as TrackedToolCall[]);
}
});

// Give time for sendMessageStream to start but NOT complete
await new Promise((r) => setTimeout(r, 10));

// At this point, sendMessageStream should have started but not finished
// markToolsAsSubmitted should NOT have been called yet (the fix!)
expect(callOrder).toContain('sendMessageStream:start');
expect(callOrder).not.toContain('markToolsAsSubmitted:race-test-call');

// Now resolve the stream
resolveStreamPromise!();
await completionPromise;

// After stream completes, markToolsAsSubmitted should be called
await waitFor(() => {
expect(callOrder).toContain('markToolsAsSubmitted:race-test-call');
});

// Verify the correct order: stream must complete before marking
const streamEndIndex = callOrder.indexOf('sendMessageStream:end');
const markIndex = callOrder.indexOf('markToolsAsSubmitted:race-test-call');
expect(streamEndIndex).toBeLessThan(markIndex);
});

it('should not flicker streaming state to Idle between tool completion and submission', async () => {
const toolCallResponseParts: PartListUnion = [
{ text: 'tool 1 final response' },
Expand Down
24 changes: 20 additions & 4 deletions packages/cli/src/ui/hooks/useGeminiStream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1188,6 +1188,17 @@ export const useGeminiStream = (
(
tc: TrackedToolCall,
): tc is TrackedCompletedToolCall | TrackedCancelledToolCall => {
// Check if we've already submitted this tool call.
// We need to look up the tracked version because the incoming 'tc'
// comes directly from the scheduler core and lacks the
// 'responseSubmittedToGemini' flag.
const trackedToolCall = toolCalls.find(
(t) => t.request.callId === tc.request.callId,
);
if (trackedToolCall?.responseSubmittedToGemini) {
return false;
}

Comment on lines +1191 to +1201
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jw408 why is this needed?

Can you explain...

Copy link

@jw408 jw408 Jan 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Happy Path (Works Fine)

t=0 Tool A and Tool B start

t=1 Tool A completes
→ handleCompletedTools([A]) fires
→ Submits A, marks A as submitted
→ React re-renders
→ useCallback recreates handleCompletedTools with fresh toolCalls

t=2 Tool B completes (after React re-rendered)
→ NEW handleCompletedTools([A, B]) fires
→ Callback sees toolCalls where A.responseSubmittedToGemini = true
→ Filters out A, only submits B ✓

Unhappy Path (Race Condition)

t=0 Tool A and Tool B start

t=1 Tool A completes
→ handleCompletedTools([A]) fires
→ Submits A, calls markToolsAsSubmitted(["A"])
→ State update is QUEUED (async)

t=1.001 Tool B completes BEFORE React re-renders
→ SAME OLD handleCompletedTools([A, B]) fires
→ This callback has a STALE CLOSURE - it captured toolCalls
from before A was marked as submitted
→ Callback sees A.responseSubmittedToGemini = false (stale!)
→ Submits A again 💥400 error

The Core Issue: Stale Closure

The useCallback captures toolCalls at creation time. If toolCalls wasn't in the dependency array (part of the fix), the callback never gets recreated with fresh state - it always sees the original toolCalls.

Even with toolCalls in deps, there's still a window between:

  1. markToolsAsSubmitted() being called
  2. React re-rendering
  3. New callback being created

If another tool completes in that window, the old callback runs with stale state.

In practice on modern computers in unloaded state, the happy path happens the overwhelming majority of the time. If my system is swapping to death because I'm out of ram, or a ran a make -j 32 on an 8 core system and it's pegged, this could happen.

It's an edge case optimization for correctness, and I will readily admit I don't understand the nuances of the react ui loop and how it interacts with the various scheduling layers. I'm trying to help fix all these 400 parallel tool calling bugs, and this is one of the PR's I've split out from the original larger pr that was muddled, but in my mind they are all related at treating the same symptom.

const isTerminalState =
tc.status === 'success' ||
tc.status === 'error' ||
Expand Down Expand Up @@ -1304,21 +1315,25 @@ export const useGeminiStream = (
(toolCall) => toolCall.request.prompt_id,
);

markToolsAsSubmitted(callIdsToMarkAsSubmitted);

// Don't continue if model was switched due to quota error
if (modelSwitchedFromQuotaError) {
markToolsAsSubmitted(callIdsToMarkAsSubmitted);
return;
}

// eslint-disable-next-line @typescript-eslint/no-floating-promises
submitQuery(
// IMPORTANT: We must await submitQuery BEFORE marking tools as submitted.
// Otherwise, streamingState becomes Idle and user prompts can race ahead
// of the tool response, causing function call/response mismatch errors.
await submitQuery(
responsesToSend,
{
isContinuation: true,
},
prompt_ids[0],
);

// Only mark as submitted after the API call is complete
markToolsAsSubmitted(callIdsToMarkAsSubmitted);
},
[
submitQuery,
Expand All @@ -1327,6 +1342,7 @@ export const useGeminiStream = (
performMemoryRefresh,
modelSwitchedFromQuotaError,
addItem,
toolCalls,
],
);

Expand Down