fix(core): Fix context window overflow warning for PDF files (#13548)

kkitase · mboshernitsan · commit 8a9ca9eea3e8 · 2025-11-24T17:06:57.000-08:00
diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts
@@ -1143,9 +1143,8 @@ ${JSON.stringify(
       // A string of length 400 is roughly 100 tokens.
       const longText = 'a'.repeat(400);
       const request: Part[] = [{ text: longText }];
-      const estimatedRequestTokenCount = Math.floor(
-        JSON.stringify(request).length / 4,
-      );
+      // estimateTextOnlyLength counts only text content (400 chars), not JSON structure
+      const estimatedRequestTokenCount = Math.floor(longText.length / 4);
       const remainingTokenCount = MOCKED_TOKEN_LIMIT - lastPromptTokenCount;
 
       // Mock tryCompressChat to not compress
@@ -1203,9 +1202,8 @@ ${JSON.stringify(
       // We need a request > 95 tokens.
       const longText = 'a'.repeat(400);
       const request: Part[] = [{ text: longText }];
-      const estimatedRequestTokenCount = Math.floor(
-        JSON.stringify(request).length / 4,
-      );
+      // estimateTextOnlyLength counts only text content (400 chars), not JSON structure
+      const estimatedRequestTokenCount = Math.floor(longText.length / 4);
       const remainingTokenCount = STICKY_MODEL_LIMIT - lastPromptTokenCount;
 
       vi.spyOn(client, 'tryCompressChat').mockResolvedValue({
@@ -1236,6 +1234,66 @@ ${JSON.stringify(
       expect(mockTurnRunFn).not.toHaveBeenCalled();
     });
 
+    it('should not trigger overflow warning for requests with large binary data (PDFs/images)', async () => {
+      // Arrange
+      const MOCKED_TOKEN_LIMIT = 1000000; // 1M tokens
+      vi.mocked(tokenLimit).mockReturnValue(MOCKED_TOKEN_LIMIT);
+
+      const lastPromptTokenCount = 10000;
+      const mockChat: Partial<GeminiChat> = {
+        getLastPromptTokenCount: vi.fn().mockReturnValue(lastPromptTokenCount),
+        getHistory: vi.fn().mockReturnValue([]),
+      };
+      client['chat'] = mockChat as GeminiChat;
+
+      // Simulate a PDF file with large base64 data (11MB when encoded)
+      // In the old implementation, this would incorrectly estimate ~2.7M tokens
+      // In the new implementation, only the text part is counted
+      const largePdfBase64 = 'A'.repeat(11 * 1024 * 1024);
+      const request: Part[] = [
+        { text: 'Please analyze this PDF document' }, // ~35 chars = ~8 tokens
+        {
+          inlineData: {
+            mimeType: 'application/pdf',
+            data: largePdfBase64, // This should be ignored in token estimation
+          },
+        },
+      ];
+
+      // Mock tryCompressChat to not compress
+      vi.spyOn(client, 'tryCompressChat').mockResolvedValue({
+        originalTokenCount: lastPromptTokenCount,
+        newTokenCount: lastPromptTokenCount,
+        compressionStatus: CompressionStatus.NOOP,
+      });
+
+      // Mock Turn.run to simulate successful processing
+      const mockStream = (async function* () {
+        yield { type: 'content', value: 'Analysis complete' };
+      })();
+      mockTurnRunFn.mockReturnValue(mockStream);
+
+      // Act
+      const stream = client.sendMessageStream(
+        request,
+        new AbortController().signal,
+        'prompt-id-pdf-test',
+      );
+
+      const events = await fromAsync(stream);
+
+      // Assert
+      // Should NOT contain overflow warning
+      expect(events).not.toContainEqual(
+        expect.objectContaining({
+          type: GeminiEventType.ContextWindowWillOverflow,
+        }),
+      );
+
+      // Turn.run should be called (processing should continue)
+      expect(mockTurnRunFn).toHaveBeenCalled();
+    });
+
     describe('Model Routing', () => {
       let mockRouterService: { route: Mock };
 
diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts
@@ -55,6 +55,44 @@ import type { ModelConfigKey } from '../services/modelConfigService.js';
 
 const MAX_TURNS = 100;
 
+/**
+ * Estimates the character length of text-only parts in a request.
+ * Binary data (inline_data, fileData) is excluded from the estimation
+ * because Gemini counts these as fixed token values, not based on their size.
+ * @param request The request to estimate tokens for
+ * @returns Estimated character length of text content
+ */
+function estimateTextOnlyLength(request: PartListUnion): number {
+  if (typeof request === 'string') {
+    return request.length;
+  }
+
+  // Ensure request is an array before iterating
+  if (!Array.isArray(request)) {
+    return 0;
+  }
+
+  let textLength = 0;
+  for (const part of request) {
+    // Handle string elements in the array
+    if (typeof part === 'string') {
+      textLength += part.length;
+    }
+    // Handle object elements with text property
+    else if (
+      typeof part === 'object' &&
+      part !== null &&
+      'text' in part &&
+      part.text
+    ) {
+      textLength += part.text.length;
+    }
+    // inlineData, fileData, and other binary parts are ignored
+    // as they are counted as fixed tokens by Gemini
+  }
+  return textLength;
+}
+
 export class GeminiClient {
   private chat?: GeminiChat;
   private sessionTurnCount = 0;
@@ -422,8 +460,11 @@ export class GeminiClient {
     // Check for context window overflow
     const modelForLimitCheck = this._getEffectiveModelForCurrentTurn();
 
+    // Estimate tokens based on text content only.
+    // Binary data (PDFs, images) are counted as fixed tokens by Gemini,
+    // not based on their base64-encoded size.
     const estimatedRequestTokenCount = Math.floor(
-      JSON.stringify(request).length / 4,
+      estimateTextOnlyLength(request) / 4,
     );
 
     const remainingTokenCount =