Skip to content

Commit 8a9ca9e

Browse files
kkitasemboshernitsan
authored andcommitted
fix(core): Fix context window overflow warning for PDF files (#13548)
1 parent 299139b commit 8a9ca9e

File tree

2 files changed

+106
-7
lines changed

2 files changed

+106
-7
lines changed

packages/core/src/core/client.test.ts

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,9 +1143,8 @@ ${JSON.stringify(
11431143
// A string of length 400 is roughly 100 tokens.
11441144
const longText = 'a'.repeat(400);
11451145
const request: Part[] = [{ text: longText }];
1146-
const estimatedRequestTokenCount = Math.floor(
1147-
JSON.stringify(request).length / 4,
1148-
);
1146+
// estimateTextOnlyLength counts only text content (400 chars), not JSON structure
1147+
const estimatedRequestTokenCount = Math.floor(longText.length / 4);
11491148
const remainingTokenCount = MOCKED_TOKEN_LIMIT - lastPromptTokenCount;
11501149

11511150
// Mock tryCompressChat to not compress
@@ -1203,9 +1202,8 @@ ${JSON.stringify(
12031202
// We need a request > 95 tokens.
12041203
const longText = 'a'.repeat(400);
12051204
const request: Part[] = [{ text: longText }];
1206-
const estimatedRequestTokenCount = Math.floor(
1207-
JSON.stringify(request).length / 4,
1208-
);
1205+
// estimateTextOnlyLength counts only text content (400 chars), not JSON structure
1206+
const estimatedRequestTokenCount = Math.floor(longText.length / 4);
12091207
const remainingTokenCount = STICKY_MODEL_LIMIT - lastPromptTokenCount;
12101208

12111209
vi.spyOn(client, 'tryCompressChat').mockResolvedValue({
@@ -1236,6 +1234,66 @@ ${JSON.stringify(
12361234
expect(mockTurnRunFn).not.toHaveBeenCalled();
12371235
});
12381236

1237+
it('should not trigger overflow warning for requests with large binary data (PDFs/images)', async () => {
1238+
// Arrange
1239+
const MOCKED_TOKEN_LIMIT = 1000000; // 1M tokens
1240+
vi.mocked(tokenLimit).mockReturnValue(MOCKED_TOKEN_LIMIT);
1241+
1242+
const lastPromptTokenCount = 10000;
1243+
const mockChat: Partial<GeminiChat> = {
1244+
getLastPromptTokenCount: vi.fn().mockReturnValue(lastPromptTokenCount),
1245+
getHistory: vi.fn().mockReturnValue([]),
1246+
};
1247+
client['chat'] = mockChat as GeminiChat;
1248+
1249+
// Simulate a PDF file with large base64 data (11MB when encoded)
1250+
// In the old implementation, this would incorrectly estimate ~2.7M tokens
1251+
// In the new implementation, only the text part is counted
1252+
const largePdfBase64 = 'A'.repeat(11 * 1024 * 1024);
1253+
const request: Part[] = [
1254+
{ text: 'Please analyze this PDF document' }, // ~35 chars = ~8 tokens
1255+
{
1256+
inlineData: {
1257+
mimeType: 'application/pdf',
1258+
data: largePdfBase64, // This should be ignored in token estimation
1259+
},
1260+
},
1261+
];
1262+
1263+
// Mock tryCompressChat to not compress
1264+
vi.spyOn(client, 'tryCompressChat').mockResolvedValue({
1265+
originalTokenCount: lastPromptTokenCount,
1266+
newTokenCount: lastPromptTokenCount,
1267+
compressionStatus: CompressionStatus.NOOP,
1268+
});
1269+
1270+
// Mock Turn.run to simulate successful processing
1271+
const mockStream = (async function* () {
1272+
yield { type: 'content', value: 'Analysis complete' };
1273+
})();
1274+
mockTurnRunFn.mockReturnValue(mockStream);
1275+
1276+
// Act
1277+
const stream = client.sendMessageStream(
1278+
request,
1279+
new AbortController().signal,
1280+
'prompt-id-pdf-test',
1281+
);
1282+
1283+
const events = await fromAsync(stream);
1284+
1285+
// Assert
1286+
// Should NOT contain overflow warning
1287+
expect(events).not.toContainEqual(
1288+
expect.objectContaining({
1289+
type: GeminiEventType.ContextWindowWillOverflow,
1290+
}),
1291+
);
1292+
1293+
// Turn.run should be called (processing should continue)
1294+
expect(mockTurnRunFn).toHaveBeenCalled();
1295+
});
1296+
12391297
describe('Model Routing', () => {
12401298
let mockRouterService: { route: Mock };
12411299

packages/core/src/core/client.ts

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,44 @@ import type { ModelConfigKey } from '../services/modelConfigService.js';
5555

5656
const MAX_TURNS = 100;
5757

58+
/**
59+
* Estimates the character length of text-only parts in a request.
60+
* Binary data (inline_data, fileData) is excluded from the estimation
61+
* because Gemini counts these as fixed token values, not based on their size.
62+
* @param request The request to estimate tokens for
63+
* @returns Estimated character length of text content
64+
*/
65+
function estimateTextOnlyLength(request: PartListUnion): number {
66+
if (typeof request === 'string') {
67+
return request.length;
68+
}
69+
70+
// Ensure request is an array before iterating
71+
if (!Array.isArray(request)) {
72+
return 0;
73+
}
74+
75+
let textLength = 0;
76+
for (const part of request) {
77+
// Handle string elements in the array
78+
if (typeof part === 'string') {
79+
textLength += part.length;
80+
}
81+
// Handle object elements with text property
82+
else if (
83+
typeof part === 'object' &&
84+
part !== null &&
85+
'text' in part &&
86+
part.text
87+
) {
88+
textLength += part.text.length;
89+
}
90+
// inlineData, fileData, and other binary parts are ignored
91+
// as they are counted as fixed tokens by Gemini
92+
}
93+
return textLength;
94+
}
95+
5896
export class GeminiClient {
5997
private chat?: GeminiChat;
6098
private sessionTurnCount = 0;
@@ -422,8 +460,11 @@ export class GeminiClient {
422460
// Check for context window overflow
423461
const modelForLimitCheck = this._getEffectiveModelForCurrentTurn();
424462

463+
// Estimate tokens based on text content only.
464+
// Binary data (PDFs, images) are counted as fixed tokens by Gemini,
465+
// not based on their base64-encoded size.
425466
const estimatedRequestTokenCount = Math.floor(
426-
JSON.stringify(request).length / 4,
467+
estimateTextOnlyLength(request) / 4,
427468
);
428469

429470
const remainingTokenCount =

0 commit comments

Comments
 (0)