Skip to content

Commit 357f0e0

Browse files
committed
Handle case where large messages would exceed context window
1 parent 6f0030d commit 357f0e0

File tree

3 files changed

+273
-7
lines changed

3 files changed

+273
-7
lines changed

src/core/Cline.ts

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ import { ClineProvider, GlobalFileNames } from "./webview/ClineProvider"
5050
import { detectCodeOmission } from "../integrations/editor/detect-omission"
5151
import { BrowserSession } from "../services/browser/BrowserSession"
5252
import { OpenRouterHandler } from "../api/providers/openrouter"
53+
import { CHARS_PER_TOKEN, ContextWindow } from "./ContextWindow"
5354

5455
const cwd =
5556
vscode.workspace.workspaceFolders?.map((folder) => folder.uri.fsPath).at(0) ?? path.join(os.homedir(), "Desktop") // may or may not exist but fs checking existence would immediately ask for permission which would be bad UX, need to come up with a better solution
@@ -83,6 +84,7 @@ export class Cline {
8384
didFinishAborting = false
8485
abandoned = false
8586
private diffViewProvider: DiffViewProvider
87+
private contextWindow?: ContextWindow
8688

8789
// streaming
8890
private currentStreamingContentIndex = 0
@@ -789,17 +791,15 @@ export class Cline {
789791
const { browserViewportSize, preferredLanguage } = await this.providerRef.deref()?.getState() ?? {}
790792
const systemPrompt = await SYSTEM_PROMPT(cwd, this.api.getModel().info.supportsComputerUse ?? false, mcpHub, this.diffStrategy, browserViewportSize) + await addCustomInstructions(this.customInstructions ?? '', cwd, preferredLanguage)
791793

794+
// Initialize context window with system prompt size
795+
const systemPromptSize = Math.ceil(systemPrompt.length / CHARS_PER_TOKEN)
796+
this.contextWindow = new ContextWindow(this.api.getModel().info, systemPromptSize)
797+
792798
// If the previous API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request
793799
if (previousApiReqIndex >= 0) {
794800
const previousRequest = this.clineMessages[previousApiReqIndex]
795801
if (previousRequest && previousRequest.text) {
796-
const { tokensIn, tokensOut, cacheWrites, cacheReads }: ClineApiReqInfo = JSON.parse(
797-
previousRequest.text,
798-
)
799-
const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
800-
const contextWindow = this.api.getModel().info.contextWindow || 128_000
801-
const maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8)
802-
if (totalTokens >= maxAllowedSize) {
802+
if (this.contextWindow.shouldTruncateHistory(this.apiConversationHistory)) {
803803
const truncatedMessages = truncateHalfConversation(this.apiConversationHistory)
804804
await this.overwriteApiConversationHistory(truncatedMessages)
805805
}
@@ -2089,6 +2089,18 @@ export class Cline {
20892089
// add environment details as its own text block, separate from tool results
20902090
userContent.push({ type: "text", text: environmentDetails })
20912091

2092+
if (this.contextWindow) {
2093+
const newMessageSize = this.contextWindow.calculateMessageSize(userContent)
2094+
const errorMessage = this.contextWindow.validateMessageSize(newMessageSize)
2095+
2096+
if (errorMessage) {
2097+
await this.say("error", errorMessage)
2098+
this.abortTask()
2099+
this.userMessageContentReady = true // keep chat enabled for retry
2100+
return true
2101+
}
2102+
}
2103+
20922104
await this.addToApiConversationHistory({ role: "user", content: userContent })
20932105

20942106
// since we sent off a placeholder api_req_started message to update the webview while waiting to actually start the API request (to load potential details for example), we need to update the text of that message

src/core/ContextWindow.ts

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import { Anthropic } from "@anthropic-ai/sdk"
2+
import { ModelInfo } from "../shared/api"
3+
4+
// Rough estimate: 1 token ≈ 4 characters (this varies by model and content but works as a conservative estimate)
5+
export const CHARS_PER_TOKEN = 4
6+
7+
export class ContextWindow {
8+
private modelInfo: ModelInfo
9+
private systemPromptSize: number
10+
11+
constructor(modelInfo: ModelInfo, systemPromptSize: number) {
12+
this.modelInfo = modelInfo
13+
this.systemPromptSize = systemPromptSize
14+
}
15+
16+
/**
17+
* Calculates the estimated token size of content blocks
18+
*/
19+
calculateMessageSize(content: Array<Anthropic.TextBlockParam | Anthropic.ImageBlockParam | Anthropic.ToolUseBlockParam | Anthropic.ToolResultBlockParam>): number {
20+
return content.reduce<number>((total, block) => {
21+
switch (block.type) {
22+
case "text": {
23+
// Rough estimate: 1 token ≈ 4 characters
24+
const textBlock = block as Anthropic.TextBlockParam
25+
return total + Math.ceil(textBlock.text.length / CHARS_PER_TOKEN)
26+
}
27+
case "tool_use": {
28+
// Tool use blocks include name and parameters
29+
const toolBlock = block as Anthropic.ToolUseBlockParam
30+
const paramSize = Object.entries(toolBlock.input || {}).reduce<number>((sum, [key, value]) =>
31+
sum + Math.ceil((key + String(value)).length / CHARS_PER_TOKEN), 0)
32+
return total + Math.ceil(toolBlock.name.length / CHARS_PER_TOKEN) + paramSize + 100 // Extra tokens for structure
33+
}
34+
case "tool_result": {
35+
// Tool results can be string or array of blocks
36+
const toolBlock = block as Anthropic.ToolResultBlockParam
37+
if (typeof toolBlock.content === "string") {
38+
return total + Math.ceil(toolBlock.content.length / CHARS_PER_TOKEN)
39+
}
40+
if (Array.isArray(toolBlock.content)) {
41+
return total + toolBlock.content.reduce<number>((sum, contentBlock) => {
42+
if (contentBlock.type === "text") {
43+
return sum + Math.ceil(contentBlock.text.length / CHARS_PER_TOKEN)
44+
}
45+
return sum + 500 // Base size for non-text content blocks
46+
}, 0)
47+
}
48+
return total + 500 // Default size for unknown content
49+
}
50+
case "image":
51+
default:
52+
// Conservative estimate for images and unknown types
53+
return total + 500
54+
}
55+
}, 0)
56+
}
57+
58+
/**
59+
* Gets the maximum allowed message size based on context window and other constraints
60+
*/
61+
private getMaxAllowedSize(): number {
62+
const contextWindow = this.modelInfo.contextWindow || 128_000
63+
const responseBuffer = this.modelInfo.maxTokens ?? 4000
64+
return contextWindow - this.systemPromptSize - responseBuffer
65+
}
66+
67+
/**
68+
* Checks if a message would exceed the context window limits
69+
* Returns an error message if the message is too large, undefined otherwise
70+
*/
71+
validateMessageSize(messageSize: number): string | undefined {
72+
const maxAllowedSize = this.getMaxAllowedSize()
73+
74+
if (messageSize >= maxAllowedSize) {
75+
return `The message is too large for the model's available space (${messageSize} estimated tokens > ${maxAllowedSize} tokens, where ${this.systemPromptSize} tokens are used by system prompt and ${this.modelInfo.maxTokens ?? 4000} tokens reserved for response). Please hit Cancel and try breaking up the task into smaller steps.`
76+
}
77+
78+
return undefined
79+
}
80+
81+
/**
82+
* Checks if the conversation history size exceeds the context window
83+
* Returns true if truncation is needed
84+
*/
85+
shouldTruncateHistory(messages: Array<Anthropic.MessageParam>): boolean {
86+
let totalSize = 0
87+
for (const message of messages) {
88+
if (Array.isArray(message.content)) {
89+
totalSize += this.calculateMessageSize(message.content)
90+
} else {
91+
totalSize += Math.ceil(message.content.length / CHARS_PER_TOKEN)
92+
}
93+
}
94+
95+
const maxAllowedSize = this.getMaxAllowedSize()
96+
97+
// Truncate when total size exceeds available space
98+
return totalSize >= maxAllowedSize
99+
}
100+
}
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
import { ContextWindow } from '../ContextWindow'
2+
import { ModelInfo } from '../../shared/api'
3+
import { Anthropic } from '@anthropic-ai/sdk'
4+
5+
describe('ContextWindow', () => {
6+
let contextWindow: ContextWindow
7+
let mockModelInfo: ModelInfo
8+
9+
beforeEach(() => {
10+
mockModelInfo = {
11+
contextWindow: 128000,
12+
maxTokens: 4000,
13+
supportsComputerUse: true,
14+
supportsPromptCache: false,
15+
supportsImages: true,
16+
inputPrice: 0,
17+
outputPrice: 0,
18+
}
19+
20+
// System prompt is 2000 tokens
21+
const systemPromptSize = 2000
22+
contextWindow = new ContextWindow(mockModelInfo, systemPromptSize)
23+
})
24+
25+
describe('Message Size Calculation', () => {
26+
it('should correctly estimate token size for text content', () => {
27+
const content: Anthropic.TextBlockParam[] = [{
28+
type: 'text',
29+
text: 'a'.repeat(4000) // Should be roughly 1000 tokens
30+
}]
31+
32+
const size = contextWindow.calculateMessageSize(content)
33+
expect(size).toBe(1000)
34+
})
35+
36+
it('should handle non-text content blocks appropriately', () => {
37+
const content: Array<Anthropic.TextBlockParam | Anthropic.ImageBlockParam> = [
38+
{
39+
type: 'text',
40+
text: 'Regular text' // 11 chars ≈ 3 tokens
41+
},
42+
{
43+
type: 'image',
44+
source: {
45+
type: 'base64',
46+
data: 'test-image-data',
47+
media_type: 'image/jpeg'
48+
}
49+
}
50+
]
51+
52+
const size = contextWindow.calculateMessageSize(content)
53+
// 3 tokens for text + 500 tokens base size for image
54+
expect(size).toBe(503)
55+
})
56+
})
57+
58+
describe('Message Size Validation', () => {
59+
it('should accept messages within context window limits', () => {
60+
const content: Anthropic.TextBlockParam[] = [{
61+
type: 'text',
62+
text: 'a'.repeat(4000) // Should be roughly 1000 tokens
63+
}]
64+
65+
const size = contextWindow.calculateMessageSize(content)
66+
const error = contextWindow.validateMessageSize(size)
67+
expect(error).toBeUndefined()
68+
})
69+
70+
it('should reject messages that exceed context window limits', () => {
71+
// Available space = context window (128000) - system prompt (2000) - response buffer (4000) = 122000
72+
// So a message of 123000 tokens should be rejected
73+
const size = 123000
74+
const error = contextWindow.validateMessageSize(size)
75+
expect(error).toBeDefined()
76+
expect(error).toContain('too large')
77+
expect(error).toContain('system prompt')
78+
expect(error).toContain('reserved for response')
79+
})
80+
})
81+
82+
describe('History Truncation', () => {
83+
it('should recommend truncation when message size exceeds available space', () => {
84+
// Available space = context window (128000) - system prompt (2000) - response buffer (4000) = 122000
85+
const messages: Anthropic.MessageParam[] = [
86+
{ role: "user", content: 'a'.repeat(400000) }, // ~100k tokens
87+
{ role: "assistant", content: 'b'.repeat(100000) } // ~25k tokens
88+
]
89+
const shouldTruncate = contextWindow.shouldTruncateHistory(messages)
90+
expect(shouldTruncate).toBe(true)
91+
})
92+
93+
it('should not recommend truncation for normal message sizes', () => {
94+
const messages: Anthropic.MessageParam[] = [
95+
{ role: "user", content: 'a'.repeat(4000) }, // ~1k tokens
96+
{ role: "assistant", content: 'b'.repeat(4000) } // ~1k tokens
97+
]
98+
const shouldTruncate = contextWindow.shouldTruncateHistory(messages)
99+
expect(shouldTruncate).toBe(false)
100+
})
101+
102+
it('should handle array content in messages', () => {
103+
const messages: Anthropic.MessageParam[] = [
104+
{
105+
role: "user",
106+
content: [
107+
{ type: "text", text: 'a'.repeat(400000) } as Anthropic.TextBlockParam, // ~100k tokens
108+
{ type: "text", text: 'b'.repeat(100000) } as Anthropic.TextBlockParam // ~25k tokens
109+
]
110+
}
111+
]
112+
const shouldTruncate = contextWindow.shouldTruncateHistory(messages)
113+
expect(shouldTruncate).toBe(true)
114+
})
115+
116+
it('should handle empty history', () => {
117+
const shouldTruncate = contextWindow.shouldTruncateHistory([])
118+
expect(shouldTruncate).toBe(false)
119+
})
120+
})
121+
122+
describe('Different Model Configurations', () => {
123+
it('should handle models with smaller context windows', () => {
124+
const smallModelInfo = {
125+
...mockModelInfo,
126+
contextWindow: 8000,
127+
maxTokens: 1000
128+
}
129+
const smallContextWindow = new ContextWindow(smallModelInfo, 2000)
130+
131+
// Available space = context window (8000) - system prompt (2000) - response buffer (1000) = 5000
132+
// So a message of 6000 tokens should be rejected
133+
const size = 6000
134+
const error = smallContextWindow.validateMessageSize(size)
135+
expect(error).toBeDefined()
136+
expect(error).toContain('too large')
137+
})
138+
139+
it('should handle models with larger response buffers', () => {
140+
const largeBufferModelInfo = {
141+
...mockModelInfo,
142+
maxTokens: 8000
143+
}
144+
const largeBufferContextWindow = new ContextWindow(largeBufferModelInfo, 2000)
145+
146+
// Available space = context window (128000) - system prompt (2000) - response buffer (8000) = 118000
147+
// So a message of 119000 tokens should be rejected
148+
const size = 119000
149+
const error = largeBufferContextWindow.validateMessageSize(size)
150+
expect(error).toBeDefined()
151+
expect(error).toContain('reserved for response')
152+
})
153+
})
154+
})

0 commit comments

Comments
 (0)