Handle case where large messages would exceed context window

mrubens · mrubens · commit 357f0e0394b9 · 2024-12-30T11:27:26.000-08:00
diff --git a/src/core/Cline.ts b/src/core/Cline.ts
@@ -50,6 +50,7 @@ import { ClineProvider, GlobalFileNames } from "./webview/ClineProvider"
 import { detectCodeOmission } from "../integrations/editor/detect-omission"
 import { BrowserSession } from "../services/browser/BrowserSession"
 import { OpenRouterHandler } from "../api/providers/openrouter"
+import { CHARS_PER_TOKEN, ContextWindow } from "./ContextWindow"
 
 const cwd =
 	vscode.workspace.workspaceFolders?.map((folder) => folder.uri.fsPath).at(0) ?? path.join(os.homedir(), "Desktop") // may or may not exist but fs checking existence would immediately ask for permission which would be bad UX, need to come up with a better solution
@@ -83,6 +84,7 @@ export class Cline {
 	didFinishAborting = false
 	abandoned = false
 	private diffViewProvider: DiffViewProvider
+	private contextWindow?: ContextWindow
 
 	// streaming
 	private currentStreamingContentIndex = 0
@@ -789,17 +791,15 @@ export class Cline {
 		const { browserViewportSize, preferredLanguage } = await this.providerRef.deref()?.getState() ?? {}
 		const systemPrompt = await SYSTEM_PROMPT(cwd, this.api.getModel().info.supportsComputerUse ?? false, mcpHub, this.diffStrategy, browserViewportSize) + await addCustomInstructions(this.customInstructions ?? '', cwd, preferredLanguage)
 
+		// Initialize context window with system prompt size
+		const systemPromptSize = Math.ceil(systemPrompt.length / CHARS_PER_TOKEN)
+		this.contextWindow = new ContextWindow(this.api.getModel().info, systemPromptSize)
+
 		// If the previous API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request
 		if (previousApiReqIndex >= 0) {
 			const previousRequest = this.clineMessages[previousApiReqIndex]
 			if (previousRequest && previousRequest.text) {
-				const { tokensIn, tokensOut, cacheWrites, cacheReads }: ClineApiReqInfo = JSON.parse(
-					previousRequest.text,
-				)
-				const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
-				const contextWindow = this.api.getModel().info.contextWindow || 128_000
-				const maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8)
-				if (totalTokens >= maxAllowedSize) {
+				if (this.contextWindow.shouldTruncateHistory(this.apiConversationHistory)) {
 					const truncatedMessages = truncateHalfConversation(this.apiConversationHistory)
 					await this.overwriteApiConversationHistory(truncatedMessages)
 				}
@@ -2089,6 +2089,18 @@ export class Cline {
 		// add environment details as its own text block, separate from tool results
 		userContent.push({ type: "text", text: environmentDetails })
 
+		if (this.contextWindow) {
+			const newMessageSize = this.contextWindow.calculateMessageSize(userContent)
+			const errorMessage = this.contextWindow.validateMessageSize(newMessageSize)
+
+			if (errorMessage) {
+				await this.say("error", errorMessage)
+				this.abortTask()
+				this.userMessageContentReady = true // keep chat enabled for retry
+				return true
+			}
+		}
+
 		await this.addToApiConversationHistory({ role: "user", content: userContent })
 
 		// since we sent off a placeholder api_req_started message to update the webview while waiting to actually start the API request (to load potential details for example), we need to update the text of that message
diff --git a/src/core/ContextWindow.ts b/src/core/ContextWindow.ts
@@ -0,0 +1,100 @@
+import { Anthropic } from "@anthropic-ai/sdk"
+import { ModelInfo } from "../shared/api"
+
+// Rough estimate: 1 token ≈ 4 characters (this varies by model and content but works as a conservative estimate)
+export const CHARS_PER_TOKEN = 4
+
+export class ContextWindow {
+    private modelInfo: ModelInfo
+    private systemPromptSize: number
+
+    constructor(modelInfo: ModelInfo, systemPromptSize: number) {
+        this.modelInfo = modelInfo
+        this.systemPromptSize = systemPromptSize
+    }
+
+    /**
+     * Calculates the estimated token size of content blocks
+     */
+    calculateMessageSize(content: Array<Anthropic.TextBlockParam | Anthropic.ImageBlockParam | Anthropic.ToolUseBlockParam | Anthropic.ToolResultBlockParam>): number {
+        return content.reduce<number>((total, block) => {
+            switch (block.type) {
+                case "text": {
+                    // Rough estimate: 1 token ≈ 4 characters
+                    const textBlock = block as Anthropic.TextBlockParam
+                    return total + Math.ceil(textBlock.text.length / CHARS_PER_TOKEN)
+                }
+                case "tool_use": {
+                    // Tool use blocks include name and parameters
+                    const toolBlock = block as Anthropic.ToolUseBlockParam
+                    const paramSize = Object.entries(toolBlock.input || {}).reduce<number>((sum, [key, value]) => 
+                        sum + Math.ceil((key + String(value)).length / CHARS_PER_TOKEN), 0)
+                    return total + Math.ceil(toolBlock.name.length / CHARS_PER_TOKEN) + paramSize + 100 // Extra tokens for structure
+                }
+                case "tool_result": {
+                    // Tool results can be string or array of blocks
+                    const toolBlock = block as Anthropic.ToolResultBlockParam
+                    if (typeof toolBlock.content === "string") {
+                        return total + Math.ceil(toolBlock.content.length / CHARS_PER_TOKEN)
+                    }
+                    if (Array.isArray(toolBlock.content)) {
+                        return total + toolBlock.content.reduce<number>((sum, contentBlock) => {
+                            if (contentBlock.type === "text") {
+                                return sum + Math.ceil(contentBlock.text.length / CHARS_PER_TOKEN)
+                            }
+                            return sum + 500 // Base size for non-text content blocks
+                        }, 0)
+                    }
+                    return total + 500 // Default size for unknown content
+                }
+                case "image":
+                default:
+                    // Conservative estimate for images and unknown types
+                    return total + 500
+            }
+        }, 0)
+    }
+
+    /**
+     * Gets the maximum allowed message size based on context window and other constraints
+     */
+    private getMaxAllowedSize(): number {
+        const contextWindow = this.modelInfo.contextWindow || 128_000
+        const responseBuffer = this.modelInfo.maxTokens ?? 4000
+        return contextWindow - this.systemPromptSize - responseBuffer
+    }
+
+    /**
+     * Checks if a message would exceed the context window limits
+     * Returns an error message if the message is too large, undefined otherwise
+     */
+    validateMessageSize(messageSize: number): string | undefined {
+        const maxAllowedSize = this.getMaxAllowedSize()
+
+        if (messageSize >= maxAllowedSize) {
+            return `The message is too large for the model's available space (${messageSize} estimated tokens > ${maxAllowedSize} tokens, where ${this.systemPromptSize} tokens are used by system prompt and ${this.modelInfo.maxTokens ?? 4000} tokens reserved for response). Please hit Cancel and try breaking up the task into smaller steps.`
+        }
+
+        return undefined
+    }
+
+    /**
+     * Checks if the conversation history size exceeds the context window
+     * Returns true if truncation is needed
+     */
+    shouldTruncateHistory(messages: Array<Anthropic.MessageParam>): boolean {
+        let totalSize = 0
+        for (const message of messages) {
+            if (Array.isArray(message.content)) {
+                totalSize += this.calculateMessageSize(message.content)
+            } else {
+                totalSize += Math.ceil(message.content.length / CHARS_PER_TOKEN)
+            }
+        }
+        
+        const maxAllowedSize = this.getMaxAllowedSize()
+        
+        // Truncate when total size exceeds available space
+        return totalSize >= maxAllowedSize
+    }
+}
diff --git a/src/core/__tests__/ContextWindow.test.ts b/src/core/__tests__/ContextWindow.test.ts
@@ -0,0 +1,154 @@
+import { ContextWindow } from '../ContextWindow'
+import { ModelInfo } from '../../shared/api'
+import { Anthropic } from '@anthropic-ai/sdk'
+
+describe('ContextWindow', () => {
+    let contextWindow: ContextWindow
+    let mockModelInfo: ModelInfo
+
+    beforeEach(() => {
+        mockModelInfo = {
+            contextWindow: 128000,
+            maxTokens: 4000,
+            supportsComputerUse: true,
+            supportsPromptCache: false,
+            supportsImages: true,
+            inputPrice: 0,
+            outputPrice: 0,
+        }
+
+        // System prompt is 2000 tokens
+        const systemPromptSize = 2000
+        contextWindow = new ContextWindow(mockModelInfo, systemPromptSize)
+    })
+
+    describe('Message Size Calculation', () => {
+        it('should correctly estimate token size for text content', () => {
+            const content: Anthropic.TextBlockParam[] = [{
+                type: 'text',
+                text: 'a'.repeat(4000) // Should be roughly 1000 tokens
+            }]
+
+            const size = contextWindow.calculateMessageSize(content)
+            expect(size).toBe(1000)
+        })
+
+        it('should handle non-text content blocks appropriately', () => {
+            const content: Array<Anthropic.TextBlockParam | Anthropic.ImageBlockParam> = [
+                {
+                    type: 'text',
+                    text: 'Regular text' // 11 chars ≈ 3 tokens
+                },
+                {
+                    type: 'image',
+                    source: {
+                        type: 'base64',
+                        data: 'test-image-data',
+                        media_type: 'image/jpeg'
+                    }
+                }
+            ]
+
+            const size = contextWindow.calculateMessageSize(content)
+            // 3 tokens for text + 500 tokens base size for image
+            expect(size).toBe(503)
+        })
+    })
+
+    describe('Message Size Validation', () => {
+        it('should accept messages within context window limits', () => {
+            const content: Anthropic.TextBlockParam[] = [{
+                type: 'text',
+                text: 'a'.repeat(4000) // Should be roughly 1000 tokens
+            }]
+
+            const size = contextWindow.calculateMessageSize(content)
+            const error = contextWindow.validateMessageSize(size)
+            expect(error).toBeUndefined()
+        })
+
+        it('should reject messages that exceed context window limits', () => {
+            // Available space = context window (128000) - system prompt (2000) - response buffer (4000) = 122000
+            // So a message of 123000 tokens should be rejected
+            const size = 123000
+            const error = contextWindow.validateMessageSize(size)
+            expect(error).toBeDefined()
+            expect(error).toContain('too large')
+            expect(error).toContain('system prompt')
+            expect(error).toContain('reserved for response')
+        })
+    })
+
+    describe('History Truncation', () => {
+        it('should recommend truncation when message size exceeds available space', () => {
+            // Available space = context window (128000) - system prompt (2000) - response buffer (4000) = 122000
+            const messages: Anthropic.MessageParam[] = [
+                { role: "user", content: 'a'.repeat(400000) }, // ~100k tokens
+                { role: "assistant", content: 'b'.repeat(100000) } // ~25k tokens
+            ]
+            const shouldTruncate = contextWindow.shouldTruncateHistory(messages)
+            expect(shouldTruncate).toBe(true)
+        })
+
+        it('should not recommend truncation for normal message sizes', () => {
+            const messages: Anthropic.MessageParam[] = [
+                { role: "user", content: 'a'.repeat(4000) }, // ~1k tokens
+                { role: "assistant", content: 'b'.repeat(4000) } // ~1k tokens
+            ]
+            const shouldTruncate = contextWindow.shouldTruncateHistory(messages)
+            expect(shouldTruncate).toBe(false)
+        })
+
+        it('should handle array content in messages', () => {
+            const messages: Anthropic.MessageParam[] = [
+                {
+                    role: "user",
+                    content: [
+                        { type: "text", text: 'a'.repeat(400000) } as Anthropic.TextBlockParam, // ~100k tokens
+                        { type: "text", text: 'b'.repeat(100000) } as Anthropic.TextBlockParam  // ~25k tokens
+                    ]
+                }
+            ]
+            const shouldTruncate = contextWindow.shouldTruncateHistory(messages)
+            expect(shouldTruncate).toBe(true)
+        })
+
+        it('should handle empty history', () => {
+            const shouldTruncate = contextWindow.shouldTruncateHistory([])
+            expect(shouldTruncate).toBe(false)
+        })
+    })
+
+    describe('Different Model Configurations', () => {
+        it('should handle models with smaller context windows', () => {
+            const smallModelInfo = {
+                ...mockModelInfo,
+                contextWindow: 8000,
+                maxTokens: 1000
+            }
+            const smallContextWindow = new ContextWindow(smallModelInfo, 2000)
+
+            // Available space = context window (8000) - system prompt (2000) - response buffer (1000) = 5000
+            // So a message of 6000 tokens should be rejected
+            const size = 6000
+            const error = smallContextWindow.validateMessageSize(size)
+            expect(error).toBeDefined()
+            expect(error).toContain('too large')
+        })
+
+        it('should handle models with larger response buffers', () => {
+            const largeBufferModelInfo = {
+                ...mockModelInfo,
+                maxTokens: 8000
+            }
+            const largeBufferContextWindow = new ContextWindow(largeBufferModelInfo, 2000)
+
+            // Available space = context window (128000) - system prompt (2000) - response buffer (8000) = 118000
+            // So a message of 119000 tokens should be rejected
+            const size = 119000
+            const error = largeBufferContextWindow.validateMessageSize(size)
+            expect(error).toBeDefined()
+            expect(error).toContain('reserved for response')
+        })
+    })
+})