move remaining context management out of Cline.ts (RooCodeInc#2367)

0xToshii · web-flow · commit ec01e1f19d40 · 2025-03-24T16:49:11.000-07:00
* move context management out

* changeset
diff --git a/.changeset/nice-boats-kick.md b/.changeset/nice-boats-kick.md
@@ -0,0 +1,5 @@
+---
+"claude-dev": patch
+---
+
+updated move context management out of cline
diff --git a/src/core/Cline.ts b/src/core/Cline.ts
@@ -1373,58 +1373,20 @@ export class Cline {
 			})
 		}
 
-		// If the previous API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request
-		if (previousApiReqIndex >= 0) {
-			const previousRequest = this.clineMessages[previousApiReqIndex]
-			if (previousRequest && previousRequest.text) {
-				const { tokensIn, tokensOut, cacheWrites, cacheReads }: ClineApiReqInfo = JSON.parse(previousRequest.text)
-				const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
-				let contextWindow = this.api.getModel().info.contextWindow || 128_000
-				// FIXME: hack to get anyone using openai compatible with deepseek to have the proper context window instead of the default 128k. We need a way for the user to specify the context window for models they input through openai compatible
-				if (this.api instanceof OpenAiHandler && this.api.getModel().id.toLowerCase().includes("deepseek")) {
-					contextWindow = 64_000
-				}
-				let maxAllowedSize: number
-				switch (contextWindow) {
-					case 64_000: // deepseek models
-						maxAllowedSize = contextWindow - 27_000
-						break
-					case 128_000: // most models
-						maxAllowedSize = contextWindow - 30_000
-						break
-					case 200_000: // claude models
-						maxAllowedSize = contextWindow - 40_000
-						break
-					default:
-						maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8) // for deepseek, 80% of 64k meant only ~10k buffer which was too small and resulted in users getting context window errors.
-				}
-
-				// This is the most reliable way to know when we're close to hitting the context window.
-				if (totalTokens >= maxAllowedSize) {
-					// Since the user may switch between models with different context windows, truncating half may not be enough (ie if switching from claude 200k to deepseek 64k, half truncation will only remove 100k tokens, but we need to remove much more)
-					// So if totalTokens/2 is greater than maxAllowedSize, we truncate 3/4 instead of 1/2
-					// FIXME: truncating the conversation in a way that is optimal for prompt caching AND takes into account multi-context window complexity is something we need to improve
-					const keep = totalTokens / 2 > maxAllowedSize ? "quarter" : "half"
-
-					// NOTE: it's okay that we overwriteConversationHistory in resume task since we're only ever removing the last user message and not anything in the middle which would affect this range
-					this.conversationHistoryDeletedRange = this.contextManager.getNextTruncationRange(
-						this.apiConversationHistory,
-						this.conversationHistoryDeletedRange,
-						keep,
-					)
-					await this.saveClineMessages() // saves task history item which we use to keep track of conversation history deleted range
-					// await this.overwriteApiConversationHistory(truncatedMessages)
-				}
-			}
-		}
-
-		// conversationHistoryDeletedRange is updated only when we're close to hitting the context window, so we don't continuously break the prompt cache
-		const truncatedConversationHistory = this.contextManager.getTruncatedMessages(
+		const contextManagementMetadata = this.contextManager.getNewContextMessagesAndMetadata(
 			this.apiConversationHistory,
+			this.clineMessages,
+			this.api,
 			this.conversationHistoryDeletedRange,
+			previousApiReqIndex,
 		)
 
-		let stream = this.api.createMessage(systemPrompt, truncatedConversationHistory)
+		if (contextManagementMetadata.updatedConversationHistoryDeletedRange) {
+			this.conversationHistoryDeletedRange = contextManagementMetadata.conversationHistoryDeletedRange
+			await this.saveClineMessages() // saves task history item which we use to keep track of conversation history deleted range
+		}
+
+		let stream = this.api.createMessage(systemPrompt, contextManagementMetadata.truncatedConversationHistory)
 
 		const iterator = stream[Symbol.asyncIterator]()
 
diff --git a/src/core/context-management/ContextManager.ts b/src/core/context-management/ContextManager.ts
@@ -1,10 +1,77 @@
 import { Anthropic } from "@anthropic-ai/sdk"
+import { ClineApiReqInfo, ClineMessage } from "../../shared/ExtensionMessage"
+import { ApiHandler } from "../../api"
+import { OpenAiHandler } from "../../api/providers/openai"
 
 export class ContextManager {
-	getNextTruncationRange(
-		messages: Anthropic.Messages.MessageParam[],
-		currentDeletedRange: [number, number] | undefined = undefined,
-		keep: "half" | "quarter" = "half",
+	getNewContextMessagesAndMetadata(
+		apiConversationHistory: Anthropic.Messages.MessageParam[],
+		clineMessages: ClineMessage[],
+		api: ApiHandler,
+		conversationHistoryDeletedRange: [number, number] | undefined,
+		previousApiReqIndex: number,
+	) {
+		let updatedConversationHistoryDeletedRange = false
+
+		// If the previous API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request
+		if (previousApiReqIndex >= 0) {
+			const previousRequest = clineMessages[previousApiReqIndex]
+			if (previousRequest && previousRequest.text) {
+				const { tokensIn, tokensOut, cacheWrites, cacheReads }: ClineApiReqInfo = JSON.parse(previousRequest.text)
+				const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
+				let contextWindow = api.getModel().info.contextWindow || 128_000
+				// FIXME: hack to get anyone using openai compatible with deepseek to have the proper context window instead of the default 128k. We need a way for the user to specify the context window for models they input through openai compatible
+				if (api instanceof OpenAiHandler && api.getModel().id.toLowerCase().includes("deepseek")) {
+					contextWindow = 64_000
+				}
+				let maxAllowedSize: number
+				switch (contextWindow) {
+					case 64_000: // deepseek models
+						maxAllowedSize = contextWindow - 27_000
+						break
+					case 128_000: // most models
+						maxAllowedSize = contextWindow - 30_000
+						break
+					case 200_000: // claude models
+						maxAllowedSize = contextWindow - 40_000
+						break
+					default:
+						maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8) // for deepseek, 80% of 64k meant only ~10k buffer which was too small and resulted in users getting context window errors.
+				}
+
+				// This is the most reliable way to know when we're close to hitting the context window.
+				if (totalTokens >= maxAllowedSize) {
+					// Since the user may switch between models with different context windows, truncating half may not be enough (ie if switching from claude 200k to deepseek 64k, half truncation will only remove 100k tokens, but we need to remove much more)
+					// So if totalTokens/2 is greater than maxAllowedSize, we truncate 3/4 instead of 1/2
+					// FIXME: truncating the conversation in a way that is optimal for prompt caching AND takes into account multi-context window complexity is something we need to improve
+					const keep = totalTokens / 2 > maxAllowedSize ? "quarter" : "half"
+
+					// NOTE: it's okay that we overwriteConversationHistory in resume task since we're only ever removing the last user message and not anything in the middle which would affect this range
+					conversationHistoryDeletedRange = this.getNextTruncationRange(
+						apiConversationHistory,
+						conversationHistoryDeletedRange,
+						keep,
+					)
+
+					updatedConversationHistoryDeletedRange = true
+				}
+			}
+		}
+
+		// conversationHistoryDeletedRange is updated only when we're close to hitting the context window, so we don't continuously break the prompt cache
+		const truncatedConversationHistory = this.getTruncatedMessages(apiConversationHistory, conversationHistoryDeletedRange)
+
+		return {
+			conversationHistoryDeletedRange: conversationHistoryDeletedRange,
+			updatedConversationHistoryDeletedRange: updatedConversationHistoryDeletedRange,
+			truncatedConversationHistory: truncatedConversationHistory,
+		}
+	}
+
+	public getNextTruncationRange(
+		apiMessages: Anthropic.Messages.MessageParam[],
+		currentDeletedRange: [number, number] | undefined,
+		keep: "half" | "quarter",
 	): [number, number] {
 		// Since we always keep the first message, currentDeletedRange[0] will always be 1 (for now until we have a smarter truncation algorithm)
 		const rangeStartIndex = 1
@@ -16,28 +83,28 @@ export class ContextManager {
 			// We first calculate half of the messages then divide by 2 to get the number of pairs.
 			// After flooring, we multiply by 2 to get the number of messages.
 			// Note that this will also always be an even number.
-			messagesToRemove = Math.floor((messages.length - startOfRest) / 4) * 2 // Keep even number
+			messagesToRemove = Math.floor((apiMessages.length - startOfRest) / 4) * 2 // Keep even number
 		} else {
 			// Remove 3/4 of remaining user-assistant pairs
 			// We calculate 3/4ths of the messages then divide by 2 to get the number of pairs.
 			// After flooring, we multiply by 2 to get the number of messages.
 			// Note that this will also always be an even number.
-			messagesToRemove = Math.floor(((messages.length - startOfRest) * 3) / 4 / 2) * 2
+			messagesToRemove = Math.floor(((apiMessages.length - startOfRest) * 3) / 4 / 2) * 2
 		}
 
 		let rangeEndIndex = startOfRest + messagesToRemove - 1
 
 		// Make sure the last message being removed is a user message, so that the next message after the initial task message is an assistant message. This preservers the user-assistant-user-assistant structure.
 		// NOTE: anthropic format messages are always user-assistant-user-assistant, while openai format messages can have multiple user messages in a row (we use anthropic format throughout cline)
-		if (messages[rangeEndIndex].role !== "user") {
+		if (apiMessages[rangeEndIndex].role !== "user") {
 			rangeEndIndex -= 1
 		}
 
 		// this is an inclusive range that will be removed from the conversation history
 		return [rangeStartIndex, rangeEndIndex]
 	}
 
-	getTruncatedMessages(
+	public getTruncatedMessages(
 		messages: Anthropic.Messages.MessageParam[],
 		deletedRange: [number, number] | undefined,
 	): Anthropic.Messages.MessageParam[] {

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"claude-dev": patch
 +---
++
 +updated move context management out of cline