Skip to content

Commit ec01e1f

Browse files
authored
move remaining context management out of Cline.ts (RooCodeInc#2367)
* move context management out * changeset
1 parent 4a0a40e commit ec01e1f

File tree

3 files changed

+90
-56
lines changed

3 files changed

+90
-56
lines changed

.changeset/nice-boats-kick.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"claude-dev": patch
3+
---
4+
5+
updated move context management out of cline

src/core/Cline.ts

Lines changed: 10 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1373,58 +1373,20 @@ export class Cline {
13731373
})
13741374
}
13751375

1376-
// If the previous API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request
1377-
if (previousApiReqIndex >= 0) {
1378-
const previousRequest = this.clineMessages[previousApiReqIndex]
1379-
if (previousRequest && previousRequest.text) {
1380-
const { tokensIn, tokensOut, cacheWrites, cacheReads }: ClineApiReqInfo = JSON.parse(previousRequest.text)
1381-
const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
1382-
let contextWindow = this.api.getModel().info.contextWindow || 128_000
1383-
// FIXME: hack to get anyone using openai compatible with deepseek to have the proper context window instead of the default 128k. We need a way for the user to specify the context window for models they input through openai compatible
1384-
if (this.api instanceof OpenAiHandler && this.api.getModel().id.toLowerCase().includes("deepseek")) {
1385-
contextWindow = 64_000
1386-
}
1387-
let maxAllowedSize: number
1388-
switch (contextWindow) {
1389-
case 64_000: // deepseek models
1390-
maxAllowedSize = contextWindow - 27_000
1391-
break
1392-
case 128_000: // most models
1393-
maxAllowedSize = contextWindow - 30_000
1394-
break
1395-
case 200_000: // claude models
1396-
maxAllowedSize = contextWindow - 40_000
1397-
break
1398-
default:
1399-
maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8) // for deepseek, 80% of 64k meant only ~10k buffer which was too small and resulted in users getting context window errors.
1400-
}
1401-
1402-
// This is the most reliable way to know when we're close to hitting the context window.
1403-
if (totalTokens >= maxAllowedSize) {
1404-
// Since the user may switch between models with different context windows, truncating half may not be enough (ie if switching from claude 200k to deepseek 64k, half truncation will only remove 100k tokens, but we need to remove much more)
1405-
// So if totalTokens/2 is greater than maxAllowedSize, we truncate 3/4 instead of 1/2
1406-
// FIXME: truncating the conversation in a way that is optimal for prompt caching AND takes into account multi-context window complexity is something we need to improve
1407-
const keep = totalTokens / 2 > maxAllowedSize ? "quarter" : "half"
1408-
1409-
// NOTE: it's okay that we overwriteConversationHistory in resume task since we're only ever removing the last user message and not anything in the middle which would affect this range
1410-
this.conversationHistoryDeletedRange = this.contextManager.getNextTruncationRange(
1411-
this.apiConversationHistory,
1412-
this.conversationHistoryDeletedRange,
1413-
keep,
1414-
)
1415-
await this.saveClineMessages() // saves task history item which we use to keep track of conversation history deleted range
1416-
// await this.overwriteApiConversationHistory(truncatedMessages)
1417-
}
1418-
}
1419-
}
1420-
1421-
// conversationHistoryDeletedRange is updated only when we're close to hitting the context window, so we don't continuously break the prompt cache
1422-
const truncatedConversationHistory = this.contextManager.getTruncatedMessages(
1376+
const contextManagementMetadata = this.contextManager.getNewContextMessagesAndMetadata(
14231377
this.apiConversationHistory,
1378+
this.clineMessages,
1379+
this.api,
14241380
this.conversationHistoryDeletedRange,
1381+
previousApiReqIndex,
14251382
)
14261383

1427-
let stream = this.api.createMessage(systemPrompt, truncatedConversationHistory)
1384+
if (contextManagementMetadata.updatedConversationHistoryDeletedRange) {
1385+
this.conversationHistoryDeletedRange = contextManagementMetadata.conversationHistoryDeletedRange
1386+
await this.saveClineMessages() // saves task history item which we use to keep track of conversation history deleted range
1387+
}
1388+
1389+
let stream = this.api.createMessage(systemPrompt, contextManagementMetadata.truncatedConversationHistory)
14281390

14291391
const iterator = stream[Symbol.asyncIterator]()
14301392

src/core/context-management/ContextManager.ts

Lines changed: 75 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,77 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
2+
import { ClineApiReqInfo, ClineMessage } from "../../shared/ExtensionMessage"
3+
import { ApiHandler } from "../../api"
4+
import { OpenAiHandler } from "../../api/providers/openai"
25

36
export class ContextManager {
4-
getNextTruncationRange(
5-
messages: Anthropic.Messages.MessageParam[],
6-
currentDeletedRange: [number, number] | undefined = undefined,
7-
keep: "half" | "quarter" = "half",
7+
getNewContextMessagesAndMetadata(
8+
apiConversationHistory: Anthropic.Messages.MessageParam[],
9+
clineMessages: ClineMessage[],
10+
api: ApiHandler,
11+
conversationHistoryDeletedRange: [number, number] | undefined,
12+
previousApiReqIndex: number,
13+
) {
14+
let updatedConversationHistoryDeletedRange = false
15+
16+
// If the previous API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request
17+
if (previousApiReqIndex >= 0) {
18+
const previousRequest = clineMessages[previousApiReqIndex]
19+
if (previousRequest && previousRequest.text) {
20+
const { tokensIn, tokensOut, cacheWrites, cacheReads }: ClineApiReqInfo = JSON.parse(previousRequest.text)
21+
const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
22+
let contextWindow = api.getModel().info.contextWindow || 128_000
23+
// FIXME: hack to get anyone using openai compatible with deepseek to have the proper context window instead of the default 128k. We need a way for the user to specify the context window for models they input through openai compatible
24+
if (api instanceof OpenAiHandler && api.getModel().id.toLowerCase().includes("deepseek")) {
25+
contextWindow = 64_000
26+
}
27+
let maxAllowedSize: number
28+
switch (contextWindow) {
29+
case 64_000: // deepseek models
30+
maxAllowedSize = contextWindow - 27_000
31+
break
32+
case 128_000: // most models
33+
maxAllowedSize = contextWindow - 30_000
34+
break
35+
case 200_000: // claude models
36+
maxAllowedSize = contextWindow - 40_000
37+
break
38+
default:
39+
maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8) // for deepseek, 80% of 64k meant only ~10k buffer which was too small and resulted in users getting context window errors.
40+
}
41+
42+
// This is the most reliable way to know when we're close to hitting the context window.
43+
if (totalTokens >= maxAllowedSize) {
44+
// Since the user may switch between models with different context windows, truncating half may not be enough (ie if switching from claude 200k to deepseek 64k, half truncation will only remove 100k tokens, but we need to remove much more)
45+
// So if totalTokens/2 is greater than maxAllowedSize, we truncate 3/4 instead of 1/2
46+
// FIXME: truncating the conversation in a way that is optimal for prompt caching AND takes into account multi-context window complexity is something we need to improve
47+
const keep = totalTokens / 2 > maxAllowedSize ? "quarter" : "half"
48+
49+
// NOTE: it's okay that we overwriteConversationHistory in resume task since we're only ever removing the last user message and not anything in the middle which would affect this range
50+
conversationHistoryDeletedRange = this.getNextTruncationRange(
51+
apiConversationHistory,
52+
conversationHistoryDeletedRange,
53+
keep,
54+
)
55+
56+
updatedConversationHistoryDeletedRange = true
57+
}
58+
}
59+
}
60+
61+
// conversationHistoryDeletedRange is updated only when we're close to hitting the context window, so we don't continuously break the prompt cache
62+
const truncatedConversationHistory = this.getTruncatedMessages(apiConversationHistory, conversationHistoryDeletedRange)
63+
64+
return {
65+
conversationHistoryDeletedRange: conversationHistoryDeletedRange,
66+
updatedConversationHistoryDeletedRange: updatedConversationHistoryDeletedRange,
67+
truncatedConversationHistory: truncatedConversationHistory,
68+
}
69+
}
70+
71+
public getNextTruncationRange(
72+
apiMessages: Anthropic.Messages.MessageParam[],
73+
currentDeletedRange: [number, number] | undefined,
74+
keep: "half" | "quarter",
875
): [number, number] {
976
// Since we always keep the first message, currentDeletedRange[0] will always be 1 (for now until we have a smarter truncation algorithm)
1077
const rangeStartIndex = 1
@@ -16,28 +83,28 @@ export class ContextManager {
1683
// We first calculate half of the messages then divide by 2 to get the number of pairs.
1784
// After flooring, we multiply by 2 to get the number of messages.
1885
// Note that this will also always be an even number.
19-
messagesToRemove = Math.floor((messages.length - startOfRest) / 4) * 2 // Keep even number
86+
messagesToRemove = Math.floor((apiMessages.length - startOfRest) / 4) * 2 // Keep even number
2087
} else {
2188
// Remove 3/4 of remaining user-assistant pairs
2289
// We calculate 3/4ths of the messages then divide by 2 to get the number of pairs.
2390
// After flooring, we multiply by 2 to get the number of messages.
2491
// Note that this will also always be an even number.
25-
messagesToRemove = Math.floor(((messages.length - startOfRest) * 3) / 4 / 2) * 2
92+
messagesToRemove = Math.floor(((apiMessages.length - startOfRest) * 3) / 4 / 2) * 2
2693
}
2794

2895
let rangeEndIndex = startOfRest + messagesToRemove - 1
2996

3097
// Make sure the last message being removed is a user message, so that the next message after the initial task message is an assistant message. This preservers the user-assistant-user-assistant structure.
3198
// NOTE: anthropic format messages are always user-assistant-user-assistant, while openai format messages can have multiple user messages in a row (we use anthropic format throughout cline)
32-
if (messages[rangeEndIndex].role !== "user") {
99+
if (apiMessages[rangeEndIndex].role !== "user") {
33100
rangeEndIndex -= 1
34101
}
35102

36103
// this is an inclusive range that will be removed from the conversation history
37104
return [rangeStartIndex, rangeEndIndex]
38105
}
39106

40-
getTruncatedMessages(
107+
public getTruncatedMessages(
41108
messages: Anthropic.Messages.MessageParam[],
42109
deletedRange: [number, number] | undefined,
43110
): Anthropic.Messages.MessageParam[] {

0 commit comments

Comments
 (0)