Skip to content

Commit d154d05

Browse files
committed
Enable separate config for truncation for models without context caching
1 parent d78da19 commit d154d05

File tree

2 files changed

+64
-21
lines changed

2 files changed

+64
-21
lines changed

src/core/Cline.ts

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ import { AssistantMessageContent, parseAssistantMessage, ToolParamName, ToolUseN
5353
import { formatResponse } from "./prompts/responses"
5454
import { SYSTEM_PROMPT } from "./prompts/system"
5555
import { modes, defaultModeSlug, getModeBySlug } from "../shared/modes"
56-
import { truncateHalfConversation } from "./sliding-window"
56+
import { truncateConversationIfNeeded } from "./sliding-window"
5757
import { ClineProvider, GlobalFileNames } from "./webview/ClineProvider"
5858
import { detectCodeOmission } from "../integrations/editor/detect-omission"
5959
import { BrowserSession } from "../services/browser/BrowserSession"
@@ -837,18 +837,25 @@ export class Cline {
837837

838838
// If the previous API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request
839839
if (previousApiReqIndex >= 0) {
840-
const previousRequest = this.clineMessages[previousApiReqIndex]
841-
if (previousRequest && previousRequest.text) {
842-
const { tokensIn, tokensOut, cacheWrites, cacheReads }: ClineApiReqInfo = JSON.parse(
843-
previousRequest.text,
844-
)
845-
const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
846-
const contextWindow = this.api.getModel().info.contextWindow || 128_000
847-
const maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8)
848-
if (totalTokens >= maxAllowedSize) {
849-
const truncatedMessages = truncateHalfConversation(this.apiConversationHistory)
850-
await this.overwriteApiConversationHistory(truncatedMessages)
851-
}
840+
const previousRequest = this.clineMessages[previousApiReqIndex]?.text
841+
if (!previousRequest) return
842+
843+
const {
844+
tokensIn = 0,
845+
tokensOut = 0,
846+
cacheWrites = 0,
847+
cacheReads = 0,
848+
}: ClineApiReqInfo = JSON.parse(previousRequest)
849+
const totalTokens = tokensIn + tokensOut + cacheWrites + cacheReads
850+
851+
const trimmedMessages = truncateConversationIfNeeded(
852+
this.apiConversationHistory,
853+
totalTokens,
854+
this.api.getModel().info,
855+
)
856+
857+
if (trimmedMessages !== this.apiConversationHistory) {
858+
await this.overwriteApiConversationHistory(trimmedMessages)
852859
}
853860
}
854861

src/core/sliding-window/index.ts

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
2+
<<<<<<< HEAD
23

34
/*
45
We can't implement a dynamically updating sliding window as it would break prompt cache
@@ -9,18 +10,53 @@ Therefore, this function should only be called when absolutely necessary to fit
910
context limits, not as a continuous process.
1011
*/
1112
export function truncateHalfConversation(
13+
=======
14+
import { ModelInfo } from "../../shared/api"
15+
import { MessageParam } from "@anthropic-ai/sdk/resources/messages.mjs"
16+
17+
export function truncateConversation(
18+
>>>>>>> 455d850c (Enable separate config for truncation for models without context caching)
1219
messages: Anthropic.Messages.MessageParam[],
20+
fracToRemove: number,
1321
): Anthropic.Messages.MessageParam[] {
14-
// API expects messages to be in user-assistant order, and tool use messages must be followed by tool results. We need to maintain this structure while truncating.
15-
16-
// Always keep the first Task message (this includes the project's file structure in environment_details)
1722
const truncatedMessages = [messages[0]]
18-
19-
// Remove half of user-assistant pairs
20-
const messagesToRemove = Math.floor(messages.length / 4) * 2 // has to be even number
21-
22-
const remainingMessages = messages.slice(messagesToRemove + 1) // has to start with assistant message since tool result cannot follow assistant message with no tool use
23+
const rawMessagesToRemove = Math.floor((messages.length - 1) * fracToRemove)
24+
const messagesToRemove = rawMessagesToRemove - (rawMessagesToRemove % 2)
25+
const remainingMessages = messages.slice(messagesToRemove + 1)
2326
truncatedMessages.push(...remainingMessages)
2427

2528
return truncatedMessages
2629
}
30+
31+
export function truncateConversationIfNeeded(
32+
messages: MessageParam[],
33+
totalTokens: number,
34+
modelInfo: ModelInfo,
35+
): MessageParam[] {
36+
if (modelInfo.supportsPromptCache) {
37+
return totalTokens < getMaxTokensForPromptCachingModels(modelInfo)
38+
? messages
39+
: truncateConversation(messages, getTruncFractionForPromptCachingModels(modelInfo))
40+
} else {
41+
const thresh = getMaxTokensForNonPromptCachingModels(modelInfo)
42+
return totalTokens < thresh
43+
? messages
44+
: truncateConversation(messages, getTruncFractionForNonPromptCachingModels(modelInfo))
45+
}
46+
}
47+
48+
function getMaxTokensForPromptCachingModels(modelInfo: ModelInfo): number {
49+
return Math.max(modelInfo.contextWindow - 40_000, modelInfo.contextWindow * 0.8)
50+
}
51+
52+
function getTruncFractionForPromptCachingModels(modelInfo: ModelInfo): number {
53+
return Math.min(80_000, modelInfo.contextWindow * 0.4)
54+
}
55+
56+
function getMaxTokensForNonPromptCachingModels(modelInfo: ModelInfo): number {
57+
return Math.max(modelInfo.contextWindow - 40_000, modelInfo.contextWindow * 0.8)
58+
}
59+
60+
function getTruncFractionForNonPromptCachingModels(modelInfo: ModelInfo): number {
61+
return Math.min(80_000, modelInfo.contextWindow * 0.4)
62+
}

0 commit comments

Comments
 (0)