Skip to content

Commit a068445

Browse files
committed
Add a dynamic token buffer
1 parent 8df6bdf commit a068445

File tree

3 files changed

+31
-12
lines changed

3 files changed

+31
-12
lines changed

.changeset/swift-lamps-decide.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"roo-cline": patch
3+
---
4+
5+
Add a dynamic token buffer

src/core/sliding-window/__tests__/sliding-window.test.ts

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,12 @@
33
import { Anthropic } from "@anthropic-ai/sdk"
44

55
import { ModelInfo } from "../../../shared/api"
6-
import { TOKEN_BUFFER, estimateTokenCount, truncateConversation, truncateConversationIfNeeded } from "../index"
6+
import {
7+
TOKEN_BUFFER_PERCENTAGE,
8+
estimateTokenCount,
9+
truncateConversation,
10+
truncateConversationIfNeeded,
11+
} from "../index"
712

813
/**
914
* Tests for the truncateConversation function
@@ -121,10 +126,11 @@ describe("getMaxTokens", () => {
121126
// Create messages with very small content in the last one to avoid token overflow
122127
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
123128

129+
// Account for the dynamic buffer which is 10% of context window (10,000 tokens)
124130
// Below max tokens and buffer - no truncation
125131
const result1 = truncateConversationIfNeeded({
126132
messages: messagesWithSmallContent,
127-
totalTokens: 44999, // Well below threshold + buffer
133+
totalTokens: 39999, // Well below threshold + dynamic buffer
128134
contextWindow: modelInfo.contextWindow,
129135
maxTokens: modelInfo.maxTokens,
130136
})
@@ -148,10 +154,11 @@ describe("getMaxTokens", () => {
148154
// Create messages with very small content in the last one to avoid token overflow
149155
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
150156

157+
// Account for the dynamic buffer which is 10% of context window (10,000 tokens)
151158
// Below max tokens and buffer - no truncation
152159
const result1 = truncateConversationIfNeeded({
153160
messages: messagesWithSmallContent,
154-
totalTokens: 74999, // Well below threshold + buffer
161+
totalTokens: 69999, // Well below threshold + dynamic buffer
155162
contextWindow: modelInfo.contextWindow,
156163
maxTokens: modelInfo.maxTokens,
157164
})
@@ -202,10 +209,11 @@ describe("getMaxTokens", () => {
202209
// Create messages with very small content in the last one to avoid token overflow
203210
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
204211

212+
// Account for the dynamic buffer which is 10% of context window (20,000 tokens for this test)
205213
// Below max tokens and buffer - no truncation
206214
const result1 = truncateConversationIfNeeded({
207215
messages: messagesWithSmallContent,
208-
totalTokens: 164999, // Well below threshold + buffer
216+
totalTokens: 149999, // Well below threshold + dynamic buffer
209217
contextWindow: modelInfo.contextWindow,
210218
maxTokens: modelInfo.maxTokens,
211219
})
@@ -244,7 +252,8 @@ describe("truncateConversationIfNeeded", () => {
244252
it("should not truncate if tokens are below max tokens threshold", () => {
245253
const modelInfo = createModelInfo(100000, true, 30000)
246254
const maxTokens = 100000 - 30000 // 70000
247-
const totalTokens = 64999 // Well below threshold + buffer
255+
const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE // 10000
256+
const totalTokens = 70000 - dynamicBuffer - 1 // Just below threshold - buffer
248257

249258
// Create messages with very small content in the last one to avoid token overflow
250259
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
@@ -338,7 +347,8 @@ describe("truncateConversationIfNeeded", () => {
338347
]
339348

340349
// Set base tokens so total is well below threshold + buffer even with small content added
341-
const baseTokensForSmall = availableTokens - smallContentTokens - TOKEN_BUFFER - 10
350+
const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE
351+
const baseTokensForSmall = availableTokens - smallContentTokens - dynamicBuffer - 10
342352
const resultWithSmall = truncateConversationIfNeeded({
343353
messages: messagesWithSmallContent,
344354
totalTokens: baseTokensForSmall,
@@ -389,10 +399,11 @@ describe("truncateConversationIfNeeded", () => {
389399
expect(resultWithVeryLarge).not.toEqual(messagesWithVeryLargeContent) // Should truncate
390400
})
391401

392-
it("should truncate if tokens are within TOKEN_BUFFER of the threshold", () => {
402+
it("should truncate if tokens are within TOKEN_BUFFER_PERCENTAGE of the threshold", () => {
393403
const modelInfo = createModelInfo(100000, true, 30000)
394404
const maxTokens = 100000 - 30000 // 70000
395-
const totalTokens = 66000 // Within 5000 of threshold (70000)
405+
const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE // 10% of 100000 = 10000
406+
const totalTokens = 70000 - dynamicBuffer + 1 // Just within the dynamic buffer of threshold (70000)
396407

397408
// Create messages with very small content in the last one to avoid token overflow
398409
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]

src/core/sliding-window/index.ts

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@ import { Tiktoken } from "js-tiktoken/lite"
44
import o200kBase from "js-tiktoken/ranks/o200k_base"
55

66
export const TOKEN_FUDGE_FACTOR = 1.5
7-
export const TOKEN_BUFFER = 5000
7+
/**
8+
* Default percentage of the context window to use as a buffer when deciding when to truncate
9+
*/
10+
export const TOKEN_BUFFER_PERCENTAGE = 0.1
811

912
/**
1013
* Counts tokens for user content using tiktoken for text
@@ -108,9 +111,9 @@ export function truncateConversationIfNeeded({
108111
const effectiveTokens = totalTokens + lastMessageTokens
109112

110113
// Calculate available tokens for conversation history
111-
const allowedTokens = contextWindow - reservedTokens
114+
// Truncate if we're within TOKEN_BUFFER_PERCENTAGE of the context window
115+
const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens
112116

113117
// Determine if truncation is needed and apply if necessary
114-
// Truncate if we're within TOKEN_BUFFER of the limit
115-
return effectiveTokens > allowedTokens - TOKEN_BUFFER ? truncateConversation(messages, 0.5) : messages
118+
return effectiveTokens > allowedTokens ? truncateConversation(messages, 0.5) : messages
116119
}

0 commit comments

Comments
 (0)