Skip to content

Commit 764d963

Browse files
committed
Add a 5k token buffer before the end of the context window
1 parent 2a7be4b commit 764d963

File tree

2 files changed

+42
-18
lines changed

2 files changed

+42
-18
lines changed

src/core/sliding-window/__tests__/sliding-window.test.ts

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import { Anthropic } from "@anthropic-ai/sdk"
44

55
import { ModelInfo } from "../../../shared/api"
6-
import { estimateTokenCount, truncateConversation, truncateConversationIfNeeded } from "../index"
6+
import { TOKEN_BUFFER, estimateTokenCount, truncateConversation, truncateConversationIfNeeded } from "../index"
77

88
/**
99
* Tests for the truncateConversation function
@@ -121,10 +121,10 @@ describe("getMaxTokens", () => {
121121
// Create messages with very small content in the last one to avoid token overflow
122122
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
123123

124-
// Below max tokens - no truncation
124+
// Below max tokens and buffer - no truncation
125125
const result1 = truncateConversationIfNeeded({
126126
messages: messagesWithSmallContent,
127-
totalTokens: 49999,
127+
totalTokens: 44999, // Well below threshold + buffer
128128
contextWindow: modelInfo.contextWindow,
129129
maxTokens: modelInfo.maxTokens,
130130
})
@@ -133,7 +133,7 @@ describe("getMaxTokens", () => {
133133
// Above max tokens - truncate
134134
const result2 = truncateConversationIfNeeded({
135135
messages: messagesWithSmallContent,
136-
totalTokens: 50001,
136+
totalTokens: 50001, // Above threshold
137137
contextWindow: modelInfo.contextWindow,
138138
maxTokens: modelInfo.maxTokens,
139139
})
@@ -148,10 +148,10 @@ describe("getMaxTokens", () => {
148148
// Create messages with very small content in the last one to avoid token overflow
149149
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
150150

151-
// Below max tokens - no truncation
151+
// Below max tokens and buffer - no truncation
152152
const result1 = truncateConversationIfNeeded({
153153
messages: messagesWithSmallContent,
154-
totalTokens: 79999,
154+
totalTokens: 74999, // Well below threshold + buffer
155155
contextWindow: modelInfo.contextWindow,
156156
maxTokens: modelInfo.maxTokens,
157157
})
@@ -160,7 +160,7 @@ describe("getMaxTokens", () => {
160160
// Above max tokens - truncate
161161
const result2 = truncateConversationIfNeeded({
162162
messages: messagesWithSmallContent,
163-
totalTokens: 80001,
163+
totalTokens: 80001, // Above threshold
164164
contextWindow: modelInfo.contextWindow,
165165
maxTokens: modelInfo.maxTokens,
166166
})
@@ -175,10 +175,10 @@ describe("getMaxTokens", () => {
175175
// Create messages with very small content in the last one to avoid token overflow
176176
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
177177

178-
// Below max tokens - no truncation
178+
// Below max tokens and buffer - no truncation
179179
const result1 = truncateConversationIfNeeded({
180180
messages: messagesWithSmallContent,
181-
totalTokens: 39999,
181+
totalTokens: 34999, // Well below threshold + buffer
182182
contextWindow: modelInfo.contextWindow,
183183
maxTokens: modelInfo.maxTokens,
184184
})
@@ -187,7 +187,7 @@ describe("getMaxTokens", () => {
187187
// Above max tokens - truncate
188188
const result2 = truncateConversationIfNeeded({
189189
messages: messagesWithSmallContent,
190-
totalTokens: 40001,
190+
totalTokens: 40001, // Above threshold
191191
contextWindow: modelInfo.contextWindow,
192192
maxTokens: modelInfo.maxTokens,
193193
})
@@ -202,10 +202,10 @@ describe("getMaxTokens", () => {
202202
// Create messages with very small content in the last one to avoid token overflow
203203
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
204204

205-
// Below max tokens - no truncation
205+
// Below max tokens and buffer - no truncation
206206
const result1 = truncateConversationIfNeeded({
207207
messages: messagesWithSmallContent,
208-
totalTokens: 169999,
208+
totalTokens: 164999, // Well below threshold + buffer
209209
contextWindow: modelInfo.contextWindow,
210210
maxTokens: modelInfo.maxTokens,
211211
})
@@ -214,7 +214,7 @@ describe("getMaxTokens", () => {
214214
// Above max tokens - truncate
215215
const result2 = truncateConversationIfNeeded({
216216
messages: messagesWithSmallContent,
217-
totalTokens: 170001,
217+
totalTokens: 170001, // Above threshold
218218
contextWindow: modelInfo.contextWindow,
219219
maxTokens: modelInfo.maxTokens,
220220
})
@@ -244,7 +244,7 @@ describe("truncateConversationIfNeeded", () => {
244244
it("should not truncate if tokens are below max tokens threshold", () => {
245245
const modelInfo = createModelInfo(100000, true, 30000)
246246
const maxTokens = 100000 - 30000 // 70000
247-
const totalTokens = 69999 // Below threshold
247+
const totalTokens = 64999 // Well below threshold + buffer
248248

249249
// Create messages with very small content in the last one to avoid token overflow
250250
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
@@ -337,8 +337,8 @@ describe("truncateConversationIfNeeded", () => {
337337
{ role: messages[messages.length - 1].role, content: smallContent },
338338
]
339339

340-
// Set base tokens so total is below threshold even with small content added
341-
const baseTokensForSmall = availableTokens - smallContentTokens - 10
340+
// Set base tokens so total is well below threshold + buffer even with small content added
341+
const baseTokensForSmall = availableTokens - smallContentTokens - TOKEN_BUFFER - 10
342342
const resultWithSmall = truncateConversationIfNeeded({
343343
messages: messagesWithSmallContent,
344344
totalTokens: baseTokensForSmall,
@@ -388,7 +388,29 @@ describe("truncateConversationIfNeeded", () => {
388388
})
389389
expect(resultWithVeryLarge).not.toEqual(messagesWithVeryLargeContent) // Should truncate
390390
})
391+
392+
it("should truncate if tokens are within TOKEN_BUFFER of the threshold", () => {
393+
const modelInfo = createModelInfo(100000, true, 30000)
394+
const maxTokens = 100000 - 30000 // 70000
395+
const totalTokens = 66000 // Within 5000 of threshold (70000)
396+
397+
// Create messages with very small content in the last one to avoid token overflow
398+
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
399+
400+
// When truncating, always uses 0.5 fraction
401+
// With 4 messages after the first, 0.5 fraction means remove 2 messages
402+
const expectedResult = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]]
403+
404+
const result = truncateConversationIfNeeded({
405+
messages: messagesWithSmallContent,
406+
totalTokens,
407+
contextWindow: modelInfo.contextWindow,
408+
maxTokens: modelInfo.maxTokens,
409+
})
410+
expect(result).toEqual(expectedResult)
411+
})
391412
})
413+
392414
/**
393415
* Tests for the estimateTokenCount function
394416
*/

src/core/sliding-window/index.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ import { Anthropic } from "@anthropic-ai/sdk"
33
import { Tiktoken } from "js-tiktoken/lite"
44
import o200kBase from "js-tiktoken/ranks/o200k_base"
55

6-
const TOKEN_FUDGE_FACTOR = 1.5
6+
export const TOKEN_FUDGE_FACTOR = 1.5
7+
export const TOKEN_BUFFER = 5000
78

89
/**
910
* Counts tokens for user content using tiktoken for text
@@ -110,5 +111,6 @@ export function truncateConversationIfNeeded({
110111
const allowedTokens = contextWindow - reservedTokens
111112

112113
// Determine if truncation is needed and apply if necessary
113-
return effectiveTokens < allowedTokens ? messages : truncateConversation(messages, 0.5)
114+
// Truncate if we're within TOKEN_BUFFER of the limit
115+
return effectiveTokens > allowedTokens - TOKEN_BUFFER ? truncateConversation(messages, 0.5) : messages
114116
}

0 commit comments

Comments
 (0)