33import { Anthropic } from "@anthropic-ai/sdk"
44
55import { ModelInfo } from "../../../shared/api"
6- import { estimateTokenCount , truncateConversation , truncateConversationIfNeeded } from "../index"
6+ import { TOKEN_BUFFER , estimateTokenCount , truncateConversation , truncateConversationIfNeeded } from "../index"
77
88/**
99 * Tests for the truncateConversation function
@@ -121,10 +121,10 @@ describe("getMaxTokens", () => {
121121 // Create messages with very small content in the last one to avoid token overflow
122122 const messagesWithSmallContent = [ ...messages . slice ( 0 , - 1 ) , { ...messages [ messages . length - 1 ] , content : "" } ]
123123
124- // Below max tokens - no truncation
124+ // Below max tokens and buffer - no truncation
125125 const result1 = truncateConversationIfNeeded ( {
126126 messages : messagesWithSmallContent ,
127- totalTokens : 49999 ,
127+ totalTokens : 44999 , // Well below threshold + buffer
128128 contextWindow : modelInfo . contextWindow ,
129129 maxTokens : modelInfo . maxTokens ,
130130 } )
@@ -133,7 +133,7 @@ describe("getMaxTokens", () => {
133133 // Above max tokens - truncate
134134 const result2 = truncateConversationIfNeeded ( {
135135 messages : messagesWithSmallContent ,
136- totalTokens : 50001 ,
136+ totalTokens : 50001 , // Above threshold
137137 contextWindow : modelInfo . contextWindow ,
138138 maxTokens : modelInfo . maxTokens ,
139139 } )
@@ -148,10 +148,10 @@ describe("getMaxTokens", () => {
148148 // Create messages with very small content in the last one to avoid token overflow
149149 const messagesWithSmallContent = [ ...messages . slice ( 0 , - 1 ) , { ...messages [ messages . length - 1 ] , content : "" } ]
150150
151- // Below max tokens - no truncation
151+ // Below max tokens and buffer - no truncation
152152 const result1 = truncateConversationIfNeeded ( {
153153 messages : messagesWithSmallContent ,
154- totalTokens : 79999 ,
154+ totalTokens : 74999 , // Well below threshold + buffer
155155 contextWindow : modelInfo . contextWindow ,
156156 maxTokens : modelInfo . maxTokens ,
157157 } )
@@ -160,7 +160,7 @@ describe("getMaxTokens", () => {
160160 // Above max tokens - truncate
161161 const result2 = truncateConversationIfNeeded ( {
162162 messages : messagesWithSmallContent ,
163- totalTokens : 80001 ,
163+ totalTokens : 80001 , // Above threshold
164164 contextWindow : modelInfo . contextWindow ,
165165 maxTokens : modelInfo . maxTokens ,
166166 } )
@@ -175,10 +175,10 @@ describe("getMaxTokens", () => {
175175 // Create messages with very small content in the last one to avoid token overflow
176176 const messagesWithSmallContent = [ ...messages . slice ( 0 , - 1 ) , { ...messages [ messages . length - 1 ] , content : "" } ]
177177
178- // Below max tokens - no truncation
178+ // Below max tokens and buffer - no truncation
179179 const result1 = truncateConversationIfNeeded ( {
180180 messages : messagesWithSmallContent ,
181- totalTokens : 39999 ,
181+ totalTokens : 34999 , // Well below threshold + buffer
182182 contextWindow : modelInfo . contextWindow ,
183183 maxTokens : modelInfo . maxTokens ,
184184 } )
@@ -187,7 +187,7 @@ describe("getMaxTokens", () => {
187187 // Above max tokens - truncate
188188 const result2 = truncateConversationIfNeeded ( {
189189 messages : messagesWithSmallContent ,
190- totalTokens : 40001 ,
190+ totalTokens : 40001 , // Above threshold
191191 contextWindow : modelInfo . contextWindow ,
192192 maxTokens : modelInfo . maxTokens ,
193193 } )
@@ -202,10 +202,10 @@ describe("getMaxTokens", () => {
202202 // Create messages with very small content in the last one to avoid token overflow
203203 const messagesWithSmallContent = [ ...messages . slice ( 0 , - 1 ) , { ...messages [ messages . length - 1 ] , content : "" } ]
204204
205- // Below max tokens - no truncation
205+ // Below max tokens and buffer - no truncation
206206 const result1 = truncateConversationIfNeeded ( {
207207 messages : messagesWithSmallContent ,
208- totalTokens : 169999 ,
208+ totalTokens : 164999 , // Well below threshold + buffer
209209 contextWindow : modelInfo . contextWindow ,
210210 maxTokens : modelInfo . maxTokens ,
211211 } )
@@ -214,7 +214,7 @@ describe("getMaxTokens", () => {
214214 // Above max tokens - truncate
215215 const result2 = truncateConversationIfNeeded ( {
216216 messages : messagesWithSmallContent ,
217- totalTokens : 170001 ,
217+ totalTokens : 170001 , // Above threshold
218218 contextWindow : modelInfo . contextWindow ,
219219 maxTokens : modelInfo . maxTokens ,
220220 } )
@@ -244,7 +244,7 @@ describe("truncateConversationIfNeeded", () => {
244244 it ( "should not truncate if tokens are below max tokens threshold" , ( ) => {
245245 const modelInfo = createModelInfo ( 100000 , true , 30000 )
246246 const maxTokens = 100000 - 30000 // 70000
247- const totalTokens = 69999 // Below threshold
247+ const totalTokens = 64999 // Well below threshold + buffer
248248
249249 // Create messages with very small content in the last one to avoid token overflow
250250 const messagesWithSmallContent = [ ...messages . slice ( 0 , - 1 ) , { ...messages [ messages . length - 1 ] , content : "" } ]
@@ -337,8 +337,8 @@ describe("truncateConversationIfNeeded", () => {
337337 { role : messages [ messages . length - 1 ] . role , content : smallContent } ,
338338 ]
339339
340- // Set base tokens so total is below threshold even with small content added
341- const baseTokensForSmall = availableTokens - smallContentTokens - 10
340+ // Set base tokens so total is well below threshold + buffer even with small content added
341+ const baseTokensForSmall = availableTokens - smallContentTokens - TOKEN_BUFFER - 10
342342 const resultWithSmall = truncateConversationIfNeeded ( {
343343 messages : messagesWithSmallContent ,
344344 totalTokens : baseTokensForSmall ,
@@ -388,7 +388,29 @@ describe("truncateConversationIfNeeded", () => {
388388 } )
389389 expect ( resultWithVeryLarge ) . not . toEqual ( messagesWithVeryLargeContent ) // Should truncate
390390 } )
391+
392+ it ( "should truncate if tokens are within TOKEN_BUFFER of the threshold" , ( ) => {
393+ const modelInfo = createModelInfo ( 100000 , true , 30000 )
394+ const maxTokens = 100000 - 30000 // 70000
395+ const totalTokens = 66000 // Within 5000 of threshold (70000)
396+
397+ // Create messages with very small content in the last one to avoid token overflow
398+ const messagesWithSmallContent = [ ...messages . slice ( 0 , - 1 ) , { ...messages [ messages . length - 1 ] , content : "" } ]
399+
400+ // When truncating, always uses 0.5 fraction
401+ // With 4 messages after the first, 0.5 fraction means remove 2 messages
402+ const expectedResult = [ messagesWithSmallContent [ 0 ] , messagesWithSmallContent [ 3 ] , messagesWithSmallContent [ 4 ] ]
403+
404+ const result = truncateConversationIfNeeded ( {
405+ messages : messagesWithSmallContent ,
406+ totalTokens,
407+ contextWindow : modelInfo . contextWindow ,
408+ maxTokens : modelInfo . maxTokens ,
409+ } )
410+ expect ( result ) . toEqual ( expectedResult )
411+ } )
391412} )
413+
392414/**
393415 * Tests for the estimateTokenCount function
394416 */
0 commit comments