@@ -63,7 +63,8 @@ type TruncateOptions = {
6363 contextWindow : number
6464 maxTokens ?: number | null
6565 apiHandler : ApiHandler
66- autoCondenseContext ?: boolean
66+ autoCondenseContext : boolean
67+ autoCondenseContextPercent : number
6768 systemPrompt : string
6869}
6970
@@ -83,6 +84,7 @@ export async function truncateConversationIfNeeded({
8384 maxTokens,
8485 apiHandler,
8586 autoCondenseContext,
87+ autoCondenseContextPercent,
8688 systemPrompt,
8789} : TruncateOptions ) : Promise < TruncateResponse > {
8890 // Calculate the maximum tokens reserved for response
@@ -96,21 +98,28 @@ export async function truncateConversationIfNeeded({
9698 : await estimateTokenCount ( [ { type : "text" , text : lastMessageContent as string } ] , apiHandler )
9799
98100 // Calculate total effective tokens (totalTokens never includes the last message)
99- const effectiveTokens = totalTokens + lastMessageTokens
101+ const prevContextTokens = totalTokens + lastMessageTokens
100102
101103 // Calculate available tokens for conversation history
102104 // Truncate if we're within TOKEN_BUFFER_PERCENTAGE of the context window
103105 const allowedTokens = contextWindow * ( 1 - TOKEN_BUFFER_PERCENTAGE ) - reservedTokens
104106
105- // Determine if truncation is needed and apply if necessary
106- if ( effectiveTokens <= allowedTokens ) {
107- return { messages, summary : "" , cost : 0 , prevContextTokens : effectiveTokens }
108- } else if ( autoCondenseContext ) {
109- const result = await summarizeConversation ( messages , apiHandler , systemPrompt )
110- if ( result . summary ) {
111- return { ...result , prevContextTokens : effectiveTokens }
107+ if ( autoCondenseContext ) {
108+ const contextPercent = ( 100 * prevContextTokens ) / contextWindow
109+ if ( contextPercent >= autoCondenseContextPercent || prevContextTokens > allowedTokens ) {
110+ // Attempt to intelligently condense the context
111+ const result = await summarizeConversation ( messages , apiHandler , systemPrompt )
112+ if ( result . summary ) {
113+ return { ...result , prevContextTokens }
114+ }
112115 }
113116 }
114- const truncatedMessages = truncateConversation ( messages , 0.5 )
115- return { messages : truncatedMessages , prevContextTokens : effectiveTokens , summary : "" , cost : 0 }
117+
118+ // Fall back to sliding window truncation if needed
119+ if ( prevContextTokens > allowedTokens ) {
120+ const truncatedMessages = truncateConversation ( messages , 0.5 )
121+ return { messages : truncatedMessages , prevContextTokens, summary : "" , cost : 0 }
122+ }
123+ // No truncation or condensation needed
124+ return { messages, summary : "" , cost : 0 , prevContextTokens }
116125}
0 commit comments