@@ -30,6 +30,8 @@ export async function eraseFirstResponseAndKeepFirstSystemChatContextShiftStrate
3030 initialCharactersRemovalCount,
3131 tokenizer,
3232 chatWrapper,
33+ failedCompressionErrorMessage : "Failed to compress chat history for context shift due to a too long prompt or system message that cannot be compressed without affecting the generation quality. " +
34+ "Consider increasing the context size or shortening the long prompt or system message." ,
3335 compressChatHistory ( { chatHistory, charactersToRemove, estimatedCharactersPerToken} ) {
3436 const res = chatHistory . map ( item => structuredClone ( item ) ) ;
3537 let charactersLeftToRemove = charactersToRemove ;
@@ -66,6 +68,8 @@ export async function eraseFirstResponseAndKeepFirstSystemChatContextShiftStrate
6668 }
6769
6870 function removeHistoryThatLedToModelResponseAtIndex ( index : number ) {
71+ let removedItems = 0 ;
72+
6973 for ( let i = index - 1 ; i >= 0 ; i -- ) {
7074 const historyItem = res [ i ] ;
7175
@@ -79,13 +83,19 @@ export async function eraseFirstResponseAndKeepFirstSystemChatContextShiftStrate
7983 break ; // keep the first system message
8084
8185 if ( historyItem . type === "user" || historyItem . type === "system" ) {
82- const newText = truncateLlamaTextAndRoundToWords ( LlamaText . fromJSON ( historyItem . text ) , charactersLeftToRemove ) ;
86+ const newText = truncateLlamaTextAndRoundToWords (
87+ LlamaText . fromJSON ( historyItem . text ) ,
88+ charactersLeftToRemove ,
89+ undefined ,
90+ false
91+ ) ;
8392 const newTextString = newText . toString ( ) ;
8493 const historyItemString = LlamaText . fromJSON ( historyItem . text ) . toString ( ) ;
8594
8695 if ( newText . values . length === 0 ) {
8796 res . splice ( i , 1 ) ;
8897 i ++ ;
98+ removedItems ++ ;
8999 charactersLeftToRemove -= historyItemString . length ;
90100 } else if ( newTextString . length < historyItemString . length ) {
91101 charactersLeftToRemove -= historyItemString . length - newTextString . length ;
@@ -98,6 +108,66 @@ export async function eraseFirstResponseAndKeepFirstSystemChatContextShiftStrate
98108 void ( historyItem satisfies never ) ;
99109 }
100110 }
111+
112+ return removedItems ;
113+ }
114+
115+ function compressHistoryThatLedToModelResponseAtIndex ( index : number , keepTokensCount : number = 0 ) {
116+ let removedItems = 0 ;
117+ let promptStartIndex : number | undefined = undefined ;
118+
119+ for ( let i = index - 1 ; i >= 0 ; i -- ) {
120+ const historyItem = res [ i ] ;
121+
122+ if ( historyItem == null )
123+ continue ;
124+
125+ if ( historyItem . type === "model" ) {
126+ promptStartIndex = i + 1 ;
127+ break ;
128+ }
129+
130+ if ( i === 0 && historyItem . type === "system" ) {
131+ promptStartIndex = i + 1 ;
132+ break ; // keep the first system message
133+ }
134+ }
135+
136+ if ( promptStartIndex == null || promptStartIndex >= index )
137+ return 0 ;
138+
139+ for ( let i = promptStartIndex ; i < index && charactersLeftToRemove > 0 ; i ++ ) {
140+ const historyItem = res [ i ] ;
141+
142+ if ( historyItem == null || historyItem . type !== "user" )
143+ continue ;
144+
145+ let removeChars = Math . min ( charactersLeftToRemove , historyItem . text . length ) ;
146+ if ( keepTokensCount > 0 ) {
147+ removeChars -= Math . floor ( keepTokensCount * estimatedCharactersPerToken ) ;
148+ if ( removeChars < 0 )
149+ removeChars = 0 ;
150+
151+ keepTokensCount -= Math . min (
152+ keepTokensCount ,
153+ Math . max ( 0 , historyItem . text . length - removeChars ) / estimatedCharactersPerToken
154+ ) ;
155+ }
156+
157+ const newText = truncateTextAndRoundToWords ( historyItem . text , removeChars , undefined , false ) ;
158+ if ( newText . length === 0 ) {
159+ res . splice ( i , 1 ) ;
160+ i -- ;
161+ index -- ;
162+ removedItems ++ ;
163+ charactersLeftToRemove -= historyItem . text . length ;
164+ } else {
165+ charactersLeftToRemove -= historyItem . text . length - newText . length ;
166+ historyItem . text = newText ;
167+ }
168+ }
169+
170+ return removedItems ;
101171 }
102172
103173 function compressFirstModelResponse ( ) {
@@ -116,7 +186,7 @@ export async function eraseFirstResponseAndKeepFirstSystemChatContextShiftStrate
116186 continue ;
117187
118188 if ( typeof item === "string" ) {
119- const newText = truncateTextAndRoundToWords ( item , charactersLeftToRemove ) ;
189+ const newText = truncateTextAndRoundToWords ( item , charactersLeftToRemove , undefined , true ) ;
120190
121191 if ( newText === "" ) {
122192 historyItem . response . splice ( t , 1 ) ;
@@ -139,14 +209,14 @@ export async function eraseFirstResponseAndKeepFirstSystemChatContextShiftStrate
139209 if ( historyItem . response . length === 0 ) {
140210 // if the model response is removed from the history,
141211 // the things that led to it are not important anymore
142- removeHistoryThatLedToModelResponseAtIndex ( i ) ;
212+ i -= removeHistoryThatLedToModelResponseAtIndex ( i ) ;
143213 res . splice ( i , 1 ) ;
144214 i -- ;
145215 }
146216 }
147217 }
148218
149- function compressLastModelResponse ( minCharactersToKeep : number = 20 ) {
219+ function compressLastModelResponse ( minCharactersToKeep : number = 60 ) {
150220 const lastHistoryItem = res [ res . length - 1 ] ;
151221
152222 if ( lastHistoryItem == null || lastHistoryItem . type !== "model" )
@@ -157,14 +227,27 @@ export async function eraseFirstResponseAndKeepFirstSystemChatContextShiftStrate
157227 if ( lastResponseItem == null || typeof lastResponseItem !== "string" )
158228 return ;
159229
160- const nextTextLength = lastResponseItem . length - charactersLeftToRemove ;
161- const charactersToRemoveFromText = charactersLeftToRemove + Math . max ( 0 , nextTextLength - minCharactersToKeep ) ;
162- const newText = truncateTextAndRoundToWords ( lastResponseItem , charactersToRemoveFromText ) ;
230+ compressHistoryThatLedToModelResponseAtIndex ( res . length - 1 , maxTokensCount / 4 ) ;
231+
232+ if ( charactersLeftToRemove <= 0 )
233+ return ;
234+
235+ const nextTextLength = Math . max (
236+ Math . min ( lastResponseItem . length , minCharactersToKeep ) ,
237+ lastResponseItem . length - charactersLeftToRemove
238+ ) ;
239+ const charactersToRemoveFromText = lastResponseItem . length - nextTextLength ;
240+ const newText = truncateTextAndRoundToWords ( lastResponseItem , charactersToRemoveFromText , undefined , true ) ;
163241
164242 if ( newText . length < lastResponseItem . length ) {
165243 lastHistoryItem . response [ lastHistoryItem . response . length - 1 ] = newText ;
166244 charactersLeftToRemove -= lastResponseItem . length - newText . length ;
167245 }
246+
247+ if ( charactersLeftToRemove <= 0 )
248+ return ;
249+
250+ compressHistoryThatLedToModelResponseAtIndex ( res . length - 1 ) ;
168251 }
169252
170253 compressFunctionCalls ( ) ;
0 commit comments