Skip to content

Commit 1fe6536

Browse files
authored
[Condense] Add a slider for the context condense threshold (RooCodeInc#3790)
* [Condense] Add a slider for the context condense threshold * slider UI * condense if we reach the threshold * fixes * test typing fixes * add more truncate tests * changeset * update translations * fix missing type
1 parent 4e71b78 commit 1fe6536

34 files changed

+349
-51
lines changed

.changeset/whole-swans-cheer.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"roo-cline": patch
3+
---
4+
5+
Adds a slider to configure threshold to trigger intelligent context condensing

src/core/sliding-window/__tests__/sliding-window.test.ts

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,8 @@ describe("truncateConversationIfNeeded", () => {
248248
contextWindow: modelInfo.contextWindow,
249249
maxTokens: modelInfo.maxTokens,
250250
apiHandler: mockApiHandler,
251+
autoCondenseContext: false,
252+
autoCondenseContextPercent: 100,
251253
systemPrompt: "System prompt",
252254
})
253255

@@ -277,6 +279,8 @@ describe("truncateConversationIfNeeded", () => {
277279
contextWindow: modelInfo.contextWindow,
278280
maxTokens: modelInfo.maxTokens,
279281
apiHandler: mockApiHandler,
282+
autoCondenseContext: false,
283+
autoCondenseContextPercent: 100,
280284
systemPrompt: "System prompt",
281285
})
282286

@@ -304,6 +308,8 @@ describe("truncateConversationIfNeeded", () => {
304308
contextWindow: modelInfo1.contextWindow,
305309
maxTokens: modelInfo1.maxTokens,
306310
apiHandler: mockApiHandler,
311+
autoCondenseContext: false,
312+
autoCondenseContextPercent: 100,
307313
systemPrompt: "System prompt",
308314
})
309315

@@ -313,6 +319,8 @@ describe("truncateConversationIfNeeded", () => {
313319
contextWindow: modelInfo2.contextWindow,
314320
maxTokens: modelInfo2.maxTokens,
315321
apiHandler: mockApiHandler,
322+
autoCondenseContext: false,
323+
autoCondenseContextPercent: 100,
316324
systemPrompt: "System prompt",
317325
})
318326

@@ -329,6 +337,8 @@ describe("truncateConversationIfNeeded", () => {
329337
contextWindow: modelInfo1.contextWindow,
330338
maxTokens: modelInfo1.maxTokens,
331339
apiHandler: mockApiHandler,
340+
autoCondenseContext: false,
341+
autoCondenseContextPercent: 100,
332342
systemPrompt: "System prompt",
333343
})
334344

@@ -338,6 +348,8 @@ describe("truncateConversationIfNeeded", () => {
338348
contextWindow: modelInfo2.contextWindow,
339349
maxTokens: modelInfo2.maxTokens,
340350
apiHandler: mockApiHandler,
351+
autoCondenseContext: false,
352+
autoCondenseContextPercent: 100,
341353
systemPrompt: "System prompt",
342354
})
343355

@@ -369,6 +381,8 @@ describe("truncateConversationIfNeeded", () => {
369381
contextWindow: modelInfo.contextWindow,
370382
maxTokens,
371383
apiHandler: mockApiHandler,
384+
autoCondenseContext: false,
385+
autoCondenseContextPercent: 100,
372386
systemPrompt: "System prompt",
373387
})
374388
expect(resultWithSmall).toEqual({
@@ -399,6 +413,8 @@ describe("truncateConversationIfNeeded", () => {
399413
contextWindow: modelInfo.contextWindow,
400414
maxTokens,
401415
apiHandler: mockApiHandler,
416+
autoCondenseContext: false,
417+
autoCondenseContextPercent: 100,
402418
systemPrompt: "System prompt",
403419
})
404420
expect(resultWithLarge.messages).not.toEqual(messagesWithLargeContent) // Should truncate
@@ -422,6 +438,8 @@ describe("truncateConversationIfNeeded", () => {
422438
contextWindow: modelInfo.contextWindow,
423439
maxTokens,
424440
apiHandler: mockApiHandler,
441+
autoCondenseContext: false,
442+
autoCondenseContextPercent: 100,
425443
systemPrompt: "System prompt",
426444
})
427445
expect(resultWithVeryLarge.messages).not.toEqual(messagesWithVeryLargeContent) // Should truncate
@@ -448,6 +466,8 @@ describe("truncateConversationIfNeeded", () => {
448466
contextWindow: modelInfo.contextWindow,
449467
maxTokens: modelInfo.maxTokens,
450468
apiHandler: mockApiHandler,
469+
autoCondenseContext: false,
470+
autoCondenseContextPercent: 100,
451471
systemPrompt: "System prompt",
452472
})
453473
expect(result).toEqual({
@@ -488,6 +508,7 @@ describe("truncateConversationIfNeeded", () => {
488508
maxTokens: modelInfo.maxTokens,
489509
apiHandler: mockApiHandler,
490510
autoCondenseContext: true,
511+
autoCondenseContextPercent: 100,
491512
systemPrompt: "System prompt",
492513
})
493514

@@ -534,6 +555,7 @@ describe("truncateConversationIfNeeded", () => {
534555
maxTokens: modelInfo.maxTokens,
535556
apiHandler: mockApiHandler,
536557
autoCondenseContext: true,
558+
autoCondenseContextPercent: 100,
537559
systemPrompt: "System prompt",
538560
})
539561

@@ -570,6 +592,7 @@ describe("truncateConversationIfNeeded", () => {
570592
maxTokens: modelInfo.maxTokens,
571593
apiHandler: mockApiHandler,
572594
autoCondenseContext: false,
595+
autoCondenseContextPercent: 50, // This shouldn't matter since autoCondenseContext is false
573596
systemPrompt: "System prompt",
574597
})
575598

@@ -587,6 +610,94 @@ describe("truncateConversationIfNeeded", () => {
587610
// Clean up
588611
summarizeSpy.mockRestore()
589612
})
613+
614+
it("should use summarizeConversation when autoCondenseContext is true and context percent exceeds threshold", async () => {
615+
// Mock the summarizeConversation function
616+
const mockSummary = "This is a summary of the conversation"
617+
const mockCost = 0.05
618+
const mockSummarizeResponse: condenseModule.SummarizeResponse = {
619+
messages: [
620+
{ role: "user", content: "First message" },
621+
{ role: "assistant", content: mockSummary, isSummary: true },
622+
{ role: "user", content: "Last message" },
623+
],
624+
summary: mockSummary,
625+
cost: mockCost,
626+
newContextTokens: 100,
627+
}
628+
629+
const summarizeSpy = jest
630+
.spyOn(condenseModule, "summarizeConversation")
631+
.mockResolvedValue(mockSummarizeResponse)
632+
633+
const modelInfo = createModelInfo(100000, 30000)
634+
// Set tokens to be below the allowedTokens threshold but above the percentage threshold
635+
const contextWindow = modelInfo.contextWindow
636+
const totalTokens = 60000 // Below allowedTokens but 60% of context window
637+
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
638+
639+
const result = await truncateConversationIfNeeded({
640+
messages: messagesWithSmallContent,
641+
totalTokens,
642+
contextWindow,
643+
maxTokens: modelInfo.maxTokens,
644+
apiHandler: mockApiHandler,
645+
autoCondenseContext: true,
646+
autoCondenseContextPercent: 50, // Set threshold to 50% - our tokens are at 60%
647+
systemPrompt: "System prompt",
648+
})
649+
650+
// Verify summarizeConversation was called with the right parameters
651+
expect(summarizeSpy).toHaveBeenCalledWith(messagesWithSmallContent, mockApiHandler, "System prompt")
652+
653+
// Verify the result contains the summary information
654+
expect(result).toMatchObject({
655+
messages: mockSummarizeResponse.messages,
656+
summary: mockSummary,
657+
cost: mockCost,
658+
prevContextTokens: totalTokens,
659+
})
660+
661+
// Clean up
662+
summarizeSpy.mockRestore()
663+
})
664+
665+
it("should not use summarizeConversation when autoCondenseContext is true but context percent is below threshold", async () => {
666+
// Reset any previous mock calls
667+
jest.clearAllMocks()
668+
const summarizeSpy = jest.spyOn(condenseModule, "summarizeConversation")
669+
670+
const modelInfo = createModelInfo(100000, 30000)
671+
// Set tokens to be below both the allowedTokens threshold and the percentage threshold
672+
const contextWindow = modelInfo.contextWindow
673+
const totalTokens = 40000 // 40% of context window
674+
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
675+
676+
const result = await truncateConversationIfNeeded({
677+
messages: messagesWithSmallContent,
678+
totalTokens,
679+
contextWindow,
680+
maxTokens: modelInfo.maxTokens,
681+
apiHandler: mockApiHandler,
682+
autoCondenseContext: true,
683+
autoCondenseContextPercent: 50, // Set threshold to 50% - our tokens are at 40%
684+
systemPrompt: "System prompt",
685+
})
686+
687+
// Verify summarizeConversation was not called
688+
expect(summarizeSpy).not.toHaveBeenCalled()
689+
690+
// Verify no truncation or summarization occurred
691+
expect(result).toEqual({
692+
messages: messagesWithSmallContent,
693+
summary: "",
694+
cost: 0,
695+
prevContextTokens: totalTokens,
696+
})
697+
698+
// Clean up
699+
summarizeSpy.mockRestore()
700+
})
590701
})
591702

592703
/**
@@ -624,6 +735,8 @@ describe("getMaxTokens", () => {
624735
contextWindow: modelInfo.contextWindow,
625736
maxTokens: modelInfo.maxTokens,
626737
apiHandler: mockApiHandler,
738+
autoCondenseContext: false,
739+
autoCondenseContextPercent: 100,
627740
systemPrompt: "System prompt",
628741
})
629742
expect(result1).toEqual({
@@ -640,6 +753,8 @@ describe("getMaxTokens", () => {
640753
contextWindow: modelInfo.contextWindow,
641754
maxTokens: modelInfo.maxTokens,
642755
apiHandler: mockApiHandler,
756+
autoCondenseContext: false,
757+
autoCondenseContextPercent: 100,
643758
systemPrompt: "System prompt",
644759
})
645760
expect(result2.messages).not.toEqual(messagesWithSmallContent)
@@ -664,6 +779,8 @@ describe("getMaxTokens", () => {
664779
contextWindow: modelInfo.contextWindow,
665780
maxTokens: modelInfo.maxTokens,
666781
apiHandler: mockApiHandler,
782+
autoCondenseContext: false,
783+
autoCondenseContextPercent: 100,
667784
systemPrompt: "System prompt",
668785
})
669786
expect(result1).toEqual({
@@ -680,6 +797,8 @@ describe("getMaxTokens", () => {
680797
contextWindow: modelInfo.contextWindow,
681798
maxTokens: modelInfo.maxTokens,
682799
apiHandler: mockApiHandler,
800+
autoCondenseContext: false,
801+
autoCondenseContextPercent: 100,
683802
systemPrompt: "System prompt",
684803
})
685804
expect(result2.messages).not.toEqual(messagesWithSmallContent)
@@ -703,6 +822,8 @@ describe("getMaxTokens", () => {
703822
contextWindow: modelInfo.contextWindow,
704823
maxTokens: modelInfo.maxTokens,
705824
apiHandler: mockApiHandler,
825+
autoCondenseContext: false,
826+
autoCondenseContextPercent: 100,
706827
systemPrompt: "System prompt",
707828
})
708829
expect(result1.messages).toEqual(messagesWithSmallContent)
@@ -714,6 +835,8 @@ describe("getMaxTokens", () => {
714835
contextWindow: modelInfo.contextWindow,
715836
maxTokens: modelInfo.maxTokens,
716837
apiHandler: mockApiHandler,
838+
autoCondenseContext: false,
839+
autoCondenseContextPercent: 100,
717840
systemPrompt: "System prompt",
718841
})
719842
expect(result2).not.toEqual(messagesWithSmallContent)
@@ -735,6 +858,8 @@ describe("getMaxTokens", () => {
735858
contextWindow: modelInfo.contextWindow,
736859
maxTokens: modelInfo.maxTokens,
737860
apiHandler: mockApiHandler,
861+
autoCondenseContext: false,
862+
autoCondenseContextPercent: 100,
738863
systemPrompt: "System prompt",
739864
})
740865
expect(result1.messages).toEqual(messagesWithSmallContent)
@@ -746,6 +871,8 @@ describe("getMaxTokens", () => {
746871
contextWindow: modelInfo.contextWindow,
747872
maxTokens: modelInfo.maxTokens,
748873
apiHandler: mockApiHandler,
874+
autoCondenseContext: false,
875+
autoCondenseContextPercent: 100,
749876
systemPrompt: "System prompt",
750877
})
751878
expect(result2).not.toEqual(messagesWithSmallContent)

src/core/sliding-window/index.ts

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ type TruncateOptions = {
6363
contextWindow: number
6464
maxTokens?: number | null
6565
apiHandler: ApiHandler
66-
autoCondenseContext?: boolean
66+
autoCondenseContext: boolean
67+
autoCondenseContextPercent: number
6768
systemPrompt: string
6869
}
6970

@@ -83,6 +84,7 @@ export async function truncateConversationIfNeeded({
8384
maxTokens,
8485
apiHandler,
8586
autoCondenseContext,
87+
autoCondenseContextPercent,
8688
systemPrompt,
8789
}: TruncateOptions): Promise<TruncateResponse> {
8890
// Calculate the maximum tokens reserved for response
@@ -96,21 +98,28 @@ export async function truncateConversationIfNeeded({
9698
: await estimateTokenCount([{ type: "text", text: lastMessageContent as string }], apiHandler)
9799

98100
// Calculate total effective tokens (totalTokens never includes the last message)
99-
const effectiveTokens = totalTokens + lastMessageTokens
101+
const prevContextTokens = totalTokens + lastMessageTokens
100102

101103
// Calculate available tokens for conversation history
102104
// Truncate if we're within TOKEN_BUFFER_PERCENTAGE of the context window
103105
const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens
104106

105-
// Determine if truncation is needed and apply if necessary
106-
if (effectiveTokens <= allowedTokens) {
107-
return { messages, summary: "", cost: 0, prevContextTokens: effectiveTokens }
108-
} else if (autoCondenseContext) {
109-
const result = await summarizeConversation(messages, apiHandler, systemPrompt)
110-
if (result.summary) {
111-
return { ...result, prevContextTokens: effectiveTokens }
107+
if (autoCondenseContext) {
108+
const contextPercent = (100 * prevContextTokens) / contextWindow
109+
if (contextPercent >= autoCondenseContextPercent || prevContextTokens > allowedTokens) {
110+
// Attempt to intelligently condense the context
111+
const result = await summarizeConversation(messages, apiHandler, systemPrompt)
112+
if (result.summary) {
113+
return { ...result, prevContextTokens }
114+
}
112115
}
113116
}
114-
const truncatedMessages = truncateConversation(messages, 0.5)
115-
return { messages: truncatedMessages, prevContextTokens: effectiveTokens, summary: "", cost: 0 }
117+
118+
// Fall back to sliding window truncation if needed
119+
if (prevContextTokens > allowedTokens) {
120+
const truncatedMessages = truncateConversation(messages, 0.5)
121+
return { messages: truncatedMessages, prevContextTokens, summary: "", cost: 0 }
122+
}
123+
// No truncation or condensation needed
124+
return { messages, summary: "", cost: 0, prevContextTokens }
116125
}

src/core/task/Task.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1460,8 +1460,14 @@ export class Task extends EventEmitter<ClineEvents> {
14601460
}
14611461

14621462
public async *attemptApiRequest(retryAttempt: number = 0): ApiStream {
1463-
const { apiConfiguration, autoApprovalEnabled, alwaysApproveResubmit, requestDelaySeconds, experiments } =
1464-
(await this.providerRef.deref()?.getState()) ?? {}
1463+
const {
1464+
apiConfiguration,
1465+
autoApprovalEnabled,
1466+
alwaysApproveResubmit,
1467+
requestDelaySeconds,
1468+
experiments,
1469+
autoCondenseContextPercent = 100,
1470+
} = (await this.providerRef.deref()?.getState()) ?? {}
14651471

14661472
let rateLimitDelay = 0
14671473

@@ -1510,6 +1516,7 @@ export class Task extends EventEmitter<ClineEvents> {
15101516
contextWindow,
15111517
apiHandler: this.api,
15121518
autoCondenseContext,
1519+
autoCondenseContextPercent,
15131520
systemPrompt,
15141521
})
15151522
if (truncateResult.messages !== this.apiConversationHistory) {

src/core/webview/ClineProvider.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1222,6 +1222,7 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
12221222
alwaysAllowModeSwitch,
12231223
alwaysAllowSubtasks,
12241224
allowedMaxRequests,
1225+
autoCondenseContextPercent,
12251226
soundEnabled,
12261227
ttsEnabled,
12271228
ttsSpeed,
@@ -1293,6 +1294,7 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
12931294
alwaysAllowModeSwitch: alwaysAllowModeSwitch ?? false,
12941295
alwaysAllowSubtasks: alwaysAllowSubtasks ?? false,
12951296
allowedMaxRequests: allowedMaxRequests ?? Infinity,
1297+
autoCondenseContextPercent: autoCondenseContextPercent ?? 100,
12961298
uriScheme: vscode.env.uriScheme,
12971299
currentTaskItem: this.getCurrentCline()?.taskId
12981300
? (taskHistory || []).find((item: HistoryItem) => item.id === this.getCurrentCline()?.taskId)
@@ -1396,6 +1398,7 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
13961398
alwaysAllowModeSwitch: stateValues.alwaysAllowModeSwitch ?? false,
13971399
alwaysAllowSubtasks: stateValues.alwaysAllowSubtasks ?? false,
13981400
allowedMaxRequests: stateValues.allowedMaxRequests ?? Infinity,
1401+
autoCondenseContextPercent: stateValues.autoCondenseContextPercent ?? 100,
13991402
taskHistory: stateValues.taskHistory,
14001403
allowedCommands: stateValues.allowedCommands,
14011404
soundEnabled: stateValues.soundEnabled ?? false,

0 commit comments

Comments
 (0)