Skip to content

Commit 6df165d

Browse files
committed
fix: respect condensation percentage threshold when saving settings
- Fixed issue where context condensation was triggered on every action after saving settings - Condensation now only triggers when the percentage threshold is actually met - If tokens exceed hard limit but percentage is below threshold, sliding window truncation is used instead - Added detailed logging to help debug condensation triggers - Updated tests to reflect the new behavior This prevents unnecessary condensation operations that were happening even when the configured percentage threshold had not been reached.
1 parent ad0e33e commit 6df165d

File tree

2 files changed

+53
-43
lines changed

2 files changed

+53
-43
lines changed

src/core/sliding-window/__tests__/sliding-window.spec.ts

Lines changed: 27 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -544,72 +544,56 @@ describe("Sliding Window", () => {
544544
})
545545
})
546546

547-
it("should use summarizeConversation when autoCondenseContext is true and tokens exceed threshold", async () => {
548-
// Mock the summarizeConversation function
549-
const mockSummary = "This is a summary of the conversation"
550-
const mockCost = 0.05
551-
const mockSummarizeResponse: condenseModule.SummarizeResponse = {
552-
messages: [
553-
{ role: "user", content: "First message" },
554-
{ role: "assistant", content: mockSummary, isSummary: true },
555-
{ role: "user", content: "Last message" },
556-
],
557-
summary: mockSummary,
558-
cost: mockCost,
559-
newContextTokens: 100,
560-
}
561-
562-
const summarizeSpy = vi
563-
.spyOn(condenseModule, "summarizeConversation")
564-
.mockResolvedValue(mockSummarizeResponse)
547+
it("should NOT use summarizeConversation when tokens exceed hard limit but percentage is below threshold", async () => {
548+
// Reset any previous mock calls
549+
vi.clearAllMocks()
550+
const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
565551

566552
const modelInfo = createModelInfo(100000, 30000)
567-
const totalTokens = 70001 // Above threshold
553+
const totalTokens = 70001 // Above hard limit but only 70% of context window
568554
const messagesWithSmallContent = [
569555
...messages.slice(0, -1),
570556
{ ...messages[messages.length - 1], content: "" },
571557
]
572558

559+
// When truncating, always uses 0.5 fraction
560+
// With 4 messages after the first, 0.5 fraction means remove 2 messages
561+
const expectedMessages = [
562+
messagesWithSmallContent[0],
563+
messagesWithSmallContent[3],
564+
messagesWithSmallContent[4],
565+
]
566+
573567
const result = await truncateConversationIfNeeded({
574568
messages: messagesWithSmallContent,
575569
totalTokens,
576570
contextWindow: modelInfo.contextWindow,
577571
maxTokens: modelInfo.maxTokens,
578572
apiHandler: mockApiHandler,
579573
autoCondenseContext: true,
580-
autoCondenseContextPercent: 100,
574+
autoCondenseContextPercent: 100, // 100% threshold, but tokens are only at 70%
581575
systemPrompt: "System prompt",
582576
taskId,
583577
profileThresholds: {},
584578
currentProfileId: "default",
585579
})
586580

587-
// Verify summarizeConversation was called with the right parameters
588-
expect(summarizeSpy).toHaveBeenCalledWith(
589-
messagesWithSmallContent,
590-
mockApiHandler,
591-
"System prompt",
592-
taskId,
593-
70001,
594-
true,
595-
undefined, // customCondensingPrompt
596-
undefined, // condensingApiHandler
597-
)
581+
// Verify summarizeConversation was NOT called (percentage threshold not met)
582+
expect(summarizeSpy).not.toHaveBeenCalled()
598583

599-
// Verify the result contains the summary information
600-
expect(result).toMatchObject({
601-
messages: mockSummarizeResponse.messages,
602-
summary: mockSummary,
603-
cost: mockCost,
584+
// Verify it used sliding window truncation instead
585+
expect(result).toEqual({
586+
messages: expectedMessages,
587+
summary: "",
588+
cost: 0,
604589
prevContextTokens: totalTokens,
605590
})
606-
// newContextTokens might be present, but we don't need to verify its exact value
607591

608592
// Clean up
609593
summarizeSpy.mockRestore()
610594
})
611595

612-
it("should fall back to truncateConversation when autoCondenseContext is true but summarization fails", async () => {
596+
it("should fall back to truncateConversation when autoCondenseContext is true, percentage threshold is met, but summarization fails", async () => {
613597
// Mock the summarizeConversation function to return an error
614598
const mockSummarizeResponse: condenseModule.SummarizeResponse = {
615599
messages: messages, // Original messages unchanged
@@ -623,7 +607,8 @@ describe("Sliding Window", () => {
623607
.mockResolvedValue(mockSummarizeResponse)
624608

625609
const modelInfo = createModelInfo(100000, 30000)
626-
const totalTokens = 70001 // Above threshold
610+
// Set tokens to meet percentage threshold (100% of context)
611+
const totalTokens = 100001 // Above 100% threshold
627612
const messagesWithSmallContent = [
628613
...messages.slice(0, -1),
629614
{ ...messages[messages.length - 1], content: "" },
@@ -644,17 +629,17 @@ describe("Sliding Window", () => {
644629
maxTokens: modelInfo.maxTokens,
645630
apiHandler: mockApiHandler,
646631
autoCondenseContext: true,
647-
autoCondenseContextPercent: 100,
632+
autoCondenseContextPercent: 100, // 100% threshold, tokens are at 100%+
648633
systemPrompt: "System prompt",
649634
taskId,
650635
profileThresholds: {},
651636
currentProfileId: "default",
652637
})
653638

654-
// Verify summarizeConversation was called
639+
// Verify summarizeConversation was called (percentage threshold was met)
655640
expect(summarizeSpy).toHaveBeenCalled()
656641

657-
// Verify it fell back to truncation
642+
// Verify it fell back to truncation after summarization failed
658643
expect(result.messages).toEqual(expectedMessages)
659644
expect(result.summary).toBe("")
660645
expect(result.prevContextTokens).toBe(totalTokens)

src/core/sliding-window/index.ts

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,17 @@ export async function truncateConversationIfNeeded({
144144

145145
if (autoCondenseContext) {
146146
const contextPercent = (100 * prevContextTokens) / contextWindow
147-
if (contextPercent >= effectiveThreshold || prevContextTokens > allowedTokens) {
147+
// Check if we should trigger condensation based on percentage threshold
148+
const shouldCondenseByPercentage = contextPercent >= effectiveThreshold
149+
// Check if we're over the hard token limit
150+
const isOverHardLimit = prevContextTokens > allowedTokens
151+
152+
// Only trigger automatic condensation when the percentage threshold is met
153+
// If we're over the hard limit but below percentage, we'll use sliding window instead
154+
if (shouldCondenseByPercentage) {
155+
console.log(
156+
`[Condensation] Triggering automatic condensation: ${contextPercent.toFixed(1)}% >= ${effectiveThreshold}% threshold`,
157+
)
148158
// Attempt to intelligently condense the context
149159
const result = await summarizeConversation(
150160
messages,
@@ -159,14 +169,29 @@ export async function truncateConversationIfNeeded({
159169
if (result.error) {
160170
error = result.error
161171
cost = result.cost
172+
console.log(`[Condensation] Condensation failed: ${result.error}`)
173+
// If condensation fails and we're over the hard limit, fall through to sliding window
162174
} else {
175+
console.log(
176+
`[Condensation] Successfully condensed context from ${prevContextTokens} to ${result.newContextTokens} tokens`,
177+
)
163178
return { ...result, prevContextTokens }
164179
}
180+
} else if (isOverHardLimit) {
181+
// If we're over the hard limit but haven't reached the percentage threshold,
182+
// log this condition - we'll use sliding window truncation below
183+
console.log(
184+
`[Condensation] Context tokens (${prevContextTokens}) exceed allowed (${allowedTokens}), but percentage (${contextPercent.toFixed(1)}%) < threshold (${effectiveThreshold}%). Will use sliding window truncation.`,
185+
)
165186
}
166187
}
167188

168189
// Fall back to sliding window truncation if needed
190+
// This happens when we're over the hard token limit regardless of percentage
169191
if (prevContextTokens > allowedTokens) {
192+
console.log(
193+
`[Condensation] Using sliding window truncation: ${prevContextTokens} tokens > ${allowedTokens} allowed`,
194+
)
170195
const truncatedMessages = truncateConversation(messages, 0.5, taskId)
171196
return { messages: truncatedMessages, prevContextTokens, summary: "", cost, error }
172197
}

0 commit comments

Comments
 (0)