diff --git a/src/core/sliding-window/__tests__/sliding-window.spec.ts b/src/core/sliding-window/__tests__/sliding-window.spec.ts index 0f2c70c81b..fbdc3ad511 100644 --- a/src/core/sliding-window/__tests__/sliding-window.spec.ts +++ b/src/core/sliding-window/__tests__/sliding-window.spec.ts @@ -1027,6 +1027,154 @@ describe("Sliding Window", () => { // Clean up summarizeSpy.mockRestore() }) + describe("Token-based thresholds", () => { + // Helper function to create messages with specific token counts + const createMessages = (count: number, tokensPerMessage: number): ApiMessage[] => { + const messages: ApiMessage[] = [] + for (let i = 0; i < count; i++) { + const role = i % 2 === 0 ? "user" : "assistant" + // Create content that roughly corresponds to the desired token count + // This is a simplification - actual token count depends on the tokenizer + const content = "x".repeat(tokensPerMessage * 4) // Rough approximation + messages.push({ role: role as "user" | "assistant", content }) + } + return messages + } + + it("should trigger condensing when token threshold is reached", async () => { + vi.clearAllMocks() + const mockCost = 0.05 + const mockSummarizeResponse: condenseModule.SummarizeResponse = { + messages: [ + { role: "assistant", content: "Summary", ts: Date.now(), isSummary: true }, + { role: "user", content: "Message 8", ts: Date.now() }, + { role: "assistant", content: "Response 9", ts: Date.now() }, + { role: "user", content: "Message 10", ts: Date.now() }, + ], + summary: "Summary of conversation", + cost: mockCost, + newContextTokens: 400, + } + + const summarizeSpy = vi + .spyOn(condenseModule, "summarizeConversation") + .mockResolvedValue(mockSummarizeResponse) + + const messages = createMessages(10, 100) // 10 messages, 100 tokens each = 1000 tokens + const totalTokens = 900 // Excluding last message + const contextWindow = 4000 + const maxTokens = 1000 + + const result = await truncateConversationIfNeeded({ + messages, + totalTokens, + contextWindow, + maxTokens, + apiHandler: mockApiHandler, + autoCondenseContext: true, + autoCondenseContextPercent: 50, // 50% threshold (not reached) + systemPrompt: "System prompt", + taskId: "test-task", + profileThresholds: { + "test-profile": 800, // 800 tokens threshold + }, + currentProfileId: "test-profile", + }) + + // Context should be above 800 token threshold + expect(summarizeSpy).toHaveBeenCalled() + const callArgs = summarizeSpy.mock.calls[0] + expect(callArgs[0]).toEqual(messages) // messages + expect(callArgs[1]).toBe(mockApiHandler) // apiHandler + expect(callArgs[2]).toBe("System prompt") // systemPrompt + expect(callArgs[3]).toBe("test-task") // taskId + expect(callArgs[4]).toBeGreaterThan(800) // prevContextTokens should be above threshold + expect(callArgs[5]).toBe(true) // automatic trigger + expect(callArgs[6]).toBeUndefined() // customCondensingPrompt + expect(callArgs[7]).toBeUndefined() // condensingApiHandler + + expect(result.messages).toEqual(mockSummarizeResponse.messages) + expect(result.summary).toBe("Summary of conversation") + expect(result.cost).toBe(mockCost) + expect(result.prevContextTokens).toBeGreaterThan(800) // Should be above threshold + }) + + it("should not trigger condensing when token threshold is not reached", async () => { + vi.clearAllMocks() + const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation") + + const messages = createMessages(10, 50) // 10 messages, 50 tokens each = 500 tokens + const totalTokens = 450 // Excluding last message + const contextWindow = 4000 + const maxTokens = 1000 + + const result = await truncateConversationIfNeeded({ + messages, + totalTokens, + contextWindow, + maxTokens, + apiHandler: mockApiHandler, + autoCondenseContext: true, + autoCondenseContextPercent: 50, // 50% threshold (not reached) + systemPrompt: "System prompt", + taskId: "test-task", + profileThresholds: { + "test-profile": 1000, // 1000 tokens threshold + }, + currentProfileId: "test-profile", + }) + + // Context is at 500 tokens (450 + 50 for last message), below 1000 token threshold + expect(summarizeSpy).not.toHaveBeenCalled() + expect(result.messages).toEqual(messages) + }) + + it("should prefer token threshold over percentage when both are configured", async () => { + vi.clearAllMocks() + const mockCost = 0.05 + const mockSummarizeResponse: condenseModule.SummarizeResponse = { + messages: [ + { role: "assistant", content: "Summary", ts: Date.now(), isSummary: true }, + { role: "user", content: "Message 8", ts: Date.now() }, + { role: "assistant", content: "Response 9", ts: Date.now() }, + { role: "user", content: "Message 10", ts: Date.now() }, + ], + summary: "Summary of conversation", + cost: mockCost, + newContextTokens: 400, + } + + const summarizeSpy = vi + .spyOn(condenseModule, "summarizeConversation") + .mockResolvedValue(mockSummarizeResponse) + + const messages = createMessages(10, 100) // 10 messages, 100 tokens each = 1000 tokens + const totalTokens = 900 // Excluding last message + const contextWindow = 4000 + const maxTokens = 1000 + + // Test with token threshold that triggers before percentage + const result = await truncateConversationIfNeeded({ + messages, + totalTokens, + contextWindow, + maxTokens, + apiHandler: mockApiHandler, + autoCondenseContext: true, + autoCondenseContextPercent: 50, // 50% = 2000 tokens (not reached) + systemPrompt: "System prompt", + taskId: "test-task", + profileThresholds: { + "test-profile": 800, // 800 tokens threshold (reached) + }, + currentProfileId: "test-profile", + }) + + // Context is at 1000 tokens, above 800 token threshold but below 50% (2000 tokens) + expect(summarizeSpy).toHaveBeenCalled() + expect(result.messages).toEqual(mockSummarizeResponse.messages) + }) + }) }) /** diff --git a/src/core/sliding-window/index.ts b/src/core/sliding-window/index.ts index 1e518c9a56..4828016c0c 100644 --- a/src/core/sliding-window/index.ts +++ b/src/core/sliding-window/index.ts @@ -124,14 +124,19 @@ export async function truncateConversationIfNeeded({ // Determine the effective threshold to use let effectiveThreshold = autoCondenseContextPercent + let effectiveTokenThreshold: number | undefined = undefined const profileThreshold = profileThresholds[currentProfileId] + if (profileThreshold !== undefined) { if (profileThreshold === -1) { // Special case: -1 means inherit from global setting effectiveThreshold = autoCondenseContextPercent } else if (profileThreshold >= MIN_CONDENSE_THRESHOLD && profileThreshold <= MAX_CONDENSE_THRESHOLD) { - // Valid custom threshold + // Valid percentage threshold effectiveThreshold = profileThreshold + } else if (profileThreshold > MAX_CONDENSE_THRESHOLD) { + // Values above 100 are treated as token counts + effectiveTokenThreshold = profileThreshold } else { // Invalid threshold value, fall back to global setting console.warn( @@ -144,7 +149,13 @@ export async function truncateConversationIfNeeded({ if (autoCondenseContext) { const contextPercent = (100 * prevContextTokens) / contextWindow - if (contextPercent >= effectiveThreshold || prevContextTokens > allowedTokens) { + // Check both percentage and token thresholds + const shouldCondenseByPercent = contextPercent >= effectiveThreshold + const shouldCondenseByTokens = + effectiveTokenThreshold !== undefined && prevContextTokens >= effectiveTokenThreshold + const shouldCondenseByLimit = prevContextTokens > allowedTokens + + if (shouldCondenseByPercent || shouldCondenseByTokens || shouldCondenseByLimit) { // Attempt to intelligently condense the context const result = await summarizeConversation( messages,