Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 27 additions & 42 deletions src/core/sliding-window/__tests__/sliding-window.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -544,72 +544,56 @@ describe("Sliding Window", () => {
})
})

it("should use summarizeConversation when autoCondenseContext is true and tokens exceed threshold", async () => {
// Mock the summarizeConversation function
const mockSummary = "This is a summary of the conversation"
const mockCost = 0.05
const mockSummarizeResponse: condenseModule.SummarizeResponse = {
messages: [
{ role: "user", content: "First message" },
{ role: "assistant", content: mockSummary, isSummary: true },
{ role: "user", content: "Last message" },
],
summary: mockSummary,
cost: mockCost,
newContextTokens: 100,
}

const summarizeSpy = vi
.spyOn(condenseModule, "summarizeConversation")
.mockResolvedValue(mockSummarizeResponse)
it("should NOT use summarizeConversation when tokens exceed hard limit but percentage is below threshold", async () => {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test modifications look comprehensive! I'm wondering though - should we add a test case for when tokens are exactly at the percentage threshold (e.g., exactly 100% when threshold is 100%)? This would help ensure boundary conditions are properly handled.

// Reset any previous mock calls
vi.clearAllMocks()
const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")

const modelInfo = createModelInfo(100000, 30000)
const totalTokens = 70001 // Above threshold
const totalTokens = 70001 // Above hard limit but only 70% of context window
const messagesWithSmallContent = [
...messages.slice(0, -1),
{ ...messages[messages.length - 1], content: "" },
]

// When truncating, always uses 0.5 fraction
// With 4 messages after the first, 0.5 fraction means remove 2 messages
const expectedMessages = [
messagesWithSmallContent[0],
messagesWithSmallContent[3],
messagesWithSmallContent[4],
]

const result = await truncateConversationIfNeeded({
messages: messagesWithSmallContent,
totalTokens,
contextWindow: modelInfo.contextWindow,
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: true,
autoCondenseContextPercent: 100,
autoCondenseContextPercent: 100, // 100% threshold, but tokens are only at 70%
systemPrompt: "System prompt",
taskId,
profileThresholds: {},
currentProfileId: "default",
})

// Verify summarizeConversation was called with the right parameters
expect(summarizeSpy).toHaveBeenCalledWith(
messagesWithSmallContent,
mockApiHandler,
"System prompt",
taskId,
70001,
true,
undefined, // customCondensingPrompt
undefined, // condensingApiHandler
)
// Verify summarizeConversation was NOT called (percentage threshold not met)
expect(summarizeSpy).not.toHaveBeenCalled()

// Verify the result contains the summary information
expect(result).toMatchObject({
messages: mockSummarizeResponse.messages,
summary: mockSummary,
cost: mockCost,
// Verify it used sliding window truncation instead
expect(result).toEqual({
messages: expectedMessages,
summary: "",
cost: 0,
prevContextTokens: totalTokens,
})
// newContextTokens might be present, but we don't need to verify its exact value

// Clean up
summarizeSpy.mockRestore()
})

it("should fall back to truncateConversation when autoCondenseContext is true but summarization fails", async () => {
it("should fall back to truncateConversation when autoCondenseContext is true, percentage threshold is met, but summarization fails", async () => {
// Mock the summarizeConversation function to return an error
const mockSummarizeResponse: condenseModule.SummarizeResponse = {
messages: messages, // Original messages unchanged
Expand All @@ -623,7 +607,8 @@ describe("Sliding Window", () => {
.mockResolvedValue(mockSummarizeResponse)

const modelInfo = createModelInfo(100000, 30000)
const totalTokens = 70001 // Above threshold
// Set tokens to meet percentage threshold (100% of context)
const totalTokens = 100001 // Above 100% threshold
const messagesWithSmallContent = [
...messages.slice(0, -1),
{ ...messages[messages.length - 1], content: "" },
Expand All @@ -644,17 +629,17 @@ describe("Sliding Window", () => {
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: true,
autoCondenseContextPercent: 100,
autoCondenseContextPercent: 100, // 100% threshold, tokens are at 100%+
systemPrompt: "System prompt",
taskId,
profileThresholds: {},
currentProfileId: "default",
})

// Verify summarizeConversation was called
// Verify summarizeConversation was called (percentage threshold was met)
expect(summarizeSpy).toHaveBeenCalled()

// Verify it fell back to truncation
// Verify it fell back to truncation after summarization failed
expect(result.messages).toEqual(expectedMessages)
expect(result.summary).toBe("")
expect(result.prevContextTokens).toBe(totalTokens)
Expand Down
27 changes: 26 additions & 1 deletion src/core/sliding-window/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,17 @@ export async function truncateConversationIfNeeded({

if (autoCondenseContext) {
const contextPercent = (100 * prevContextTokens) / contextWindow
if (contextPercent >= effectiveThreshold || prevContextTokens > allowedTokens) {
// Check if we should trigger condensation based on percentage threshold
const shouldCondenseByPercentage = contextPercent >= effectiveThreshold
// Check if we're over the hard token limit
const isOverHardLimit = prevContextTokens > allowedTokens
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable naming here is mostly clear, but I'm wondering if isOverHardLimit could be more descriptive? Perhaps isOverTokenLimit or exceedsAllowedTokens would be more consistent with the allowedTokens variable used elsewhere in the code?


// Only trigger automatic condensation when the percentage threshold is met
// If we're over the hard limit but below percentage, we'll use sliding window instead
if (shouldCondenseByPercentage) {
console.log(
`[Condensation] Triggering automatic condensation: ${contextPercent.toFixed(1)}% >= ${effectiveThreshold}% threshold`,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I notice we're adding several console.log statements here for debugging condensation decisions. While these are helpful for understanding the flow, should we consider using a proper logging framework or removing these before merging to avoid cluttering production logs? We could potentially use the existing logging infrastructure that's already in place.

)
// Attempt to intelligently condense the context
const result = await summarizeConversation(
messages,
Expand All @@ -159,14 +169,29 @@ export async function truncateConversationIfNeeded({
if (result.error) {
error = result.error
cost = result.cost
console.log(`[Condensation] Condensation failed: ${result.error}`)
// If condensation fails and we're over the hard limit, fall through to sliding window
} else {
console.log(
`[Condensation] Successfully condensed context from ${prevContextTokens} to ${result.newContextTokens} tokens`,
)
return { ...result, prevContextTokens }
}
} else if (isOverHardLimit) {
// If we're over the hard limit but haven't reached the percentage threshold,
// log this condition - we'll use sliding window truncation below
console.log(
`[Condensation] Context tokens (${prevContextTokens}) exceed allowed (${allowedTokens}), but percentage (${contextPercent.toFixed(1)}%) < threshold (${effectiveThreshold}%). Will use sliding window truncation.`,
)
}
}

// Fall back to sliding window truncation if needed
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment says "This happens when we're over the hard token limit regardless of percentage" but that's not entirely accurate - this code path is also reached when condensation fails (see line 173 where we check for result.error). Could we update this comment to reflect both scenarios?

// This happens when we're over the hard token limit regardless of percentage
if (prevContextTokens > allowedTokens) {
console.log(
`[Condensation] Using sliding window truncation: ${prevContextTokens} tokens > ${allowedTokens} allowed`,
)
const truncatedMessages = truncateConversation(messages, 0.5, taskId)
return { messages: truncatedMessages, prevContextTokens, summary: "", cost, error }
}
Expand Down
Loading