Skip to content

Commit be1511a

Browse files
committed
feat: add token-based condensing threshold support
- Allow configuring condensing threshold in tokens (values > 100) - Percentage thresholds remain for values between 5-100 - Token thresholds take precedence when both could apply - Add comprehensive tests for token-based thresholds Addresses #7440
1 parent ff1f4f0 commit be1511a

File tree

2 files changed

+161
-2
lines changed

2 files changed

+161
-2
lines changed

src/core/sliding-window/__tests__/sliding-window.spec.ts

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,154 @@ describe("Sliding Window", () => {
10271027
// Clean up
10281028
summarizeSpy.mockRestore()
10291029
})
1030+
describe("Token-based thresholds", () => {
1031+
// Helper function to create messages with specific token counts
1032+
const createMessages = (count: number, tokensPerMessage: number): ApiMessage[] => {
1033+
const messages: ApiMessage[] = []
1034+
for (let i = 0; i < count; i++) {
1035+
const role = i % 2 === 0 ? "user" : "assistant"
1036+
// Create content that roughly corresponds to the desired token count
1037+
// This is a simplification - actual token count depends on the tokenizer
1038+
const content = "x".repeat(tokensPerMessage * 4) // Rough approximation
1039+
messages.push({ role: role as "user" | "assistant", content })
1040+
}
1041+
return messages
1042+
}
1043+
1044+
it("should trigger condensing when token threshold is reached", async () => {
1045+
vi.clearAllMocks()
1046+
const mockCost = 0.05
1047+
const mockSummarizeResponse: condenseModule.SummarizeResponse = {
1048+
messages: [
1049+
{ role: "assistant", content: "Summary", ts: Date.now(), isSummary: true },
1050+
{ role: "user", content: "Message 8", ts: Date.now() },
1051+
{ role: "assistant", content: "Response 9", ts: Date.now() },
1052+
{ role: "user", content: "Message 10", ts: Date.now() },
1053+
],
1054+
summary: "Summary of conversation",
1055+
cost: mockCost,
1056+
newContextTokens: 400,
1057+
}
1058+
1059+
const summarizeSpy = vi
1060+
.spyOn(condenseModule, "summarizeConversation")
1061+
.mockResolvedValue(mockSummarizeResponse)
1062+
1063+
const messages = createMessages(10, 100) // 10 messages, 100 tokens each = 1000 tokens
1064+
const totalTokens = 900 // Excluding last message
1065+
const contextWindow = 4000
1066+
const maxTokens = 1000
1067+
1068+
const result = await truncateConversationIfNeeded({
1069+
messages,
1070+
totalTokens,
1071+
contextWindow,
1072+
maxTokens,
1073+
apiHandler: mockApiHandler,
1074+
autoCondenseContext: true,
1075+
autoCondenseContextPercent: 50, // 50% threshold (not reached)
1076+
systemPrompt: "System prompt",
1077+
taskId: "test-task",
1078+
profileThresholds: {
1079+
"test-profile": 800, // 800 tokens threshold
1080+
},
1081+
currentProfileId: "test-profile",
1082+
})
1083+
1084+
// Context should be above 800 token threshold
1085+
expect(summarizeSpy).toHaveBeenCalled()
1086+
const callArgs = summarizeSpy.mock.calls[0]
1087+
expect(callArgs[0]).toEqual(messages) // messages
1088+
expect(callArgs[1]).toBe(mockApiHandler) // apiHandler
1089+
expect(callArgs[2]).toBe("System prompt") // systemPrompt
1090+
expect(callArgs[3]).toBe("test-task") // taskId
1091+
expect(callArgs[4]).toBeGreaterThan(800) // prevContextTokens should be above threshold
1092+
expect(callArgs[5]).toBe(true) // automatic trigger
1093+
expect(callArgs[6]).toBeUndefined() // customCondensingPrompt
1094+
expect(callArgs[7]).toBeUndefined() // condensingApiHandler
1095+
1096+
expect(result.messages).toEqual(mockSummarizeResponse.messages)
1097+
expect(result.summary).toBe("Summary of conversation")
1098+
expect(result.cost).toBe(mockCost)
1099+
expect(result.prevContextTokens).toBeGreaterThan(800) // Should be above threshold
1100+
})
1101+
1102+
it("should not trigger condensing when token threshold is not reached", async () => {
1103+
vi.clearAllMocks()
1104+
const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
1105+
1106+
const messages = createMessages(10, 50) // 10 messages, 50 tokens each = 500 tokens
1107+
const totalTokens = 450 // Excluding last message
1108+
const contextWindow = 4000
1109+
const maxTokens = 1000
1110+
1111+
const result = await truncateConversationIfNeeded({
1112+
messages,
1113+
totalTokens,
1114+
contextWindow,
1115+
maxTokens,
1116+
apiHandler: mockApiHandler,
1117+
autoCondenseContext: true,
1118+
autoCondenseContextPercent: 50, // 50% threshold (not reached)
1119+
systemPrompt: "System prompt",
1120+
taskId: "test-task",
1121+
profileThresholds: {
1122+
"test-profile": 1000, // 1000 tokens threshold
1123+
},
1124+
currentProfileId: "test-profile",
1125+
})
1126+
1127+
// Context is at 500 tokens (450 + 50 for last message), below 1000 token threshold
1128+
expect(summarizeSpy).not.toHaveBeenCalled()
1129+
expect(result.messages).toEqual(messages)
1130+
})
1131+
1132+
it("should prefer token threshold over percentage when both are configured", async () => {
1133+
vi.clearAllMocks()
1134+
const mockCost = 0.05
1135+
const mockSummarizeResponse: condenseModule.SummarizeResponse = {
1136+
messages: [
1137+
{ role: "assistant", content: "Summary", ts: Date.now(), isSummary: true },
1138+
{ role: "user", content: "Message 8", ts: Date.now() },
1139+
{ role: "assistant", content: "Response 9", ts: Date.now() },
1140+
{ role: "user", content: "Message 10", ts: Date.now() },
1141+
],
1142+
summary: "Summary of conversation",
1143+
cost: mockCost,
1144+
newContextTokens: 400,
1145+
}
1146+
1147+
const summarizeSpy = vi
1148+
.spyOn(condenseModule, "summarizeConversation")
1149+
.mockResolvedValue(mockSummarizeResponse)
1150+
1151+
const messages = createMessages(10, 100) // 10 messages, 100 tokens each = 1000 tokens
1152+
const totalTokens = 900 // Excluding last message
1153+
const contextWindow = 4000
1154+
const maxTokens = 1000
1155+
1156+
// Test with token threshold that triggers before percentage
1157+
const result = await truncateConversationIfNeeded({
1158+
messages,
1159+
totalTokens,
1160+
contextWindow,
1161+
maxTokens,
1162+
apiHandler: mockApiHandler,
1163+
autoCondenseContext: true,
1164+
autoCondenseContextPercent: 50, // 50% = 2000 tokens (not reached)
1165+
systemPrompt: "System prompt",
1166+
taskId: "test-task",
1167+
profileThresholds: {
1168+
"test-profile": 800, // 800 tokens threshold (reached)
1169+
},
1170+
currentProfileId: "test-profile",
1171+
})
1172+
1173+
// Context is at 1000 tokens, above 800 token threshold but below 50% (2000 tokens)
1174+
expect(summarizeSpy).toHaveBeenCalled()
1175+
expect(result.messages).toEqual(mockSummarizeResponse.messages)
1176+
})
1177+
})
10301178
})
10311179

10321180
/**

src/core/sliding-window/index.ts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,14 +124,19 @@ export async function truncateConversationIfNeeded({
124124

125125
// Determine the effective threshold to use
126126
let effectiveThreshold = autoCondenseContextPercent
127+
let effectiveTokenThreshold: number | undefined = undefined
127128
const profileThreshold = profileThresholds[currentProfileId]
129+
128130
if (profileThreshold !== undefined) {
129131
if (profileThreshold === -1) {
130132
// Special case: -1 means inherit from global setting
131133
effectiveThreshold = autoCondenseContextPercent
132134
} else if (profileThreshold >= MIN_CONDENSE_THRESHOLD && profileThreshold <= MAX_CONDENSE_THRESHOLD) {
133-
// Valid custom threshold
135+
// Valid percentage threshold
134136
effectiveThreshold = profileThreshold
137+
} else if (profileThreshold > MAX_CONDENSE_THRESHOLD) {
138+
// Values above 100 are treated as token counts
139+
effectiveTokenThreshold = profileThreshold
135140
} else {
136141
// Invalid threshold value, fall back to global setting
137142
console.warn(
@@ -144,7 +149,13 @@ export async function truncateConversationIfNeeded({
144149

145150
if (autoCondenseContext) {
146151
const contextPercent = (100 * prevContextTokens) / contextWindow
147-
if (contextPercent >= effectiveThreshold || prevContextTokens > allowedTokens) {
152+
// Check both percentage and token thresholds
153+
const shouldCondenseByPercent = contextPercent >= effectiveThreshold
154+
const shouldCondenseByTokens =
155+
effectiveTokenThreshold !== undefined && prevContextTokens >= effectiveTokenThreshold
156+
const shouldCondenseByLimit = prevContextTokens > allowedTokens
157+
158+
if (shouldCondenseByPercent || shouldCondenseByTokens || shouldCondenseByLimit) {
148159
// Attempt to intelligently condense the context
149160
const result = await summarizeConversation(
150161
messages,

0 commit comments

Comments
 (0)