Skip to content

Commit c96ff0d

Browse files
authored
Merge pull request #8960 from continuedev/nate/auto-continue-after-compaction
Add auto-continuation after compaction in chat streaming
2 parents 305c55c + 0ec0249 commit c96ff0d

File tree

3 files changed

+436
-32
lines changed

3 files changed

+436
-32
lines changed
Lines changed: 324 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,324 @@
1+
import { ModelConfig } from "@continuedev/config-yaml";
2+
import { BaseLlmApi } from "@continuedev/openai-adapters";
3+
import type { ChatHistoryItem } from "core/index.js";
4+
import { convertToUnifiedHistory } from "core/util/messageConversion.js";
5+
import type { ChatCompletionChunk } from "openai/resources/chat/completions.mjs";
6+
import { beforeEach, describe, expect, it, vi } from "vitest";
7+
8+
import { streamChatResponse } from "./streamChatResponse.js";
9+
10+
// Mock all dependencies
11+
vi.mock("../compaction.js", () => ({
12+
compactChatHistory: vi.fn(),
13+
pruneLastMessage: vi.fn((history) => history.slice(0, -1)),
14+
}));
15+
16+
vi.mock("../session.js", () => ({
17+
updateSessionHistory: vi.fn(),
18+
}));
19+
20+
vi.mock("../util/tokenizer.js", () => ({
21+
shouldAutoCompact: vi.fn(),
22+
getAutoCompactMessage: vi.fn(() => "Auto-compacting..."),
23+
countChatHistoryItemTokens: vi.fn(() => 100),
24+
validateContextLength: vi.fn(() => ({ isValid: true })),
25+
}));
26+
27+
vi.mock("../telemetry/telemetryService.js", () => ({
28+
telemetryService: {
29+
logApiRequest: vi.fn(),
30+
recordResponseTime: vi.fn(),
31+
recordTokenUsage: vi.fn(),
32+
recordCost: vi.fn(),
33+
},
34+
}));
35+
36+
vi.mock("../telemetry/posthogService.js", () => ({
37+
posthogService: {
38+
capture: vi.fn(),
39+
},
40+
}));
41+
42+
vi.mock("../util/logger.js", () => ({
43+
logger: {
44+
info: vi.fn(),
45+
debug: vi.fn(),
46+
error: vi.fn(),
47+
warn: vi.fn(),
48+
},
49+
}));
50+
51+
vi.mock("../services/index.js", () => ({
52+
services: {
53+
systemMessage: {
54+
getSystemMessage: vi.fn(() => Promise.resolve("System message")),
55+
},
56+
toolPermissions: {
57+
getState: vi.fn(() => ({ currentMode: "enabled" })),
58+
isHeadless: vi.fn(() => false),
59+
},
60+
chatHistory: {
61+
isReady: vi.fn(() => true),
62+
getHistory: vi.fn(() => []),
63+
setHistory: vi.fn(),
64+
addUserMessage: vi.fn(),
65+
},
66+
},
67+
}));
68+
69+
vi.mock("./handleToolCalls.js", () => ({
70+
handleToolCalls: vi.fn(() => Promise.resolve(false)),
71+
getRequestTools: vi.fn(() => Promise.resolve([])),
72+
}));
73+
74+
vi.mock("./streamChatResponse.compactionHelpers.js", () => ({
75+
handlePreApiCompaction: vi.fn((chatHistory) =>
76+
Promise.resolve({ chatHistory, wasCompacted: false }),
77+
),
78+
handlePostToolValidation: vi.fn((_, chatHistory) =>
79+
Promise.resolve({ chatHistory, wasCompacted: false }),
80+
),
81+
handleNormalAutoCompaction: vi.fn((chatHistory) =>
82+
Promise.resolve({ chatHistory, wasCompacted: false }),
83+
),
84+
}));
85+
86+
describe("streamChatResponse - auto-continuation after compaction", () => {
87+
const mockModel: ModelConfig = {
88+
provider: "openai",
89+
name: "gpt-4",
90+
model: "gpt-4",
91+
defaultCompletionOptions: {
92+
contextLength: 8192,
93+
maxTokens: 2048,
94+
},
95+
} as any;
96+
97+
let mockLlmApi: BaseLlmApi;
98+
let mockAbortController: AbortController;
99+
let chatHistory: ChatHistoryItem[];
100+
let chunks: ChatCompletionChunk[];
101+
let responseCount: number;
102+
103+
function contentChunk(content: string): ChatCompletionChunk {
104+
return {
105+
id: "test",
106+
object: "chat.completion.chunk",
107+
created: Date.now(),
108+
model: "test-model",
109+
choices: [
110+
{
111+
index: 0,
112+
delta: { content },
113+
finish_reason: null,
114+
},
115+
],
116+
};
117+
}
118+
119+
beforeEach(() => {
120+
vi.clearAllMocks();
121+
responseCount = 0;
122+
chunks = [contentChunk("Initial response")];
123+
chatHistory = convertToUnifiedHistory([{ role: "user", content: "Hello" }]);
124+
125+
mockLlmApi = {
126+
chatCompletionStream: vi.fn().mockImplementation(async function* () {
127+
responseCount++;
128+
for (const chunk of chunks) {
129+
yield chunk;
130+
}
131+
}),
132+
} as unknown as BaseLlmApi;
133+
134+
mockAbortController = {
135+
signal: { aborted: false },
136+
abort: vi.fn(),
137+
} as unknown as AbortController;
138+
});
139+
140+
it("should automatically continue after compaction when no tool calls remain", async () => {
141+
const { services } = await import("../services/index.js");
142+
const { handleNormalAutoCompaction } = await import(
143+
"./streamChatResponse.compactionHelpers.js"
144+
);
145+
const { logger } = await import("../util/logger.js");
146+
147+
// Track history modifications
148+
const historyUpdates: string[] = [];
149+
vi.mocked(services.chatHistory.addUserMessage).mockImplementation((msg) => {
150+
historyUpdates.push(msg);
151+
return {
152+
message: { role: "user", content: msg },
153+
contextItems: [],
154+
};
155+
});
156+
157+
// First call: compaction happens
158+
// Second call (after continuation): no compaction
159+
let compactionCallCount = 0;
160+
vi.mocked(handleNormalAutoCompaction).mockImplementation(() => {
161+
compactionCallCount++;
162+
return Promise.resolve({
163+
chatHistory,
164+
wasCompacted: compactionCallCount === 1, // Only first call has compaction
165+
});
166+
});
167+
168+
// Simulate: first response completes (no tool calls), triggering auto-continue
169+
let callCount = 0;
170+
mockLlmApi.chatCompletionStream = vi
171+
.fn()
172+
.mockImplementation(async function* () {
173+
callCount++;
174+
if (callCount === 1) {
175+
// First call: just content, no tool calls (shouldContinue = false)
176+
yield contentChunk("First response");
177+
} else if (callCount === 2) {
178+
// Second call: after auto-continuation
179+
yield contentChunk("Continued after compaction");
180+
}
181+
}) as any;
182+
183+
await streamChatResponse(
184+
chatHistory,
185+
mockModel,
186+
mockLlmApi,
187+
mockAbortController,
188+
);
189+
190+
// Verify "continue" message was added
191+
expect(historyUpdates).toContain("continue");
192+
193+
// Verify logging occurred
194+
expect(logger.debug).toHaveBeenCalledWith(
195+
"Auto-compaction occurred during this turn - automatically continuing session",
196+
);
197+
expect(logger.debug).toHaveBeenCalledWith(
198+
"Added continuation message after compaction",
199+
);
200+
201+
// Verify the LLM was called multiple times (continuing the conversation)
202+
expect(callCount).toBeGreaterThan(1);
203+
});
204+
205+
it("should not auto-continue if compaction occurs with tool calls pending", async () => {
206+
const { services } = await import("../services/index.js");
207+
const { handleNormalAutoCompaction } = await import(
208+
"./streamChatResponse.compactionHelpers.js"
209+
);
210+
const { handleToolCalls } = await import("./handleToolCalls.js");
211+
212+
const historyUpdates: string[] = [];
213+
vi.mocked(services.chatHistory.addUserMessage).mockImplementation((msg) => {
214+
historyUpdates.push(msg);
215+
return {
216+
message: { role: "user", content: msg },
217+
contextItems: [],
218+
};
219+
});
220+
221+
// Compaction happens
222+
vi.mocked(handleNormalAutoCompaction).mockResolvedValue({
223+
chatHistory,
224+
wasCompacted: true,
225+
});
226+
227+
// But tool calls are still being processed (shouldContinue = true)
228+
// This is simulated by having handleToolCalls return true (shouldReturn)
229+
vi.mocked(handleToolCalls).mockResolvedValue(true);
230+
231+
// Mock tool calls in response
232+
mockLlmApi.chatCompletionStream = vi
233+
.fn()
234+
.mockImplementation(async function* () {
235+
yield {
236+
id: "test",
237+
object: "chat.completion.chunk",
238+
created: Date.now(),
239+
model: "test-model",
240+
choices: [
241+
{
242+
index: 0,
243+
delta: {
244+
tool_calls: [
245+
{
246+
index: 0,
247+
id: "call_123",
248+
type: "function",
249+
function: {
250+
name: "ReadFile",
251+
arguments: '{"filepath": "/test"}',
252+
},
253+
},
254+
],
255+
},
256+
finish_reason: null,
257+
},
258+
],
259+
};
260+
}) as any;
261+
262+
await streamChatResponse(
263+
chatHistory,
264+
mockModel,
265+
mockLlmApi,
266+
mockAbortController,
267+
);
268+
269+
// Should NOT auto-continue because tool calls are pending
270+
expect(historyUpdates).not.toContain("continue");
271+
});
272+
273+
it("should not create infinite loops - flag is reset after continuation", async () => {
274+
const { services } = await import("../services/index.js");
275+
const { handleNormalAutoCompaction } = await import(
276+
"./streamChatResponse.compactionHelpers.js"
277+
);
278+
279+
const historyUpdates: string[] = [];
280+
vi.mocked(services.chatHistory.addUserMessage).mockImplementation((msg) => {
281+
historyUpdates.push(msg);
282+
return {
283+
message: { role: "user", content: msg },
284+
contextItems: [],
285+
};
286+
});
287+
288+
// Track calls - compaction happens on 1st turn, then again on 2nd turn
289+
let normalCompactionCallCount = 0;
290+
vi.mocked(handleNormalAutoCompaction).mockImplementation(() => {
291+
normalCompactionCallCount++;
292+
// Compaction happens on first two calls
293+
return Promise.resolve({
294+
chatHistory,
295+
wasCompacted: normalCompactionCallCount <= 1,
296+
});
297+
});
298+
299+
let streamCallCount = 0;
300+
mockLlmApi.chatCompletionStream = vi
301+
.fn()
302+
.mockImplementation(async function* () {
303+
streamCallCount++;
304+
yield contentChunk(`Response ${streamCallCount}`);
305+
}) as any;
306+
307+
await streamChatResponse(
308+
chatHistory,
309+
mockModel,
310+
mockLlmApi,
311+
mockAbortController,
312+
);
313+
314+
// Should only add "continue" once
315+
// The flag is reset after the first continuation
316+
const continueCount = historyUpdates.filter(
317+
(msg) => msg === "continue",
318+
).length;
319+
expect(continueCount).toBeLessThanOrEqual(1);
320+
321+
// Should have called the LLM at least once
322+
expect(streamCallCount).toBeGreaterThanOrEqual(1);
323+
});
324+
});

extensions/cli/src/stream/streamChatResponse.compactionHelpers.ts

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,11 @@ export async function handlePostToolValidation(
6666
toolCalls: ToolCall[],
6767
chatHistory: ChatHistoryItem[],
6868
options: CompactionHelperOptions,
69-
): Promise<ChatHistoryItem[]> {
69+
): Promise<{ chatHistory: ChatHistoryItem[]; wasCompacted: boolean }> {
7070
const { model, llmApi, isHeadless, callbacks, systemMessage } = options;
7171

7272
if (toolCalls.length === 0) {
73-
return chatHistory;
73+
return { chatHistory, wasCompacted: false };
7474
}
7575

7676
// Get updated history after tool execution
@@ -150,6 +150,7 @@ export async function handlePostToolValidation(
150150
inputTokens: postCompactionValidation.inputTokens,
151151
contextLimit: postCompactionValidation.contextLimit,
152152
});
153+
return { chatHistory, wasCompacted: true };
153154
} else {
154155
// Compaction failed, cannot continue
155156
logger.error("Failed to compact history after tool execution overflow");
@@ -159,7 +160,7 @@ export async function handlePostToolValidation(
159160
}
160161
}
161162

162-
return chatHistory;
163+
return { chatHistory, wasCompacted: false };
163164
}
164165

165166
/**
@@ -169,11 +170,11 @@ export async function handleNormalAutoCompaction(
169170
chatHistory: ChatHistoryItem[],
170171
shouldContinue: boolean,
171172
options: CompactionHelperOptions,
172-
): Promise<ChatHistoryItem[]> {
173+
): Promise<{ chatHistory: ChatHistoryItem[]; wasCompacted: boolean }> {
173174
const { model, llmApi, isHeadless, callbacks, systemMessage } = options;
174175

175176
if (!shouldContinue) {
176-
return chatHistory;
177+
return { chatHistory, wasCompacted: false };
177178
}
178179

179180
const chatHistorySvc = services.chatHistory;
@@ -198,11 +199,14 @@ export async function handleNormalAutoCompaction(
198199
// Use the service to update history if available, otherwise use local copy
199200
if (chatHistorySvc && typeof chatHistorySvc.setHistory === "function") {
200201
chatHistorySvc.setHistory(updatedChatHistory);
201-
return chatHistorySvc.getHistory();
202+
return {
203+
chatHistory: chatHistorySvc.getHistory(),
204+
wasCompacted: true,
205+
};
202206
}
203207
// Fallback: return the compacted history directly when service unavailable
204-
return updatedChatHistory;
208+
return { chatHistory: updatedChatHistory, wasCompacted: true };
205209
}
206210

207-
return chatHistory;
211+
return { chatHistory, wasCompacted: false };
208212
}

0 commit comments

Comments
 (0)