From 7a62835b7a0eea6700cc74486871e3e311feb418 Mon Sep 17 00:00:00 2001 From: Continue Agent Date: Tue, 7 Oct 2025 18:02:45 +0000 Subject: [PATCH 1/2] Add pruning fallback and character truncation to handle large context - Add compileChatMessages pruning as fallback when context length validation fails - Truncate Bash tool output by both lines (5000) and characters (200000) - Reuse existing pruning logic from core/llm/countTokens.js - Log pruning details when it occurs Fixes CON-4274 Generated with [Continue](https://continue.dev) Co-Authored-By: Continue Co-authored-by: nate --- .../cli/src/stream/streamChatResponse.ts | 88 +++++++++++++++++-- .../cli/src/tools/runTerminalCommand.ts | 54 +++++++----- 2 files changed, 112 insertions(+), 30 deletions(-) diff --git a/extensions/cli/src/stream/streamChatResponse.ts b/extensions/cli/src/stream/streamChatResponse.ts index 2c7ef956611..2167260ebd8 100644 --- a/extensions/cli/src/stream/streamChatResponse.ts +++ b/extensions/cli/src/stream/streamChatResponse.ts @@ -1,7 +1,11 @@ import { ModelConfig } from "@continuedev/config-yaml"; import { BaseLlmApi } from "@continuedev/openai-adapters"; import type { ChatHistoryItem } from "core/index.js"; -import { convertFromUnifiedHistoryWithSystemMessage } from "core/util/messageConversion.js"; +import { compileChatMessages } from "core/llm/countTokens.js"; +import { + convertFromUnifiedHistoryWithSystemMessage, + convertToUnifiedHistory, +} from "core/util/messageConversion.js"; import * as dotenv from "dotenv"; import type { ChatCompletionMessageParam, @@ -150,18 +154,90 @@ export async function processStreamingResponse( // Validate context length before making the request const validation = validateContextLength(chatHistory, model); - if (!validation.isValid) { - throw new Error(`Context length validation failed: ${validation.error}`); - } // Get fresh system message and inject it const systemMessage = await services.systemMessage.getSystemMessage( services.toolPermissions.getState().currentMode, ); - const openaiChatHistory = convertFromUnifiedHistoryWithSystemMessage( - chatHistory, + + let openaiChatHistory: ChatCompletionMessageParam[]; + let chatHistoryToUse = chatHistory; + + // If validation fails, try to prune using compileChatMessages + if (!validation.isValid) { + logger.warn( + "Context length validation failed, attempting to prune messages", + { + error: validation.error, + historyLength: chatHistory.length, + }, + ); + + try { + // Convert to ChatMessage format for pruning + const openaiMessages = convertFromUnifiedHistoryWithSystemMessage( + chatHistory, + systemMessage, + ) as ChatCompletionMessageParam[]; + + // Use compileChatMessages to prune + const contextLength = model.contextLength || 4096; + const maxTokens = model.defaultCompletionOptions?.maxTokens || 1024; + + const result = compileChatMessages({ + modelName: model.model, + msgs: openaiMessages.map((msg) => ({ + role: msg.role, + content: msg.content || "", + ...("tool_calls" in msg && msg.tool_calls + ? { toolCalls: msg.tool_calls } + : {}), + ...("tool_call_id" in msg && msg.tool_call_id + ? { toolCallId: msg.tool_call_id } + : {}), + })), + knownContextLength: contextLength, + maxTokens, + supportsImages: false, + tools, + }); + + if (result.didPrune) { + logger.info("Successfully pruned chat history to fit context length", { + originalLength: chatHistory.length, + prunedLength: result.compiledChatMessages.length, + contextPercentage: `${(result.contextPercentage * 100).toFixed(1)}%`, + }); + + // Convert pruned messages back to ChatHistoryItem format + const prunedOpenaiMessages = result.compiledChatMessages.map( + (msg: any) => ({ + role: msg.role, + content: msg.content, + ...(msg.toolCalls ? { tool_calls: msg.toolCalls } : {}), + ...(msg.toolCallId ? { tool_call_id: msg.toolCallId } : {}), + }), + ) as ChatCompletionMessageParam[]; + + // Remove system message from the pruned messages to avoid duplication + const messagesWithoutSystem = prunedOpenaiMessages.filter( + (msg) => msg.role !== "system", + ); + chatHistoryToUse = convertToUnifiedHistory(messagesWithoutSystem); + } + } catch (pruneError: any) { + logger.error("Failed to prune chat history", { error: pruneError }); + throw new Error( + `Context length validation failed and pruning failed: ${pruneError.message}`, + ); + } + } + + openaiChatHistory = convertFromUnifiedHistoryWithSystemMessage( + chatHistoryToUse, systemMessage, ) as ChatCompletionMessageParam[]; + const requestStartTime = Date.now(); const streamFactory = async (retryAbortSignal: AbortSignal) => { diff --git a/extensions/cli/src/tools/runTerminalCommand.ts b/extensions/cli/src/tools/runTerminalCommand.ts index 00204d55bd1..9e49ab8b9a0 100644 --- a/extensions/cli/src/tools/runTerminalCommand.ts +++ b/extensions/cli/src/tools/runTerminalCommand.ts @@ -13,6 +13,30 @@ import { import { Tool } from "./types.js"; +// Maximum number of lines and characters to return from command output +const MAX_OUTPUT_LINES = 5000; +const MAX_OUTPUT_CHARS = 200000; + +// Helper function to truncate command output by both lines and characters +function truncateOutput(output: string): string { + const lines = output.split("\n"); + let truncated = output; + let truncationMsg = ""; + + // First check character limit + if (output.length > MAX_OUTPUT_CHARS) { + truncated = output.substring(0, MAX_OUTPUT_CHARS); + truncationMsg = `\n\n[Output truncated to first ${MAX_OUTPUT_CHARS} characters of ${output.length} total]`; + } + // Then check line limit (only if not already truncated by characters) + else if (lines.length > MAX_OUTPUT_LINES) { + truncated = lines.slice(0, MAX_OUTPUT_LINES).join("\n"); + truncationMsg = `\n\n[Output truncated to first ${MAX_OUTPUT_LINES} lines of ${lines.length} total]`; + } + + return truncationMsg ? truncated + truncationMsg : truncated; +} + // Helper function to use login shell on Unix/macOS and PowerShell on Windows function getShellCommand(command: string): { shell: string; args: string[] } { if (process.platform === "win32") { @@ -99,18 +123,9 @@ Commands are automatically executed from the current working directory (${proces let output = stdout + (stderr ? `\nStderr: ${stderr}` : ""); output += `\n\n[Command timed out after ${TIMEOUT_MS / 1000} seconds of no output]`; - // Truncate output if it has too many lines - const lines = output.split("\n"); - if (lines.length > 5000) { - const truncatedOutput = lines.slice(0, 5000).join("\n"); - resolve( - truncatedOutput + - `\n\n[Output truncated to first 5000 lines of ${lines.length} total]`, - ); - return; - } - - resolve(output); + // Truncate output by both lines and characters + const truncatedOutput = truncateOutput(output); + resolve(truncatedOutput); }, TIMEOUT_MS); }; @@ -155,18 +170,9 @@ Commands are automatically executed from the current working directory (${proces output = stdout + `\nStderr: ${stderr}`; } - // Truncate output if it has too many lines - const lines = output.split("\n"); - if (lines.length > 5000) { - const truncatedOutput = lines.slice(0, 5000).join("\n"); - resolve( - truncatedOutput + - `\n\n[Output truncated to first 5000 lines of ${lines.length} total]`, - ); - return; - } - - resolve(output); + // Truncate output by both lines and characters + const truncatedOutput = truncateOutput(output); + resolve(truncatedOutput); }); child.on("error", (error) => { From fdcdcb6a4a918f229c47a8d4fce47e820638751d Mon Sep 17 00:00:00 2001 From: Continue Date: Tue, 2 Dec 2025 21:35:50 +0000 Subject: [PATCH 2/2] Address PR feedback: simplify context handling and improve bash truncation - Remove duplicate token validation logic; use compileChatMessages directly - Eliminate unnecessary conversions between unified history and ChatMessage formats - Change bash tool truncation to keep LAST X lines/chars instead of first X - Better captures test/install outcomes at the end of command output - Simplify pruning flow: always use compileChatMessages for context management Addresses feedback from @RomneyDa and @chezsmithy Co-authored-by: dallin --- .../cli/src/stream/streamChatResponse.ts | 125 ++++++------------ .../cli/src/tools/runTerminalCommand.ts | 16 ++- 2 files changed, 52 insertions(+), 89 deletions(-) diff --git a/extensions/cli/src/stream/streamChatResponse.ts b/extensions/cli/src/stream/streamChatResponse.ts index 2167260ebd8..f55e6713f47 100644 --- a/extensions/cli/src/stream/streamChatResponse.ts +++ b/extensions/cli/src/stream/streamChatResponse.ts @@ -2,10 +2,7 @@ import { ModelConfig } from "@continuedev/config-yaml"; import { BaseLlmApi } from "@continuedev/openai-adapters"; import type { ChatHistoryItem } from "core/index.js"; import { compileChatMessages } from "core/llm/countTokens.js"; -import { - convertFromUnifiedHistoryWithSystemMessage, - convertToUnifiedHistory, -} from "core/util/messageConversion.js"; +import { convertFromUnifiedHistoryWithSystemMessage } from "core/util/messageConversion.js"; import * as dotenv from "dotenv"; import type { ChatCompletionMessageParam, @@ -22,7 +19,6 @@ import { withExponentialBackoff, } from "../util/exponentialBackoff.js"; import { logger } from "../util/logger.js"; -import { validateContextLength } from "../util/tokenizer.js"; import { getAllTools, handleToolCalls } from "./handleToolCalls.js"; import { handleAutoCompaction } from "./streamChatResponse.autoCompaction.js"; @@ -152,91 +148,56 @@ export async function processStreamingResponse( tools, } = options; - // Validate context length before making the request - const validation = validateContextLength(chatHistory, model); - - // Get fresh system message and inject it + // Get fresh system message const systemMessage = await services.systemMessage.getSystemMessage( services.toolPermissions.getState().currentMode, ); - let openaiChatHistory: ChatCompletionMessageParam[]; - let chatHistoryToUse = chatHistory; - - // If validation fails, try to prune using compileChatMessages - if (!validation.isValid) { - logger.warn( - "Context length validation failed, attempting to prune messages", - { - error: validation.error, - historyLength: chatHistory.length, - }, - ); - - try { - // Convert to ChatMessage format for pruning - const openaiMessages = convertFromUnifiedHistoryWithSystemMessage( - chatHistory, - systemMessage, - ) as ChatCompletionMessageParam[]; - - // Use compileChatMessages to prune - const contextLength = model.contextLength || 4096; - const maxTokens = model.defaultCompletionOptions?.maxTokens || 1024; - - const result = compileChatMessages({ - modelName: model.model, - msgs: openaiMessages.map((msg) => ({ - role: msg.role, - content: msg.content || "", - ...("tool_calls" in msg && msg.tool_calls - ? { toolCalls: msg.tool_calls } - : {}), - ...("tool_call_id" in msg && msg.tool_call_id - ? { toolCallId: msg.tool_call_id } - : {}), - })), - knownContextLength: contextLength, - maxTokens, - supportsImages: false, - tools, - }); + // Convert unified history to ChatMessage format for compileChatMessages + const openaiMessages = convertFromUnifiedHistoryWithSystemMessage( + chatHistory, + systemMessage, + ) as ChatCompletionMessageParam[]; - if (result.didPrune) { - logger.info("Successfully pruned chat history to fit context length", { - originalLength: chatHistory.length, - prunedLength: result.compiledChatMessages.length, - contextPercentage: `${(result.contextPercentage * 100).toFixed(1)}%`, - }); + // Convert to ChatMessage format and use compileChatMessages to handle pruning + const contextLength = model.contextLength || 4096; + const maxTokens = model.defaultCompletionOptions?.maxTokens || 1024; + + const chatMessages = openaiMessages.map((msg) => ({ + role: msg.role, + content: msg.content || "", + ...("tool_calls" in msg && msg.tool_calls + ? { toolCalls: msg.tool_calls } + : {}), + ...("tool_call_id" in msg && msg.tool_call_id + ? { toolCallId: msg.tool_call_id } + : {}), + })); + + const result = compileChatMessages({ + modelName: model.model, + msgs: chatMessages, + knownContextLength: contextLength, + maxTokens, + supportsImages: false, + tools, + }); - // Convert pruned messages back to ChatHistoryItem format - const prunedOpenaiMessages = result.compiledChatMessages.map( - (msg: any) => ({ - role: msg.role, - content: msg.content, - ...(msg.toolCalls ? { tool_calls: msg.toolCalls } : {}), - ...(msg.toolCallId ? { tool_call_id: msg.toolCallId } : {}), - }), - ) as ChatCompletionMessageParam[]; - - // Remove system message from the pruned messages to avoid duplication - const messagesWithoutSystem = prunedOpenaiMessages.filter( - (msg) => msg.role !== "system", - ); - chatHistoryToUse = convertToUnifiedHistory(messagesWithoutSystem); - } - } catch (pruneError: any) { - logger.error("Failed to prune chat history", { error: pruneError }); - throw new Error( - `Context length validation failed and pruning failed: ${pruneError.message}`, - ); - } + if (result.didPrune) { + logger.info("Chat history pruned to fit context length", { + originalLength: chatHistory.length, + prunedLength: result.compiledChatMessages.length, + contextPercentage: `${(result.contextPercentage * 100).toFixed(1)}%`, + }); } - openaiChatHistory = convertFromUnifiedHistoryWithSystemMessage( - chatHistoryToUse, - systemMessage, - ) as ChatCompletionMessageParam[]; + // Convert back to OpenAI format + const openaiChatHistory = result.compiledChatMessages.map((msg: any) => ({ + role: msg.role, + content: msg.content, + ...(msg.toolCalls ? { tool_calls: msg.toolCalls } : {}), + ...(msg.toolCallId ? { tool_call_id: msg.toolCallId } : {}), + })) as ChatCompletionMessageParam[]; const requestStartTime = Date.now(); diff --git a/extensions/cli/src/tools/runTerminalCommand.ts b/extensions/cli/src/tools/runTerminalCommand.ts index 9e49ab8b9a0..7f9d2336881 100644 --- a/extensions/cli/src/tools/runTerminalCommand.ts +++ b/extensions/cli/src/tools/runTerminalCommand.ts @@ -18,23 +18,25 @@ const MAX_OUTPUT_LINES = 5000; const MAX_OUTPUT_CHARS = 200000; // Helper function to truncate command output by both lines and characters +// Keeps the LAST X lines/chars to capture test/install outcomes function truncateOutput(output: string): string { const lines = output.split("\n"); let truncated = output; let truncationMsg = ""; - // First check character limit + // First check character limit - keep last X characters if (output.length > MAX_OUTPUT_CHARS) { - truncated = output.substring(0, MAX_OUTPUT_CHARS); - truncationMsg = `\n\n[Output truncated to first ${MAX_OUTPUT_CHARS} characters of ${output.length} total]`; + const startIndex = output.length - MAX_OUTPUT_CHARS; + truncated = output.substring(startIndex); + truncationMsg = `[Output truncated: showing last ${MAX_OUTPUT_CHARS} characters of ${output.length} total]\n\n`; } - // Then check line limit (only if not already truncated by characters) + // Then check line limit (only if not already truncated by characters) - keep last X lines else if (lines.length > MAX_OUTPUT_LINES) { - truncated = lines.slice(0, MAX_OUTPUT_LINES).join("\n"); - truncationMsg = `\n\n[Output truncated to first ${MAX_OUTPUT_LINES} lines of ${lines.length} total]`; + truncated = lines.slice(-MAX_OUTPUT_LINES).join("\n"); + truncationMsg = `[Output truncated: showing last ${MAX_OUTPUT_LINES} lines of ${lines.length} total]\n\n`; } - return truncationMsg ? truncated + truncationMsg : truncated; + return truncationMsg ? truncationMsg + truncated : truncated; } // Helper function to use login shell on Unix/macOS and PowerShell on Windows