From 7a62835b7a0eea6700cc74486871e3e311feb418 Mon Sep 17 00:00:00 2001
From: Continue Agent <agent@continue.dev>
Date: Tue, 7 Oct 2025 18:02:45 +0000
Subject: [PATCH 1/2] Add pruning fallback and character truncation to handle
 large context

- Add compileChatMessages pruning as fallback when context length validation fails
- Truncate Bash tool output by both lines (5000) and characters (200000)
- Reuse existing pruning logic from core/llm/countTokens.js
- Log pruning details when it occurs

Fixes CON-4274

Generated with [Continue](https://continue.dev)

Co-Authored-By: Continue <noreply@continue.dev>
Co-authored-by: nate <nate@continue.dev>
---
 .../cli/src/stream/streamChatResponse.ts      | 88 +++++++++++++++++--
 .../cli/src/tools/runTerminalCommand.ts       | 54 +++++++-----
 2 files changed, 112 insertions(+), 30 deletions(-)

diff --git a/extensions/cli/src/stream/streamChatResponse.ts b/extensions/cli/src/stream/streamChatResponse.ts
index 2c7ef956611..2167260ebd8 100644
--- a/extensions/cli/src/stream/streamChatResponse.ts
+++ b/extensions/cli/src/stream/streamChatResponse.ts
@@ -1,7 +1,11 @@
 import { ModelConfig } from "@continuedev/config-yaml";
 import { BaseLlmApi } from "@continuedev/openai-adapters";
 import type { ChatHistoryItem } from "core/index.js";
-import { convertFromUnifiedHistoryWithSystemMessage } from "core/util/messageConversion.js";
+import { compileChatMessages } from "core/llm/countTokens.js";
+import {
+  convertFromUnifiedHistoryWithSystemMessage,
+  convertToUnifiedHistory,
+} from "core/util/messageConversion.js";
 import * as dotenv from "dotenv";
 import type {
   ChatCompletionMessageParam,
@@ -150,18 +154,90 @@ export async function processStreamingResponse(
 
   // Validate context length before making the request
   const validation = validateContextLength(chatHistory, model);
-  if (!validation.isValid) {
-    throw new Error(`Context length validation failed: ${validation.error}`);
-  }
 
   // Get fresh system message and inject it
   const systemMessage = await services.systemMessage.getSystemMessage(
     services.toolPermissions.getState().currentMode,
   );
-  const openaiChatHistory = convertFromUnifiedHistoryWithSystemMessage(
-    chatHistory,
+
+  let openaiChatHistory: ChatCompletionMessageParam[];
+  let chatHistoryToUse = chatHistory;
+
+  // If validation fails, try to prune using compileChatMessages
+  if (!validation.isValid) {
+    logger.warn(
+      "Context length validation failed, attempting to prune messages",
+      {
+        error: validation.error,
+        historyLength: chatHistory.length,
+      },
+    );
+
+    try {
+      // Convert to ChatMessage format for pruning
+      const openaiMessages = convertFromUnifiedHistoryWithSystemMessage(
+        chatHistory,
+        systemMessage,
+      ) as ChatCompletionMessageParam[];
+
+      // Use compileChatMessages to prune
+      const contextLength = model.contextLength || 4096;
+      const maxTokens = model.defaultCompletionOptions?.maxTokens || 1024;
+
+      const result = compileChatMessages({
+        modelName: model.model,
+        msgs: openaiMessages.map((msg) => ({
+          role: msg.role,
+          content: msg.content || "",
+          ...("tool_calls" in msg && msg.tool_calls
+            ? { toolCalls: msg.tool_calls }
+            : {}),
+          ...("tool_call_id" in msg && msg.tool_call_id
+            ? { toolCallId: msg.tool_call_id }
+            : {}),
+        })),
+        knownContextLength: contextLength,
+        maxTokens,
+        supportsImages: false,
+        tools,
+      });
+
+      if (result.didPrune) {
+        logger.info("Successfully pruned chat history to fit context length", {
+          originalLength: chatHistory.length,
+          prunedLength: result.compiledChatMessages.length,
+          contextPercentage: `${(result.contextPercentage * 100).toFixed(1)}%`,
+        });
+
+        // Convert pruned messages back to ChatHistoryItem format
+        const prunedOpenaiMessages = result.compiledChatMessages.map(
+          (msg: any) => ({
+            role: msg.role,
+            content: msg.content,
+            ...(msg.toolCalls ? { tool_calls: msg.toolCalls } : {}),
+            ...(msg.toolCallId ? { tool_call_id: msg.toolCallId } : {}),
+          }),
+        ) as ChatCompletionMessageParam[];
+
+        // Remove system message from the pruned messages to avoid duplication
+        const messagesWithoutSystem = prunedOpenaiMessages.filter(
+          (msg) => msg.role !== "system",
+        );
+        chatHistoryToUse = convertToUnifiedHistory(messagesWithoutSystem);
+      }
+    } catch (pruneError: any) {
+      logger.error("Failed to prune chat history", { error: pruneError });
+      throw new Error(
+        `Context length validation failed and pruning failed: ${pruneError.message}`,
+      );
+    }
+  }
+
+  openaiChatHistory = convertFromUnifiedHistoryWithSystemMessage(
+    chatHistoryToUse,
     systemMessage,
   ) as ChatCompletionMessageParam[];
+
   const requestStartTime = Date.now();
 
   const streamFactory = async (retryAbortSignal: AbortSignal) => {
diff --git a/extensions/cli/src/tools/runTerminalCommand.ts b/extensions/cli/src/tools/runTerminalCommand.ts
index 00204d55bd1..9e49ab8b9a0 100644
--- a/extensions/cli/src/tools/runTerminalCommand.ts
+++ b/extensions/cli/src/tools/runTerminalCommand.ts
@@ -13,6 +13,30 @@ import {
 
 import { Tool } from "./types.js";
 
+// Maximum number of lines and characters to return from command output
+const MAX_OUTPUT_LINES = 5000;
+const MAX_OUTPUT_CHARS = 200000;
+
+// Helper function to truncate command output by both lines and characters
+function truncateOutput(output: string): string {
+  const lines = output.split("\n");
+  let truncated = output;
+  let truncationMsg = "";
+
+  // First check character limit
+  if (output.length > MAX_OUTPUT_CHARS) {
+    truncated = output.substring(0, MAX_OUTPUT_CHARS);
+    truncationMsg = `\n\n[Output truncated to first ${MAX_OUTPUT_CHARS} characters of ${output.length} total]`;
+  }
+  // Then check line limit (only if not already truncated by characters)
+  else if (lines.length > MAX_OUTPUT_LINES) {
+    truncated = lines.slice(0, MAX_OUTPUT_LINES).join("\n");
+    truncationMsg = `\n\n[Output truncated to first ${MAX_OUTPUT_LINES} lines of ${lines.length} total]`;
+  }
+
+  return truncationMsg ? truncated + truncationMsg : truncated;
+}
+
 // Helper function to use login shell on Unix/macOS and PowerShell on Windows
 function getShellCommand(command: string): { shell: string; args: string[] } {
   if (process.platform === "win32") {
@@ -99,18 +123,9 @@ Commands are automatically executed from the current working directory (${proces
           let output = stdout + (stderr ? `\nStderr: ${stderr}` : "");
           output += `\n\n[Command timed out after ${TIMEOUT_MS / 1000} seconds of no output]`;
 
-          // Truncate output if it has too many lines
-          const lines = output.split("\n");
-          if (lines.length > 5000) {
-            const truncatedOutput = lines.slice(0, 5000).join("\n");
-            resolve(
-              truncatedOutput +
-                `\n\n[Output truncated to first 5000 lines of ${lines.length} total]`,
-            );
-            return;
-          }
-
-          resolve(output);
+          // Truncate output by both lines and characters
+          const truncatedOutput = truncateOutput(output);
+          resolve(truncatedOutput);
         }, TIMEOUT_MS);
       };
 
@@ -155,18 +170,9 @@ Commands are automatically executed from the current working directory (${proces
           output = stdout + `\nStderr: ${stderr}`;
         }
 
-        // Truncate output if it has too many lines
-        const lines = output.split("\n");
-        if (lines.length > 5000) {
-          const truncatedOutput = lines.slice(0, 5000).join("\n");
-          resolve(
-            truncatedOutput +
-              `\n\n[Output truncated to first 5000 lines of ${lines.length} total]`,
-          );
-          return;
-        }
-
-        resolve(output);
+        // Truncate output by both lines and characters
+        const truncatedOutput = truncateOutput(output);
+        resolve(truncatedOutput);
       });
 
       child.on("error", (error) => {

From fdcdcb6a4a918f229c47a8d4fce47e820638751d Mon Sep 17 00:00:00 2001
From: Continue <noreply@continue.dev>
Date: Tue, 2 Dec 2025 21:35:50 +0000
Subject: [PATCH 2/2] Address PR feedback: simplify context handling and
 improve bash truncation

- Remove duplicate token validation logic; use compileChatMessages directly
- Eliminate unnecessary conversions between unified history and ChatMessage formats
- Change bash tool truncation to keep LAST X lines/chars instead of first X
  - Better captures test/install outcomes at the end of command output
- Simplify pruning flow: always use compileChatMessages for context management

Addresses feedback from @RomneyDa and @chezsmithy

Co-authored-by: dallin <dallin@continue.dev>
---
 .../cli/src/stream/streamChatResponse.ts      | 125 ++++++------------
 .../cli/src/tools/runTerminalCommand.ts       |  16 ++-
 2 files changed, 52 insertions(+), 89 deletions(-)

diff --git a/extensions/cli/src/stream/streamChatResponse.ts b/extensions/cli/src/stream/streamChatResponse.ts
index 2167260ebd8..f55e6713f47 100644
--- a/extensions/cli/src/stream/streamChatResponse.ts
+++ b/extensions/cli/src/stream/streamChatResponse.ts
@@ -2,10 +2,7 @@ import { ModelConfig } from "@continuedev/config-yaml";
 import { BaseLlmApi } from "@continuedev/openai-adapters";
 import type { ChatHistoryItem } from "core/index.js";
 import { compileChatMessages } from "core/llm/countTokens.js";
-import {
-  convertFromUnifiedHistoryWithSystemMessage,
-  convertToUnifiedHistory,
-} from "core/util/messageConversion.js";
+import { convertFromUnifiedHistoryWithSystemMessage } from "core/util/messageConversion.js";
 import * as dotenv from "dotenv";
 import type {
   ChatCompletionMessageParam,
@@ -22,7 +19,6 @@ import {
   withExponentialBackoff,
 } from "../util/exponentialBackoff.js";
 import { logger } from "../util/logger.js";
-import { validateContextLength } from "../util/tokenizer.js";
 
 import { getAllTools, handleToolCalls } from "./handleToolCalls.js";
 import { handleAutoCompaction } from "./streamChatResponse.autoCompaction.js";
@@ -152,91 +148,56 @@ export async function processStreamingResponse(
     tools,
   } = options;
 
-  // Validate context length before making the request
-  const validation = validateContextLength(chatHistory, model);
-
-  // Get fresh system message and inject it
+  // Get fresh system message
   const systemMessage = await services.systemMessage.getSystemMessage(
     services.toolPermissions.getState().currentMode,
   );
 
-  let openaiChatHistory: ChatCompletionMessageParam[];
-  let chatHistoryToUse = chatHistory;
-
-  // If validation fails, try to prune using compileChatMessages
-  if (!validation.isValid) {
-    logger.warn(
-      "Context length validation failed, attempting to prune messages",
-      {
-        error: validation.error,
-        historyLength: chatHistory.length,
-      },
-    );
-
-    try {
-      // Convert to ChatMessage format for pruning
-      const openaiMessages = convertFromUnifiedHistoryWithSystemMessage(
-        chatHistory,
-        systemMessage,
-      ) as ChatCompletionMessageParam[];
-
-      // Use compileChatMessages to prune
-      const contextLength = model.contextLength || 4096;
-      const maxTokens = model.defaultCompletionOptions?.maxTokens || 1024;
-
-      const result = compileChatMessages({
-        modelName: model.model,
-        msgs: openaiMessages.map((msg) => ({
-          role: msg.role,
-          content: msg.content || "",
-          ...("tool_calls" in msg && msg.tool_calls
-            ? { toolCalls: msg.tool_calls }
-            : {}),
-          ...("tool_call_id" in msg && msg.tool_call_id
-            ? { toolCallId: msg.tool_call_id }
-            : {}),
-        })),
-        knownContextLength: contextLength,
-        maxTokens,
-        supportsImages: false,
-        tools,
-      });
+  // Convert unified history to ChatMessage format for compileChatMessages
+  const openaiMessages = convertFromUnifiedHistoryWithSystemMessage(
+    chatHistory,
+    systemMessage,
+  ) as ChatCompletionMessageParam[];
 
-      if (result.didPrune) {
-        logger.info("Successfully pruned chat history to fit context length", {
-          originalLength: chatHistory.length,
-          prunedLength: result.compiledChatMessages.length,
-          contextPercentage: `${(result.contextPercentage * 100).toFixed(1)}%`,
-        });
+  // Convert to ChatMessage format and use compileChatMessages to handle pruning
+  const contextLength = model.contextLength || 4096;
+  const maxTokens = model.defaultCompletionOptions?.maxTokens || 1024;
+
+  const chatMessages = openaiMessages.map((msg) => ({
+    role: msg.role,
+    content: msg.content || "",
+    ...("tool_calls" in msg && msg.tool_calls
+      ? { toolCalls: msg.tool_calls }
+      : {}),
+    ...("tool_call_id" in msg && msg.tool_call_id
+      ? { toolCallId: msg.tool_call_id }
+      : {}),
+  }));
+
+  const result = compileChatMessages({
+    modelName: model.model,
+    msgs: chatMessages,
+    knownContextLength: contextLength,
+    maxTokens,
+    supportsImages: false,
+    tools,
+  });
 
-        // Convert pruned messages back to ChatHistoryItem format
-        const prunedOpenaiMessages = result.compiledChatMessages.map(
-          (msg: any) => ({
-            role: msg.role,
-            content: msg.content,
-            ...(msg.toolCalls ? { tool_calls: msg.toolCalls } : {}),
-            ...(msg.toolCallId ? { tool_call_id: msg.toolCallId } : {}),
-          }),
-        ) as ChatCompletionMessageParam[];
-
-        // Remove system message from the pruned messages to avoid duplication
-        const messagesWithoutSystem = prunedOpenaiMessages.filter(
-          (msg) => msg.role !== "system",
-        );
-        chatHistoryToUse = convertToUnifiedHistory(messagesWithoutSystem);
-      }
-    } catch (pruneError: any) {
-      logger.error("Failed to prune chat history", { error: pruneError });
-      throw new Error(
-        `Context length validation failed and pruning failed: ${pruneError.message}`,
-      );
-    }
+  if (result.didPrune) {
+    logger.info("Chat history pruned to fit context length", {
+      originalLength: chatHistory.length,
+      prunedLength: result.compiledChatMessages.length,
+      contextPercentage: `${(result.contextPercentage * 100).toFixed(1)}%`,
+    });
   }
 
-  openaiChatHistory = convertFromUnifiedHistoryWithSystemMessage(
-    chatHistoryToUse,
-    systemMessage,
-  ) as ChatCompletionMessageParam[];
+  // Convert back to OpenAI format
+  const openaiChatHistory = result.compiledChatMessages.map((msg: any) => ({
+    role: msg.role,
+    content: msg.content,
+    ...(msg.toolCalls ? { tool_calls: msg.toolCalls } : {}),
+    ...(msg.toolCallId ? { tool_call_id: msg.toolCallId } : {}),
+  })) as ChatCompletionMessageParam[];
 
   const requestStartTime = Date.now();
 
diff --git a/extensions/cli/src/tools/runTerminalCommand.ts b/extensions/cli/src/tools/runTerminalCommand.ts
index 9e49ab8b9a0..7f9d2336881 100644
--- a/extensions/cli/src/tools/runTerminalCommand.ts
+++ b/extensions/cli/src/tools/runTerminalCommand.ts
@@ -18,23 +18,25 @@ const MAX_OUTPUT_LINES = 5000;
 const MAX_OUTPUT_CHARS = 200000;
 
 // Helper function to truncate command output by both lines and characters
+// Keeps the LAST X lines/chars to capture test/install outcomes
 function truncateOutput(output: string): string {
   const lines = output.split("\n");
   let truncated = output;
   let truncationMsg = "";
 
-  // First check character limit
+  // First check character limit - keep last X characters
   if (output.length > MAX_OUTPUT_CHARS) {
-    truncated = output.substring(0, MAX_OUTPUT_CHARS);
-    truncationMsg = `\n\n[Output truncated to first ${MAX_OUTPUT_CHARS} characters of ${output.length} total]`;
+    const startIndex = output.length - MAX_OUTPUT_CHARS;
+    truncated = output.substring(startIndex);
+    truncationMsg = `[Output truncated: showing last ${MAX_OUTPUT_CHARS} characters of ${output.length} total]\n\n`;
   }
-  // Then check line limit (only if not already truncated by characters)
+  // Then check line limit (only if not already truncated by characters) - keep last X lines
   else if (lines.length > MAX_OUTPUT_LINES) {
-    truncated = lines.slice(0, MAX_OUTPUT_LINES).join("\n");
-    truncationMsg = `\n\n[Output truncated to first ${MAX_OUTPUT_LINES} lines of ${lines.length} total]`;
+    truncated = lines.slice(-MAX_OUTPUT_LINES).join("\n");
+    truncationMsg = `[Output truncated: showing last ${MAX_OUTPUT_LINES} lines of ${lines.length} total]\n\n`;
   }
 
-  return truncationMsg ? truncated + truncationMsg : truncated;
+  return truncationMsg ? truncationMsg + truncated : truncated;
 }
 
 // Helper function to use login shell on Unix/macOS and PowerShell on Windows