Advertise full context not just prompt (#3674)

lramos15 · web-flow · commit f8d1854598d9 · 2026-02-11T21:59:02.000Z
diff --git a/src/extension/intents/node/toolCallingLoop.ts b/src/extension/intents/node/toolCallingLoop.ts
@@ -922,6 +922,7 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
 			messages: buildPromptResult.messages,
 			tokenizer,
 			tools: availableTools,
+			maxOutputTokens: endpoint.maxOutputTokens,
 		});
 		fetchStreamSource?.resolve();
 		const chatResult = await processResponsePromise ?? undefined;
diff --git a/src/extension/prompts/node/agent/summarizedConversationHistory.tsx b/src/extension/prompts/node/agent/summarizedConversationHistory.tsx
@@ -604,6 +604,7 @@ class ConversationHistorySummarizer {
 			tokenizer,
 			tools: this.props.tools ?? undefined,
 			totalPromptTokens: summaryResponse.type === ChatFetchResponseType.Success ? summaryResponse.usage?.prompt_tokens : undefined,
+			maxOutputTokens: endpoint.maxOutputTokens,
 		});
 
 		return {
diff --git a/src/platform/tokenizer/node/promptTokenDetails.ts b/src/platform/tokenizer/node/promptTokenDetails.ts
@@ -22,6 +22,7 @@ export const PromptTokenLabel = {
 	// System category
 	SystemInstructions: 'System Instructions',
 	Tools: 'Tool Definitions',
+	ReservedOutput: 'Reserved Output',
 
 	// User Context category
 	Messages: 'Messages',
@@ -222,6 +223,8 @@ export interface IPromptTokenDetailOptions {
 	totalPromptTokens?: number;
 	/** The tools available to the model */
 	tools?: readonly LanguageModelToolInformation[];
+	/** The maximum output tokens for the model, shown as reserved output in the breakdown */
+	maxOutputTokens?: number;
 }
 
 /**
@@ -353,6 +356,11 @@ export async function computePromptTokenDetails(
 		counts[PromptTokenCategory.System][PromptTokenLabel.Tools] = toolTokens;
 	}
 
+	// Count reserved output tokens
+	if (options.maxOutputTokens && options.maxOutputTokens > 0) {
+		counts[PromptTokenCategory.System][PromptTokenLabel.ReservedOutput] = options.maxOutputTokens;
+	}
+
 	// Calculate total tokens
 	let totalTokens = options.totalPromptTokens;
 	if (totalTokens === undefined) {
@@ -361,6 +369,9 @@ export async function computePromptTokenDetails(
 			totalTokens += await tokenizer.countToolTokens(tools);
 		}
 	}
+	if (options.maxOutputTokens && options.maxOutputTokens > 0) {
+		totalTokens += options.maxOutputTokens;
+	}
 
 	// Convert counts to percentages
 	const details: ChatResultPromptTokenDetail[] = [];