microsoft · pierceboggan · Aug 4, 2025 · Aug 4, 2025 · Aug 5, 2025 · Aug 5, 2025
diff --git a/src/extension/prompt/common/conversation.ts b/src/extension/prompt/common/conversation.ts
@@ -328,6 +328,16 @@ export interface IResultMetadata {
 	toolCallResults?: Record<string, LanguageModelToolResult>;
 	maxToolCallsExceeded?: boolean;
 	summary?: { toolCallRoundId: string; text: string };
+
+	/**
+	 * Token usage information for the response
+	 */
+	tokenUsage?: {
+		promptTokens: number;
+		completionTokens?: number;
+		totalTokens: number;
+		contextWindow: number;
+	};
 }
 
 /** There may be no metadata for results coming from old persisted messages, or from messages that are currently in progress (TODO, try to handle this case) */

diff --git a/src/extension/prompt/node/chatParticipantRequestHandler.ts b/src/extension/prompt/node/chatParticipantRequestHandler.ts
@@ -256,21 +256,31 @@ export class ChatParticipantRequestHandler {
 
 				result = await chatResult;
 				const endpoint = await this._endpointProvider.getChatEndpoint(this.request);
-				result.details = `${endpoint.name} • ${endpoint.multiplier ?? 0}x`;
+
+				// Build details string with model info and token usage if available
+				let details = `${endpoint.name} • ${endpoint.multiplier ?? 0}x`;
+				if (result.metadata?.tokenUsage) {
+					const { totalTokens, contextWindow } = result.metadata.tokenUsage;
+					details += ` • ${totalTokens}/${contextWindow} tokens`;
+				}
+				result.details = details;
 			}
 
 			this._conversationStore.addConversation(this.turn.id, this.conversation);
 
 			// mixin fixed metadata shape into result. Modified in place because the object is already
 			// cached in the conversation store and we want the full information when looking this up
 			// later
+			// Merge in required metadata fields. Preserve any existing metadata (e.g. tokenCount/contextWindow injected downstream).
+			const existingMeta: any = (result as ICopilotChatResult).metadata ?? {};
 			mixin(result, {
 				metadata: {
+					...existingMeta,
 					modelMessageId: this.turn.responseId ?? '',
 					responseId: this.turn.id,
 					sessionId: this.conversation.sessionId,
 					agentId: this.chatAgentArgs.agentId,
-					command: this.request.command
+					command: this.request.command,
 				}
 			} satisfies ICopilotChatResult, true);
 

diff --git a/src/extension/prompt/node/defaultIntentRequestHandler.ts b/src/extension/prompt/node/defaultIntentRequestHandler.ts
@@ -136,6 +136,38 @@ export class DefaultIntentRequestHandler {
 			mixin(chatResult, { metadata: metadataFragment }, true);
 			const baseModelTelemetry = createTelemetryWithId();
 			chatResult = await this.processResult(resultDetails.response, responseMessage, chatResult, metadataFragment, baseModelTelemetry, resultDetails.toolCallRounds);
+
+			// --- augment metadata with token usage/context window ---------------------------------
+			// We want to surface the number of tokens used in the final request that produced this
+			// response, along with the effective model context window. This enables lightweight UI
+			// affordances (e.g. inline footer text) to visualize prompt size vs available budget.
+			//
+			// IMPORTANT: Do NOT change Auto model behavior. We simply read the already-resolved
+			// endpoint (which may itself be the resolved backing model for Auto) and record numbers.
+			// No additional resolution or overrides are performed here.
+			try {
+				// Count tokens in the LAST request that was sent to the model (post tool-calling loop).
+				// This corresponds to the messages array used in the final fetch that yielded the model reply.
+				const ep = intentInvocation.endpoint;
+				const tokenizer = ep.acquireTokenizer();
+				const lastMsgs = resultDetails.lastRequestMessages ?? [];
+				const tokenCount = await tokenizer.countMessagesTokens(lastMsgs);
+				const contextWindow = ep.modelMaxPromptTokens;
+
+				// Merge into metadata with structured token usage information
+				const existingMeta: any = chatResult.metadata ?? {};
+				const tokenUsage = typeof tokenCount === 'number' && typeof contextWindow === 'number' ? {
+					promptTokens: tokenCount,
+					totalTokens: tokenCount, // For now, only tracking prompt tokens
+					contextWindow: contextWindow
+				} : undefined;
+
+				// chatResult.metadata is read-only in the ChatResult type, so build a new object.
+				chatResult = { ...chatResult, metadata: { ...existingMeta, tokenCount, contextWindow, tokenUsage } };
+			} catch {
+				// ignore token counting failures; metadata will simply omit token info
+			}
+			// -----------------------------------------------------------------------------------------
 			if (chatResult.errorDetails && intentInvocation.modifyErrorDetails) {
 				chatResult.errorDetails = intentInvocation.modifyErrorDetails(chatResult.errorDetails, resultDetails.response);
 			}