microsoft · lramos15 · Jan 13, 2026 · Jan 9, 2026 · Copilot · Jan 9, 2026
diff --git a/src/platform/endpoint/vscode-node/extChatEndpoint.ts b/src/platform/endpoint/vscode-node/extChatEndpoint.ts
@@ -22,11 +22,11 @@ import { ChatCompletion } from '../../networking/common/openai';
 import { IRequestLogger } from '../../requestLogger/node/requestLogger';
 import { ITelemetryService } from '../../telemetry/common/telemetry';
 import { TelemetryData } from '../../telemetry/common/telemetryData';
-import { ITokenizerProvider } from '../../tokenizer/node/tokenizer';
 import { EndpointEditToolName, IEndpointProvider, isEndpointEditToolName } from '../common/endpointProvider';
 import { CustomDataPartMimeTypes } from '../common/endpointTypes';
 import { decodeStatefulMarker, encodeStatefulMarker, rawPartAsStatefulMarker } from '../common/statefulMarkerContainer';
 import { rawPartAsThinkingData } from '../common/thinkingDataContainer';
+import { ExtensionContributedChatTokenizer } from './extChatTokenizer';
 
 enum ChatImageMimeType {
 	PNG = 'image/png',
@@ -47,7 +47,6 @@ export class ExtensionContributedChatEndpoint implements IChatEndpoint {
 
 	constructor(
 		private readonly languageModel: vscode.LanguageModelChat,
-		@ITokenizerProvider private readonly _tokenizerProvider: ITokenizerProvider,
 		@IInstantiationService private readonly _instantiationService: IInstantiationService,
 		@IRequestLogger private readonly _requestLogger: IRequestLogger,
 		@IEndpointProvider private readonly _endpointProvider: IEndpointProvider
@@ -130,8 +129,8 @@ export class ExtensionContributedChatEndpoint implements IChatEndpoint {
 	}
 
 	public acquireTokenizer(): ITokenizer {
-		// TODO @lramos15, this should be driven by the extension API.
-		return this._tokenizerProvider.acquireTokenizer(this);
+		// Use the extension-contributed tokenizer that leverages the VS Code language model API
+		return new ExtensionContributedChatTokenizer(this.languageModel);
 	}
 
 	async makeChatRequest(

diff --git a/src/platform/endpoint/vscode-node/extChatTokenizer.ts b/src/platform/endpoint/vscode-node/extChatTokenizer.ts
@@ -0,0 +1,119 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { OutputMode, Raw } from '@vscode/prompt-tsx';
+import { LanguageModelChat, LanguageModelChatTool } from 'vscode';
+import { ITokenizer } from '../../../util/common/tokenizer';
+import { assertNever } from '../../../util/vs/base/common/assert';
+import { calculateImageTokenCost } from '../../tokenizer/node/tokenizer';
+import { convertToApiChatMessage } from './extChatEndpoint';
+
+/**
+ * BaseTokensPerCompletion is the minimum tokens for a completion request.
+ * Replies are primed with <|im_start|>assistant<|message|>, so these tokens represent the
+ * special token and the role name.
+ */
+const BaseTokensPerCompletion = 3;
+
+/*
+ * Each GPT 3.5 / GPT 4 message comes with 3 tokens per message due to special characters
+ */
+const BaseTokensPerMessage = 3;
+
+
+export class ExtensionContributedChatTokenizer implements ITokenizer {
+	public readonly mode = OutputMode.Raw;
+
+	constructor(private readonly languageModel: LanguageModelChat) { }
+
+	async tokenLength(text: string | Raw.ChatCompletionContentPart): Promise<number> {
+		if (typeof text === 'string') {
+			return this._textTokenLength(text);
+		}
+
+		switch (text.type) {
+			case Raw.ChatCompletionContentPartKind.Text:
+				return this._textTokenLength(text.text);
+			case Raw.ChatCompletionContentPartKind.Opaque:
+				return text.tokenUsage || 0;
+			case Raw.ChatCompletionContentPartKind.Image:
+				if (text.imageUrl.url.startsWith('data:image/')) {
+					try {
+						return calculateImageTokenCost(text.imageUrl.url, text.imageUrl.detail);
+					} catch {
+						return this._textTokenLength(text.imageUrl.url);
+					}
+				}
+				return this._textTokenLength(text.imageUrl.url);
-				if (text.imageUrl.url.startsWith('data:image/')) {
-					try {
-						return calculateImageTokenCost(text.imageUrl.url, text.imageUrl.detail);
-					} catch {
-						return this._textTokenLength(text.imageUrl.url);
-					}
-				}
-				return this._textTokenLength(text.imageUrl.url);
+				if (!text.imageUrl || !text.imageUrl.url) {
+					return 0;
+				}
+				{
+					const imageUrl = text.imageUrl;
+					if (imageUrl.url.startsWith('data:image/')) {
+						try {
+							return calculateImageTokenCost(imageUrl.url, imageUrl.detail);
+						} catch {
+							return this._textTokenLength(imageUrl.url);
+						}
+					}
+					return this._textTokenLength(imageUrl.url);
+				}
-				if (text.imageUrl.url.startsWith('data:image/')) {
-					try {
-						return calculateImageTokenCost(text.imageUrl.url, text.imageUrl.detail);
-					} catch {
-						return this._textTokenLength(text.imageUrl.url);
-					}
-				}
-				return this._textTokenLength(text.imageUrl.url);
+				if (!text.imageUrl || !text.imageUrl.url) {
+					return 0;
+				}
+				{
+					const imageUrl = text.imageUrl;
+					if (imageUrl.url.startsWith('data:image/')) {
+						try {
+							return calculateImageTokenCost(imageUrl.url, imageUrl.detail);
+						} catch {
+							return this._textTokenLength(imageUrl.url);
+						}
+					}
+					return this._textTokenLength(imageUrl.url);
+				}
+			case Raw.ChatCompletionContentPartKind.CacheBreakpoint:
+				return 0;
+			default:
+				assertNever(text, `unknown content part (${JSON.stringify(text)})`);
+		}
+	}
+
+	private async _textTokenLength(text: string): Promise<number> {
+		if (!text) {
+			return 0;
+		}
+		// Use the VS Code language model API to count tokens
+		return this.languageModel.countTokens(text);
+	}
+
+	async countMessageTokens(message: Raw.ChatMessage): Promise<number> {
+		// Convert to VS Code message format and use the language model's countTokens
+		const apiMessages = convertToApiChatMessage([message]);
+		if (apiMessages.length === 0) {
+			return 0;
-			return 0;
+			// Edge case: convertToApiChatMessage can return an empty array when the message
+			// only contains filtered content (e.g., non-base64 images). In that case, we still
+			// account for the base per-message token overhead.
+			return BaseTokensPerMessage;
-			return 0;
+			// Edge case: convertToApiChatMessage can return an empty array when the message
+			// only contains filtered content (e.g., non-base64 images). In that case, we still
+			// account for the base per-message token overhead.
+			return BaseTokensPerMessage;
+		}
+
+		// Count tokens for the message using VS Code API
+		const messageTokens = await this.languageModel.countTokens(apiMessages[0]);
+		return BaseTokensPerMessage + messageTokens;
+	}
+
+	async countMessagesTokens(messages: Raw.ChatMessage[]): Promise<number> {
+		let numTokens = BaseTokensPerCompletion;
+		for (const message of messages) {
+			numTokens += await this.countMessageTokens(message);
+		}
+		return numTokens;
+	}
+
+	async countToolTokens(tools: readonly LanguageModelChatTool[]): Promise<number> {
+		const baseToolTokens = 16;
+		let numTokens = 0;
+		if (tools.length) {
+			numTokens += baseToolTokens;
+		}
+
+		const baseTokensPerTool = 8;
+		for (const tool of tools) {
+			numTokens += baseTokensPerTool;
+			numTokens += await this._countObjectTokens({ name: tool.name, description: tool.description, parameters: tool.inputSchema });
+		}
+
+		// This is an estimate, so give a little safety margin
+		return Math.floor(numTokens * 1.1);
+	}
+
+	private async _countObjectTokens(obj: Record<string, unknown>): Promise<number> {
+		let numTokens = 0;
+		for (const [key, value] of Object.entries(obj)) {
+			if (!value) {
-			if (!value) {
+			if (value === null || value === undefined) {
-			if (!value) {
+			if (value === null || value === undefined) {
+				continue;
+			}
+
+			numTokens += await this._textTokenLength(key);
+			if (typeof value === 'string') {
+				numTokens += await this._textTokenLength(value);
+			} else if (typeof value === 'object') {
+				numTokens += await this._countObjectTokens(value as Record<string, unknown>);
+			}
+		}
+
+		return numTokens;
+	}
+}
diff --git a/src/platform/endpoint/vscode-node/test/extChatTokenizer.spec.ts b/src/platform/endpoint/vscode-node/test/extChatTokenizer.spec.ts
@@ -0,0 +1,227 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { Raw } from '@vscode/prompt-tsx';
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+import type { LanguageModelChat, LanguageModelChatMessage, LanguageModelChatMessage2 } from 'vscode';
+import { ExtensionContributedChatTokenizer } from '../extChatTokenizer';
+
+/**
+ * Mock implementation of LanguageModelChat for testing purposes.
+ * Simulates token counting with a configurable strategy.
+ */
+class MockLanguageModelChat implements Partial<LanguageModelChat> {
+	private readonly _tokenCountFn: (input: string | LanguageModelChatMessage | LanguageModelChatMessage2) => number;
+
+	constructor(tokenCountFn?: (input: string | LanguageModelChatMessage | LanguageModelChatMessage2) => number) {
+		// Default: approximate token count as words (split by whitespace)
+		this._tokenCountFn = tokenCountFn ?? ((input) => {
+			if (typeof input === 'string') {
+				return input.split(/\s+/).filter(Boolean).length || 0;
+			}
+			// For messages, count tokens in all text content parts
+			let total = 0;
+			for (const part of input.content) {
+				if ('value' in part && typeof part.value === 'string') {
+					total += part.value.split(/\s+/).filter(Boolean).length || 0;
+				}
+			}
+			return total;
+		});
+	}
+
+	countTokens(input: string | LanguageModelChatMessage | LanguageModelChatMessage2): Thenable<number> {
+		return Promise.resolve(this._tokenCountFn(input));
+	}
+}
+
+describe('ExtensionContributedChatTokenizer', () => {
+	let tokenizer: ExtensionContributedChatTokenizer;
+	let mockLanguageModel: MockLanguageModelChat;
+
+	beforeEach(() => {
+		mockLanguageModel = new MockLanguageModelChat();
+		tokenizer = new ExtensionContributedChatTokenizer(mockLanguageModel as unknown as LanguageModelChat);
+	});
+
+	describe('tokenLength', () => {
+		it('should count tokens for a simple string', async () => {
+			const result = await tokenizer.tokenLength('Hello world');
+			expect(result).toBe(2); // "Hello" and "world"
+		});
+
+		it('should return 0 for an empty string', async () => {
+			const result = await tokenizer.tokenLength('');
+			expect(result).toBe(0);
+		});
+
+		it('should count tokens for a text content part', async () => {
+			const textPart: Raw.ChatCompletionContentPart = {
+				type: Raw.ChatCompletionContentPartKind.Text,
+				text: 'This is a test message'
+			};
+			const result = await tokenizer.tokenLength(textPart);
+			expect(result).toBe(5); // 5 words
+		});
+
+		it('should return tokenUsage for opaque content parts', async () => {
+			const opaquePart: Raw.ChatCompletionContentPart = {
+				type: Raw.ChatCompletionContentPartKind.Opaque,
+				value: { some: 'data' },
+				tokenUsage: 42
+			};
+			const result = await tokenizer.tokenLength(opaquePart);
+			expect(result).toBe(42);
+		});
+
+		it('should return 0 for opaque content parts without tokenUsage', async () => {
+			const opaquePart: Raw.ChatCompletionContentPart = {
+				type: Raw.ChatCompletionContentPartKind.Opaque,
+				value: { some: 'data' }
+			};
+			const result = await tokenizer.tokenLength(opaquePart);
+			expect(result).toBe(0);
+		});
+
+		it('should return 0 for cache breakpoint content parts', async () => {
+			const cacheBreakpoint: Raw.ChatCompletionContentPart = {
+				type: Raw.ChatCompletionContentPartKind.CacheBreakpoint
+			};
+			const result = await tokenizer.tokenLength(cacheBreakpoint);
+			expect(result).toBe(0);
+		});
+	});
+
+	describe('countMessageTokens', () => {
+		it('should count tokens for a user message', async () => {
+			const message: Raw.ChatMessage = {
+				role: Raw.ChatRole.User,
+				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'Hello there' }]
+			};
+			const result = await tokenizer.countMessageTokens(message);
+			// BaseTokensPerMessage (3) + message content tokens
+			expect(result).toBeGreaterThanOrEqual(3);
+		});
+
+		it('should count tokens for an assistant message', async () => {
+			const message: Raw.ChatMessage = {
+				role: Raw.ChatRole.Assistant,
+				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'I can help with that' }]
+			};
+			const result = await tokenizer.countMessageTokens(message);
+			expect(result).toBeGreaterThanOrEqual(3);
+		});
+
+		it('should count tokens for a system message', async () => {
+			const message: Raw.ChatMessage = {
+				role: Raw.ChatRole.System,
+				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'You are a helpful assistant' }]
+			};
+			const result = await tokenizer.countMessageTokens(message);
+			expect(result).toBeGreaterThanOrEqual(3);
+		});
+	});
+
+	describe('countMessagesTokens', () => {
+		it('should count tokens for multiple messages', async () => {
+			const messages: Raw.ChatMessage[] = [
+				{
+					role: Raw.ChatRole.System,
+					content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'You are helpful' }]
+				},
+				{
+					role: Raw.ChatRole.User,
+					content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'Hi' }]
+				},
+				{
+					role: Raw.ChatRole.Assistant,
+					content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'Hello' }]
+				}
+			];
+			const result = await tokenizer.countMessagesTokens(messages);
+			// BaseTokensPerCompletion (3) + 3 messages * BaseTokensPerMessage (3) + content tokens
+			expect(result).toBeGreaterThanOrEqual(12);
+		});
+
+		it('should return base tokens for empty messages array', async () => {
+			const result = await tokenizer.countMessagesTokens([]);
+			expect(result).toBe(3); // BaseTokensPerCompletion
+		});
+	});
+
+	describe('countToolTokens', () => {
+		it('should count tokens for a single tool', async () => {
+			const tools = [{
+				name: 'get_weather',
+				description: 'Get the current weather',
+				inputSchema: {
+					type: 'object',
+					properties: {
+						location: { type: 'string' }
+					}
+				}
+			}];
+			const result = await tokenizer.countToolTokens(tools);
+			// baseToolTokens (16) + baseTokensPerTool (8) + object tokens * 1.1
+			expect(result).toBeGreaterThan(24);
+		});
+
+		it('should count tokens for multiple tools', async () => {
+			const tools = [
+				{
+					name: 'get_weather',
+					description: 'Get weather info',
+					inputSchema: { type: 'object' }
+				},
+				{
+					name: 'search',
+					description: 'Search the web',
+					inputSchema: { type: 'object' }
+				}
+			];
+			const result = await tokenizer.countToolTokens(tools);
+			// baseToolTokens (16) + 2 * baseTokensPerTool (8) + object tokens
+			expect(result).toBeGreaterThan(32);
+		});
+
+		it('should return 0 for empty tools array', async () => {
+			const result = await tokenizer.countToolTokens([]);
+			expect(result).toBe(0);
+		});
+	});
+
+	describe('with custom token counting', () => {
+		it('should use the language model countTokens method', async () => {
+			const countTokensSpy = vi.fn().mockResolvedValue(10);
+			const customMock = {
+				countTokens: countTokensSpy
+			} as unknown as LanguageModelChat;
+
+			const customTokenizer = new ExtensionContributedChatTokenizer(customMock);
+			const result = await customTokenizer.tokenLength('test string');
+
+			expect(countTokensSpy).toHaveBeenCalledWith('test string');
+			expect(result).toBe(10);
+		});
+
+		it('should delegate message token counting to language model', async () => {
+			const countTokensSpy = vi.fn().mockResolvedValue(15);
+			const customMock = {
+				countTokens: countTokensSpy
+			} as unknown as LanguageModelChat;
+
+			const customTokenizer = new ExtensionContributedChatTokenizer(customMock);
+			const message: Raw.ChatMessage = {
+				role: Raw.ChatRole.User,
+				content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'Hello' }]
+			};
+
+			const result = await customTokenizer.countMessageTokens(message);
+			// BaseTokensPerMessage (3) + 15 from language model
+			expect(result).toBe(18);
+			expect(countTokensSpy).toHaveBeenCalled();
+		});
+	});
+});