microsoft
diff --git a/‎src/extension/context/node/resolvers/extensionApi.tsx‎
Lines changed: 1 addition & 1 deletion b/‎src/extension/context/node/resolvers/extensionApi.tsx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/extension/prompt/vscode-node/endpointProviderImpl.ts‎
Lines changed: 22 additions & 2 deletions b/‎src/extension/prompt/vscode-node/endpointProviderImpl.ts‎
Lines changed: 22 additions & 2 deletions
diff --git a/‎src/extension/prompt/vscode-node/settingsEditorSearchServiceImpl.ts‎
Lines changed: 1 addition & 1 deletion b/‎src/extension/prompt/vscode-node/settingsEditorSearchServiceImpl.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/extension/prompts/node/panel/newWorkspace/newWorkspace.tsx‎
Lines changed: 1 addition & 1 deletion b/‎src/extension/prompts/node/panel/newWorkspace/newWorkspace.tsx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/extension/prompts/node/panel/vscode.tsx‎
Lines changed: 1 addition & 1 deletion b/‎src/extension/prompts/node/panel/vscode.tsx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/extension/test/vscode-node/endpoints.test.ts‎
Lines changed: 18 additions & 1 deletion b/‎src/extension/test/vscode-node/endpoints.test.ts‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎src/platform/embeddings/common/embeddingsComputer.ts‎
Lines changed: 1 addition & 0 deletions b/‎src/platform/embeddings/common/embeddingsComputer.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/platform/embeddings/common/remoteEmbeddingsComputer.ts‎
Lines changed: 176 additions & 2 deletions b/‎src/platform/embeddings/common/remoteEmbeddingsComputer.ts‎
Lines changed: 176 additions & 2 deletions
diff --git a/‎src/platform/embeddings/common/vscodeIndex.ts‎
Lines changed: 1 addition & 1 deletion b/‎src/platform/embeddings/common/vscodeIndex.ts‎
Lines changed: 1 addition & 1 deletion
@@ -95,7 +95,7 @@ export class VSCodeAPIContextElement extends PromptElement<VSCodeAPIContextProps
 			return [];
 		}
 
-		const embeddingResult = await this.embeddingsComputer.computeEmbeddings(EmbeddingType.text3small_512, [this.props.query], {}, new TelemetryCorrelationId('VSCodeAPIContextElement::getSnippets'), token);
+		const embeddingResult = await this.embeddingsComputer.computeEmbeddings(EmbeddingType.text3small_512, [this.props.query], { endpointType: 'capi' }, new TelemetryCorrelationId('VSCodeAPIContextElement::getSnippets'), token);
 		return this.apiEmbeddingsIndex.nClosestValues(embeddingResult.values[0], 5);
 	}
 
 
@@ -9,15 +9,16 @@ import { ConfigKey, IConfigurationService } from '../../../platform/configuratio
 import { AutoChatEndpoint } from '../../../platform/endpoint/common/autoChatEndpoint';
 import { IAutomodeService } from '../../../platform/endpoint/common/automodeService';
 import { ICAPIClientService } from '../../../platform/endpoint/common/capiClient';
-import { ChatEndpointFamily, IChatModelInformation, ICompletionModelInformation, IEndpointProvider } from '../../../platform/endpoint/common/endpointProvider';
+import { ChatEndpointFamily, EmbeddingsEndpointFamily, IChatModelInformation, ICompletionModelInformation, IEmbeddingModelInformation, IEndpointProvider } from '../../../platform/endpoint/common/endpointProvider';
 import { CopilotChatEndpoint } from '../../../platform/endpoint/node/copilotChatEndpoint';
+import { EmbeddingEndpoint } from '../../../platform/endpoint/node/embeddingsEndpoint';
 import { IModelMetadataFetcher, ModelMetadataFetcher } from '../../../platform/endpoint/node/modelMetadataFetcher';
 import { applyExperimentModifications, ExperimentConfig, getCustomDefaultModelExperimentConfig, ProxyExperimentEndpoint } from '../../../platform/endpoint/node/proxyExperimentEndpoint';
 import { ExtensionContributedChatEndpoint } from '../../../platform/endpoint/vscode-node/extChatEndpoint';
 import { IEnvService } from '../../../platform/env/common/envService';
 import { ILogService } from '../../../platform/log/common/logService';
 import { IFetcherService } from '../../../platform/networking/common/fetcherService';
-import { IChatEndpoint } from '../../../platform/networking/common/networking';
+import { IChatEndpoint, IEmbeddingsEndpoint } from '../../../platform/networking/common/networking';
 import { IRequestLogger } from '../../../platform/requestLogger/node/requestLogger';
 import { IExperimentationService } from '../../../platform/telemetry/common/nullExperimentationService';
 import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';
@@ -30,6 +31,7 @@ export class ProductionEndpointProvider implements IEndpointProvider {
 	declare readonly _serviceBrand: undefined;
 
 	private _chatEndpoints: Map<string, IChatEndpoint> = new Map();
+	private _embeddingEndpoints: Map<string, IEmbeddingsEndpoint> = new Map();
 	private readonly _modelFetcher: IModelMetadataFetcher;
 
 	constructor(
@@ -144,6 +146,24 @@ export class ProductionEndpointProvider implements IEndpointProvider {
 		return endpoint;
 	}
 
+	async getEmbeddingsEndpoint(family?: EmbeddingsEndpointFamily): Promise<IEmbeddingsEndpoint> {
+		this._logService.trace(`Resolving embedding model`);
+		const modelMetadata = await this._modelFetcher.getEmbeddingsModel('text-embedding-3-small');
+		const model = await this.getOrCreateEmbeddingEndpointInstance(modelMetadata);
+		this._logService.trace(`Resolved embedding model`);
+		return model;
+	}
+
+	private async getOrCreateEmbeddingEndpointInstance(modelMetadata: IEmbeddingModelInformation): Promise<IEmbeddingsEndpoint> {
+		const modelId = 'text-embedding-3-small';
+		let embeddingEndpoint = this._embeddingEndpoints.get(modelId);
+		if (!embeddingEndpoint) {
+			embeddingEndpoint = this._instantiationService.createInstance(EmbeddingEndpoint, modelMetadata);
+			this._embeddingEndpoints.set(modelId, embeddingEndpoint);
+		}
+		return embeddingEndpoint;
+	}
+
 	async getAllCompletionModels(forceRefresh?: boolean): Promise<ICompletionModelInformation[]> {
 		return this._modelFetcher.getAllCompletionModels(forceRefresh ?? false);
 	}
 
@@ -37,7 +37,7 @@ export class SettingsEditorSearchServiceImpl implements ISettingsEditorSearchSer
 
 		let embeddingResult: Embeddings;
 		try {
-			embeddingResult = await this.embeddingsComputer.computeEmbeddings(EmbeddingType.text3small_512, [query], {}, new TelemetryCorrelationId('SettingsEditorSearchServiceImpl::provideSettingsSearchResults'), token);
+			embeddingResult = await this.embeddingsComputer.computeEmbeddings(EmbeddingType.text3small_512, [query], { endpointType: 'capi' }, new TelemetryCorrelationId('SettingsEditorSearchServiceImpl::provideSettingsSearchResults'), token);
 		} catch {
 			if (token.isCancellationRequested) {
 				progress.report(canceledBundle);
 
@@ -105,7 +105,7 @@ export class NewWorkspacePrompt extends PromptElement<NewWorkspacePromptProps, N
 			}
 			else if (instruction.intent === 'Project') {
 				if (this.props.useTemplates) {
-					const result = await this.embeddingsComputer.computeEmbeddings(EmbeddingType.text3small_512, [instruction.question], {}, undefined);
+					const result = await this.embeddingsComputer.computeEmbeddings(EmbeddingType.text3small_512, [instruction.question], { endpointType: 'capi' }, undefined);
 					progress.report(new ChatResponseProgressPart(l10n.t('Searching project template index...')));
 					const similarProjects = await this.projectTemplatesIndex.nClosestValues(result.values[0], 1);
 					if (similarProjects.length > 0) {
 
@@ -136,7 +136,7 @@ export class VscodePrompt extends PromptElement<VscodePromptProps, VscodePromptS
 			return { settings: [], commands: [], query: userQuery };
 		}
 
-		const embeddingResult = await this.embeddingsComputer.computeEmbeddings(EmbeddingType.text3small_512, [userQuery], {}, undefined);
+		const embeddingResult = await this.embeddingsComputer.computeEmbeddings(EmbeddingType.text3small_512, [userQuery], { endpointType: 'capi' }, undefined);
 		if (token.isCancellationRequested) {
 			return { settings: [], commands: [], query: userQuery };
 		}
 
@@ -7,7 +7,7 @@ import assert from 'assert';
 import { SinonSandbox, createSandbox } from 'sinon';
 import { LanguageModelChat } from 'vscode';
 import { CHAT_MODEL } from '../../../platform/configuration/common/configurationService';
-import { IChatModelInformation, ICompletionModelInformation } from '../../../platform/endpoint/common/endpointProvider';
+import { IChatModelInformation, ICompletionModelInformation, IEmbeddingModelInformation } from '../../../platform/endpoint/common/endpointProvider';
 import { IModelMetadataFetcher } from '../../../platform/endpoint/node/modelMetadataFetcher';
 import { ITestingServicesAccessor } from '../../../platform/test/node/services';
 import { TokenizerType } from '../../../util/common/tokenizer';
@@ -43,6 +43,23 @@ class FakeModelMetadataFetcher implements IModelMetadataFetcher {
 			}
 		};
 	}
+
+	async getEmbeddingsModel(): Promise<IEmbeddingModelInformation> {
+		return {
+			id: 'text-embedding-3-small',
+			name: 'fake-name',
+			version: 'fake-version',
+			model_picker_enabled: false,
+			is_chat_default: false,
+			is_chat_fallback: false,
+			capabilities: {
+				type: 'embeddings',
+				tokenizer: TokenizerType.O200K,
+				family: 'text-embedding-3-small',
+				limits: { max_inputs: 256 }
+			}
+		};
+	}
 }
 
 suite('Endpoint Class Test', function () {
 
@@ -91,6 +91,7 @@ export const IEmbeddingsComputer = createServiceIdentifier<IEmbeddingsComputer>(
 
 export type ComputeEmbeddingsOptions = {
 	readonly inputType?: 'document' | 'query';
+	readonly endpointType?: 'capi' | 'github';
 };
 
 export interface IEmbeddingsComputer {
 
@@ -7,19 +7,29 @@ import { RequestType } from '@vscode/copilot-api';
 import type { CancellationToken } from 'vscode';
 import { createRequestHMAC } from '../../../util/common/crypto';
 import { CallTracker, TelemetryCorrelationId } from '../../../util/common/telemetryCorrelationId';
+import { Limiter } from '../../../util/vs/base/common/async';
 import { env } from '../../../util/vs/base/common/process';
 import { generateUuid } from '../../../util/vs/base/common/uuid';
 import { IAuthenticationService } from '../../authentication/common/authentication';
 import { getGithubMetadataHeaders } from '../../chunking/common/chunkingEndpointClientImpl';
 import { ICAPIClientService } from '../../endpoint/common/capiClient';
+import { IEndpointProvider } from '../../endpoint/common/endpointProvider';
 import { IEnvService } from '../../env/common/envService';
 import { logExecTime } from '../../log/common/logExecTime';
 import { ILogService } from '../../log/common/logService';
 import { IFetcherService } from '../../networking/common/fetcherService';
-import { postRequest } from '../../networking/common/networking';
+import { IEmbeddingsEndpoint, postRequest } from '../../networking/common/networking';
 import { ITelemetryService } from '../../telemetry/common/telemetry';
-import { ComputeEmbeddingsOptions, Embedding, EmbeddingType, Embeddings, IEmbeddingsComputer } from './embeddingsComputer';
+import { ComputeEmbeddingsOptions, Embedding, EmbeddingType, EmbeddingTypeInfo, EmbeddingVector, Embeddings, IEmbeddingsComputer, getWellKnownEmbeddingTypeInfo } from './embeddingsComputer';
 
+interface CAPIEmbeddingResults {
+	readonly type: 'success';
+	readonly embeddings: EmbeddingVector[];
+}
+interface CAPIEmbeddingError {
+	readonly type: 'failed';
+	readonly reason: string;
+}
 
 export class RemoteEmbeddingsComputer implements IEmbeddingsComputer {
 
@@ -34,6 +44,7 @@ export class RemoteEmbeddingsComputer implements IEmbeddingsComputer {
 		@IFetcherService private readonly _fetcherService: IFetcherService,
 		@ILogService private readonly _logService: ILogService,
 		@ITelemetryService private readonly _telemetryService: ITelemetryService,
+		@IEndpointProvider private readonly _endpointProvider: IEndpointProvider,
 	) { }
 
 	public async computeEmbeddings(
@@ -44,6 +55,12 @@ export class RemoteEmbeddingsComputer implements IEmbeddingsComputer {
 		cancellationToken?: CancellationToken,
 	): Promise<Embeddings> {
 		return logExecTime(this._logService, 'RemoteEmbeddingsComputer::computeEmbeddings', async () => {
+
+			if (options?.endpointType === 'capi') {
+				const embeddings = await this.computeCAPIEmbeddings(inputs, options, cancellationToken);
+				return embeddings ?? { type: embeddingType, values: [] };
+			}
+
 			const token = (await this._authService.getAnyGitHubSession({ silent: true }))?.accessToken;
 			if (!token) {
 				throw new Error('No authentication token available');
@@ -127,4 +144,161 @@ export class RemoteEmbeddingsComputer implements IEmbeddingsComputer {
 			return { type: embeddingType, values: embeddingsOut };
 		});
 	}
+
+	private async computeCAPIEmbeddings(
+		inputs: readonly string[],
+		options?: ComputeEmbeddingsOptions,
+		cancellationToken?: CancellationToken,
+	) {
+		const typeInfo = getWellKnownEmbeddingTypeInfo(EmbeddingType.text3small_512);
+		if (!typeInfo) {
+			throw new Error(`Embeddings type info not found: ${EmbeddingType.text3small_512}`);
+		}
+		const endpoint = await this._endpointProvider.getEmbeddingsEndpoint('text3small');
+		const batchSize = endpoint.maxBatchSize;
+		// Open AI seems to allow 1 less than max tokens for the model requests. So if the max tokens is 8192, we can only send 8191 tokens.
+		const maxTokens = endpoint.modelMaxPromptTokens - 1;
+		return this.fetchResponseWithBatches(typeInfo, endpoint, inputs, cancellationToken, maxTokens, batchSize);
+	}
+
+	/**
+	 * A recursive helper that drives the public `fetchResponse` function. This allows accepting a batch and supports backing off the endpoint.
+	 * @param inputs The inputs to get embeddings for
+	 * @param cancellationToken A cancellation token to allow cancelling the requests
+	 * @param batchSize The batch size to calculate
+	 * @returns The embeddings
+	 */
+	private async fetchResponseWithBatches(
+		type: EmbeddingTypeInfo,
+		endpoint: IEmbeddingsEndpoint,
+		inputs: readonly string[],
+		cancellationToken: CancellationToken | undefined,
+		maxTokens: number,
+		batchSize: number,
+		parallelism = 1,
+	): Promise<Embeddings | undefined> {
+		// First we loop through all inputs and count their token length, if one exceeds max tokens then we fail
+		for (const input of inputs) {
+			const inputTokenLength = await endpoint.acquireTokenizer().tokenLength(input);
+			if (inputTokenLength > maxTokens) {
+				return undefined;
+			}
+		}
+
+		let embeddings: EmbeddingVector[] = [];
+		const promises: Promise<CAPIEmbeddingResults | undefined>[] = [];
+		const limiter = new Limiter<CAPIEmbeddingResults | undefined>(parallelism);
+		try {
+			for (let i = 0; i < inputs.length; i += batchSize) {
+				const currentBatch = inputs.slice(i, i + batchSize);
+				promises.push(limiter.queue(async () => {
+					if (cancellationToken?.isCancellationRequested) {
+						return;
+					}
+
+					const r = await this.rawEmbeddingsFetchWithTelemetry(type, endpoint, generateUuid(), currentBatch, cancellationToken);
+					if (r.type === 'failed') {
+						throw new Error('Embeddings request failed ' + r.reason);
+					}
+					return r;
+				}));
+			}
+
+			embeddings = (await Promise.all(promises)).flatMap(response => response?.embeddings ?? []);
+		} catch (e) {
+			return undefined;
+		} finally {
+			limiter.dispose();
+		}
+
+		if (cancellationToken?.isCancellationRequested) {
+			return undefined;
+		}
+
+		// If there are no embeddings, return undefined
+		if (embeddings.length === 0) {
+			return undefined;
+		}
+		return { type: EmbeddingType.text3small_512, values: embeddings.map((value): Embedding => ({ type: EmbeddingType.text3small_512, value })) };
+	}
+
+	private async rawEmbeddingsFetchWithTelemetry(
+		type: EmbeddingTypeInfo,
+		endpoint: IEmbeddingsEndpoint,
+		requestId: string,
+		inputs: readonly string[],
+		cancellationToken: CancellationToken | undefined
+	) {
+		const startTime = Date.now();
+		const rawRequest = await this.rawEmbeddingsFetch(type, endpoint, requestId, inputs, cancellationToken);
+		if (rawRequest.type === 'failed') {
+			this._telemetryService.sendMSFTTelemetryErrorEvent('embedding.error', {
+				type: rawRequest.type,
+				reason: rawRequest.reason
+			});
+			return rawRequest;
+		}
+
+		const tokenizer = endpoint.acquireTokenizer();
+		const tokenCounts = await Promise.all(inputs.map(input => tokenizer.tokenLength(input)));
+		const inputTokenCount = tokenCounts.reduce((acc, count) => acc + count, 0);
+		this._telemetryService.sendMSFTTelemetryEvent('embedding.success', {}, {
+			batchSize: inputs.length,
+			inputTokenCount,
+			timeToComplete: Date.now() - startTime
+		});
+		return rawRequest;
+	}
+
+	/**
+	 * The function which actually makes the request to the API and handles failures.
+	 * This is separated out from fetchResponse as fetchResponse does some manipulation to the input and handles errors differently
+	 */
+	public async rawEmbeddingsFetch(
+		type: EmbeddingTypeInfo,
+		endpoint: IEmbeddingsEndpoint,
+		requestId: string,
+		inputs: readonly string[],
+		cancellationToken: CancellationToken | undefined
+	): Promise<CAPIEmbeddingResults | CAPIEmbeddingError> {
+		try {
+			const token = await this._authService.getCopilotToken();
+
+			const body = { input: inputs, model: type.model, dimensions: type.dimensions };
+			endpoint.interceptBody?.(body);
+			const response = await postRequest(
+				this._fetcherService,
+				this._telemetryService,
+				this._capiClientService,
+				endpoint,
+				token.token,
+				await createRequestHMAC(env.HMAC_SECRET),
+				'copilot-panel',
+				requestId,
+				body,
+				undefined,
+				cancellationToken
+			);
+			const jsonResponse = response.status === 200 ? await response.json() : await response.text();
+
+			type EmbeddingResponse = {
+				object: string;
+				index: number;
+				embedding: number[];
+			};
+			if (response.status === 200 && jsonResponse.data) {
+				return { type: 'success', embeddings: jsonResponse.data.map((d: EmbeddingResponse) => d.embedding) };
+			} else {
+				return { type: 'failed', reason: jsonResponse.error };
+			}
+		} catch (e) {
+			let errorMessage = (e as Error)?.message ?? 'Unknown error';
+			// Timeouts = JSON parse errors because the response is incomplete
+			if (errorMessage.match(/Unexpected.*JSON/i)) {
+				errorMessage = 'timeout';
+			}
+			return { type: 'failed', reason: errorMessage };
+
+		}
+	}
 }
@@ -108,7 +108,7 @@ abstract class RelatedInformationProviderEmbeddingsIndex<V extends { key: string
 			return [];
 		}
 		const startOfEmbeddingRequest = Date.now();
-		const embeddingResult = await this.embeddingsComputer.computeEmbeddings(EmbeddingType.text3small_512, [query], {}, new TelemetryCorrelationId('RelatedInformationProviderEmbeddingsIndex::provideRelatedInformation'), token);
+		const embeddingResult = await this.embeddingsComputer.computeEmbeddings(EmbeddingType.text3small_512, [query], { endpointType: 'capi' }, new TelemetryCorrelationId('RelatedInformationProviderEmbeddingsIndex::provideRelatedInformation'), token);
 		this._logService.debug(`Related Information: Remote similarly request took ${Date.now() - startOfEmbeddingRequest}ms`);
 		if (token.isCancellationRequested) {
 			// return an array of 0s the same length as comparisons
Original file line number	Diff line number	Diff line change
`@@ -95,7 +95,7 @@ export class VSCodeAPIContextElement extends PromptElement<VSCodeAPIContextProps`
`95`	`95`	`return [];`
`96`	`96`	`}`
`97`	`97`
`98`		`- const embeddingResult = await this.embeddingsComputer.computeEmbeddings(EmbeddingType.text3small_512, [this.props.query], {}, new TelemetryCorrelationId('VSCodeAPIContextElement::getSnippets'), token);`
	`98`	`+ const embeddingResult = await this.embeddingsComputer.computeEmbeddings(EmbeddingType.text3small_512, [this.props.query], { endpointType: 'capi' }, new TelemetryCorrelationId('VSCodeAPIContextElement::getSnippets'), token);`
`99`	`99`	`return this.apiEmbeddingsIndex.nClosestValues(embeddingResult.values[0], 5);`
`100`	`100`	`}`
`101`	`101`
Original file line number	Diff line number	Diff line change
`@@ -105,7 +105,7 @@ export class NewWorkspacePrompt extends PromptElement<NewWorkspacePromptProps, N`
`105`	`105`	`}`
`106`	`106`	`else if (instruction.intent === 'Project') {`
`107`	`107`	`if (this.props.useTemplates) {`
`108`		`- const result = await this.embeddingsComputer.computeEmbeddings(EmbeddingType.text3small_512, [instruction.question], {}, undefined);`
	`108`	`+ const result = await this.embeddingsComputer.computeEmbeddings(EmbeddingType.text3small_512, [instruction.question], { endpointType: 'capi' }, undefined);`
`109`	`109`	`progress.report(new ChatResponseProgressPart(l10n.t('Searching project template index...')));`
`110`	`110`	`const similarProjects = await this.projectTemplatesIndex.nClosestValues(result.values[0], 1);`
`111`	`111`	`if (similarProjects.length > 0) {`
Original file line number	Diff line number	Diff line change
`@@ -136,7 +136,7 @@ export class VscodePrompt extends PromptElement<VscodePromptProps, VscodePromptS`
`136`	`136`	`return { settings: [], commands: [], query: userQuery };`
`137`	`137`	`}`
`138`	`138`
`139`		`- const embeddingResult = await this.embeddingsComputer.computeEmbeddings(EmbeddingType.text3small_512, [userQuery], {}, undefined);`
	`139`	`+ const embeddingResult = await this.embeddingsComputer.computeEmbeddings(EmbeddingType.text3small_512, [userQuery], { endpointType: 'capi' }, undefined);`
`140`	`140`	`if (token.isCancellationRequested) {`
`141`	`141`	`return { settings: [], commands: [], query: userQuery };`
`142`	`142`	`}`
Original file line number	Diff line number	Diff line change
`@@ -108,7 +108,7 @@ abstract class RelatedInformationProviderEmbeddingsIndex<V extends { key: string`
`108`	`108`	`return [];`
`109`	`109`	`}`
`110`	`110`	`const startOfEmbeddingRequest = Date.now();`
`111`		`- const embeddingResult = await this.embeddingsComputer.computeEmbeddings(EmbeddingType.text3small_512, [query], {}, new TelemetryCorrelationId('RelatedInformationProviderEmbeddingsIndex::provideRelatedInformation'), token);`
	`111`	`+ const embeddingResult = await this.embeddingsComputer.computeEmbeddings(EmbeddingType.text3small_512, [query], { endpointType: 'capi' }, new TelemetryCorrelationId('RelatedInformationProviderEmbeddingsIndex::provideRelatedInformation'), token);`
`112`	`112`	this._logService.debug(`Related Information: Remote similarly request took ${Date.now() - startOfEmbeddingRequest}ms`);
`113`	`113`	`if (token.isCancellationRequested) {`
`114`	`114`	`// return an array of 0s the same length as comparisons`