Skip to content

Commit ea0662a

Browse files
authored
Update fetch tool to use metis (#1192)
* Update fetch tool to use metis microsoft/vscode#268956 * Add service dep for tests to * Use mock service for tests * Fix lazy
1 parent cd1487b commit ea0662a

File tree

9 files changed

+58
-19
lines changed

9 files changed

+58
-19
lines changed

src/extension/extension/vscode-node/services.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ import { IWorkspaceListenerService } from '../../workspaceRecorder/common/worksp
103103
import { WorkspacListenerService } from '../../workspaceRecorder/vscode-node/workspaceListenerService';
104104
import { registerServices as registerCommonServices } from '../vscode/services';
105105
import { NativeEnvServiceImpl } from '../../../platform/env/vscode-node/nativeEnvServiceImpl';
106+
import { GithubAvailableEmbeddingTypesService, IGithubAvailableEmbeddingTypesService } from '../../../platform/workspaceChunkSearch/common/githubAvailableEmbeddingTypes';
106107

107108
// ###########################################################################################
108109
// ### ###
@@ -195,6 +196,7 @@ export function registerServices(builder: IInstantiationServiceBuilder, extensio
195196
builder.define(IWorkspaceListenerService, new SyncDescriptor(WorkspacListenerService));
196197
builder.define(ICodeSearchAuthenticationService, new SyncDescriptor(VsCodeCodeSearchAuthenticationService));
197198
builder.define(ITodoListContextProvider, new SyncDescriptor(TodoListContextProvider));
199+
builder.define(IGithubAvailableEmbeddingTypesService, new SyncDescriptor(GithubAvailableEmbeddingTypesService));
198200
}
199201

200202
function setupMSFTExperimentationService(builder: IInstantiationServiceBuilder, extensionContext: ExtensionContext) {

src/extension/test/node/services.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import { SimulationAlternativeNotebookContentService, SimulationNotebookService,
3030
import { NullTestProvider } from '../../../platform/testing/common/nullTestProvider';
3131
import { TestLogService } from '../../../platform/testing/common/testLogService';
3232
import { ITestProvider } from '../../../platform/testing/common/testProvider';
33+
import { IGithubAvailableEmbeddingTypesService, MockGithubAvailableEmbeddingTypesService } from '../../../platform/workspaceChunkSearch/common/githubAvailableEmbeddingTypes';
3334
import { IWorkspaceChunkSearchService, NullWorkspaceChunkSearchService } from '../../../platform/workspaceChunkSearch/node/workspaceChunkSearchService';
3435
import { DisposableStore } from '../../../util/vs/base/common/lifecycle';
3536
import { SyncDescriptor } from '../../../util/vs/platform/instantiation/common/descriptors';
@@ -102,5 +103,6 @@ export function createExtensionUnitTestingServices(disposables: Pick<DisposableS
102103
testingServiceCollection.define(ITodoListContextProvider, new SyncDescriptor(TodoListContextProvider));
103104
testingServiceCollection.define(ILanguageModelServer, new SyncDescriptor(MockLanguageModelServer));
104105
testingServiceCollection.define(IEditToolLearningService, new SyncDescriptor(EditToolLearningService));
106+
testingServiceCollection.define(IGithubAvailableEmbeddingTypesService, new SyncDescriptor(MockGithubAvailableEmbeddingTypesService));
105107
return testingServiceCollection;
106108
}

src/extension/test/vscode-node/services.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ import { TestProvider } from '../../../platform/testing/vscode/testProviderImpl'
8080
import { ITokenizerProvider, TokenizerProvider } from '../../../platform/tokenizer/node/tokenizer';
8181
import { IWorkspaceService } from '../../../platform/workspace/common/workspaceService';
8282
import { ExtensionTextDocumentManager } from '../../../platform/workspace/vscode/workspaceServiceImpl';
83+
import { GithubAvailableEmbeddingTypesService, IGithubAvailableEmbeddingTypesService } from '../../../platform/workspaceChunkSearch/common/githubAvailableEmbeddingTypes';
8384
import { SyncDescriptor } from '../../../util/vs/platform/instantiation/common/descriptors';
8485
import { CommandServiceImpl, ICommandService } from '../../commands/node/commandService';
8586
import { IPromptWorkspaceLabels, PromptWorkspaceLabels } from '../../context/node/resolvers/promptWorkspaceLabels';
@@ -180,6 +181,7 @@ export function createExtensionTestingServices(): TestingServiceCollection {
180181
testingServiceCollection.define(IToolGroupingCache, new SyncDescriptor(ToolGroupingCache));
181182
testingServiceCollection.define(IToolGroupingService, new SyncDescriptor(ToolGroupingService));
182183
testingServiceCollection.define(ITodoListContextProvider, new SyncDescriptor(TodoListContextProvider));
184+
testingServiceCollection.define(IGithubAvailableEmbeddingTypesService, new SyncDescriptor(GithubAvailableEmbeddingTypesService));
183185

184186
return testingServiceCollection;
185187
}

src/extension/tools/node/githubRepoTool.tsx

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,12 @@ import { GithubRepoId, toGithubNwo } from '../../../platform/git/common/gitServi
1212
import { IGithubCodeSearchService } from '../../../platform/remoteCodeSearch/common/githubCodeSearchService';
1313
import { RemoteCodeSearchIndexStatus } from '../../../platform/remoteCodeSearch/common/remoteCodeSearch';
1414
import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';
15-
import { GithubAvailableEmbeddingTypesManager } from '../../../platform/workspaceChunkSearch/common/githubAvailableEmbeddingTypes';
15+
import { GithubAvailableEmbeddingTypesService, IGithubAvailableEmbeddingTypesService } from '../../../platform/workspaceChunkSearch/common/githubAvailableEmbeddingTypes';
1616
import { Result } from '../../../util/common/result';
1717
import { TelemetryCorrelationId } from '../../../util/common/telemetryCorrelationId';
1818
import { isLocation, isUri } from '../../../util/common/types';
1919
import { raceCancellationError, timeout } from '../../../util/vs/base/common/async';
2020
import { CancellationToken } from '../../../util/vs/base/common/cancellation';
21-
import { Lazy } from '../../../util/vs/base/common/lazy';
2221
import { URI } from '../../../util/vs/base/common/uri';
2322
import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
2423
import { ExtendedLanguageModelToolResult, LanguageModelPromptTsxPart, MarkdownString } from '../../../vscodeTypes';
@@ -42,12 +41,12 @@ interface PrepareError {
4241
export class GithubRepoTool implements ICopilotTool<GithubRepoToolParams> {
4342
public static readonly toolName = ToolName.GithubRepo;
4443

45-
private readonly _availableEmbeddingTypesManager = new Lazy<GithubAvailableEmbeddingTypesManager>(() => this._instantiationService.createInstance(GithubAvailableEmbeddingTypesManager));
4644

4745
constructor(
4846
@IRunCommandExecutionService _commandService: IRunCommandExecutionService,
4947
@IInstantiationService private readonly _instantiationService: IInstantiationService,
5048
@IGithubCodeSearchService private readonly _githubCodeSearch: IGithubCodeSearchService,
49+
@IGithubAvailableEmbeddingTypesService private readonly _availableEmbeddingTypesManager: GithubAvailableEmbeddingTypesService,
5150
@ITelemetryService private readonly _telemetryService: ITelemetryService,
5251
) { }
5352

@@ -57,7 +56,7 @@ export class GithubRepoTool implements ICopilotTool<GithubRepoToolParams> {
5756
throw new Error('Invalid input. Could not parse repo');
5857
}
5958

60-
const embeddingType = await this._availableEmbeddingTypesManager.value.getPreferredType(false);
59+
const embeddingType = await this._availableEmbeddingTypesManager.getPreferredType(false);
6160
if (!embeddingType) {
6261
throw new Error('No embedding models available');
6362
}

src/platform/embeddings/common/embeddingsComputer.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,10 @@ export interface EmbeddingDistance {
8989

9090
export const IEmbeddingsComputer = createServiceIdentifier<IEmbeddingsComputer>('IEmbeddingsComputer');
9191

92+
export type EmbeddingInputType = 'document' | 'query';
93+
9294
export type ComputeEmbeddingsOptions = {
93-
readonly inputType?: 'document' | 'query';
95+
readonly inputType?: EmbeddingInputType;
9496
};
9597

9698
export interface IEmbeddingsComputer {

src/platform/urlChunkSearch/node/urlChunkEmbeddingsIndex.ts

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@ import { URI } from '../../../util/vs/base/common/uri';
1313
import { IAuthenticationService } from '../../authentication/common/authentication';
1414
import { FileChunkAndScore, FileChunkWithEmbedding } from '../../chunking/common/chunk';
1515
import { ChunkableContent, ComputeBatchInfo, EmbeddingsComputeQos, IChunkingEndpointClient } from '../../chunking/common/chunkingEndpointClient';
16-
import { distance, Embedding, EmbeddingType, IEmbeddingsComputer } from '../../embeddings/common/embeddingsComputer';
16+
import { distance, Embedding, EmbeddingInputType, EmbeddingType, IEmbeddingsComputer } from '../../embeddings/common/embeddingsComputer';
1717
import { ILogService } from '../../log/common/logService';
18+
import { IGithubAvailableEmbeddingTypesService } from '../../workspaceChunkSearch/common/githubAvailableEmbeddingTypes';
1819

1920
/**
2021
* The maximum content length to sent to the chunking endpoint.
@@ -51,6 +52,7 @@ export class UrlChunkEmbeddingsIndex extends Disposable {
5152
@ILogService private readonly _logService: ILogService,
5253
@IEmbeddingsComputer private readonly _embeddingsComputer: IEmbeddingsComputer,
5354
@IChunkingEndpointClient private readonly _chunkingEndpointClient: IChunkingEndpointClient,
55+
@IGithubAvailableEmbeddingTypesService private readonly _availableEmbeddingTypesService: IGithubAvailableEmbeddingTypesService,
5456
) {
5557
super();
5658
}
@@ -60,20 +62,25 @@ export class UrlChunkEmbeddingsIndex extends Disposable {
6062
query: string,
6163
token: CancellationToken,
6264
): Promise<FileChunkAndScore[][]> {
65+
const embeddingType = await raceCancellationError(this._availableEmbeddingTypesService.getPreferredType(/*silent*/ false), token);
66+
if (!embeddingType) {
67+
throw new Error('No embedding types available');
68+
}
69+
6370
const [queryEmbedding, fileChunksAndEmbeddings] = await raceCancellationError(Promise.all([
64-
this.computeEmbeddings(query, token),
65-
this.getEmbeddingsForFiles(files.map(file => new UrlContent(file.uri, file.content)), EmbeddingsComputeQos.Batch, token)
71+
this.computeEmbeddings(embeddingType, query, 'query', token),
72+
this.getEmbeddingsForFiles(embeddingType, files.map(file => new UrlContent(file.uri, file.content)), EmbeddingsComputeQos.Batch, token)
6673
]), token);
6774

6875
return this.computeChunkScores(fileChunksAndEmbeddings, queryEmbedding);
6976
}
7077

71-
private async computeEmbeddings(str: string, token: CancellationToken): Promise<Embedding> {
72-
const embeddings = await this._embeddingsComputer.computeEmbeddings(EmbeddingType.text3small_512, [str], {}, new TelemetryCorrelationId('UrlChunkEmbeddingsIndex::computeEmbeddings'), token);
78+
private async computeEmbeddings(embeddingType: EmbeddingType, str: string, inputType: EmbeddingInputType, token: CancellationToken): Promise<Embedding> {
79+
const embeddings = await this._embeddingsComputer.computeEmbeddings(embeddingType, [str], { inputType }, new TelemetryCorrelationId('UrlChunkEmbeddingsIndex::computeEmbeddings'), token);
7380
return embeddings.values[0];
7481
}
7582

76-
private async getEmbeddingsForFiles(files: readonly UrlContent[], qos: EmbeddingsComputeQos, token: CancellationToken): Promise<(readonly FileChunkWithEmbedding[])[]> {
83+
private async getEmbeddingsForFiles(embeddingType: EmbeddingType, files: readonly UrlContent[], qos: EmbeddingsComputeQos, token: CancellationToken): Promise<(readonly FileChunkWithEmbedding[])[]> {
7784
if (!files.length) {
7885
return [];
7986
}
@@ -88,7 +95,7 @@ export class UrlChunkEmbeddingsIndex extends Disposable {
8895
}
8996

9097
const result = await Promise.all(files.map(async file => {
91-
const result = await this.getChunksAndEmbeddings(authToken, file, batchInfo, qos, token);
98+
const result = await this.getChunksAndEmbeddings(authToken, embeddingType, file, batchInfo, qos, token);
9299
if (!result) {
93100
return [];
94101
}
@@ -107,13 +114,13 @@ export class UrlChunkEmbeddingsIndex extends Disposable {
107114
);
108115
}
109116

110-
private async getChunksAndEmbeddings(authToken: string, content: UrlContent, batchInfo: ComputeBatchInfo, qos: EmbeddingsComputeQos, token: CancellationToken): Promise<readonly FileChunkWithEmbedding[] | undefined> {
117+
private async getChunksAndEmbeddings(authToken: string, embeddingType: EmbeddingType, content: UrlContent, batchInfo: ComputeBatchInfo, qos: EmbeddingsComputeQos, token: CancellationToken): Promise<readonly FileChunkWithEmbedding[] | undefined> {
111118
const existing = await raceCancellationError(this._cache.get(content), token);
112119
if (existing) {
113120
return existing;
114121
}
115122

116-
const chunksAndEmbeddings = await raceCancellationError(this._chunkingEndpointClient.computeChunksAndEmbeddings(authToken, EmbeddingType.text3small_512, content, batchInfo, qos, new Map(), new CallTracker('UrlChunkEmbeddingsIndex::getChunksAndEmbeddings'), token), token);
123+
const chunksAndEmbeddings = await raceCancellationError(this._chunkingEndpointClient.computeChunksAndEmbeddings(authToken, embeddingType, content, batchInfo, qos, new Map(), new CallTracker('UrlChunkEmbeddingsIndex::getChunksAndEmbeddings'), token), token);
117124
if (chunksAndEmbeddings) {
118125
this._cache.set(content, chunksAndEmbeddings);
119126
}

src/platform/workspaceChunkSearch/common/githubAvailableEmbeddingTypes.ts

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import { RequestType } from '@vscode/copilot-api';
77
import { createRequestHMAC } from '../../../util/common/crypto';
88
import { Result } from '../../../util/common/result';
9+
import { createServiceIdentifier } from '../../../util/common/services';
910
import { CallTracker } from '../../../util/common/telemetryCorrelationId';
1011
import { env } from '../../../util/vs/base/common/process';
1112
import { generateUuid } from '../../../util/vs/base/common/uuid';
@@ -35,7 +36,22 @@ type GetAvailableTypesError =
3536

3637
type GetAvailableTypesResult = Result<AvailableEmbeddingTypes, GetAvailableTypesError>;
3738

38-
export class GithubAvailableEmbeddingTypesManager {
39+
export const IGithubAvailableEmbeddingTypesService = createServiceIdentifier<IGithubAvailableEmbeddingTypesService>('IGithubAvailableEmbeddingTypesService');
40+
41+
export interface IGithubAvailableEmbeddingTypesService {
42+
readonly _serviceBrand: undefined;
43+
44+
/**
45+
* Gets the preferred embedding type based on available types and user configuration.
46+
* @param silent Whether to silently handle authentication errors
47+
* @returns The preferred embedding type or undefined if none available
48+
*/
49+
getPreferredType(silent: boolean): Promise<EmbeddingType | undefined>;
50+
}
51+
52+
export class GithubAvailableEmbeddingTypesService implements IGithubAvailableEmbeddingTypesService {
53+
54+
readonly _serviceBrand: undefined;
3955

4056
private _cached?: Promise<GetAvailableTypesResult>;
4157

@@ -213,3 +229,12 @@ export class GithubAvailableEmbeddingTypesManager {
213229
return all.primary.at(0) ?? all.deprecated.at(0);
214230
}
215231
}
232+
233+
234+
export class MockGithubAvailableEmbeddingTypesService implements IGithubAvailableEmbeddingTypesService {
235+
declare readonly _serviceBrand: undefined;
236+
237+
async getPreferredType(_silent: boolean): Promise<EmbeddingType | undefined> {
238+
return EmbeddingType.metis_1024_I16_Binary;
239+
}
240+
}

src/platform/workspaceChunkSearch/node/workspaceChunkSearchService.ts

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ import { ISimulationTestContext } from '../../simulationTestContext/common/simul
3434
import { IExperimentationService } from '../../telemetry/common/nullExperimentationService';
3535
import { ITelemetryService } from '../../telemetry/common/telemetry';
3636
import { getWorkspaceFileDisplayPath, IWorkspaceService } from '../../workspace/common/workspaceService';
37-
import { GithubAvailableEmbeddingTypesManager } from '../common/githubAvailableEmbeddingTypes';
37+
import { IGithubAvailableEmbeddingTypesService } from '../common/githubAvailableEmbeddingTypes';
3838
import { IWorkspaceChunkSearchStrategy, StrategySearchResult, StrategySearchSizing, WorkspaceChunkQuery, WorkspaceChunkQueryWithEmbeddings, WorkspaceChunkSearchOptions, WorkspaceChunkSearchStrategyId, WorkspaceSearchAlert } from '../common/workspaceChunkSearch';
3939
import { CodeSearchChunkSearch, CodeSearchRemoteIndexState } from './codeSearchChunkSearch';
4040
import { EmbeddingsChunkSearch, LocalEmbeddingsIndexState, LocalEmbeddingsIndexStatus } from './embeddingsChunkSearch';
@@ -115,17 +115,15 @@ export class WorkspaceChunkSearchService extends Disposable implements IWorkspac
115115
readonly onDidChangeIndexState = this._onDidChangeIndexState.event;
116116

117117
private _impl: WorkspaceChunkSearchServiceImpl | undefined;
118-
private readonly _availableEmbeddingTypes: GithubAvailableEmbeddingTypesManager;
119118

120119
constructor(
121120
@IInstantiationService private readonly _instantiationService: IInstantiationService,
122121
@IAuthenticationService private readonly _authenticationService: IAuthenticationService,
122+
@IGithubAvailableEmbeddingTypesService private readonly _availableEmbeddingTypes: IGithubAvailableEmbeddingTypesService,
123123
@ILogService private readonly _logService: ILogService,
124124
) {
125125
super();
126126

127-
this._availableEmbeddingTypes = _instantiationService.createInstance(GithubAvailableEmbeddingTypesManager);
128-
129127
this.tryInit(true);
130128
}
131129

test/base/simulationContext.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ import { SimulationReviewService } from '../../src/platform/test/node/simulation
4040
import { NullTestProvider } from '../../src/platform/testing/common/nullTestProvider';
4141
import { ITestProvider } from '../../src/platform/testing/common/testProvider';
4242
import { ITokenizerProvider, TokenizerProvider } from '../../src/platform/tokenizer/node/tokenizer';
43+
import { GithubAvailableEmbeddingTypesService, IGithubAvailableEmbeddingTypesService } from '../../src/platform/workspaceChunkSearch/common/githubAvailableEmbeddingTypes';
4344
import { IWorkspaceChunkSearchService, WorkspaceChunkSearchService } from '../../src/platform/workspaceChunkSearch/node/workspaceChunkSearchService';
4445
import { IWorkspaceFileIndex, WorkspaceFileIndex } from '../../src/platform/workspaceChunkSearch/node/workspaceFileIndex';
4546
import { createServiceIdentifier } from '../../src/util/common/services';
@@ -292,6 +293,7 @@ export async function createSimulationAccessor(
292293
testingServiceCollection.define(IGitExtensionService, new SyncDescriptor(NullGitExtensionService));
293294
testingServiceCollection.define(IReleaseNotesService, new SyncDescriptor(ReleaseNotesService));
294295
testingServiceCollection.define(IWorkspaceFileIndex, new SyncDescriptor(WorkspaceFileIndex));
296+
testingServiceCollection.define(IGithubAvailableEmbeddingTypesService, new SyncDescriptor(GithubAvailableEmbeddingTypesService));
295297

296298
if (opts.useExperimentalCodeSearchService) {
297299
testingServiceCollection.define(IWorkspaceChunkSearchService, new SyncDescriptor(SimulationCodeSearchChunkSearchService, []));

0 commit comments

Comments
 (0)